From 24a4d59aa7fdc337eef1c2b589478ea998e54373 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 3 Jul 2023 15:22:38 +0200 Subject: [PATCH 01/35] accel/tcg: Move HMP info jit and info opcount code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move all of it into accel/tcg/monitor.c. This puts everything about tcg that is only used by the monitor in the same place. Tested-by: Philippe Mathieu-Daudé Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 15 ---- accel/tcg/internal-common.h | 2 - accel/tcg/monitor.c | 154 ++++++++++++++++++++++++++++++++++++ accel/tcg/translate-all.c | 127 ----------------------------- include/exec/cputlb.h | 1 - include/tcg/tcg.h | 3 - tcg/tcg.c | 10 --- 7 files changed, 154 insertions(+), 158 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index b8c5e345b8..13986820fe 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -321,21 +321,6 @@ static void flush_all_helper(CPUState *src, run_on_cpu_func fn, } } -void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide) -{ - CPUState *cpu; - size_t full = 0, part = 0, elide = 0; - - CPU_FOREACH(cpu) { - full += qatomic_read(&cpu->neg.tlb.c.full_flush_count); - part += qatomic_read(&cpu->neg.tlb.c.part_flush_count); - elide += qatomic_read(&cpu->neg.tlb.c.elide_flush_count); - } - *pfull = full; - *ppart = part; - *pelide = elide; -} - static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) { uint16_t asked = data.host_int; diff --git a/accel/tcg/internal-common.h b/accel/tcg/internal-common.h index 3b2277e6e9..edefd0dcb7 100644 --- a/accel/tcg/internal-common.h +++ b/accel/tcg/internal-common.h @@ -14,8 +14,6 @@ extern int64_t max_delay; extern int64_t max_advance; -void dump_exec_info(GString *buf); - /* * Return true if CS is not running in parallel with other cpus, either * because there are no other cpus or we are within an exclusive context. diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c index caf1189e0b..093efe9714 100644 --- a/accel/tcg/monitor.c +++ b/accel/tcg/monitor.c @@ -8,6 +8,7 @@ #include "qemu/osdep.h" #include "qemu/accel.h" +#include "qemu/qht.h" #include "qapi/error.h" #include "qapi/type-helpers.h" #include "qapi/qapi-commands-machine.h" @@ -17,6 +18,7 @@ #include "sysemu/tcg.h" #include "tcg/tcg.h" #include "internal-common.h" +#include "tb-context.h" static void dump_drift_info(GString *buf) @@ -50,6 +52,153 @@ static void dump_accel_info(GString *buf) one_insn_per_tb ? "on" : "off"); } +static void print_qht_statistics(struct qht_stats hst, GString *buf) +{ + uint32_t hgram_opts; + size_t hgram_bins; + char *hgram; + + if (!hst.head_buckets) { + return; + } + g_string_append_printf(buf, "TB hash buckets %zu/%zu " + "(%0.2f%% head buckets used)\n", + hst.used_head_buckets, hst.head_buckets, + (double)hst.used_head_buckets / + hst.head_buckets * 100); + + hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; + hgram_opts |= QDIST_PR_100X | QDIST_PR_PERCENT; + if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) { + hgram_opts |= QDIST_PR_NODECIMAL; + } + hgram = qdist_pr(&hst.occupancy, 10, hgram_opts); + g_string_append_printf(buf, "TB hash occupancy %0.2f%% avg chain occ. " + "Histogram: %s\n", + qdist_avg(&hst.occupancy) * 100, hgram); + g_free(hgram); + + hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; + hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain); + if (hgram_bins > 10) { + hgram_bins = 10; + } else { + hgram_bins = 0; + hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE; + } + hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts); + g_string_append_printf(buf, "TB hash avg chain %0.3f buckets. " + "Histogram: %s\n", + qdist_avg(&hst.chain), hgram); + g_free(hgram); +} + +struct tb_tree_stats { + size_t nb_tbs; + size_t host_size; + size_t target_size; + size_t max_target_size; + size_t direct_jmp_count; + size_t direct_jmp2_count; + size_t cross_page; +}; + +static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data) +{ + const TranslationBlock *tb = value; + struct tb_tree_stats *tst = data; + + tst->nb_tbs++; + tst->host_size += tb->tc.size; + tst->target_size += tb->size; + if (tb->size > tst->max_target_size) { + tst->max_target_size = tb->size; + } + if (tb->page_addr[1] != -1) { + tst->cross_page++; + } + if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) { + tst->direct_jmp_count++; + if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) { + tst->direct_jmp2_count++; + } + } + return false; +} + +static void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide) +{ + CPUState *cpu; + size_t full = 0, part = 0, elide = 0; + + CPU_FOREACH(cpu) { + full += qatomic_read(&cpu->neg.tlb.c.full_flush_count); + part += qatomic_read(&cpu->neg.tlb.c.part_flush_count); + elide += qatomic_read(&cpu->neg.tlb.c.elide_flush_count); + } + *pfull = full; + *ppart = part; + *pelide = elide; +} + +static void tcg_dump_info(GString *buf) +{ + g_string_append_printf(buf, "[TCG profiler not compiled]\n"); +} + +static void dump_exec_info(GString *buf) +{ + struct tb_tree_stats tst = {}; + struct qht_stats hst; + size_t nb_tbs, flush_full, flush_part, flush_elide; + + tcg_tb_foreach(tb_tree_stats_iter, &tst); + nb_tbs = tst.nb_tbs; + /* XXX: avoid using doubles ? */ + g_string_append_printf(buf, "Translation buffer state:\n"); + /* + * Report total code size including the padding and TB structs; + * otherwise users might think "-accel tcg,tb-size" is not honoured. + * For avg host size we use the precise numbers from tb_tree_stats though. + */ + g_string_append_printf(buf, "gen code size %zu/%zu\n", + tcg_code_size(), tcg_code_capacity()); + g_string_append_printf(buf, "TB count %zu\n", nb_tbs); + g_string_append_printf(buf, "TB avg target size %zu max=%zu bytes\n", + nb_tbs ? tst.target_size / nb_tbs : 0, + tst.max_target_size); + g_string_append_printf(buf, "TB avg host size %zu bytes " + "(expansion ratio: %0.1f)\n", + nb_tbs ? tst.host_size / nb_tbs : 0, + tst.target_size ? + (double)tst.host_size / tst.target_size : 0); + g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n", + tst.cross_page, + nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0); + g_string_append_printf(buf, "direct jump count %zu (%zu%%) " + "(2 jumps=%zu %zu%%)\n", + tst.direct_jmp_count, + nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0, + tst.direct_jmp2_count, + nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0); + + qht_statistics_init(&tb_ctx.htable, &hst); + print_qht_statistics(hst, buf); + qht_statistics_destroy(&hst); + + g_string_append_printf(buf, "\nStatistics:\n"); + g_string_append_printf(buf, "TB flush count %u\n", + qatomic_read(&tb_ctx.tb_flush_count)); + g_string_append_printf(buf, "TB invalidate count %u\n", + qatomic_read(&tb_ctx.tb_phys_invalidate_count)); + + tlb_flush_counts(&flush_full, &flush_part, &flush_elide); + g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full); + g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part); + g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide); + tcg_dump_info(buf); +} + HumanReadableText *qmp_x_query_jit(Error **errp) { g_autoptr(GString) buf = g_string_new(""); @@ -66,6 +215,11 @@ HumanReadableText *qmp_x_query_jit(Error **errp) return human_readable_text_from_str(buf); } +static void tcg_dump_op_count(GString *buf) +{ + g_string_append_printf(buf, "[TCG profiler not compiled]\n"); +} + HumanReadableText *qmp_x_query_opcount(Error **errp) { g_autoptr(GString) buf = g_string_new(""); diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 8cb6ad3511..e579b0891d 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -645,133 +645,6 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) cpu_loop_exit_noexc(cpu); } -static void print_qht_statistics(struct qht_stats hst, GString *buf) -{ - uint32_t hgram_opts; - size_t hgram_bins; - char *hgram; - - if (!hst.head_buckets) { - return; - } - g_string_append_printf(buf, "TB hash buckets %zu/%zu " - "(%0.2f%% head buckets used)\n", - hst.used_head_buckets, hst.head_buckets, - (double)hst.used_head_buckets / - hst.head_buckets * 100); - - hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; - hgram_opts |= QDIST_PR_100X | QDIST_PR_PERCENT; - if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) { - hgram_opts |= QDIST_PR_NODECIMAL; - } - hgram = qdist_pr(&hst.occupancy, 10, hgram_opts); - g_string_append_printf(buf, "TB hash occupancy %0.2f%% avg chain occ. " - "Histogram: %s\n", - qdist_avg(&hst.occupancy) * 100, hgram); - g_free(hgram); - - hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; - hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain); - if (hgram_bins > 10) { - hgram_bins = 10; - } else { - hgram_bins = 0; - hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE; - } - hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts); - g_string_append_printf(buf, "TB hash avg chain %0.3f buckets. " - "Histogram: %s\n", - qdist_avg(&hst.chain), hgram); - g_free(hgram); -} - -struct tb_tree_stats { - size_t nb_tbs; - size_t host_size; - size_t target_size; - size_t max_target_size; - size_t direct_jmp_count; - size_t direct_jmp2_count; - size_t cross_page; -}; - -static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data) -{ - const TranslationBlock *tb = value; - struct tb_tree_stats *tst = data; - - tst->nb_tbs++; - tst->host_size += tb->tc.size; - tst->target_size += tb->size; - if (tb->size > tst->max_target_size) { - tst->max_target_size = tb->size; - } - if (tb_page_addr1(tb) != -1) { - tst->cross_page++; - } - if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) { - tst->direct_jmp_count++; - if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) { - tst->direct_jmp2_count++; - } - } - return false; -} - -void dump_exec_info(GString *buf) -{ - struct tb_tree_stats tst = {}; - struct qht_stats hst; - size_t nb_tbs, flush_full, flush_part, flush_elide; - - tcg_tb_foreach(tb_tree_stats_iter, &tst); - nb_tbs = tst.nb_tbs; - /* XXX: avoid using doubles ? */ - g_string_append_printf(buf, "Translation buffer state:\n"); - /* - * Report total code size including the padding and TB structs; - * otherwise users might think "-accel tcg,tb-size" is not honoured. - * For avg host size we use the precise numbers from tb_tree_stats though. - */ - g_string_append_printf(buf, "gen code size %zu/%zu\n", - tcg_code_size(), tcg_code_capacity()); - g_string_append_printf(buf, "TB count %zu\n", nb_tbs); - g_string_append_printf(buf, "TB avg target size %zu max=%zu bytes\n", - nb_tbs ? tst.target_size / nb_tbs : 0, - tst.max_target_size); - g_string_append_printf(buf, "TB avg host size %zu bytes " - "(expansion ratio: %0.1f)\n", - nb_tbs ? tst.host_size / nb_tbs : 0, - tst.target_size ? - (double)tst.host_size / tst.target_size : 0); - g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n", - tst.cross_page, - nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0); - g_string_append_printf(buf, "direct jump count %zu (%zu%%) " - "(2 jumps=%zu %zu%%)\n", - tst.direct_jmp_count, - nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0, - tst.direct_jmp2_count, - nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0); - - qht_statistics_init(&tb_ctx.htable, &hst); - print_qht_statistics(hst, buf); - qht_statistics_destroy(&hst); - - g_string_append_printf(buf, "\nStatistics:\n"); - g_string_append_printf(buf, "TB flush count %u\n", - qatomic_read(&tb_ctx.tb_flush_count)); - g_string_append_printf(buf, "TB invalidate count %u\n", - qatomic_read(&tb_ctx.tb_phys_invalidate_count)); - - tlb_flush_counts(&flush_full, &flush_part, &flush_elide); - g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full); - g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part); - g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide); - tcg_dump_info(buf); -} - #else /* CONFIG_USER_ONLY */ void cpu_interrupt(CPUState *cpu, int mask) diff --git a/include/exec/cputlb.h b/include/exec/cputlb.h index 19b16e58f8..6da1462c4f 100644 --- a/include/exec/cputlb.h +++ b/include/exec/cputlb.h @@ -26,6 +26,5 @@ /* cputlb.c */ void tlb_protect_code(ram_addr_t ram_addr); void tlb_unprotect_code(ram_addr_t ram_addr); -void tlb_flush_counts(size_t *full, size_t *part, size_t *elide); #endif #endif diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index a9282cdcc6..3a4c0f124f 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -846,9 +846,6 @@ static inline TCGv_ptr tcg_temp_new_ptr(void) return temp_tcgv_ptr(t); } -void tcg_dump_info(GString *buf); -void tcg_dump_op_count(GString *buf); - #define TCG_CT_CONST 1 /* any constant of register size */ typedef struct TCGArgConstraint { diff --git a/tcg/tcg.c b/tcg/tcg.c index 35158a0846..847d749a7e 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -5927,11 +5927,6 @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); } -void tcg_dump_op_count(GString *buf) -{ - g_string_append_printf(buf, "[TCG profiler not compiled]\n"); -} - int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) { int i, start_words, num_insns; @@ -6128,11 +6123,6 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) return tcg_current_code_size(s); } -void tcg_dump_info(GString *buf) -{ - g_string_append_printf(buf, "[TCG profiler not compiled]\n"); -} - #ifdef ELF_HOST_MACHINE /* In order to use this feature, the backend needs to do three things: From fa645b48d3da093165406dce2c36f967fac6bd9d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 16 Sep 2023 15:01:45 -0700 Subject: [PATCH 02/35] tcg: Add C_N2_I1 Constraint with two outputs, both in new registers. Signed-off-by: Richard Henderson Reviewed-by: Jiajie Chen Message-Id: <20230916220151.526140-2-richard.henderson@linaro.org> --- tcg/tcg.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tcg/tcg.c b/tcg/tcg.c index 847d749a7e..6766b60b8a 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -653,6 +653,7 @@ static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1, #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4), #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2), +#define C_N2_I1(O1, O2, I1) C_PFX3(c_n2_i1_, O1, O2, I1), #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1), #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), @@ -675,6 +676,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode); #undef C_O1_I3 #undef C_O1_I4 #undef C_N1_I2 +#undef C_N2_I1 #undef C_O2_I1 #undef C_O2_I2 #undef C_O2_I3 @@ -694,6 +696,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode); #define C_O1_I4(O1, I1, I2, I3, I4) { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } }, #define C_N1_I2(O1, I1, I2) { .args_ct_str = { "&" #O1, #I1, #I2 } }, +#define C_N2_I1(O1, O2, I1) { .args_ct_str = { "&" #O1, "&" #O2, #I1 } }, #define C_O2_I1(O1, O2, I1) { .args_ct_str = { #O1, #O2, #I1 } }, #define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } }, @@ -715,6 +718,7 @@ static const TCGTargetOpDef constraint_sets[] = { #undef C_O1_I3 #undef C_O1_I4 #undef C_N1_I2 +#undef C_N2_I1 #undef C_O2_I1 #undef C_O2_I2 #undef C_O2_I3 @@ -734,6 +738,7 @@ static const TCGTargetOpDef constraint_sets[] = { #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4) #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2) +#define C_N2_I1(O1, O2, I1) C_PFX3(c_n2_i1_, O1, O2, I1) #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1) #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) From 2b2ae0a42e67a85273c0976466f1a634ede084dc Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 16 Sep 2023 15:01:46 -0700 Subject: [PATCH 03/35] tcg/loongarch64: Use C_N2_I1 for INDEX_op_qemu_ld_a*_i128 Use new registers for the output, so that we never overlap the input address, which could happen for user-only. This avoids a "tmp = addr + 0" in that case. Signed-off-by: Richard Henderson Reviewed-by: Jiajie Chen Message-Id: <20230916220151.526140-3-richard.henderson@linaro.org> --- tcg/loongarch64/tcg-target-con-set.h | 2 +- tcg/loongarch64/tcg-target.c.inc | 17 +++++++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/tcg/loongarch64/tcg-target-con-set.h b/tcg/loongarch64/tcg-target-con-set.h index 77d62e38e7..cae6c2aad6 100644 --- a/tcg/loongarch64/tcg-target-con-set.h +++ b/tcg/loongarch64/tcg-target-con-set.h @@ -38,4 +38,4 @@ C_O1_I2(w, w, wM) C_O1_I2(w, w, wA) C_O1_I3(w, w, w, w) C_O1_I4(r, rZ, rJ, rZ, rZ) -C_O2_I1(r, r, r) +C_N2_I1(r, r, r) diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index ccf133db4b..a1a387df31 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1103,13 +1103,18 @@ static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg data_lo, TCGReg data_hi } } else { /* Otherwise use a pair of LD/ST. */ - tcg_out_opc_add_d(s, TCG_REG_TMP0, h.base, h.index); + TCGReg base = h.base; + if (h.index != TCG_REG_ZERO) { + base = TCG_REG_TMP0; + tcg_out_opc_add_d(s, base, h.base, h.index); + } if (is_ld) { - tcg_out_opc_ld_d(s, data_lo, TCG_REG_TMP0, 0); - tcg_out_opc_ld_d(s, data_hi, TCG_REG_TMP0, 8); + tcg_debug_assert(base != data_lo); + tcg_out_opc_ld_d(s, data_lo, base, 0); + tcg_out_opc_ld_d(s, data_hi, base, 8); } else { - tcg_out_opc_st_d(s, data_lo, TCG_REG_TMP0, 0); - tcg_out_opc_st_d(s, data_hi, TCG_REG_TMP0, 8); + tcg_out_opc_st_d(s, data_lo, base, 0); + tcg_out_opc_st_d(s, data_hi, base, 8); } } @@ -2049,7 +2054,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_qemu_ld_a32_i128: case INDEX_op_qemu_ld_a64_i128: - return C_O2_I1(r, r, r); + return C_N2_I1(r, r, r); case INDEX_op_qemu_st_a32_i128: case INDEX_op_qemu_st_a64_i128: From 0885f1221e0add5529dada1e7948d2c00189cb8b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 16 Sep 2023 15:01:47 -0700 Subject: [PATCH 04/35] util: Add cpuinfo for loongarch64 Signed-off-by: Richard Henderson Reviewed-by: Jiajie Chen Message-Id: <20230916220151.526140-4-richard.henderson@linaro.org> --- host/include/loongarch64/host/cpuinfo.h | 21 +++++++++++++++ util/cpuinfo-loongarch.c | 35 +++++++++++++++++++++++++ util/meson.build | 2 ++ 3 files changed, 58 insertions(+) create mode 100644 host/include/loongarch64/host/cpuinfo.h create mode 100644 util/cpuinfo-loongarch.c diff --git a/host/include/loongarch64/host/cpuinfo.h b/host/include/loongarch64/host/cpuinfo.h new file mode 100644 index 0000000000..fab664a10b --- /dev/null +++ b/host/include/loongarch64/host/cpuinfo.h @@ -0,0 +1,21 @@ +/* + * SPDX-License-Identifier: GPL-2.0-or-later + * Host specific cpu identification for LoongArch + */ + +#ifndef HOST_CPUINFO_H +#define HOST_CPUINFO_H + +#define CPUINFO_ALWAYS (1u << 0) /* so cpuinfo is nonzero */ +#define CPUINFO_LSX (1u << 1) + +/* Initialized with a constructor. */ +extern unsigned cpuinfo; + +/* + * We cannot rely on constructor ordering, so other constructors must + * use the function interface rather than the variable above. + */ +unsigned cpuinfo_init(void); + +#endif /* HOST_CPUINFO_H */ diff --git a/util/cpuinfo-loongarch.c b/util/cpuinfo-loongarch.c new file mode 100644 index 0000000000..08b6d7460c --- /dev/null +++ b/util/cpuinfo-loongarch.c @@ -0,0 +1,35 @@ +/* + * SPDX-License-Identifier: GPL-2.0-or-later + * Host specific cpu identification for LoongArch. + */ + +#include "qemu/osdep.h" +#include "host/cpuinfo.h" + +#ifdef CONFIG_GETAUXVAL +# include +#else +# include "elf.h" +#endif +#include + +unsigned cpuinfo; + +/* Called both as constructor and (possibly) via other constructors. */ +unsigned __attribute__((constructor)) cpuinfo_init(void) +{ + unsigned info = cpuinfo; + unsigned long hwcap; + + if (info) { + return info; + } + + hwcap = qemu_getauxval(AT_HWCAP); + + info = CPUINFO_ALWAYS; + info |= (hwcap & HWCAP_LOONGARCH_LSX ? CPUINFO_LSX : 0); + + cpuinfo = info; + return info; +} diff --git a/util/meson.build b/util/meson.build index 769b24f2e0..1bfff81087 100644 --- a/util/meson.build +++ b/util/meson.build @@ -113,6 +113,8 @@ if cpu == 'aarch64' util_ss.add(files('cpuinfo-aarch64.c')) elif cpu in ['x86', 'x86_64'] util_ss.add(files('cpuinfo-i386.c')) +elif cpu == 'loongarch64' + util_ss.add(files('cpuinfo-loongarch.c')) elif cpu in ['ppc', 'ppc64'] util_ss.add(files('cpuinfo-ppc.c')) endif From f2a553481e520e359f735ea6f4a9435e52b3b446 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 16 Sep 2023 15:01:48 -0700 Subject: [PATCH 05/35] tcg/loongarch64: Use cpuinfo.h Signed-off-by: Richard Henderson Reviewed-by: Jiajie Chen Message-Id: <20230916220151.526140-5-richard.henderson@linaro.org> --- tcg/loongarch64/tcg-target.c.inc | 8 +------- tcg/loongarch64/tcg-target.h | 8 ++++---- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index a1a387df31..ae13c1f666 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -32,8 +32,6 @@ #include "../tcg-ldst.c.inc" #include -bool use_lsx_instructions; - #ifdef CONFIG_DEBUG_TCG static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { "zero", @@ -2314,10 +2312,6 @@ static void tcg_target_init(TCGContext *s) exit(EXIT_FAILURE); } - if (hwcap & HWCAP_LOONGARCH_LSX) { - use_lsx_instructions = 1; - } - tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS; tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS; @@ -2333,7 +2327,7 @@ static void tcg_target_init(TCGContext *s) tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S8); tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S9); - if (use_lsx_instructions) { + if (cpuinfo & CPUINFO_LSX) { tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS; tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V24); tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V25); diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h index 03017672f6..1bea15b02e 100644 --- a/tcg/loongarch64/tcg-target.h +++ b/tcg/loongarch64/tcg-target.h @@ -29,6 +29,8 @@ #ifndef LOONGARCH_TCG_TARGET_H #define LOONGARCH_TCG_TARGET_H +#include "host/cpuinfo.h" + #define TCG_TARGET_INSN_UNIT_SIZE 4 #define TCG_TARGET_NB_REGS 64 @@ -85,8 +87,6 @@ typedef enum { TCG_VEC_TMP0 = TCG_REG_V23, } TCGReg; -extern bool use_lsx_instructions; - /* used for function call generation */ #define TCG_REG_CALL_STACK TCG_REG_SP #define TCG_TARGET_STACK_ALIGN 16 @@ -171,10 +171,10 @@ extern bool use_lsx_instructions; #define TCG_TARGET_HAS_muluh_i64 1 #define TCG_TARGET_HAS_mulsh_i64 1 -#define TCG_TARGET_HAS_qemu_ldst_i128 use_lsx_instructions +#define TCG_TARGET_HAS_qemu_ldst_i128 (cpuinfo & CPUINFO_LSX) #define TCG_TARGET_HAS_v64 0 -#define TCG_TARGET_HAS_v128 use_lsx_instructions +#define TCG_TARGET_HAS_v128 (cpuinfo & CPUINFO_LSX) #define TCG_TARGET_HAS_v256 0 #define TCG_TARGET_HAS_not_vec 1 From adc8467e697db988878cec645ae52aa6b51ce4c4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 16 Sep 2023 15:01:49 -0700 Subject: [PATCH 06/35] host/include/loongarch64: Add atomic16 load and store While loongarch64 does not have a 128-bit cmpxchg, it does have 128-bit atomic load and store via the vector unit. Signed-off-by: Richard Henderson Message-Id: <20230916220151.526140-6-richard.henderson@linaro.org> --- .../include/loongarch64/host/atomic128-ldst.h | 52 +++++++++++++++++++ .../loongarch64/host/load-extract-al16-al8.h | 39 ++++++++++++++ .../loongarch64/host/store-insert-al16.h | 12 +++++ 3 files changed, 103 insertions(+) create mode 100644 host/include/loongarch64/host/atomic128-ldst.h create mode 100644 host/include/loongarch64/host/load-extract-al16-al8.h create mode 100644 host/include/loongarch64/host/store-insert-al16.h diff --git a/host/include/loongarch64/host/atomic128-ldst.h b/host/include/loongarch64/host/atomic128-ldst.h new file mode 100644 index 0000000000..9a4a8f8b9e --- /dev/null +++ b/host/include/loongarch64/host/atomic128-ldst.h @@ -0,0 +1,52 @@ +/* + * SPDX-License-Identifier: GPL-2.0-or-later + * Load/store for 128-bit atomic operations, LoongArch version. + * + * See docs/devel/atomics.rst for discussion about the guarantees each + * atomic primitive is meant to provide. + */ + +#ifndef LOONGARCH_ATOMIC128_LDST_H +#define LOONGARCH_ATOMIC128_LDST_H + +#include "host/cpuinfo.h" +#include "tcg/debug-assert.h" + +#define HAVE_ATOMIC128_RO likely(cpuinfo & CPUINFO_LSX) +#define HAVE_ATOMIC128_RW HAVE_ATOMIC128_RO + +/* + * As of gcc 13 and clang 16, there is no compiler support for LSX at all. + * Use inline assembly throughout. + */ + +static inline Int128 atomic16_read_ro(const Int128 *ptr) +{ + uint64_t l, h; + + tcg_debug_assert(HAVE_ATOMIC128_RO); + asm("vld $vr0, %2, 0\n\t" + "vpickve2gr.d %0, $vr0, 0\n\t" + "vpickve2gr.d %1, $vr0, 1" + : "=r"(l), "=r"(h) : "r"(ptr), "m"(*ptr) : "f0"); + + return int128_make128(l, h); +} + +static inline Int128 atomic16_read_rw(Int128 *ptr) +{ + return atomic16_read_ro(ptr); +} + +static inline void atomic16_set(Int128 *ptr, Int128 val) +{ + uint64_t l = int128_getlo(val), h = int128_gethi(val); + + tcg_debug_assert(HAVE_ATOMIC128_RW); + asm("vinsgr2vr.d $vr0, %1, 0\n\t" + "vinsgr2vr.d $vr0, %2, 1\n\t" + "vst $vr0, %3, 0" + : "=m"(*ptr) : "r"(l), "r"(h), "r"(ptr) : "f0"); +} + +#endif /* LOONGARCH_ATOMIC128_LDST_H */ diff --git a/host/include/loongarch64/host/load-extract-al16-al8.h b/host/include/loongarch64/host/load-extract-al16-al8.h new file mode 100644 index 0000000000..d1fb59d8af --- /dev/null +++ b/host/include/loongarch64/host/load-extract-al16-al8.h @@ -0,0 +1,39 @@ +/* + * SPDX-License-Identifier: GPL-2.0-or-later + * Atomic extract 64 from 128-bit, LoongArch version. + * + * Copyright (C) 2023 Linaro, Ltd. + */ + +#ifndef LOONGARCH_LOAD_EXTRACT_AL16_AL8_H +#define LOONGARCH_LOAD_EXTRACT_AL16_AL8_H + +#include "host/cpuinfo.h" +#include "tcg/debug-assert.h" + +/** + * load_atom_extract_al16_or_al8: + * @pv: host address + * @s: object size in bytes, @s <= 8. + * + * Load @s bytes from @pv, when pv % s != 0. If [p, p+s-1] does not + * cross an 16-byte boundary then the access must be 16-byte atomic, + * otherwise the access must be 8-byte atomic. + */ +static inline uint64_t load_atom_extract_al16_or_al8(void *pv, int s) +{ + uintptr_t pi = (uintptr_t)pv; + Int128 *ptr_align = (Int128 *)(pi & ~7); + int shr = (pi & 7) * 8; + uint64_t l, h; + + tcg_debug_assert(HAVE_ATOMIC128_RO); + asm("vld $vr0, %2, 0\n\t" + "vpickve2gr.d %0, $vr0, 0\n\t" + "vpickve2gr.d %1, $vr0, 1" + : "=r"(l), "=r"(h) : "r"(ptr_align), "m"(*ptr_align) : "f0"); + + return (l >> shr) | (h << (-shr & 63)); +} + +#endif /* LOONGARCH_LOAD_EXTRACT_AL16_AL8_H */ diff --git a/host/include/loongarch64/host/store-insert-al16.h b/host/include/loongarch64/host/store-insert-al16.h new file mode 100644 index 0000000000..919fd8d744 --- /dev/null +++ b/host/include/loongarch64/host/store-insert-al16.h @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: GPL-2.0-or-later + * Atomic store insert into 128-bit, LoongArch version. + */ + +#ifndef LOONGARCH_STORE_INSERT_AL16_H +#define LOONGARCH_STORE_INSERT_AL16_H + +void store_atom_insert_al16(Int128 *ps, Int128 val, Int128 msk) + QEMU_ERROR("unsupported atomic"); + +#endif /* LOONGARCH_STORE_INSERT_AL16_H */ From 8b1b3db71a1df58a6e28956b72f143e8cf38bdf6 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 16 Sep 2023 15:01:50 -0700 Subject: [PATCH 07/35] accel/tcg: Remove redundant case in store_atom_16 We handled the HAVE_ATOMIC128_RW case with atomic16_set at the top of the function; the only thing left for a host without that support is to fall through to cpu_loop_exit_atomic. Signed-off-by: Richard Henderson Message-Id: <20230916220151.526140-7-richard.henderson@linaro.org> --- accel/tcg/ldst_atomicity.c.inc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc index 1cf5b92166..e8f97506fa 100644 --- a/accel/tcg/ldst_atomicity.c.inc +++ b/accel/tcg/ldst_atomicity.c.inc @@ -1103,10 +1103,6 @@ static void store_atom_16(CPUState *cpu, uintptr_t ra, } break; case MO_128: - if (HAVE_ATOMIC128_RW) { - atomic16_set(pv, val); - return; - } break; default: g_assert_not_reached(); From 6046f6e94d8d530ecc28176232479889abbee47e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 16 Sep 2023 15:01:51 -0700 Subject: [PATCH 08/35] accel/tcg: Fix condition for store_atom_insert_al16 Store bytes under a mask is fundamentally a cmpxchg, not a straight store. Use HAVE_CMPXCHG128 instead of HAVE_ATOMIC128_RW. Signed-off-by: Richard Henderson Message-Id: <20230916220151.526140-8-richard.henderson@linaro.org> --- accel/tcg/cputlb.c | 2 +- accel/tcg/ldst_atomicity.c.inc | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 13986820fe..f35c5f359b 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -2691,7 +2691,7 @@ static uint64_t do_st16_leN(CPUState *cpu, MMULookupPageData *p, case MO_ATOM_WITHIN16_PAIR: /* Since size > 8, this is the half that must be atomic. */ - if (!HAVE_ATOMIC128_RW) { + if (!HAVE_CMPXCHG128) { cpu_loop_exit_atomic(cpu, ra); } return store_whole_le16(p->haddr, p->size, val_le); diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc index e8f97506fa..33a04dec52 100644 --- a/accel/tcg/ldst_atomicity.c.inc +++ b/accel/tcg/ldst_atomicity.c.inc @@ -825,7 +825,7 @@ static uint64_t store_whole_le16(void *pv, int size, Int128 val_le) int sh = o * 8; Int128 m, v; - qemu_build_assert(HAVE_ATOMIC128_RW); + qemu_build_assert(HAVE_CMPXCHG128); /* Like MAKE_64BIT_MASK(0, sz), but larger. */ if (sz <= 64) { @@ -887,7 +887,7 @@ static void store_atom_2(CPUState *cpu, uintptr_t ra, return; } } else if ((pi & 15) == 7) { - if (HAVE_ATOMIC128_RW) { + if (HAVE_CMPXCHG128) { Int128 v = int128_lshift(int128_make64(val), 56); Int128 m = int128_lshift(int128_make64(0xffff), 56); store_atom_insert_al16(pv - 7, v, m); @@ -956,7 +956,7 @@ static void store_atom_4(CPUState *cpu, uintptr_t ra, return; } } else { - if (HAVE_ATOMIC128_RW) { + if (HAVE_CMPXCHG128) { store_whole_le16(pv, 4, int128_make64(cpu_to_le32(val))); return; } @@ -1021,7 +1021,7 @@ static void store_atom_8(CPUState *cpu, uintptr_t ra, } break; case MO_64: - if (HAVE_ATOMIC128_RW) { + if (HAVE_CMPXCHG128) { store_whole_le16(pv, 8, int128_make64(cpu_to_le64(val))); return; } @@ -1076,7 +1076,7 @@ static void store_atom_16(CPUState *cpu, uintptr_t ra, } break; case -MO_64: - if (HAVE_ATOMIC128_RW) { + if (HAVE_CMPXCHG128) { uint64_t val_le; int s2 = pi & 15; int s1 = 16 - s2; From ecfa1877f7e9d300b57492552afed2932ed954d7 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 29 Oct 2023 14:08:38 -0700 Subject: [PATCH 09/35] tcg: Mark tcg_gen_op* as noinline Encourage the compiler to tail-call rather than inline across the dozens of opcode expanders. Signed-off-by: Richard Henderson Message-Id: <20231029210848.78234-2-richard.henderson@linaro.org> --- tcg/tcg-op.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 828eb9ee46..9aed19e957 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -31,20 +31,26 @@ #include "tcg-internal.h" -void tcg_gen_op1(TCGOpcode opc, TCGArg a1) +/* + * Encourage the compiler to tail-call to a function, rather than inlining. + * Minimizes code size across 99 bottles of beer on the wall. + */ +#define NI __attribute__((noinline)) + +void NI tcg_gen_op1(TCGOpcode opc, TCGArg a1) { TCGOp *op = tcg_emit_op(opc, 1); op->args[0] = a1; } -void tcg_gen_op2(TCGOpcode opc, TCGArg a1, TCGArg a2) +void NI tcg_gen_op2(TCGOpcode opc, TCGArg a1, TCGArg a2) { TCGOp *op = tcg_emit_op(opc, 2); op->args[0] = a1; op->args[1] = a2; } -void tcg_gen_op3(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3) +void NI tcg_gen_op3(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3) { TCGOp *op = tcg_emit_op(opc, 3); op->args[0] = a1; @@ -52,7 +58,7 @@ void tcg_gen_op3(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3) op->args[2] = a3; } -void tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4) +void NI tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4) { TCGOp *op = tcg_emit_op(opc, 4); op->args[0] = a1; @@ -61,8 +67,8 @@ void tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4) op->args[3] = a4; } -void tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, - TCGArg a4, TCGArg a5) +void NI tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, + TCGArg a4, TCGArg a5) { TCGOp *op = tcg_emit_op(opc, 5); op->args[0] = a1; @@ -72,8 +78,8 @@ void tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, op->args[4] = a5; } -void tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, - TCGArg a4, TCGArg a5, TCGArg a6) +void NI tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, + TCGArg a4, TCGArg a5, TCGArg a6) { TCGOp *op = tcg_emit_op(opc, 6); op->args[0] = a1; From 6fc75d50a58bdf15fcc09e89880ba5125d572a6d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 29 Oct 2023 14:08:39 -0700 Subject: [PATCH 10/35] tcg: Move tcg_gen_op* out of line In addition to moving out of line, with CONFIG_DEBUG_TCG mark them all noinline. Signed-off-by: Richard Henderson Message-Id: <20231029210848.78234-3-richard.henderson@linaro.org> --- include/tcg/tcg-op-common.h | 252 +++++++----------------------------- tcg/tcg-op.c | 208 +++++++++++++++++++++++++++++ 2 files changed, 252 insertions(+), 208 deletions(-) diff --git a/include/tcg/tcg-op-common.h b/include/tcg/tcg-op-common.h index 677aea6dd1..e093f541a7 100644 --- a/include/tcg/tcg-op-common.h +++ b/include/tcg/tcg-op-common.h @@ -25,214 +25,50 @@ void vec_gen_2(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg); void vec_gen_3(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg); void vec_gen_4(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg, TCGArg); -static inline void tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 a1) -{ - tcg_gen_op1(opc, tcgv_i32_arg(a1)); -} - -static inline void tcg_gen_op1_i64(TCGOpcode opc, TCGv_i64 a1) -{ - tcg_gen_op1(opc, tcgv_i64_arg(a1)); -} - -static inline void tcg_gen_op1i(TCGOpcode opc, TCGArg a1) -{ - tcg_gen_op1(opc, a1); -} - -static inline void tcg_gen_op2_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2) -{ - tcg_gen_op2(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2)); -} - -static inline void tcg_gen_op2_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2) -{ - tcg_gen_op2(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2)); -} - -static inline void tcg_gen_op2i_i32(TCGOpcode opc, TCGv_i32 a1, TCGArg a2) -{ - tcg_gen_op2(opc, tcgv_i32_arg(a1), a2); -} - -static inline void tcg_gen_op2i_i64(TCGOpcode opc, TCGv_i64 a1, TCGArg a2) -{ - tcg_gen_op2(opc, tcgv_i64_arg(a1), a2); -} - -static inline void tcg_gen_op2ii(TCGOpcode opc, TCGArg a1, TCGArg a2) -{ - tcg_gen_op2(opc, a1, a2); -} - -static inline void tcg_gen_op3_i32(TCGOpcode opc, TCGv_i32 a1, - TCGv_i32 a2, TCGv_i32 a3) -{ - tcg_gen_op3(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), tcgv_i32_arg(a3)); -} - -static inline void tcg_gen_op3_i64(TCGOpcode opc, TCGv_i64 a1, - TCGv_i64 a2, TCGv_i64 a3) -{ - tcg_gen_op3(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), tcgv_i64_arg(a3)); -} - -static inline void tcg_gen_op3i_i32(TCGOpcode opc, TCGv_i32 a1, - TCGv_i32 a2, TCGArg a3) -{ - tcg_gen_op3(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3); -} - -static inline void tcg_gen_op3i_i64(TCGOpcode opc, TCGv_i64 a1, - TCGv_i64 a2, TCGArg a3) -{ - tcg_gen_op3(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), a3); -} - -static inline void tcg_gen_ldst_op_i32(TCGOpcode opc, TCGv_i32 val, - TCGv_ptr base, TCGArg offset) -{ - tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_ptr_arg(base), offset); -} - -static inline void tcg_gen_ldst_op_i64(TCGOpcode opc, TCGv_i64 val, - TCGv_ptr base, TCGArg offset) -{ - tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_ptr_arg(base), offset); -} - -static inline void tcg_gen_op4_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, - TCGv_i32 a3, TCGv_i32 a4) -{ - tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), - tcgv_i32_arg(a3), tcgv_i32_arg(a4)); -} - -static inline void tcg_gen_op4_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, - TCGv_i64 a3, TCGv_i64 a4) -{ - tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), - tcgv_i64_arg(a3), tcgv_i64_arg(a4)); -} - -static inline void tcg_gen_op4i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, - TCGv_i32 a3, TCGArg a4) -{ - tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), - tcgv_i32_arg(a3), a4); -} - -static inline void tcg_gen_op4i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, - TCGv_i64 a3, TCGArg a4) -{ - tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), - tcgv_i64_arg(a3), a4); -} - -static inline void tcg_gen_op4ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, - TCGArg a3, TCGArg a4) -{ - tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3, a4); -} - -static inline void tcg_gen_op4ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, - TCGArg a3, TCGArg a4) -{ - tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), a3, a4); -} - -static inline void tcg_gen_op5_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, - TCGv_i32 a3, TCGv_i32 a4, TCGv_i32 a5) -{ - tcg_gen_op5(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), - tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5)); -} - -static inline void tcg_gen_op5_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, - TCGv_i64 a3, TCGv_i64 a4, TCGv_i64 a5) -{ - tcg_gen_op5(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), - tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5)); -} - -static inline void tcg_gen_op5i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, - TCGv_i32 a3, TCGv_i32 a4, TCGArg a5) -{ - tcg_gen_op5(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), - tcgv_i32_arg(a3), tcgv_i32_arg(a4), a5); -} - -static inline void tcg_gen_op5i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, - TCGv_i64 a3, TCGv_i64 a4, TCGArg a5) -{ - tcg_gen_op5(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), - tcgv_i64_arg(a3), tcgv_i64_arg(a4), a5); -} - -static inline void tcg_gen_op5ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, - TCGv_i32 a3, TCGArg a4, TCGArg a5) -{ - tcg_gen_op5(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), - tcgv_i32_arg(a3), a4, a5); -} - -static inline void tcg_gen_op5ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, - TCGv_i64 a3, TCGArg a4, TCGArg a5) -{ - tcg_gen_op5(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), - tcgv_i64_arg(a3), a4, a5); -} - -static inline void tcg_gen_op6_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, - TCGv_i32 a3, TCGv_i32 a4, - TCGv_i32 a5, TCGv_i32 a6) -{ - tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), - tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5), - tcgv_i32_arg(a6)); -} - -static inline void tcg_gen_op6_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, - TCGv_i64 a3, TCGv_i64 a4, - TCGv_i64 a5, TCGv_i64 a6) -{ - tcg_gen_op6(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), - tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5), - tcgv_i64_arg(a6)); -} - -static inline void tcg_gen_op6i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, - TCGv_i32 a3, TCGv_i32 a4, - TCGv_i32 a5, TCGArg a6) -{ - tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), - tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5), a6); -} - -static inline void tcg_gen_op6i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, - TCGv_i64 a3, TCGv_i64 a4, - TCGv_i64 a5, TCGArg a6) -{ - tcg_gen_op6(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), - tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5), a6); -} - -static inline void tcg_gen_op6ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, - TCGv_i32 a3, TCGv_i32 a4, - TCGArg a5, TCGArg a6) -{ - tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), - tcgv_i32_arg(a3), tcgv_i32_arg(a4), a5, a6); -} - -static inline void tcg_gen_op6ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, - TCGv_i64 a3, TCGv_i64 a4, - TCGArg a5, TCGArg a6) -{ - tcg_gen_op6(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), - tcgv_i64_arg(a3), tcgv_i64_arg(a4), a5, a6); -} - +void tcg_gen_op1_i32(TCGOpcode, TCGv_i32); +void tcg_gen_op1_i64(TCGOpcode, TCGv_i64); +void tcg_gen_op1i(TCGOpcode, TCGArg); +void tcg_gen_op2_i32(TCGOpcode, TCGv_i32, TCGv_i32); +void tcg_gen_op2_i64(TCGOpcode, TCGv_i64, TCGv_i64); +void tcg_gen_op2i_i32(TCGOpcode, TCGv_i32, TCGArg); +void tcg_gen_op2i_i64(TCGOpcode, TCGv_i64, TCGArg); +void tcg_gen_op2ii(TCGOpcode, TCGArg, TCGArg); +void tcg_gen_op3_i32(TCGOpcode, TCGv_i32, TCGv_i32, TCGv_i32); +void tcg_gen_op3_i64(TCGOpcode, TCGv_i64, TCGv_i64, TCGv_i64); +void tcg_gen_op3i_i32(TCGOpcode, TCGv_i32, TCGv_i32, TCGArg); +void tcg_gen_op3i_i64(TCGOpcode, TCGv_i64, TCGv_i64, TCGArg); +void tcg_gen_ldst_op_i32(TCGOpcode, TCGv_i32, TCGv_ptr, TCGArg); +void tcg_gen_ldst_op_i64(TCGOpcode, TCGv_i64, TCGv_ptr, TCGArg); +void tcg_gen_op4_i32(TCGOpcode, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32); +void tcg_gen_op4_i64(TCGOpcode, TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64); +void tcg_gen_op4i_i32(TCGOpcode, TCGv_i32, TCGv_i32, TCGv_i32, TCGArg); +void tcg_gen_op4i_i64(TCGOpcode, TCGv_i64, TCGv_i64, TCGv_i64, TCGArg); +void tcg_gen_op4ii_i32(TCGOpcode, TCGv_i32, TCGv_i32, TCGArg, TCGArg); +void tcg_gen_op4ii_i64(TCGOpcode, TCGv_i64, TCGv_i64, TCGArg, TCGArg); +void tcg_gen_op5_i32(TCGOpcode, TCGv_i32, TCGv_i32, TCGv_i32, + TCGv_i32, TCGv_i32); +void tcg_gen_op5_i64(TCGOpcode, TCGv_i64, TCGv_i64, TCGv_i64, + TCGv_i64, TCGv_i64); +void tcg_gen_op5i_i32(TCGOpcode, TCGv_i32, TCGv_i32, TCGv_i32, + TCGv_i32, TCGArg); +void tcg_gen_op5i_i64(TCGOpcode, TCGv_i64, TCGv_i64, TCGv_i64, + TCGv_i64, TCGArg); +void tcg_gen_op5ii_i32(TCGOpcode, TCGv_i32, TCGv_i32, TCGv_i32, + TCGArg, TCGArg); +void tcg_gen_op5ii_i64(TCGOpcode, TCGv_i64, TCGv_i64, TCGv_i64, + TCGArg, TCGArg); +void tcg_gen_op6_i32(TCGOpcode, TCGv_i32, TCGv_i32, TCGv_i32, + TCGv_i32, TCGv_i32, TCGv_i32); +void tcg_gen_op6_i64(TCGOpcode, TCGv_i64, TCGv_i64, TCGv_i64, + TCGv_i64, TCGv_i64, TCGv_i64); +void tcg_gen_op6i_i32(TCGOpcode, TCGv_i32, TCGv_i32, TCGv_i32, + TCGv_i32, TCGv_i32, TCGArg); +void tcg_gen_op6i_i64(TCGOpcode, TCGv_i64, TCGv_i64, TCGv_i64, + TCGv_i64, TCGv_i64, TCGArg); +void tcg_gen_op6ii_i32(TCGOpcode, TCGv_i32, TCGv_i32, TCGv_i32, + TCGv_i32, TCGArg, TCGArg); +void tcg_gen_op6ii_i64(TCGOpcode, TCGv_i64, TCGv_i64, TCGv_i64, + TCGv_i64, TCGArg, TCGArg); /* Generic ops. */ diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 9aed19e957..6c826b46b0 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -90,6 +90,214 @@ void NI tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, op->args[5] = a6; } +/* + * With CONFIG_DEBUG_TCG, tcgv_*_tmp via tcgv_*_arg, is an out-of-line + * assertion check. Force tail calls to avoid too much code expansion. + */ +#ifdef CONFIG_DEBUG_TCG +# define DNI NI +#else +# define DNI +#endif + +void DNI tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 a1) +{ + tcg_gen_op1(opc, tcgv_i32_arg(a1)); +} + +void DNI tcg_gen_op1_i64(TCGOpcode opc, TCGv_i64 a1) +{ + tcg_gen_op1(opc, tcgv_i64_arg(a1)); +} + +void DNI tcg_gen_op1i(TCGOpcode opc, TCGArg a1) +{ + tcg_gen_op1(opc, a1); +} + +void DNI tcg_gen_op2_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2) +{ + tcg_gen_op2(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2)); +} + +void DNI tcg_gen_op2_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2) +{ + tcg_gen_op2(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2)); +} + +void DNI tcg_gen_op2i_i32(TCGOpcode opc, TCGv_i32 a1, TCGArg a2) +{ + tcg_gen_op2(opc, tcgv_i32_arg(a1), a2); +} + +void DNI tcg_gen_op2i_i64(TCGOpcode opc, TCGv_i64 a1, TCGArg a2) +{ + tcg_gen_op2(opc, tcgv_i64_arg(a1), a2); +} + +void DNI tcg_gen_op2ii(TCGOpcode opc, TCGArg a1, TCGArg a2) +{ + tcg_gen_op2(opc, a1, a2); +} + +void DNI tcg_gen_op3_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, TCGv_i32 a3) +{ + tcg_gen_op3(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), tcgv_i32_arg(a3)); +} + +void DNI tcg_gen_op3_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, TCGv_i64 a3) +{ + tcg_gen_op3(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), tcgv_i64_arg(a3)); +} + +void DNI tcg_gen_op3i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, TCGArg a3) +{ + tcg_gen_op3(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3); +} + +void DNI tcg_gen_op3i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, TCGArg a3) +{ + tcg_gen_op3(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), a3); +} + +void DNI tcg_gen_ldst_op_i32(TCGOpcode opc, TCGv_i32 val, + TCGv_ptr base, TCGArg offset) +{ + tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_ptr_arg(base), offset); +} + +void DNI tcg_gen_ldst_op_i64(TCGOpcode opc, TCGv_i64 val, + TCGv_ptr base, TCGArg offset) +{ + tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_ptr_arg(base), offset); +} + +void DNI tcg_gen_op4_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, + TCGv_i32 a3, TCGv_i32 a4) +{ + tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), + tcgv_i32_arg(a3), tcgv_i32_arg(a4)); +} + +void DNI tcg_gen_op4_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, + TCGv_i64 a3, TCGv_i64 a4) +{ + tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), + tcgv_i64_arg(a3), tcgv_i64_arg(a4)); +} + +void DNI tcg_gen_op4i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, + TCGv_i32 a3, TCGArg a4) +{ + tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), + tcgv_i32_arg(a3), a4); +} + +void DNI tcg_gen_op4i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, + TCGv_i64 a3, TCGArg a4) +{ + tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), + tcgv_i64_arg(a3), a4); +} + +void DNI tcg_gen_op4ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, + TCGArg a3, TCGArg a4) +{ + tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3, a4); +} + +void DNI tcg_gen_op4ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, + TCGArg a3, TCGArg a4) +{ + tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), a3, a4); +} + +void DNI tcg_gen_op5_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, + TCGv_i32 a3, TCGv_i32 a4, TCGv_i32 a5) +{ + tcg_gen_op5(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), + tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5)); +} + +void DNI tcg_gen_op5_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, + TCGv_i64 a3, TCGv_i64 a4, TCGv_i64 a5) +{ + tcg_gen_op5(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), + tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5)); +} + +void DNI tcg_gen_op5i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, + TCGv_i32 a3, TCGv_i32 a4, TCGArg a5) +{ + tcg_gen_op5(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), + tcgv_i32_arg(a3), tcgv_i32_arg(a4), a5); +} + +void DNI tcg_gen_op5i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, + TCGv_i64 a3, TCGv_i64 a4, TCGArg a5) +{ + tcg_gen_op5(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), + tcgv_i64_arg(a3), tcgv_i64_arg(a4), a5); +} + +void DNI tcg_gen_op5ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, + TCGv_i32 a3, TCGArg a4, TCGArg a5) +{ + tcg_gen_op5(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), + tcgv_i32_arg(a3), a4, a5); +} + +void DNI tcg_gen_op5ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, + TCGv_i64 a3, TCGArg a4, TCGArg a5) +{ + tcg_gen_op5(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), + tcgv_i64_arg(a3), a4, a5); +} + +void DNI tcg_gen_op6_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, TCGv_i32 a3, + TCGv_i32 a4, TCGv_i32 a5, TCGv_i32 a6) +{ + tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), + tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5), + tcgv_i32_arg(a6)); +} + +void DNI tcg_gen_op6_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, TCGv_i64 a3, + TCGv_i64 a4, TCGv_i64 a5, TCGv_i64 a6) +{ + tcg_gen_op6(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), + tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5), + tcgv_i64_arg(a6)); +} + +void DNI tcg_gen_op6i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, TCGv_i32 a3, + TCGv_i32 a4, TCGv_i32 a5, TCGArg a6) +{ + tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), + tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5), a6); +} + +void DNI tcg_gen_op6i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, TCGv_i64 a3, + TCGv_i64 a4, TCGv_i64 a5, TCGArg a6) +{ + tcg_gen_op6(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), + tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5), a6); +} + +void DNI tcg_gen_op6ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, + TCGv_i32 a3, TCGv_i32 a4, TCGArg a5, TCGArg a6) +{ + tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), + tcgv_i32_arg(a3), tcgv_i32_arg(a4), a5, a6); +} + +void DNI tcg_gen_op6ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, + TCGv_i64 a3, TCGv_i64 a4, TCGArg a5, TCGArg a6) +{ + tcg_gen_op6(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), + tcgv_i64_arg(a3), tcgv_i64_arg(a4), a5, a6); +} + /* Generic ops. */ static void add_last_as_label_use(TCGLabel *l) From 01bbb6e3eb3368b40384a7e044c9a2d342b8039b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 29 Oct 2023 14:08:40 -0700 Subject: [PATCH 11/35] tcg: Move generic expanders out of line Signed-off-by: Richard Henderson Message-Id: <20231029210848.78234-4-richard.henderson@linaro.org> --- include/tcg/tcg-op-common.h | 19 +++---------------- tcg/tcg-op.c | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/include/tcg/tcg-op-common.h b/include/tcg/tcg-op-common.h index e093f541a7..2134961a98 100644 --- a/include/tcg/tcg-op-common.h +++ b/include/tcg/tcg-op-common.h @@ -72,12 +72,7 @@ void tcg_gen_op6ii_i64(TCGOpcode, TCGv_i64, TCGv_i64, TCGv_i64, /* Generic ops. */ -static inline void gen_set_label(TCGLabel *l) -{ - l->present = 1; - tcg_gen_op1(INDEX_op_set_label, label_arg(l)); -} - +void gen_set_label(TCGLabel *l); void tcg_gen_br(TCGLabel *l); void tcg_gen_mb(TCGBar); @@ -121,16 +116,8 @@ void tcg_gen_goto_tb(unsigned idx); */ void tcg_gen_lookup_and_goto_ptr(void); -static inline void tcg_gen_plugin_cb_start(unsigned from, unsigned type, - unsigned wr) -{ - tcg_gen_op3(INDEX_op_plugin_cb_start, from, type, wr); -} - -static inline void tcg_gen_plugin_cb_end(void) -{ - tcg_emit_op(INDEX_op_plugin_cb_end, 0); -} +void tcg_gen_plugin_cb_start(unsigned from, unsigned type, unsigned wr); +void tcg_gen_plugin_cb_end(void); /* 32 bit ops */ diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 6c826b46b0..a8cbad212d 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -300,6 +300,12 @@ void DNI tcg_gen_op6ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, /* Generic ops. */ +void gen_set_label(TCGLabel *l) +{ + l->present = 1; + tcg_gen_op1(INDEX_op_set_label, label_arg(l)); +} + static void add_last_as_label_use(TCGLabel *l) { TCGLabelUse *u = tcg_malloc(sizeof(TCGLabelUse)); @@ -333,6 +339,16 @@ void tcg_gen_mb(TCGBar mb_type) } } +void tcg_gen_plugin_cb_start(unsigned from, unsigned type, unsigned wr) +{ + tcg_gen_op3(INDEX_op_plugin_cb_start, from, type, wr); +} + +void tcg_gen_plugin_cb_end(void) +{ + tcg_emit_op(INDEX_op_plugin_cb_end, 0); +} + /* 32 bit ops */ void tcg_gen_movi_i32(TCGv_i32 ret, int32_t arg) From 09607d35f5a9a6aa1c57302c1e27066ad2309e63 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 29 Oct 2023 14:08:41 -0700 Subject: [PATCH 12/35] tcg: Move 32-bit expanders out of line Signed-off-by: Richard Henderson Message-Id: <20231029210848.78234-5-richard.henderson@linaro.org> --- include/tcg/tcg-op-common.h | 140 ++++++------------------------------ tcg/tcg-op.c | 116 ++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+), 119 deletions(-) diff --git a/include/tcg/tcg-op-common.h b/include/tcg/tcg-op-common.h index 2134961a98..cdaa1415d1 100644 --- a/include/tcg/tcg-op-common.h +++ b/include/tcg/tcg-op-common.h @@ -197,128 +197,30 @@ void tcg_gen_abs_i32(TCGv_i32, TCGv_i32); /* Replicate a value of size @vece from @in to all the lanes in @out */ void tcg_gen_dup_i32(unsigned vece, TCGv_i32 out, TCGv_i32 in); -static inline void tcg_gen_discard_i32(TCGv_i32 arg) -{ - tcg_gen_op1_i32(INDEX_op_discard, arg); -} +void tcg_gen_discard_i32(TCGv_i32 arg); +void tcg_gen_mov_i32(TCGv_i32 ret, TCGv_i32 arg); -static inline void tcg_gen_mov_i32(TCGv_i32 ret, TCGv_i32 arg) -{ - if (ret != arg) { - tcg_gen_op2_i32(INDEX_op_mov_i32, ret, arg); - } -} +void tcg_gen_ld8u_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset); +void tcg_gen_ld8s_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset); +void tcg_gen_ld16u_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset); +void tcg_gen_ld16s_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset); +void tcg_gen_ld_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset); -static inline void tcg_gen_ld8u_i32(TCGv_i32 ret, TCGv_ptr arg2, - tcg_target_long offset) -{ - tcg_gen_ldst_op_i32(INDEX_op_ld8u_i32, ret, arg2, offset); -} +void tcg_gen_st8_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset); +void tcg_gen_st16_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset); +void tcg_gen_st_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset); -static inline void tcg_gen_ld8s_i32(TCGv_i32 ret, TCGv_ptr arg2, - tcg_target_long offset) -{ - tcg_gen_ldst_op_i32(INDEX_op_ld8s_i32, ret, arg2, offset); -} - -static inline void tcg_gen_ld16u_i32(TCGv_i32 ret, TCGv_ptr arg2, - tcg_target_long offset) -{ - tcg_gen_ldst_op_i32(INDEX_op_ld16u_i32, ret, arg2, offset); -} - -static inline void tcg_gen_ld16s_i32(TCGv_i32 ret, TCGv_ptr arg2, - tcg_target_long offset) -{ - tcg_gen_ldst_op_i32(INDEX_op_ld16s_i32, ret, arg2, offset); -} - -static inline void tcg_gen_ld_i32(TCGv_i32 ret, TCGv_ptr arg2, - tcg_target_long offset) -{ - tcg_gen_ldst_op_i32(INDEX_op_ld_i32, ret, arg2, offset); -} - -static inline void tcg_gen_st8_i32(TCGv_i32 arg1, TCGv_ptr arg2, - tcg_target_long offset) -{ - tcg_gen_ldst_op_i32(INDEX_op_st8_i32, arg1, arg2, offset); -} - -static inline void tcg_gen_st16_i32(TCGv_i32 arg1, TCGv_ptr arg2, - tcg_target_long offset) -{ - tcg_gen_ldst_op_i32(INDEX_op_st16_i32, arg1, arg2, offset); -} - -static inline void tcg_gen_st_i32(TCGv_i32 arg1, TCGv_ptr arg2, - tcg_target_long offset) -{ - tcg_gen_ldst_op_i32(INDEX_op_st_i32, arg1, arg2, offset); -} - -static inline void tcg_gen_add_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) -{ - tcg_gen_op3_i32(INDEX_op_add_i32, ret, arg1, arg2); -} - -static inline void tcg_gen_sub_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) -{ - tcg_gen_op3_i32(INDEX_op_sub_i32, ret, arg1, arg2); -} - -static inline void tcg_gen_and_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) -{ - tcg_gen_op3_i32(INDEX_op_and_i32, ret, arg1, arg2); -} - -static inline void tcg_gen_or_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) -{ - tcg_gen_op3_i32(INDEX_op_or_i32, ret, arg1, arg2); -} - -static inline void tcg_gen_xor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) -{ - tcg_gen_op3_i32(INDEX_op_xor_i32, ret, arg1, arg2); -} - -static inline void tcg_gen_shl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) -{ - tcg_gen_op3_i32(INDEX_op_shl_i32, ret, arg1, arg2); -} - -static inline void tcg_gen_shr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) -{ - tcg_gen_op3_i32(INDEX_op_shr_i32, ret, arg1, arg2); -} - -static inline void tcg_gen_sar_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) -{ - tcg_gen_op3_i32(INDEX_op_sar_i32, ret, arg1, arg2); -} - -static inline void tcg_gen_mul_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) -{ - tcg_gen_op3_i32(INDEX_op_mul_i32, ret, arg1, arg2); -} - -static inline void tcg_gen_neg_i32(TCGv_i32 ret, TCGv_i32 arg) -{ - if (TCG_TARGET_HAS_neg_i32) { - tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg); - } else { - tcg_gen_subfi_i32(ret, 0, arg); - } -} - -static inline void tcg_gen_not_i32(TCGv_i32 ret, TCGv_i32 arg) -{ - if (TCG_TARGET_HAS_not_i32) { - tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg); - } else { - tcg_gen_xori_i32(ret, arg, -1); - } -} +void tcg_gen_add_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_sub_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_and_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_or_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_xor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_shl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_shr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_sar_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_mul_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_neg_i32(TCGv_i32 ret, TCGv_i32 arg); +void tcg_gen_not_i32(TCGv_i32 ret, TCGv_i32 arg); /* 64 bit ops */ diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index a8cbad212d..ab6281ebec 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -351,11 +351,28 @@ void tcg_gen_plugin_cb_end(void) /* 32 bit ops */ +void tcg_gen_discard_i32(TCGv_i32 arg) +{ + tcg_gen_op1_i32(INDEX_op_discard, arg); +} + +void tcg_gen_mov_i32(TCGv_i32 ret, TCGv_i32 arg) +{ + if (ret != arg) { + tcg_gen_op2_i32(INDEX_op_mov_i32, ret, arg); + } +} + void tcg_gen_movi_i32(TCGv_i32 ret, int32_t arg) { tcg_gen_mov_i32(ret, tcg_constant_i32(arg)); } +void tcg_gen_add_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_add_i32, ret, arg1, arg2); +} + void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { /* some cases can be optimized here */ @@ -366,6 +383,11 @@ void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) } } +void tcg_gen_sub_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_sub_i32, ret, arg1, arg2); +} + void tcg_gen_subfi_i32(TCGv_i32 ret, int32_t arg1, TCGv_i32 arg2) { if (arg1 == 0 && TCG_TARGET_HAS_neg_i32) { @@ -386,6 +408,20 @@ void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) } } +void tcg_gen_neg_i32(TCGv_i32 ret, TCGv_i32 arg) +{ + if (TCG_TARGET_HAS_neg_i32) { + tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg); + } else { + tcg_gen_subfi_i32(ret, 0, arg); + } +} + +void tcg_gen_and_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_and_i32, ret, arg1, arg2); +} + void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { /* Some cases can be optimized here. */ @@ -414,6 +450,11 @@ void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) tcg_gen_and_i32(ret, arg1, tcg_constant_i32(arg2)); } +void tcg_gen_or_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_or_i32, ret, arg1, arg2); +} + void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { /* Some cases can be optimized here. */ @@ -426,6 +467,11 @@ void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) } } +void tcg_gen_xor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_xor_i32, ret, arg1, arg2); +} + void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { /* Some cases can be optimized here. */ @@ -439,6 +485,20 @@ void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) } } +void tcg_gen_not_i32(TCGv_i32 ret, TCGv_i32 arg) +{ + if (TCG_TARGET_HAS_not_i32) { + tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg); + } else { + tcg_gen_xori_i32(ret, arg, -1); + } +} + +void tcg_gen_shl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_shl_i32, ret, arg1, arg2); +} + void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { tcg_debug_assert(arg2 >= 0 && arg2 < 32); @@ -449,6 +509,11 @@ void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) } } +void tcg_gen_shr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_shr_i32, ret, arg1, arg2); +} + void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { tcg_debug_assert(arg2 >= 0 && arg2 < 32); @@ -459,6 +524,11 @@ void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) } } +void tcg_gen_sar_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_sar_i32, ret, arg1, arg2); +} + void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { tcg_debug_assert(arg2 >= 0 && arg2 < 32); @@ -527,6 +597,11 @@ void tcg_gen_negsetcondi_i32(TCGCond cond, TCGv_i32 ret, tcg_gen_negsetcond_i32(cond, ret, arg1, tcg_constant_i32(arg2)); } +void tcg_gen_mul_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_mul_i32, ret, arg1, arg2); +} + void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { if (arg2 == 0) { @@ -1385,6 +1460,47 @@ void tcg_gen_abs_i32(TCGv_i32 ret, TCGv_i32 a) tcg_temp_free_i32(t); } +void tcg_gen_ld8u_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) +{ + tcg_gen_ldst_op_i32(INDEX_op_ld8u_i32, ret, arg2, offset); +} + +void tcg_gen_ld8s_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) +{ + tcg_gen_ldst_op_i32(INDEX_op_ld8s_i32, ret, arg2, offset); +} + +void tcg_gen_ld16u_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) +{ + tcg_gen_ldst_op_i32(INDEX_op_ld16u_i32, ret, arg2, offset); +} + +void tcg_gen_ld16s_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) +{ + tcg_gen_ldst_op_i32(INDEX_op_ld16s_i32, ret, arg2, offset); +} + +void tcg_gen_ld_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) +{ + tcg_gen_ldst_op_i32(INDEX_op_ld_i32, ret, arg2, offset); +} + +void tcg_gen_st8_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset) +{ + tcg_gen_ldst_op_i32(INDEX_op_st8_i32, arg1, arg2, offset); +} + +void tcg_gen_st16_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset) +{ + tcg_gen_ldst_op_i32(INDEX_op_st16_i32, arg1, arg2, offset); +} + +void tcg_gen_st_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset) +{ + tcg_gen_ldst_op_i32(INDEX_op_st_i32, arg1, arg2, offset); +} + + /* 64-bit ops */ #if TCG_TARGET_REG_BITS == 32 From e0de2f558067400e32fc1082468c0664314f3b34 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 29 Oct 2023 14:08:42 -0700 Subject: [PATCH 13/35] tcg: Move 64-bit expanders out of line This one is more complicated, combining 32-bit and 64-bit expansion with C if instead of preprocessor #if. Signed-off-by: Richard Henderson Message-Id: <20231029210848.78234-6-richard.henderson@linaro.org> --- include/tcg/tcg-op-common.h | 144 +--------------------- tcg/tcg-op.c | 231 +++++++++++++++++++++++++----------- 2 files changed, 169 insertions(+), 206 deletions(-) diff --git a/include/tcg/tcg-op-common.h b/include/tcg/tcg-op-common.h index cdaa1415d1..f5ec54f874 100644 --- a/include/tcg/tcg-op-common.h +++ b/include/tcg/tcg-op-common.h @@ -305,130 +305,6 @@ void tcg_gen_abs_i64(TCGv_i64, TCGv_i64); /* Replicate a value of size @vece from @in to all the lanes in @out */ void tcg_gen_dup_i64(unsigned vece, TCGv_i64 out, TCGv_i64 in); -#if TCG_TARGET_REG_BITS == 64 -static inline void tcg_gen_discard_i64(TCGv_i64 arg) -{ - tcg_gen_op1_i64(INDEX_op_discard, arg); -} - -static inline void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg) -{ - if (ret != arg) { - tcg_gen_op2_i64(INDEX_op_mov_i64, ret, arg); - } -} - -static inline void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2, - tcg_target_long offset) -{ - tcg_gen_ldst_op_i64(INDEX_op_ld8u_i64, ret, arg2, offset); -} - -static inline void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2, - tcg_target_long offset) -{ - tcg_gen_ldst_op_i64(INDEX_op_ld8s_i64, ret, arg2, offset); -} - -static inline void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2, - tcg_target_long offset) -{ - tcg_gen_ldst_op_i64(INDEX_op_ld16u_i64, ret, arg2, offset); -} - -static inline void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2, - tcg_target_long offset) -{ - tcg_gen_ldst_op_i64(INDEX_op_ld16s_i64, ret, arg2, offset); -} - -static inline void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2, - tcg_target_long offset) -{ - tcg_gen_ldst_op_i64(INDEX_op_ld32u_i64, ret, arg2, offset); -} - -static inline void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_ptr arg2, - tcg_target_long offset) -{ - tcg_gen_ldst_op_i64(INDEX_op_ld32s_i64, ret, arg2, offset); -} - -static inline void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, - tcg_target_long offset) -{ - tcg_gen_ldst_op_i64(INDEX_op_ld_i64, ret, arg2, offset); -} - -static inline void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, - tcg_target_long offset) -{ - tcg_gen_ldst_op_i64(INDEX_op_st8_i64, arg1, arg2, offset); -} - -static inline void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, - tcg_target_long offset) -{ - tcg_gen_ldst_op_i64(INDEX_op_st16_i64, arg1, arg2, offset); -} - -static inline void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, - tcg_target_long offset) -{ - tcg_gen_ldst_op_i64(INDEX_op_st32_i64, arg1, arg2, offset); -} - -static inline void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, - tcg_target_long offset) -{ - tcg_gen_ldst_op_i64(INDEX_op_st_i64, arg1, arg2, offset); -} - -static inline void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) -{ - tcg_gen_op3_i64(INDEX_op_add_i64, ret, arg1, arg2); -} - -static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) -{ - tcg_gen_op3_i64(INDEX_op_sub_i64, ret, arg1, arg2); -} - -static inline void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) -{ - tcg_gen_op3_i64(INDEX_op_and_i64, ret, arg1, arg2); -} - -static inline void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) -{ - tcg_gen_op3_i64(INDEX_op_or_i64, ret, arg1, arg2); -} - -static inline void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) -{ - tcg_gen_op3_i64(INDEX_op_xor_i64, ret, arg1, arg2); -} - -static inline void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) -{ - tcg_gen_op3_i64(INDEX_op_shl_i64, ret, arg1, arg2); -} - -static inline void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) -{ - tcg_gen_op3_i64(INDEX_op_shr_i64, ret, arg1, arg2); -} - -static inline void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) -{ - tcg_gen_op3_i64(INDEX_op_sar_i64, ret, arg1, arg2); -} - -static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) -{ - tcg_gen_op3_i64(INDEX_op_mul_i64, ret, arg1, arg2); -} -#else /* TCG_TARGET_REG_BITS == 32 */ void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset); void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset); void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset); @@ -453,16 +329,8 @@ void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); -#endif /* TCG_TARGET_REG_BITS */ +void tcg_gen_neg_i64(TCGv_i64 ret, TCGv_i64 arg); -static inline void tcg_gen_neg_i64(TCGv_i64 ret, TCGv_i64 arg) -{ - if (TCG_TARGET_HAS_neg_i64) { - tcg_gen_op2_i64(INDEX_op_neg_i64, ret, arg); - } else { - tcg_gen_subfi_i64(ret, 0, arg); - } -} /* Size changing operations. */ @@ -473,19 +341,17 @@ void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg); void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg); void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg); void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg); +void tcg_gen_concat32_i64(TCGv_i64 ret, TCGv_i64 lo, TCGv_i64 hi); -void tcg_gen_mov_i128(TCGv_i128 dst, TCGv_i128 src); void tcg_gen_extr_i128_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i128 arg); void tcg_gen_concat_i64_i128(TCGv_i128 ret, TCGv_i64 lo, TCGv_i64 hi); +/* 128 bit ops */ + +void tcg_gen_mov_i128(TCGv_i128 dst, TCGv_i128 src); void tcg_gen_ld_i128(TCGv_i128 ret, TCGv_ptr base, tcg_target_long offset); void tcg_gen_st_i128(TCGv_i128 val, TCGv_ptr base, tcg_target_long offset); -static inline void tcg_gen_concat32_i64(TCGv_i64 ret, TCGv_i64 lo, TCGv_i64 hi) -{ - tcg_gen_deposit_i64(ret, lo, hi, 32, 32); -} - /* Local load/store bit ops */ void tcg_gen_qemu_ld_i32_chk(TCGv_i32, TCGTemp *, TCGArg, MemOp, TCGType); diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index ab6281ebec..579a2aab15 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1503,152 +1503,238 @@ void tcg_gen_st_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset) /* 64-bit ops */ -#if TCG_TARGET_REG_BITS == 32 -/* These are all inline for TCG_TARGET_REG_BITS == 64. */ - void tcg_gen_discard_i64(TCGv_i64 arg) { - tcg_gen_discard_i32(TCGV_LOW(arg)); - tcg_gen_discard_i32(TCGV_HIGH(arg)); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_op1_i64(INDEX_op_discard, arg); + } else { + tcg_gen_discard_i32(TCGV_LOW(arg)); + tcg_gen_discard_i32(TCGV_HIGH(arg)); + } } void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg) { - TCGTemp *ts = tcgv_i64_temp(arg); - - /* Canonicalize TCGv_i64 TEMP_CONST into TCGv_i32 TEMP_CONST. */ - if (ts->kind == TEMP_CONST) { - tcg_gen_movi_i64(ret, ts->val); + if (ret == arg) { + return; + } + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_op2_i64(INDEX_op_mov_i64, ret, arg); } else { - tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg)); - tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg)); + TCGTemp *ts = tcgv_i64_temp(arg); + + /* Canonicalize TCGv_i64 TEMP_CONST into TCGv_i32 TEMP_CONST. */ + if (ts->kind == TEMP_CONST) { + tcg_gen_movi_i64(ret, ts->val); + } else { + tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg)); + tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg)); + } } } void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg) { - tcg_gen_movi_i32(TCGV_LOW(ret), arg); - tcg_gen_movi_i32(TCGV_HIGH(ret), arg >> 32); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_mov_i64(ret, tcg_constant_i64(arg)); + } else { + tcg_gen_movi_i32(TCGV_LOW(ret), arg); + tcg_gen_movi_i32(TCGV_HIGH(ret), arg >> 32); + } } void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ld8u_i32(TCGV_LOW(ret), arg2, offset); - tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_ldst_op_i64(INDEX_op_ld8u_i64, ret, arg2, offset); + } else { + tcg_gen_ld8u_i32(TCGV_LOW(ret), arg2, offset); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + } } void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ld8s_i32(TCGV_LOW(ret), arg2, offset); - tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_ldst_op_i64(INDEX_op_ld8s_i64, ret, arg2, offset); + } else { + tcg_gen_ld8s_i32(TCGV_LOW(ret), arg2, offset); + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); + } } void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ld16u_i32(TCGV_LOW(ret), arg2, offset); - tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_ldst_op_i64(INDEX_op_ld16u_i64, ret, arg2, offset); + } else { + tcg_gen_ld16u_i32(TCGV_LOW(ret), arg2, offset); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + } } void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ld16s_i32(TCGV_LOW(ret), arg2, offset); - tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_ldst_op_i64(INDEX_op_ld16s_i64, ret, arg2, offset); + } else { + tcg_gen_ld16s_i32(TCGV_LOW(ret), arg2, offset); + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); + } } void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset); - tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_ldst_op_i64(INDEX_op_ld32u_i64, ret, arg2, offset); + } else { + tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + } } void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset); - tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_ldst_op_i64(INDEX_op_ld32s_i64, ret, arg2, offset); + } else { + tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset); + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); + } } void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { - /* Since arg2 and ret have different types, - they cannot be the same temporary */ -#if HOST_BIG_ENDIAN - tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset); - tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset + 4); -#else - tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset); - tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset + 4); -#endif + /* + * For 32-bit host, since arg2 and ret have different types, + * they cannot be the same temporary -- no chance of overlap. + */ + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_ldst_op_i64(INDEX_op_ld_i64, ret, arg2, offset); + } else if (HOST_BIG_ENDIAN) { + tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset); + tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset + 4); + } else { + tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset); + tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset + 4); + } } void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_st8_i32(TCGV_LOW(arg1), arg2, offset); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_ldst_op_i64(INDEX_op_st8_i64, arg1, arg2, offset); + } else { + tcg_gen_st8_i32(TCGV_LOW(arg1), arg2, offset); + } } void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_st16_i32(TCGV_LOW(arg1), arg2, offset); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_ldst_op_i64(INDEX_op_st16_i64, arg1, arg2, offset); + } else { + tcg_gen_st16_i32(TCGV_LOW(arg1), arg2, offset); + } } void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_ldst_op_i64(INDEX_op_st32_i64, arg1, arg2, offset); + } else { + tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset); + } } void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) { -#if HOST_BIG_ENDIAN - tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset); - tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset + 4); -#else - tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset); - tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset + 4); -#endif + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_ldst_op_i64(INDEX_op_st_i64, arg1, arg2, offset); + } else if (HOST_BIG_ENDIAN) { + tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset); + tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset + 4); + } else { + tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset); + tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset + 4); + } } void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), - TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2)); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_op3_i64(INDEX_op_add_i64, ret, arg1, arg2); + } else { + tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), + TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2)); + } } void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), - TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2)); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_op3_i64(INDEX_op_sub_i64, ret, arg1, arg2); + } else { + tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), + TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2)); + } } void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - tcg_gen_and_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); - tcg_gen_and_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_op3_i64(INDEX_op_and_i64, ret, arg1, arg2); + } else { + tcg_gen_and_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); + tcg_gen_and_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); + } } void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); - tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_op3_i64(INDEX_op_or_i64, ret, arg1, arg2); + } else { + tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); + tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); + } } void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - tcg_gen_xor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); - tcg_gen_xor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_op3_i64(INDEX_op_xor_i64, ret, arg1, arg2); + } else { + tcg_gen_xor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); + tcg_gen_xor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); + } } void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - gen_helper_shl_i64(ret, arg1, arg2); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_op3_i64(INDEX_op_shl_i64, ret, arg1, arg2); + } else { + gen_helper_shl_i64(ret, arg1, arg2); + } } void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - gen_helper_shr_i64(ret, arg1, arg2); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_op3_i64(INDEX_op_shr_i64, ret, arg1, arg2); + } else { + gen_helper_shr_i64(ret, arg1, arg2); + } } void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - gen_helper_sar_i64(ret, arg1, arg2); + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_op3_i64(INDEX_op_sar_i64, ret, arg1, arg2); + } else { + gen_helper_sar_i64(ret, arg1, arg2); + } } void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) @@ -1656,6 +1742,12 @@ void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) TCGv_i64 t0; TCGv_i32 t1; + if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_op3_i64(INDEX_op_mul_i64, ret, arg1, arg2); + return; + } + + t0 = tcg_temp_ebb_new_i64(); t1 = tcg_temp_ebb_new_i32(); @@ -1672,15 +1764,6 @@ void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) tcg_temp_free_i32(t1); } -#else - -void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg) -{ - tcg_gen_mov_i64(ret, tcg_constant_i64(arg)); -} - -#endif /* TCG_TARGET_REG_SIZE == 32 */ - void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) { /* some cases can be optimized here */ @@ -1723,6 +1806,15 @@ void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) } } +void tcg_gen_neg_i64(TCGv_i64 ret, TCGv_i64 arg) +{ + if (TCG_TARGET_HAS_neg_i64) { + tcg_gen_op2_i64(INDEX_op_neg_i64, ret, arg); + } else { + tcg_gen_subfi_i64(ret, 0, arg); + } +} + void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) { if (TCG_TARGET_REG_BITS == 32) { @@ -3218,6 +3310,11 @@ void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg) tcg_gen_shri_i64(hi, arg, 32); } +void tcg_gen_concat32_i64(TCGv_i64 ret, TCGv_i64 lo, TCGv_i64 hi) +{ + tcg_gen_deposit_i64(ret, lo, hi, 32, 32); +} + void tcg_gen_extr_i128_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i128 arg) { tcg_gen_mov_i64(lo, TCGV128_LOW(arg)); From 27c758fd22a825783228dbdd339fd55da08a5ff3 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 29 Oct 2023 14:08:43 -0700 Subject: [PATCH 14/35] tcg: Move vec_gen_* declarations to tcg-internal.h These are used within tcg-op-vec.c and tcg/host/tcg-target.c.inc. There are no uses outside tcg/. Signed-off-by: Richard Henderson Message-Id: <20231029210848.78234-7-richard.henderson@linaro.org> --- include/tcg/tcg-op-common.h | 4 ---- tcg/tcg-internal.h | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/tcg/tcg-op-common.h b/include/tcg/tcg-op-common.h index f5ec54f874..3f8b214376 100644 --- a/include/tcg/tcg-op-common.h +++ b/include/tcg/tcg-op-common.h @@ -21,10 +21,6 @@ void tcg_gen_op4(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg); void tcg_gen_op5(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg); void tcg_gen_op6(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg); -void vec_gen_2(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg); -void vec_gen_3(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg); -void vec_gen_4(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg, TCGArg); - void tcg_gen_op1_i32(TCGOpcode, TCGv_i32); void tcg_gen_op1_i64(TCGOpcode, TCGv_i64); void tcg_gen_op1i(TCGOpcode, TCGArg); diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h index 40a69e6e6e..f18d282abb 100644 --- a/tcg/tcg-internal.h +++ b/tcg/tcg-internal.h @@ -83,4 +83,8 @@ static inline TCGv_i64 TCGV128_HIGH(TCGv_i128 t) bool tcg_target_has_memory_bswap(MemOp memop); +void vec_gen_2(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg); +void vec_gen_3(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg); +void vec_gen_4(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg, TCGArg); + #endif /* TCG_INTERNAL_H */ From 1d67bf545fd6321d14150eac00851782073a17e9 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 29 Oct 2023 14:08:44 -0700 Subject: [PATCH 15/35] tcg: Move tcg_gen_opN declarations to tcg-internal.h These are used within tcg-op.c and tcg-op-ldst.c. There are no uses outside tcg/. Signed-off-by: Richard Henderson Message-Id: <20231029210848.78234-8-richard.henderson@linaro.org> --- include/tcg/tcg-op-common.h | 7 ------- tcg/tcg-internal.h | 7 +++++++ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/tcg/tcg-op-common.h b/include/tcg/tcg-op-common.h index 3f8b214376..b922545118 100644 --- a/include/tcg/tcg-op-common.h +++ b/include/tcg/tcg-op-common.h @@ -14,13 +14,6 @@ /* Basic output routines. Not for general consumption. */ -void tcg_gen_op1(TCGOpcode, TCGArg); -void tcg_gen_op2(TCGOpcode, TCGArg, TCGArg); -void tcg_gen_op3(TCGOpcode, TCGArg, TCGArg, TCGArg); -void tcg_gen_op4(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg); -void tcg_gen_op5(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg); -void tcg_gen_op6(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg); - void tcg_gen_op1_i32(TCGOpcode, TCGv_i32); void tcg_gen_op1_i64(TCGOpcode, TCGv_i64); void tcg_gen_op1i(TCGOpcode, TCGArg); diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h index f18d282abb..c9ac34fc3d 100644 --- a/tcg/tcg-internal.h +++ b/tcg/tcg-internal.h @@ -83,6 +83,13 @@ static inline TCGv_i64 TCGV128_HIGH(TCGv_i128 t) bool tcg_target_has_memory_bswap(MemOp memop); +void tcg_gen_op1(TCGOpcode, TCGArg); +void tcg_gen_op2(TCGOpcode, TCGArg, TCGArg); +void tcg_gen_op3(TCGOpcode, TCGArg, TCGArg, TCGArg); +void tcg_gen_op4(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg); +void tcg_gen_op5(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg); +void tcg_gen_op6(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg); + void vec_gen_2(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg); void vec_gen_3(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg); void vec_gen_4(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg, TCGArg); From 17b9fadb1d93edd79ac1aec7f48b66ceff325cbd Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 29 Oct 2023 14:08:45 -0700 Subject: [PATCH 16/35] tcg: Unexport tcg_gen_op*_{i32,i64} These functions are no longer used outside tcg-op.c. There are several that are completely unused, so remove them. Signed-off-by: Richard Henderson Message-Id: <20231029210848.78234-9-richard.henderson@linaro.org> --- include/tcg/tcg-op-common.h | 47 ------------- tcg/tcg-op.c | 131 ++++++++++++++---------------------- 2 files changed, 52 insertions(+), 126 deletions(-) diff --git a/include/tcg/tcg-op-common.h b/include/tcg/tcg-op-common.h index b922545118..760c67683b 100644 --- a/include/tcg/tcg-op-common.h +++ b/include/tcg/tcg-op-common.h @@ -12,53 +12,6 @@ #include "exec/helper-proto-common.h" #include "exec/helper-gen-common.h" -/* Basic output routines. Not for general consumption. */ - -void tcg_gen_op1_i32(TCGOpcode, TCGv_i32); -void tcg_gen_op1_i64(TCGOpcode, TCGv_i64); -void tcg_gen_op1i(TCGOpcode, TCGArg); -void tcg_gen_op2_i32(TCGOpcode, TCGv_i32, TCGv_i32); -void tcg_gen_op2_i64(TCGOpcode, TCGv_i64, TCGv_i64); -void tcg_gen_op2i_i32(TCGOpcode, TCGv_i32, TCGArg); -void tcg_gen_op2i_i64(TCGOpcode, TCGv_i64, TCGArg); -void tcg_gen_op2ii(TCGOpcode, TCGArg, TCGArg); -void tcg_gen_op3_i32(TCGOpcode, TCGv_i32, TCGv_i32, TCGv_i32); -void tcg_gen_op3_i64(TCGOpcode, TCGv_i64, TCGv_i64, TCGv_i64); -void tcg_gen_op3i_i32(TCGOpcode, TCGv_i32, TCGv_i32, TCGArg); -void tcg_gen_op3i_i64(TCGOpcode, TCGv_i64, TCGv_i64, TCGArg); -void tcg_gen_ldst_op_i32(TCGOpcode, TCGv_i32, TCGv_ptr, TCGArg); -void tcg_gen_ldst_op_i64(TCGOpcode, TCGv_i64, TCGv_ptr, TCGArg); -void tcg_gen_op4_i32(TCGOpcode, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32); -void tcg_gen_op4_i64(TCGOpcode, TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64); -void tcg_gen_op4i_i32(TCGOpcode, TCGv_i32, TCGv_i32, TCGv_i32, TCGArg); -void tcg_gen_op4i_i64(TCGOpcode, TCGv_i64, TCGv_i64, TCGv_i64, TCGArg); -void tcg_gen_op4ii_i32(TCGOpcode, TCGv_i32, TCGv_i32, TCGArg, TCGArg); -void tcg_gen_op4ii_i64(TCGOpcode, TCGv_i64, TCGv_i64, TCGArg, TCGArg); -void tcg_gen_op5_i32(TCGOpcode, TCGv_i32, TCGv_i32, TCGv_i32, - TCGv_i32, TCGv_i32); -void tcg_gen_op5_i64(TCGOpcode, TCGv_i64, TCGv_i64, TCGv_i64, - TCGv_i64, TCGv_i64); -void tcg_gen_op5i_i32(TCGOpcode, TCGv_i32, TCGv_i32, TCGv_i32, - TCGv_i32, TCGArg); -void tcg_gen_op5i_i64(TCGOpcode, TCGv_i64, TCGv_i64, TCGv_i64, - TCGv_i64, TCGArg); -void tcg_gen_op5ii_i32(TCGOpcode, TCGv_i32, TCGv_i32, TCGv_i32, - TCGArg, TCGArg); -void tcg_gen_op5ii_i64(TCGOpcode, TCGv_i64, TCGv_i64, TCGv_i64, - TCGArg, TCGArg); -void tcg_gen_op6_i32(TCGOpcode, TCGv_i32, TCGv_i32, TCGv_i32, - TCGv_i32, TCGv_i32, TCGv_i32); -void tcg_gen_op6_i64(TCGOpcode, TCGv_i64, TCGv_i64, TCGv_i64, - TCGv_i64, TCGv_i64, TCGv_i64); -void tcg_gen_op6i_i32(TCGOpcode, TCGv_i32, TCGv_i32, TCGv_i32, - TCGv_i32, TCGv_i32, TCGArg); -void tcg_gen_op6i_i64(TCGOpcode, TCGv_i64, TCGv_i64, TCGv_i64, - TCGv_i64, TCGv_i64, TCGArg); -void tcg_gen_op6ii_i32(TCGOpcode, TCGv_i32, TCGv_i32, TCGv_i32, - TCGv_i32, TCGArg, TCGArg); -void tcg_gen_op6ii_i64(TCGOpcode, TCGv_i64, TCGv_i64, TCGv_i64, - TCGv_i64, TCGArg, TCGArg); - /* Generic ops. */ void gen_set_label(TCGLabel *l); diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 579a2aab15..9aba103590 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -100,204 +100,177 @@ void NI tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, # define DNI #endif -void DNI tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 a1) +static void DNI tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 a1) { tcg_gen_op1(opc, tcgv_i32_arg(a1)); } -void DNI tcg_gen_op1_i64(TCGOpcode opc, TCGv_i64 a1) +static void DNI tcg_gen_op1_i64(TCGOpcode opc, TCGv_i64 a1) { tcg_gen_op1(opc, tcgv_i64_arg(a1)); } -void DNI tcg_gen_op1i(TCGOpcode opc, TCGArg a1) +static void DNI tcg_gen_op1i(TCGOpcode opc, TCGArg a1) { tcg_gen_op1(opc, a1); } -void DNI tcg_gen_op2_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2) +static void DNI tcg_gen_op2_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2) { tcg_gen_op2(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2)); } -void DNI tcg_gen_op2_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2) +static void DNI tcg_gen_op2_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2) { tcg_gen_op2(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2)); } -void DNI tcg_gen_op2i_i32(TCGOpcode opc, TCGv_i32 a1, TCGArg a2) -{ - tcg_gen_op2(opc, tcgv_i32_arg(a1), a2); -} - -void DNI tcg_gen_op2i_i64(TCGOpcode opc, TCGv_i64 a1, TCGArg a2) -{ - tcg_gen_op2(opc, tcgv_i64_arg(a1), a2); -} - -void DNI tcg_gen_op2ii(TCGOpcode opc, TCGArg a1, TCGArg a2) -{ - tcg_gen_op2(opc, a1, a2); -} - -void DNI tcg_gen_op3_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, TCGv_i32 a3) +static void DNI tcg_gen_op3_i32(TCGOpcode opc, TCGv_i32 a1, + TCGv_i32 a2, TCGv_i32 a3) { tcg_gen_op3(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), tcgv_i32_arg(a3)); } -void DNI tcg_gen_op3_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, TCGv_i64 a3) +static void DNI tcg_gen_op3_i64(TCGOpcode opc, TCGv_i64 a1, + TCGv_i64 a2, TCGv_i64 a3) { tcg_gen_op3(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), tcgv_i64_arg(a3)); } -void DNI tcg_gen_op3i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, TCGArg a3) +static void DNI tcg_gen_op3i_i32(TCGOpcode opc, TCGv_i32 a1, + TCGv_i32 a2, TCGArg a3) { tcg_gen_op3(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3); } -void DNI tcg_gen_op3i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, TCGArg a3) +static void DNI tcg_gen_op3i_i64(TCGOpcode opc, TCGv_i64 a1, + TCGv_i64 a2, TCGArg a3) { tcg_gen_op3(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), a3); } -void DNI tcg_gen_ldst_op_i32(TCGOpcode opc, TCGv_i32 val, - TCGv_ptr base, TCGArg offset) +static void DNI tcg_gen_ldst_op_i32(TCGOpcode opc, TCGv_i32 val, + TCGv_ptr base, TCGArg offset) { tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_ptr_arg(base), offset); } -void DNI tcg_gen_ldst_op_i64(TCGOpcode opc, TCGv_i64 val, - TCGv_ptr base, TCGArg offset) +static void DNI tcg_gen_ldst_op_i64(TCGOpcode opc, TCGv_i64 val, + TCGv_ptr base, TCGArg offset) { tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_ptr_arg(base), offset); } -void DNI tcg_gen_op4_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, - TCGv_i32 a3, TCGv_i32 a4) +static void DNI tcg_gen_op4_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, + TCGv_i32 a3, TCGv_i32 a4) { tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), tcgv_i32_arg(a3), tcgv_i32_arg(a4)); } -void DNI tcg_gen_op4_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, - TCGv_i64 a3, TCGv_i64 a4) +static void DNI tcg_gen_op4_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, + TCGv_i64 a3, TCGv_i64 a4) { tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), tcgv_i64_arg(a3), tcgv_i64_arg(a4)); } -void DNI tcg_gen_op4i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, - TCGv_i32 a3, TCGArg a4) +static void DNI tcg_gen_op4i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, + TCGv_i32 a3, TCGArg a4) { tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), tcgv_i32_arg(a3), a4); } -void DNI tcg_gen_op4i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, - TCGv_i64 a3, TCGArg a4) +static void DNI tcg_gen_op4i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, + TCGv_i64 a3, TCGArg a4) { tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), tcgv_i64_arg(a3), a4); } -void DNI tcg_gen_op4ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, - TCGArg a3, TCGArg a4) +static void DNI tcg_gen_op4ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, + TCGArg a3, TCGArg a4) { tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3, a4); } -void DNI tcg_gen_op4ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, - TCGArg a3, TCGArg a4) +static void DNI tcg_gen_op4ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, + TCGArg a3, TCGArg a4) { tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), a3, a4); } -void DNI tcg_gen_op5_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, - TCGv_i32 a3, TCGv_i32 a4, TCGv_i32 a5) +static void DNI tcg_gen_op5_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, + TCGv_i32 a3, TCGv_i32 a4, TCGv_i32 a5) { tcg_gen_op5(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5)); } -void DNI tcg_gen_op5_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, - TCGv_i64 a3, TCGv_i64 a4, TCGv_i64 a5) +static void DNI tcg_gen_op5_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, + TCGv_i64 a3, TCGv_i64 a4, TCGv_i64 a5) { tcg_gen_op5(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5)); } -void DNI tcg_gen_op5i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, - TCGv_i32 a3, TCGv_i32 a4, TCGArg a5) -{ - tcg_gen_op5(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), - tcgv_i32_arg(a3), tcgv_i32_arg(a4), a5); -} - -void DNI tcg_gen_op5i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, - TCGv_i64 a3, TCGv_i64 a4, TCGArg a5) -{ - tcg_gen_op5(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), - tcgv_i64_arg(a3), tcgv_i64_arg(a4), a5); -} - -void DNI tcg_gen_op5ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, - TCGv_i32 a3, TCGArg a4, TCGArg a5) +static void DNI tcg_gen_op5ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, + TCGv_i32 a3, TCGArg a4, TCGArg a5) { tcg_gen_op5(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), tcgv_i32_arg(a3), a4, a5); } -void DNI tcg_gen_op5ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, - TCGv_i64 a3, TCGArg a4, TCGArg a5) +static void DNI tcg_gen_op5ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, + TCGv_i64 a3, TCGArg a4, TCGArg a5) { tcg_gen_op5(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), tcgv_i64_arg(a3), a4, a5); } -void DNI tcg_gen_op6_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, TCGv_i32 a3, - TCGv_i32 a4, TCGv_i32 a5, TCGv_i32 a6) +static void DNI tcg_gen_op6_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, + TCGv_i32 a3, TCGv_i32 a4, + TCGv_i32 a5, TCGv_i32 a6) { tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5), tcgv_i32_arg(a6)); } -void DNI tcg_gen_op6_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, TCGv_i64 a3, - TCGv_i64 a4, TCGv_i64 a5, TCGv_i64 a6) +static void DNI tcg_gen_op6_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, + TCGv_i64 a3, TCGv_i64 a4, + TCGv_i64 a5, TCGv_i64 a6) { tcg_gen_op6(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5), tcgv_i64_arg(a6)); } -void DNI tcg_gen_op6i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, TCGv_i32 a3, - TCGv_i32 a4, TCGv_i32 a5, TCGArg a6) +static void DNI tcg_gen_op6i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, + TCGv_i32 a3, TCGv_i32 a4, + TCGv_i32 a5, TCGArg a6) { tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5), a6); } -void DNI tcg_gen_op6i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, TCGv_i64 a3, - TCGv_i64 a4, TCGv_i64 a5, TCGArg a6) +static void DNI tcg_gen_op6i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, + TCGv_i64 a3, TCGv_i64 a4, + TCGv_i64 a5, TCGArg a6) { tcg_gen_op6(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5), a6); } -void DNI tcg_gen_op6ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, - TCGv_i32 a3, TCGv_i32 a4, TCGArg a5, TCGArg a6) +static void DNI tcg_gen_op6ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, + TCGv_i32 a3, TCGv_i32 a4, + TCGArg a5, TCGArg a6) { tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), tcgv_i32_arg(a3), tcgv_i32_arg(a4), a5, a6); } -void DNI tcg_gen_op6ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, - TCGv_i64 a3, TCGv_i64 a4, TCGArg a5, TCGArg a6) -{ - tcg_gen_op6(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), - tcgv_i64_arg(a3), tcgv_i64_arg(a4), a5, a6); -} - /* Generic ops. */ void gen_set_label(TCGLabel *l) From 16edaee720139892dbff97cfcb89bf18eb70d227 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 29 Oct 2023 14:08:46 -0700 Subject: [PATCH 17/35] tcg: Move tcg_constant_* out of line Signed-off-by: Richard Henderson Message-Id: <20231029210848.78234-10-richard.henderson@linaro.org> --- include/tcg/tcg-op-common.h | 8 ++++++++ include/tcg/tcg.h | 26 -------------------------- tcg/tcg-internal.h | 7 +++++++ tcg/tcg.c | 15 +++++++++++++++ 4 files changed, 30 insertions(+), 26 deletions(-) diff --git a/include/tcg/tcg-op-common.h b/include/tcg/tcg-op-common.h index 760c67683b..dddf93067e 100644 --- a/include/tcg/tcg-op-common.h +++ b/include/tcg/tcg-op-common.h @@ -12,6 +12,11 @@ #include "exec/helper-proto-common.h" #include "exec/helper-gen-common.h" +TCGv_i32 tcg_constant_i32(int32_t val); +TCGv_i64 tcg_constant_i64(int64_t val); +TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val); +TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val); + /* Generic ops. */ void gen_set_label(TCGLabel *l); @@ -459,6 +464,9 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t); # define NAT TCGv_i64 #endif +TCGv_ptr tcg_constant_ptr_int(intptr_t x); +#define tcg_constant_ptr(X) tcg_constant_ptr_int((intptr_t)(X)) + static inline void tcg_gen_ld_ptr(TCGv_ptr r, TCGv_ptr a, intptr_t o) { glue(tcg_gen_ld_,PTR)((NAT)r, a, o); diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index 3a4c0f124f..1ae131c242 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -934,32 +934,6 @@ void tcg_remove_ops_after(TCGOp *op); void tcg_optimize(TCGContext *s); -/* - * Locate or create a read-only temporary that is a constant. - * This kind of temporary need not be freed, but for convenience - * will be silently ignored by tcg_temp_free_*. - */ -TCGTemp *tcg_constant_internal(TCGType type, int64_t val); - -static inline TCGv_i32 tcg_constant_i32(int32_t val) -{ - return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val)); -} - -static inline TCGv_i64 tcg_constant_i64(int64_t val) -{ - return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val)); -} - -TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val); -TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val); - -#if UINTPTR_MAX == UINT32_MAX -# define tcg_constant_ptr(x) ((TCGv_ptr)tcg_constant_i32((intptr_t)(x))) -#else -# define tcg_constant_ptr(x) ((TCGv_ptr)tcg_constant_i64((intptr_t)(x))) -#endif - TCGLabel *gen_new_label(void); /** diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h index c9ac34fc3d..6c9d9e48db 100644 --- a/tcg/tcg-internal.h +++ b/tcg/tcg-internal.h @@ -83,6 +83,13 @@ static inline TCGv_i64 TCGV128_HIGH(TCGv_i128 t) bool tcg_target_has_memory_bswap(MemOp memop); +/* + * Locate or create a read-only temporary that is a constant. + * This kind of temporary need not be freed, but for convenience + * will be silently ignored by tcg_temp_free_*. + */ +TCGTemp *tcg_constant_internal(TCGType type, int64_t val); + void tcg_gen_op1(TCGOpcode, TCGArg); void tcg_gen_op2(TCGOpcode, TCGArg, TCGArg); void tcg_gen_op3(TCGOpcode, TCGArg, TCGArg, TCGArg); diff --git a/tcg/tcg.c b/tcg/tcg.c index 6766b60b8a..ab0d227c00 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1806,6 +1806,21 @@ TCGTemp *tcg_constant_internal(TCGType type, int64_t val) return ts; } +TCGv_i32 tcg_constant_i32(int32_t val) +{ + return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val)); +} + +TCGv_i64 tcg_constant_i64(int64_t val) +{ + return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val)); +} + +TCGv_ptr tcg_constant_ptr_int(intptr_t val) +{ + return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val)); +} + TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val) { val = dup_const(vece, val); From 4643f3e07edd82f55788d55d30f58239c5468e6f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 29 Oct 2023 14:08:47 -0700 Subject: [PATCH 18/35] tcg: Move tcg_temp_new_*, tcg_global_mem_new_* out of line Signed-off-by: Richard Henderson Message-Id: <20231029210848.78234-11-richard.henderson@linaro.org> --- include/tcg/tcg-op-common.h | 11 ++++++ include/tcg/tcg-temp-internal.h | 27 +++----------- include/tcg/tcg.h | 51 -------------------------- tcg/tcg.c | 64 +++++++++++++++++++++++++++++++-- 4 files changed, 76 insertions(+), 77 deletions(-) diff --git a/include/tcg/tcg-op-common.h b/include/tcg/tcg-op-common.h index dddf93067e..2d932a515e 100644 --- a/include/tcg/tcg-op-common.h +++ b/include/tcg/tcg-op-common.h @@ -17,6 +17,17 @@ TCGv_i64 tcg_constant_i64(int64_t val); TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val); TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val); +TCGv_i32 tcg_temp_new_i32(void); +TCGv_i64 tcg_temp_new_i64(void); +TCGv_ptr tcg_temp_new_ptr(void); +TCGv_i128 tcg_temp_new_i128(void); +TCGv_vec tcg_temp_new_vec(TCGType type); +TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match); + +TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name); +TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name); +TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name); + /* Generic ops. */ void gen_set_label(TCGLabel *l); diff --git a/include/tcg/tcg-temp-internal.h b/include/tcg/tcg-temp-internal.h index dded2917e5..2d45cc45d2 100644 --- a/include/tcg/tcg-temp-internal.h +++ b/include/tcg/tcg-temp-internal.h @@ -56,28 +56,9 @@ static inline void tcg_temp_free_vec(TCGv_vec arg) tcg_temp_free_internal(tcgv_vec_temp(arg)); } -static inline TCGv_i32 tcg_temp_ebb_new_i32(void) -{ - TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB); - return temp_tcgv_i32(t); -} - -static inline TCGv_i64 tcg_temp_ebb_new_i64(void) -{ - TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB); - return temp_tcgv_i64(t); -} - -static inline TCGv_i128 tcg_temp_ebb_new_i128(void) -{ - TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB); - return temp_tcgv_i128(t); -} - -static inline TCGv_ptr tcg_temp_ebb_new_ptr(void) -{ - TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB); - return temp_tcgv_ptr(t); -} +TCGv_i32 tcg_temp_ebb_new_i32(void); +TCGv_i64 tcg_temp_ebb_new_i64(void); +TCGv_ptr tcg_temp_ebb_new_ptr(void); +TCGv_i128 tcg_temp_ebb_new_i128(void); #endif /* TCG_TEMP_FREE_H */ diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index 1ae131c242..fc218fd381 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -795,57 +795,6 @@ void tb_target_set_jmp_target(const TranslationBlock *, int, void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size); -TCGTemp *tcg_global_mem_new_internal(TCGType, TCGv_ptr, - intptr_t, const char *); -TCGTemp *tcg_temp_new_internal(TCGType, TCGTempKind); -TCGv_vec tcg_temp_new_vec(TCGType type); -TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match); - -static inline TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t offset, - const char *name) -{ - TCGTemp *t = tcg_global_mem_new_internal(TCG_TYPE_I32, reg, offset, name); - return temp_tcgv_i32(t); -} - -static inline TCGv_i32 tcg_temp_new_i32(void) -{ - TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB); - return temp_tcgv_i32(t); -} - -static inline TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t offset, - const char *name) -{ - TCGTemp *t = tcg_global_mem_new_internal(TCG_TYPE_I64, reg, offset, name); - return temp_tcgv_i64(t); -} - -static inline TCGv_i64 tcg_temp_new_i64(void) -{ - TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB); - return temp_tcgv_i64(t); -} - -static inline TCGv_i128 tcg_temp_new_i128(void) -{ - TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB); - return temp_tcgv_i128(t); -} - -static inline TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t offset, - const char *name) -{ - TCGTemp *t = tcg_global_mem_new_internal(TCG_TYPE_PTR, reg, offset, name); - return temp_tcgv_ptr(t); -} - -static inline TCGv_ptr tcg_temp_new_ptr(void) -{ - TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB); - return temp_tcgv_ptr(t); -} - #define TCG_CT_CONST 1 /* any constant of register size */ typedef struct TCGArgConstraint { diff --git a/tcg/tcg.c b/tcg/tcg.c index ab0d227c00..ec358ce5c0 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1572,8 +1572,8 @@ void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); } -TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, - intptr_t offset, const char *name) +static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset, + const char *name, TCGType type) { TCGContext *s = tcg_ctx; TCGTemp *base_ts = tcgv_ptr_temp(base); @@ -1632,7 +1632,25 @@ TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, return ts; } -TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind) +TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name) +{ + TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32); + return temp_tcgv_i32(ts); +} + +TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name) +{ + TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64); + return temp_tcgv_i64(ts); +} + +TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name) +{ + TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR); + return temp_tcgv_ptr(ts); +} + +static TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind) { TCGContext *s = tcg_ctx; TCGTemp *ts; @@ -1696,6 +1714,46 @@ TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind) return ts; } +TCGv_i32 tcg_temp_new_i32(void) +{ + return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB)); +} + +TCGv_i32 tcg_temp_ebb_new_i32(void) +{ + return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB)); +} + +TCGv_i64 tcg_temp_new_i64(void) +{ + return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB)); +} + +TCGv_i64 tcg_temp_ebb_new_i64(void) +{ + return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB)); +} + +TCGv_ptr tcg_temp_new_ptr(void) +{ + return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB)); +} + +TCGv_ptr tcg_temp_ebb_new_ptr(void) +{ + return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB)); +} + +TCGv_i128 tcg_temp_new_i128(void) +{ + return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB)); +} + +TCGv_i128 tcg_temp_ebb_new_i128(void) +{ + return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB)); +} + TCGv_vec tcg_temp_new_vec(TCGType type) { TCGTemp *t; From 58b797130c659ca084ad298e92ad4ee114c37a06 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 29 Oct 2023 14:08:48 -0700 Subject: [PATCH 19/35] tcg: Move tcg_temp_free_* out of line Signed-off-by: Richard Henderson Message-Id: <20231029210848.78234-12-richard.henderson@linaro.org> --- include/tcg/tcg-temp-internal.h | 29 +++++------------------------ tcg/tcg.c | 25 +++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 24 deletions(-) diff --git a/include/tcg/tcg-temp-internal.h b/include/tcg/tcg-temp-internal.h index 2d45cc45d2..44192c55a9 100644 --- a/include/tcg/tcg-temp-internal.h +++ b/include/tcg/tcg-temp-internal.h @@ -31,30 +31,11 @@ void tcg_temp_free_internal(TCGTemp *); -static inline void tcg_temp_free_i32(TCGv_i32 arg) -{ - tcg_temp_free_internal(tcgv_i32_temp(arg)); -} - -static inline void tcg_temp_free_i64(TCGv_i64 arg) -{ - tcg_temp_free_internal(tcgv_i64_temp(arg)); -} - -static inline void tcg_temp_free_i128(TCGv_i128 arg) -{ - tcg_temp_free_internal(tcgv_i128_temp(arg)); -} - -static inline void tcg_temp_free_ptr(TCGv_ptr arg) -{ - tcg_temp_free_internal(tcgv_ptr_temp(arg)); -} - -static inline void tcg_temp_free_vec(TCGv_vec arg) -{ - tcg_temp_free_internal(tcgv_vec_temp(arg)); -} +void tcg_temp_free_i32(TCGv_i32 arg); +void tcg_temp_free_i64(TCGv_i64 arg); +void tcg_temp_free_i128(TCGv_i128 arg); +void tcg_temp_free_ptr(TCGv_ptr arg); +void tcg_temp_free_vec(TCGv_vec arg); TCGv_i32 tcg_temp_ebb_new_i32(void); TCGv_i64 tcg_temp_ebb_new_i64(void); diff --git a/tcg/tcg.c b/tcg/tcg.c index ec358ce5c0..258bd1c10b 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1809,6 +1809,31 @@ void tcg_temp_free_internal(TCGTemp *ts) } } +void tcg_temp_free_i32(TCGv_i32 arg) +{ + tcg_temp_free_internal(tcgv_i32_temp(arg)); +} + +void tcg_temp_free_i64(TCGv_i64 arg) +{ + tcg_temp_free_internal(tcgv_i64_temp(arg)); +} + +void tcg_temp_free_i128(TCGv_i128 arg) +{ + tcg_temp_free_internal(tcgv_i128_temp(arg)); +} + +void tcg_temp_free_ptr(TCGv_ptr arg) +{ + tcg_temp_free_internal(tcgv_ptr_temp(arg)); +} + +void tcg_temp_free_vec(TCGv_vec arg) +{ + tcg_temp_free_internal(tcgv_vec_temp(arg)); +} + TCGTemp *tcg_constant_internal(TCGType type, int64_t val) { TCGContext *s = tcg_ctx; From 42221a64da3ade66b01952a84307acc7061c1a05 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 25 Oct 2023 21:13:59 -0700 Subject: [PATCH 20/35] tcg/mips: Split out tcg_out_setcond_int Return the temp and a set of flags, to be used as a primitive for setcond, brcond, movcond. Signed-off-by: Richard Henderson Message-Id: <20231026041404.1229328-2-richard.henderson@linaro.org> --- tcg/mips/tcg-target.c.inc | 302 +++++++++++++++----------------------- 1 file changed, 118 insertions(+), 184 deletions(-) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 328984ccff..89681f00fe 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -871,71 +871,83 @@ static void tcg_out_addsub2(TCGContext *s, TCGReg rl, TCGReg rh, TCGReg al, } } -/* Bit 0 set if inversion required; bit 1 set if swapping required. */ -#define MIPS_CMP_INV 1 -#define MIPS_CMP_SWAP 2 +#define SETCOND_INV TCG_TARGET_NB_REGS +#define SETCOND_NEZ (SETCOND_INV << 1) +#define SETCOND_FLAGS (SETCOND_INV | SETCOND_NEZ) -static const uint8_t mips_cmp_map[16] = { - [TCG_COND_LT] = 0, - [TCG_COND_LTU] = 0, - [TCG_COND_GE] = MIPS_CMP_INV, - [TCG_COND_GEU] = MIPS_CMP_INV, - [TCG_COND_LE] = MIPS_CMP_INV | MIPS_CMP_SWAP, - [TCG_COND_LEU] = MIPS_CMP_INV | MIPS_CMP_SWAP, - [TCG_COND_GT] = MIPS_CMP_SWAP, - [TCG_COND_GTU] = MIPS_CMP_SWAP, -}; +static int tcg_out_setcond_int(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg arg1, TCGReg arg2) +{ + int flags = 0; + + switch (cond) { + case TCG_COND_EQ: /* -> NE */ + case TCG_COND_GE: /* -> LT */ + case TCG_COND_GEU: /* -> LTU */ + case TCG_COND_LE: /* -> GT */ + case TCG_COND_LEU: /* -> GTU */ + cond = tcg_invert_cond(cond); + flags ^= SETCOND_INV; + break; + default: + break; + } + + switch (cond) { + case TCG_COND_NE: + flags |= SETCOND_NEZ; + if (arg2 == 0) { + return arg1 | flags; + } + tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2); + break; + case TCG_COND_LT: + tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2); + break; + case TCG_COND_LTU: + tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2); + break; + case TCG_COND_GT: + tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1); + break; + case TCG_COND_GTU: + tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1); + break; + default: + g_assert_not_reached(); + } + return ret | flags; +} + +static void tcg_out_setcond_end(TCGContext *s, TCGReg ret, int tmpflags) +{ + if (tmpflags != ret) { + TCGReg tmp = tmpflags & ~SETCOND_FLAGS; + + switch (tmpflags & SETCOND_FLAGS) { + case SETCOND_INV: + /* Intermediate result is boolean: simply invert. */ + tcg_out_opc_imm(s, OPC_XORI, ret, tmp, 1); + break; + case SETCOND_NEZ: + /* Intermediate result is zero/non-zero: test != 0. */ + tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, tmp); + break; + case SETCOND_NEZ | SETCOND_INV: + /* Intermediate result is zero/non-zero: test == 0. */ + tcg_out_opc_imm(s, OPC_SLTIU, ret, tmp, 1); + break; + default: + g_assert_not_reached(); + } + } +} static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg arg1, TCGReg arg2) { - MIPSInsn s_opc = OPC_SLTU; - int cmp_map; - - switch (cond) { - case TCG_COND_EQ: - if (arg2 != 0) { - tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2); - arg1 = ret; - } - tcg_out_opc_imm(s, OPC_SLTIU, ret, arg1, 1); - break; - - case TCG_COND_NE: - if (arg2 != 0) { - tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2); - arg1 = ret; - } - tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, arg1); - break; - - case TCG_COND_LT: - case TCG_COND_GE: - case TCG_COND_LE: - case TCG_COND_GT: - s_opc = OPC_SLT; - /* FALLTHRU */ - - case TCG_COND_LTU: - case TCG_COND_GEU: - case TCG_COND_LEU: - case TCG_COND_GTU: - cmp_map = mips_cmp_map[cond]; - if (cmp_map & MIPS_CMP_SWAP) { - TCGReg t = arg1; - arg1 = arg2; - arg2 = t; - } - tcg_out_opc_reg(s, s_opc, ret, arg1, arg2); - if (cmp_map & MIPS_CMP_INV) { - tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); - } - break; - - default: - g_assert_not_reached(); - break; - } + int tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2); + tcg_out_setcond_end(s, ret, tmpflags); } static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, @@ -948,9 +960,7 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, [TCG_COND_GE] = OPC_BGEZ, }; - MIPSInsn s_opc = OPC_SLTU; - MIPSInsn b_opc; - int cmp_map; + MIPSInsn b_opc = 0; switch (cond) { case TCG_COND_EQ: @@ -959,7 +969,6 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, case TCG_COND_NE: b_opc = OPC_BNE; break; - case TCG_COND_LT: case TCG_COND_GT: case TCG_COND_LE: @@ -968,133 +977,76 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, b_opc = b_zero[cond]; arg2 = arg1; arg1 = 0; - break; } - s_opc = OPC_SLT; - /* FALLTHRU */ - - case TCG_COND_LTU: - case TCG_COND_GTU: - case TCG_COND_LEU: - case TCG_COND_GEU: - cmp_map = mips_cmp_map[cond]; - if (cmp_map & MIPS_CMP_SWAP) { - TCGReg t = arg1; - arg1 = arg2; - arg2 = t; - } - tcg_out_opc_reg(s, s_opc, TCG_TMP0, arg1, arg2); - b_opc = (cmp_map & MIPS_CMP_INV ? OPC_BEQ : OPC_BNE); - arg1 = TCG_TMP0; - arg2 = TCG_REG_ZERO; break; - default: - g_assert_not_reached(); break; } + if (b_opc == 0) { + int tmpflags = tcg_out_setcond_int(s, cond, TCG_TMP0, arg1, arg2); + + arg2 = TCG_REG_ZERO; + arg1 = tmpflags & ~SETCOND_FLAGS; + b_opc = tmpflags & SETCOND_INV ? OPC_BEQ : OPC_BNE; + } + + tcg_out_reloc(s, s->code_ptr, R_MIPS_PC16, l, 0); tcg_out_opc_br(s, b_opc, arg1, arg2); - tcg_out_reloc(s, s->code_ptr - 1, R_MIPS_PC16, l, 0); tcg_out_nop(s); } -static TCGReg tcg_out_reduce_eq2(TCGContext *s, TCGReg tmp0, TCGReg tmp1, - TCGReg al, TCGReg ah, - TCGReg bl, TCGReg bh) +static int tcg_out_setcond2_int(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg al, TCGReg ah, TCGReg bl, TCGReg bh) { - /* Merge highpart comparison into AH. */ - if (bh != 0) { - if (ah != 0) { - tcg_out_opc_reg(s, OPC_XOR, tmp0, ah, bh); - ah = tmp0; - } else { - ah = bh; - } + int flags = 0; + + switch (cond) { + case TCG_COND_EQ: + flags |= SETCOND_INV; + /* fall through */ + case TCG_COND_NE: + flags |= SETCOND_NEZ; + tcg_out_opc_reg(s, OPC_XOR, TCG_TMP0, al, bl); + tcg_out_opc_reg(s, OPC_XOR, TCG_TMP1, ah, bh); + tcg_out_opc_reg(s, OPC_OR, ret, TCG_TMP0, TCG_TMP1); + break; + + default: + tcg_out_setcond(s, TCG_COND_EQ, TCG_TMP0, ah, bh); + tcg_out_setcond(s, tcg_unsigned_cond(cond), TCG_TMP1, al, bl); + tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP0); + tcg_out_setcond(s, tcg_high_cond(cond), TCG_TMP0, ah, bh); + tcg_out_opc_reg(s, OPC_OR, ret, TCG_TMP0, TCG_TMP1); + break; } - /* Merge lowpart comparison into AL. */ - if (bl != 0) { - if (al != 0) { - tcg_out_opc_reg(s, OPC_XOR, tmp1, al, bl); - al = tmp1; - } else { - al = bl; - } - } - /* Merge high and low part comparisons into AL. */ - if (ah != 0) { - if (al != 0) { - tcg_out_opc_reg(s, OPC_OR, tmp0, ah, al); - al = tmp0; - } else { - al = ah; - } - } - return al; + return ret | flags; } static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg al, TCGReg ah, TCGReg bl, TCGReg bh) { - TCGReg tmp0 = TCG_TMP0; - TCGReg tmp1 = ret; - - tcg_debug_assert(ret != TCG_TMP0); - if (ret == ah || ret == bh) { - tcg_debug_assert(ret != TCG_TMP1); - tmp1 = TCG_TMP1; - } - - switch (cond) { - case TCG_COND_EQ: - case TCG_COND_NE: - tmp1 = tcg_out_reduce_eq2(s, tmp0, tmp1, al, ah, bl, bh); - tcg_out_setcond(s, cond, ret, tmp1, TCG_REG_ZERO); - break; - - default: - tcg_out_setcond(s, TCG_COND_EQ, tmp0, ah, bh); - tcg_out_setcond(s, tcg_unsigned_cond(cond), tmp1, al, bl); - tcg_out_opc_reg(s, OPC_AND, tmp1, tmp1, tmp0); - tcg_out_setcond(s, tcg_high_cond(cond), tmp0, ah, bh); - tcg_out_opc_reg(s, OPC_OR, ret, tmp1, tmp0); - break; - } + int tmpflags = tcg_out_setcond2_int(s, cond, ret, al, ah, bl, bh); + tcg_out_setcond_end(s, ret, tmpflags); } static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, TCGReg bl, TCGReg bh, TCGLabel *l) { - TCGCond b_cond = TCG_COND_NE; - TCGReg tmp = TCG_TMP1; + int tmpflags = tcg_out_setcond2_int(s, cond, TCG_TMP0, al, ah, bl, bh); + TCGReg tmp = tmpflags & ~SETCOND_FLAGS; + MIPSInsn b_opc = tmpflags & SETCOND_INV ? OPC_BEQ : OPC_BNE; - /* With branches, we emit between 4 and 9 insns with 2 or 3 branches. - With setcond, we emit between 3 and 10 insns and only 1 branch, - which ought to get better branch prediction. */ - switch (cond) { - case TCG_COND_EQ: - case TCG_COND_NE: - b_cond = cond; - tmp = tcg_out_reduce_eq2(s, TCG_TMP0, TCG_TMP1, al, ah, bl, bh); - break; - - default: - /* Minimize code size by preferring a compare not requiring INV. */ - if (mips_cmp_map[cond] & MIPS_CMP_INV) { - cond = tcg_invert_cond(cond); - b_cond = TCG_COND_EQ; - } - tcg_out_setcond2(s, cond, tmp, al, ah, bl, bh); - break; - } - - tcg_out_brcond(s, b_cond, tmp, TCG_REG_ZERO, l); + tcg_out_reloc(s, s->code_ptr, R_MIPS_PC16, l, 0); + tcg_out_opc_br(s, b_opc, tmp, TCG_REG_ZERO); + tcg_out_nop(s); } static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg c1, TCGReg c2, TCGReg v1, TCGReg v2) { - bool eqz = false; + int tmpflags; + bool eqz; /* If one of the values is zero, put it last to match SEL*Z instructions */ if (use_mips32r6_instructions && v1 == 0) { @@ -1103,27 +1055,9 @@ static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, cond = tcg_invert_cond(cond); } - switch (cond) { - case TCG_COND_EQ: - eqz = true; - /* FALLTHRU */ - case TCG_COND_NE: - if (c2 != 0) { - tcg_out_opc_reg(s, OPC_XOR, TCG_TMP0, c1, c2); - c1 = TCG_TMP0; - } - break; - - default: - /* Minimize code size by preferring a compare not requiring INV. */ - if (mips_cmp_map[cond] & MIPS_CMP_INV) { - cond = tcg_invert_cond(cond); - eqz = true; - } - tcg_out_setcond(s, cond, TCG_TMP0, c1, c2); - c1 = TCG_TMP0; - break; - } + tmpflags = tcg_out_setcond_int(s, cond, TCG_TMP0, c1, c2); + c1 = tmpflags & ~SETCOND_FLAGS; + eqz = tmpflags & SETCOND_INV; if (use_mips32r6_instructions) { MIPSInsn m_opc_t = eqz ? OPC_SELEQZ : OPC_SELNEZ; From 2cff741da81416ba7d4d8f2470e75d0e13bccae4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 25 Oct 2023 21:14:00 -0700 Subject: [PATCH 21/35] tcg/mips: Always implement movcond Expand as branch over move if not supported in the ISA. Signed-off-by: Richard Henderson Message-Id: <20231026041404.1229328-3-richard.henderson@linaro.org> --- tcg/mips/tcg-target.c.inc | 19 ++++++++++++++----- tcg/mips/tcg-target.h | 4 ++-- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 89681f00fe..82b078b9c5 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1070,13 +1070,22 @@ static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, if (v2 != 0) { tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP1); } - } else { + return; + } + + /* This should be guaranteed via constraints */ + tcg_debug_assert(v2 == ret); + + if (use_movnz_instructions) { MIPSInsn m_opc = eqz ? OPC_MOVZ : OPC_MOVN; - tcg_out_opc_reg(s, m_opc, ret, v1, c1); - - /* This should be guaranteed via constraints */ - tcg_debug_assert(v2 == ret); + } else { + /* Invert the condition in order to branch over the move. */ + MIPSInsn b_opc = eqz ? OPC_BNE : OPC_BEQ; + tcg_out_opc_imm(s, b_opc, c1, TCG_REG_ZERO, 2); + tcg_out_nop(s); + /* Open-code tcg_out_mov, without the nop-move check. */ + tcg_out_opc_reg(s, OPC_OR, ret, v1, TCG_REG_ZERO); } } diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index c0576f66d7..0a4083f0d9 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -154,7 +154,7 @@ extern bool use_mips32r2_instructions; #endif /* optional instructions detected at runtime */ -#define TCG_TARGET_HAS_movcond_i32 use_movnz_instructions +#define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_bswap16_i32 use_mips32r2_instructions #define TCG_TARGET_HAS_deposit_i32 use_mips32r2_instructions #define TCG_TARGET_HAS_extract_i32 use_mips32r2_instructions @@ -169,7 +169,7 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_qemu_st8_i32 0 #if TCG_TARGET_REG_BITS == 64 -#define TCG_TARGET_HAS_movcond_i64 use_movnz_instructions +#define TCG_TARGET_HAS_movcond_i64 1 #define TCG_TARGET_HAS_bswap16_i64 use_mips32r2_instructions #define TCG_TARGET_HAS_bswap32_i64 use_mips32r2_instructions #define TCG_TARGET_HAS_bswap64_i64 use_mips32r2_instructions From 3871be753f3351c21c8e384432f7798c3eed9de9 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 25 Oct 2023 21:14:01 -0700 Subject: [PATCH 22/35] tcg: Remove TCG_TARGET_HAS_movcond_{i32,i64} The movcond opcode is now mandatory for backends to implement. Signed-off-by: Richard Henderson Message-Id: <20231026041404.1229328-4-richard.henderson@linaro.org> --- include/tcg/tcg-opc.h | 4 +-- include/tcg/tcg.h | 1 - tcg/aarch64/tcg-target.h | 2 -- tcg/arm/tcg-target.h | 1 - tcg/i386/tcg-target.h | 2 -- tcg/loongarch64/tcg-target.h | 2 -- tcg/mips/tcg-target.h | 2 -- tcg/ppc/tcg-target.h | 2 -- tcg/riscv/tcg-target.h | 2 -- tcg/s390x/tcg-target.h | 2 -- tcg/sparc64/tcg-target.h | 2 -- tcg/tcg-op.c | 50 ++++++++---------------------------- tcg/tcg.c | 6 ++--- tcg/tci/tcg-target.h | 2 -- 14 files changed, 14 insertions(+), 66 deletions(-) diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 6eff3d9106..ecd08db0de 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -47,7 +47,7 @@ DEF(mb, 0, 0, 1, 0) DEF(mov_i32, 1, 1, 0, TCG_OPF_NOT_PRESENT) DEF(setcond_i32, 1, 2, 1, 0) DEF(negsetcond_i32, 1, 2, 1, IMPL(TCG_TARGET_HAS_negsetcond_i32)) -DEF(movcond_i32, 1, 4, 1, IMPL(TCG_TARGET_HAS_movcond_i32)) +DEF(movcond_i32, 1, 4, 1, 0) /* load/store */ DEF(ld8u_i32, 1, 1, 1, 0) DEF(ld8s_i32, 1, 1, 1, 0) @@ -113,7 +113,7 @@ DEF(ctpop_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ctpop_i32)) DEF(mov_i64, 1, 1, 0, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT) DEF(setcond_i64, 1, 2, 1, IMPL64) DEF(negsetcond_i64, 1, 2, 1, IMPL64 | IMPL(TCG_TARGET_HAS_negsetcond_i64)) -DEF(movcond_i64, 1, 4, 1, IMPL64 | IMPL(TCG_TARGET_HAS_movcond_i64)) +DEF(movcond_i64, 1, 4, 1, IMPL64) /* load/store */ DEF(ld8u_i64, 1, 1, 1, IMPL64) DEF(ld8s_i64, 1, 1, 1, IMPL64) diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index fc218fd381..32a208a02e 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -96,7 +96,6 @@ typedef uint64_t TCGRegSet; #define TCG_TARGET_HAS_extract_i64 0 #define TCG_TARGET_HAS_sextract_i64 0 #define TCG_TARGET_HAS_extract2_i64 0 -#define TCG_TARGET_HAS_movcond_i64 0 #define TCG_TARGET_HAS_negsetcond_i64 0 #define TCG_TARGET_HAS_add2_i64 0 #define TCG_TARGET_HAS_sub2_i64 0 diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index 98727ea53b..352e19aba8 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -85,7 +85,6 @@ typedef enum { #define TCG_TARGET_HAS_extract_i32 1 #define TCG_TARGET_HAS_sextract_i32 1 #define TCG_TARGET_HAS_extract2_i32 1 -#define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 @@ -122,7 +121,6 @@ typedef enum { #define TCG_TARGET_HAS_extract_i64 1 #define TCG_TARGET_HAS_sextract_i64 1 #define TCG_TARGET_HAS_extract2_i64 1 -#define TCG_TARGET_HAS_movcond_i64 1 #define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 311a985209..439898efb3 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -115,7 +115,6 @@ extern bool use_neon_instructions; #define TCG_TARGET_HAS_extract_i32 use_armv7_instructions #define TCG_TARGET_HAS_sextract_i32 use_armv7_instructions #define TCG_TARGET_HAS_extract2_i32 1 -#define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_mulu2_i32 1 #define TCG_TARGET_HAS_muls2_i32 1 diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 8417ea4899..7522ce7575 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -149,7 +149,6 @@ typedef enum { #define TCG_TARGET_HAS_extract_i32 1 #define TCG_TARGET_HAS_sextract_i32 1 #define TCG_TARGET_HAS_extract2_i32 1 -#define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 @@ -186,7 +185,6 @@ typedef enum { #define TCG_TARGET_HAS_extract_i64 1 #define TCG_TARGET_HAS_sextract_i64 0 #define TCG_TARGET_HAS_extract2_i64 1 -#define TCG_TARGET_HAS_movcond_i64 1 #define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h index 1bea15b02e..486898c2d3 100644 --- a/tcg/loongarch64/tcg-target.h +++ b/tcg/loongarch64/tcg-target.h @@ -97,7 +97,6 @@ typedef enum { #define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL /* optional instructions */ -#define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_negsetcond_i32 0 #define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_rem_i32 1 @@ -134,7 +133,6 @@ typedef enum { #define TCG_TARGET_HAS_qemu_st8_i32 0 /* 64-bit operations */ -#define TCG_TARGET_HAS_movcond_i64 1 #define TCG_TARGET_HAS_negsetcond_i64 0 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index 0a4083f0d9..5b3fdcc726 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -154,7 +154,6 @@ extern bool use_mips32r2_instructions; #endif /* optional instructions detected at runtime */ -#define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_bswap16_i32 use_mips32r2_instructions #define TCG_TARGET_HAS_deposit_i32 use_mips32r2_instructions #define TCG_TARGET_HAS_extract_i32 use_mips32r2_instructions @@ -169,7 +168,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_qemu_st8_i32 0 #if TCG_TARGET_REG_BITS == 64 -#define TCG_TARGET_HAS_movcond_i64 1 #define TCG_TARGET_HAS_bswap16_i64 use_mips32r2_instructions #define TCG_TARGET_HAS_bswap32_i64 use_mips32r2_instructions #define TCG_TARGET_HAS_bswap64_i64 use_mips32r2_instructions diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index 8bfb14998e..a2856afd4d 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -96,7 +96,6 @@ typedef enum { #define TCG_TARGET_HAS_extract_i32 1 #define TCG_TARGET_HAS_sextract_i32 0 #define TCG_TARGET_HAS_extract2_i32 0 -#define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_muls2_i32 0 @@ -134,7 +133,6 @@ typedef enum { #define TCG_TARGET_HAS_extract_i64 1 #define TCG_TARGET_HAS_sextract_i64 0 #define TCG_TARGET_HAS_extract2_i64 0 -#define TCG_TARGET_HAS_movcond_i64 1 #define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h index c1132d178f..f3644a8bc1 100644 --- a/tcg/riscv/tcg-target.h +++ b/tcg/riscv/tcg-target.h @@ -87,7 +87,6 @@ extern bool have_zbb; #endif /* optional instructions */ -#define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_rem_i32 1 @@ -123,7 +122,6 @@ extern bool have_zbb; #define TCG_TARGET_HAS_setcond2 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 -#define TCG_TARGET_HAS_movcond_i64 1 #define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h index 50e12ef9d6..2c936c1bcb 100644 --- a/tcg/s390x/tcg-target.h +++ b/tcg/s390x/tcg-target.h @@ -95,7 +95,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_extract_i32 1 #define TCG_TARGET_HAS_sextract_i32 0 #define TCG_TARGET_HAS_extract2_i32 0 -#define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 @@ -131,7 +130,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_extract_i64 1 #define TCG_TARGET_HAS_sextract_i64 0 #define TCG_TARGET_HAS_extract2_i64 0 -#define TCG_TARGET_HAS_movcond_i64 1 #define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h index 5cfc4b4679..4c286c6006 100644 --- a/tcg/sparc64/tcg-target.h +++ b/tcg/sparc64/tcg-target.h @@ -105,7 +105,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_extract_i32 0 #define TCG_TARGET_HAS_sextract_i32 0 #define TCG_TARGET_HAS_extract2_i32 0 -#define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 @@ -142,7 +141,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_extract_i64 0 #define TCG_TARGET_HAS_sextract_i64 0 #define TCG_TARGET_HAS_extract2_i64 0 -#define TCG_TARGET_HAS_movcond_i64 1 #define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 9aba103590..26bcd090c1 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1142,17 +1142,8 @@ void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1, tcg_gen_mov_i32(ret, v1); } else if (cond == TCG_COND_NEVER) { tcg_gen_mov_i32(ret, v2); - } else if (TCG_TARGET_HAS_movcond_i32) { - tcg_gen_op6i_i32(INDEX_op_movcond_i32, ret, c1, c2, v1, v2, cond); } else { - TCGv_i32 t0 = tcg_temp_ebb_new_i32(); - TCGv_i32 t1 = tcg_temp_ebb_new_i32(); - tcg_gen_negsetcond_i32(cond, t0, c1, c2); - tcg_gen_and_i32(t1, v1, t0); - tcg_gen_andc_i32(ret, v2, t0); - tcg_gen_or_i32(ret, ret, t1); - tcg_temp_free_i32(t0); - tcg_temp_free_i32(t1); + tcg_gen_op6i_i32(INDEX_op_movcond_i32, ret, c1, c2, v1, v2, cond); } } @@ -3011,43 +3002,22 @@ void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1, tcg_gen_mov_i64(ret, v1); } else if (cond == TCG_COND_NEVER) { tcg_gen_mov_i64(ret, v2); - } else if (TCG_TARGET_REG_BITS == 32) { + } else if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_op6i_i64(INDEX_op_movcond_i64, ret, c1, c2, v1, v2, cond); + } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); - TCGv_i32 t1 = tcg_temp_ebb_new_i32(); + TCGv_i32 zero = tcg_constant_i32(0); + tcg_gen_op6i_i32(INDEX_op_setcond2_i32, t0, TCGV_LOW(c1), TCGV_HIGH(c1), TCGV_LOW(c2), TCGV_HIGH(c2), cond); - if (TCG_TARGET_HAS_movcond_i32) { - tcg_gen_movi_i32(t1, 0); - tcg_gen_movcond_i32(TCG_COND_NE, TCGV_LOW(ret), t0, t1, - TCGV_LOW(v1), TCGV_LOW(v2)); - tcg_gen_movcond_i32(TCG_COND_NE, TCGV_HIGH(ret), t0, t1, - TCGV_HIGH(v1), TCGV_HIGH(v2)); - } else { - tcg_gen_neg_i32(t0, t0); + tcg_gen_movcond_i32(TCG_COND_NE, TCGV_LOW(ret), t0, zero, + TCGV_LOW(v1), TCGV_LOW(v2)); + tcg_gen_movcond_i32(TCG_COND_NE, TCGV_HIGH(ret), t0, zero, + TCGV_HIGH(v1), TCGV_HIGH(v2)); - tcg_gen_and_i32(t1, TCGV_LOW(v1), t0); - tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(v2), t0); - tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(ret), t1); - - tcg_gen_and_i32(t1, TCGV_HIGH(v1), t0); - tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(v2), t0); - tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), t1); - } tcg_temp_free_i32(t0); - tcg_temp_free_i32(t1); - } else if (TCG_TARGET_HAS_movcond_i64) { - tcg_gen_op6i_i64(INDEX_op_movcond_i64, ret, c1, c2, v1, v2, cond); - } else { - TCGv_i64 t0 = tcg_temp_ebb_new_i64(); - TCGv_i64 t1 = tcg_temp_ebb_new_i64(); - tcg_gen_negsetcond_i64(cond, t0, c1, c2); - tcg_gen_and_i64(t1, v1, t0); - tcg_gen_andc_i64(ret, v2, t0); - tcg_gen_or_i64(ret, ret, t1); - tcg_temp_free_i64(t0); - tcg_temp_free_i64(t1); } } diff --git a/tcg/tcg.c b/tcg/tcg.c index 258bd1c10b..d59ff14f0f 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1977,6 +1977,7 @@ bool tcg_op_supported(TCGOpcode op) case INDEX_op_mov_i32: case INDEX_op_setcond_i32: case INDEX_op_brcond_i32: + case INDEX_op_movcond_i32: case INDEX_op_ld8u_i32: case INDEX_op_ld8s_i32: case INDEX_op_ld16u_i32: @@ -1998,8 +1999,6 @@ bool tcg_op_supported(TCGOpcode op) case INDEX_op_negsetcond_i32: return TCG_TARGET_HAS_negsetcond_i32; - case INDEX_op_movcond_i32: - return TCG_TARGET_HAS_movcond_i32; case INDEX_op_div_i32: case INDEX_op_divu_i32: return TCG_TARGET_HAS_div_i32; @@ -2072,6 +2071,7 @@ bool tcg_op_supported(TCGOpcode op) case INDEX_op_mov_i64: case INDEX_op_setcond_i64: case INDEX_op_brcond_i64: + case INDEX_op_movcond_i64: case INDEX_op_ld8u_i64: case INDEX_op_ld8s_i64: case INDEX_op_ld16u_i64: @@ -2098,8 +2098,6 @@ bool tcg_op_supported(TCGOpcode op) case INDEX_op_negsetcond_i64: return TCG_TARGET_HAS_negsetcond_i64; - case INDEX_op_movcond_i64: - return TCG_TARGET_HAS_movcond_i64; case INDEX_op_div_i64: case INDEX_op_divu_i64: return TCG_TARGET_HAS_div_i64; diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index 91ca33b616..3503fc4a4c 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -69,7 +69,6 @@ #define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_orc_i32 1 #define TCG_TARGET_HAS_rot_i32 1 -#define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_negsetcond_i32 0 #define TCG_TARGET_HAS_muls2_i32 1 #define TCG_TARGET_HAS_muluh_i32 0 @@ -104,7 +103,6 @@ #define TCG_TARGET_HAS_not_i64 1 #define TCG_TARGET_HAS_orc_i64 1 #define TCG_TARGET_HAS_rot_i64 1 -#define TCG_TARGET_HAS_movcond_i64 1 #define TCG_TARGET_HAS_negsetcond_i64 0 #define TCG_TARGET_HAS_muls2_i64 1 #define TCG_TARGET_HAS_add2_i32 1 From e0448a8b71624c97af0422df085c66d147104061 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 25 Oct 2023 21:14:02 -0700 Subject: [PATCH 23/35] tcg/mips: Implement neg opcodes Signed-off-by: Richard Henderson Message-Id: <20231026041404.1229328-5-richard.henderson@linaro.org> --- tcg/mips/tcg-target.c.inc | 8 ++++++++ tcg/mips/tcg-target.h | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 82b078b9c5..8328dbdecc 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1920,6 +1920,12 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_opc_reg(s, OPC_MFHI, a1, 0, 0); break; + case INDEX_op_neg_i32: + i1 = OPC_SUBU; + goto do_unary; + case INDEX_op_neg_i64: + i1 = OPC_DSUBU; + goto do_unary; case INDEX_op_not_i32: case INDEX_op_not_i64: i1 = OPC_NOR; @@ -2144,6 +2150,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_ld16u_i32: case INDEX_op_ld16s_i32: case INDEX_op_ld_i32: + case INDEX_op_neg_i32: case INDEX_op_not_i32: case INDEX_op_bswap16_i32: case INDEX_op_bswap32_i32: @@ -2157,6 +2164,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_ld32s_i64: case INDEX_op_ld32u_i64: case INDEX_op_ld_i64: + case INDEX_op_neg_i64: case INDEX_op_not_i64: case INDEX_op_bswap16_i64: case INDEX_op_bswap32_i64: diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index 5b3fdcc726..20c14224fb 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -184,12 +184,12 @@ extern bool use_mips32r2_instructions; #endif /* optional instructions automatically implemented */ -#define TCG_TARGET_HAS_neg_i32 0 /* sub rd, zero, rt */ +#define TCG_TARGET_HAS_neg_i32 1 #define TCG_TARGET_HAS_ext8u_i32 0 /* andi rt, rs, 0xff */ #define TCG_TARGET_HAS_ext16u_i32 0 /* andi rt, rs, 0xffff */ #if TCG_TARGET_REG_BITS == 64 -#define TCG_TARGET_HAS_neg_i64 0 /* sub rd, zero, rt */ +#define TCG_TARGET_HAS_neg_i64 1 #define TCG_TARGET_HAS_ext8u_i64 0 /* andi rt, rs, 0xff */ #define TCG_TARGET_HAS_ext16u_i64 0 /* andi rt, rs, 0xffff */ #endif From 0fbee2b76441db5452cde98148f600aba907f4f7 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 25 Oct 2023 21:14:03 -0700 Subject: [PATCH 24/35] tcg/loongarch64: Implement neg opcodes Signed-off-by: Richard Henderson Message-Id: <20231026041404.1229328-6-richard.henderson@linaro.org> --- tcg/loongarch64/tcg-target.c.inc | 9 +++++++++ tcg/loongarch64/tcg-target.h | 4 ++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index ae13c1f666..a588fb3085 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1441,6 +1441,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, } break; + case INDEX_op_neg_i32: + tcg_out_opc_sub_w(s, a0, TCG_REG_ZERO, a1); + break; + case INDEX_op_neg_i64: + tcg_out_opc_sub_d(s, a0, TCG_REG_ZERO, a1); + break; + case INDEX_op_mul_i32: tcg_out_opc_mul_w(s, a0, a1, a2); break; @@ -2076,6 +2083,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: case INDEX_op_ext_i32_i64: + case INDEX_op_neg_i32: + case INDEX_op_neg_i64: case INDEX_op_not_i32: case INDEX_op_not_i64: case INDEX_op_extract_i32: diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h index 486898c2d3..189997644a 100644 --- a/tcg/loongarch64/tcg-target.h +++ b/tcg/loongarch64/tcg-target.h @@ -119,7 +119,7 @@ typedef enum { #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_neg_i32 0 +#define TCG_TARGET_HAS_neg_i32 1 #define TCG_TARGET_HAS_andc_i32 1 #define TCG_TARGET_HAS_orc_i32 1 #define TCG_TARGET_HAS_eqv_i32 0 @@ -153,7 +153,7 @@ typedef enum { #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_neg_i64 0 +#define TCG_TARGET_HAS_neg_i64 1 #define TCG_TARGET_HAS_andc_i64 1 #define TCG_TARGET_HAS_orc_i64 1 #define TCG_TARGET_HAS_eqv_i64 0 From b701f195d3ed39429fab5787df7c6b1c9377ab94 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 25 Oct 2023 21:14:04 -0700 Subject: [PATCH 25/35] tcg: Remove TCG_TARGET_HAS_neg_{i32,i64} The movcond opcode is now mandatory for backends to implement. Signed-off-by: Richard Henderson Message-Id: <20231026041404.1229328-7-richard.henderson@linaro.org> --- include/tcg/tcg-opc.h | 4 ++-- include/tcg/tcg.h | 1 - tcg/aarch64/tcg-target.h | 2 -- tcg/arm/tcg-target.h | 1 - tcg/i386/tcg-target.h | 2 -- tcg/loongarch64/tcg-target.h | 2 -- tcg/mips/tcg-target.h | 2 -- tcg/optimize.c | 4 ++-- tcg/ppc/tcg-target.h | 2 -- tcg/riscv/tcg-target.h | 2 -- tcg/s390x/tcg-target.h | 2 -- tcg/sparc64/tcg-target.h | 2 -- tcg/tcg-op.c | 22 +++++++++------------- tcg/tcg.c | 6 ++---- tcg/tci.c | 2 -- tcg/tci/tcg-target.h | 2 -- 16 files changed, 15 insertions(+), 43 deletions(-) diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index ecd08db0de..b80227fa1c 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -100,7 +100,7 @@ DEF(ext16u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16u_i32)) DEF(bswap16_i32, 1, 1, 1, IMPL(TCG_TARGET_HAS_bswap16_i32)) DEF(bswap32_i32, 1, 1, 1, IMPL(TCG_TARGET_HAS_bswap32_i32)) DEF(not_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_not_i32)) -DEF(neg_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_neg_i32)) +DEF(neg_i32, 1, 1, 0, 0) DEF(andc_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_andc_i32)) DEF(orc_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_orc_i32)) DEF(eqv_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_eqv_i32)) @@ -171,7 +171,7 @@ DEF(bswap16_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap16_i64)) DEF(bswap32_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap32_i64)) DEF(bswap64_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap64_i64)) DEF(not_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_not_i64)) -DEF(neg_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_neg_i64)) +DEF(neg_i64, 1, 1, 0, IMPL64) DEF(andc_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_andc_i64)) DEF(orc_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_orc_i64)) DEF(eqv_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_eqv_i64)) diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index 32a208a02e..daf2a5bf9e 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -82,7 +82,6 @@ typedef uint64_t TCGRegSet; #define TCG_TARGET_HAS_bswap16_i64 0 #define TCG_TARGET_HAS_bswap32_i64 0 #define TCG_TARGET_HAS_bswap64_i64 0 -#define TCG_TARGET_HAS_neg_i64 0 #define TCG_TARGET_HAS_not_i64 0 #define TCG_TARGET_HAS_andc_i64 0 #define TCG_TARGET_HAS_orc_i64 0 diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index 352e19aba8..33f15a564a 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -71,7 +71,6 @@ typedef enum { #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_neg_i32 1 #define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_andc_i32 1 #define TCG_TARGET_HAS_orc_i32 1 @@ -107,7 +106,6 @@ typedef enum { #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_neg_i64 1 #define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_andc_i64 1 #define TCG_TARGET_HAS_orc_i64 1 diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 439898efb3..a712cc80ad 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -101,7 +101,6 @@ extern bool use_neon_instructions; #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_neg_i32 1 #define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_andc_i32 1 #define TCG_TARGET_HAS_orc_i32 0 diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 7522ce7575..fa34deec47 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -135,7 +135,6 @@ typedef enum { #define TCG_TARGET_HAS_ext16u_i32 1 #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_neg_i32 1 #define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_andc_i32 have_bmi1 #define TCG_TARGET_HAS_orc_i32 0 @@ -171,7 +170,6 @@ typedef enum { #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 -#define TCG_TARGET_HAS_neg_i64 1 #define TCG_TARGET_HAS_not_i64 1 #define TCG_TARGET_HAS_andc_i64 have_bmi1 #define TCG_TARGET_HAS_orc_i64 0 diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h index 189997644a..9c70ebfefc 100644 --- a/tcg/loongarch64/tcg-target.h +++ b/tcg/loongarch64/tcg-target.h @@ -119,7 +119,6 @@ typedef enum { #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_neg_i32 1 #define TCG_TARGET_HAS_andc_i32 1 #define TCG_TARGET_HAS_orc_i32 1 #define TCG_TARGET_HAS_eqv_i32 0 @@ -153,7 +152,6 @@ typedef enum { #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_neg_i64 1 #define TCG_TARGET_HAS_andc_i64 1 #define TCG_TARGET_HAS_orc_i64 1 #define TCG_TARGET_HAS_eqv_i64 0 diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index 20c14224fb..b98ffae1d0 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -184,12 +184,10 @@ extern bool use_mips32r2_instructions; #endif /* optional instructions automatically implemented */ -#define TCG_TARGET_HAS_neg_i32 1 #define TCG_TARGET_HAS_ext8u_i32 0 /* andi rt, rs, 0xff */ #define TCG_TARGET_HAS_ext16u_i32 0 /* andi rt, rs, 0xffff */ #if TCG_TARGET_REG_BITS == 64 -#define TCG_TARGET_HAS_neg_i64 1 #define TCG_TARGET_HAS_ext8u_i64 0 /* andi rt, rs, 0xff */ #define TCG_TARGET_HAS_ext16u_i64 0 /* andi rt, rs, 0xffff */ #endif diff --git a/tcg/optimize.c b/tcg/optimize.c index 2db5177c32..6b072d4cdb 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -2001,11 +2001,11 @@ static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op) switch (ctx->type) { case TCG_TYPE_I32: neg_op = INDEX_op_neg_i32; - have_neg = TCG_TARGET_HAS_neg_i32; + have_neg = true; break; case TCG_TYPE_I64: neg_op = INDEX_op_neg_i64; - have_neg = TCG_TARGET_HAS_neg_i64; + have_neg = true; break; case TCG_TYPE_V64: case TCG_TYPE_V128: diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index a2856afd4d..5295e4f9ab 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -83,7 +83,6 @@ typedef enum { #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_neg_i32 1 #define TCG_TARGET_HAS_andc_i32 1 #define TCG_TARGET_HAS_orc_i32 1 #define TCG_TARGET_HAS_eqv_i32 1 @@ -120,7 +119,6 @@ typedef enum { #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_neg_i64 1 #define TCG_TARGET_HAS_andc_i64 1 #define TCG_TARGET_HAS_orc_i64 1 #define TCG_TARGET_HAS_eqv_i64 1 diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h index f3644a8bc1..a4edc3dc74 100644 --- a/tcg/riscv/tcg-target.h +++ b/tcg/riscv/tcg-target.h @@ -109,7 +109,6 @@ extern bool have_zbb; #define TCG_TARGET_HAS_bswap16_i32 have_zbb #define TCG_TARGET_HAS_bswap32_i32 have_zbb #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_neg_i32 1 #define TCG_TARGET_HAS_andc_i32 have_zbb #define TCG_TARGET_HAS_orc_i32 have_zbb #define TCG_TARGET_HAS_eqv_i32 have_zbb @@ -142,7 +141,6 @@ extern bool have_zbb; #define TCG_TARGET_HAS_bswap32_i64 have_zbb #define TCG_TARGET_HAS_bswap64_i64 have_zbb #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_neg_i64 1 #define TCG_TARGET_HAS_andc_i64 have_zbb #define TCG_TARGET_HAS_orc_i64 have_zbb #define TCG_TARGET_HAS_eqv_i64 have_zbb diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h index 2c936c1bcb..e69b0d2ddd 100644 --- a/tcg/s390x/tcg-target.h +++ b/tcg/s390x/tcg-target.h @@ -82,7 +82,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 HAVE_FACILITY(MISC_INSN_EXT3) -#define TCG_TARGET_HAS_neg_i32 1 #define TCG_TARGET_HAS_andc_i32 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_orc_i32 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_eqv_i32 HAVE_FACILITY(MISC_INSN_EXT3) @@ -117,7 +116,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_not_i64 HAVE_FACILITY(MISC_INSN_EXT3) -#define TCG_TARGET_HAS_neg_i64 1 #define TCG_TARGET_HAS_andc_i64 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_orc_i64 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_eqv_i64 HAVE_FACILITY(MISC_INSN_EXT3) diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h index 4c286c6006..f8cf145266 100644 --- a/tcg/sparc64/tcg-target.h +++ b/tcg/sparc64/tcg-target.h @@ -91,7 +91,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_ext16u_i32 0 #define TCG_TARGET_HAS_bswap16_i32 0 #define TCG_TARGET_HAS_bswap32_i32 0 -#define TCG_TARGET_HAS_neg_i32 1 #define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_andc_i32 1 #define TCG_TARGET_HAS_orc_i32 1 @@ -127,7 +126,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_bswap16_i64 0 #define TCG_TARGET_HAS_bswap32_i64 0 #define TCG_TARGET_HAS_bswap64_i64 0 -#define TCG_TARGET_HAS_neg_i64 1 #define TCG_TARGET_HAS_not_i64 1 #define TCG_TARGET_HAS_andc_i64 1 #define TCG_TARGET_HAS_orc_i64 1 diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 26bcd090c1..de096a6f93 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -363,9 +363,8 @@ void tcg_gen_sub_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_subfi_i32(TCGv_i32 ret, int32_t arg1, TCGv_i32 arg2) { - if (arg1 == 0 && TCG_TARGET_HAS_neg_i32) { - /* Don't recurse with tcg_gen_neg_i32. */ - tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg2); + if (arg1 == 0) { + tcg_gen_neg_i32(ret, arg2); } else { tcg_gen_sub_i32(ret, tcg_constant_i32(arg1), arg2); } @@ -383,11 +382,7 @@ void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) void tcg_gen_neg_i32(TCGv_i32 ret, TCGv_i32 arg) { - if (TCG_TARGET_HAS_neg_i32) { - tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg); - } else { - tcg_gen_subfi_i32(ret, 0, arg); - } + tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg); } void tcg_gen_and_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) @@ -1744,9 +1739,8 @@ void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) void tcg_gen_subfi_i64(TCGv_i64 ret, int64_t arg1, TCGv_i64 arg2) { - if (arg1 == 0 && TCG_TARGET_HAS_neg_i64) { - /* Don't recurse with tcg_gen_neg_i64. */ - tcg_gen_op2_i64(INDEX_op_neg_i64, ret, arg2); + if (arg1 == 0) { + tcg_gen_neg_i64(ret, arg2); } else if (TCG_TARGET_REG_BITS == 64) { tcg_gen_sub_i64(ret, tcg_constant_i64(arg1), arg2); } else { @@ -1772,10 +1766,12 @@ void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) void tcg_gen_neg_i64(TCGv_i64 ret, TCGv_i64 arg) { - if (TCG_TARGET_HAS_neg_i64) { + if (TCG_TARGET_REG_BITS == 64) { tcg_gen_op2_i64(INDEX_op_neg_i64, ret, arg); } else { - tcg_gen_subfi_i64(ret, 0, arg); + TCGv_i32 zero = tcg_constant_i32(0); + tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), + zero, zero, TCGV_LOW(arg), TCGV_HIGH(arg)); } } diff --git a/tcg/tcg.c b/tcg/tcg.c index d59ff14f0f..d2ea22b397 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1988,6 +1988,7 @@ bool tcg_op_supported(TCGOpcode op) case INDEX_op_st_i32: case INDEX_op_add_i32: case INDEX_op_sub_i32: + case INDEX_op_neg_i32: case INDEX_op_mul_i32: case INDEX_op_and_i32: case INDEX_op_or_i32: @@ -2045,8 +2046,6 @@ bool tcg_op_supported(TCGOpcode op) return TCG_TARGET_HAS_bswap32_i32; case INDEX_op_not_i32: return TCG_TARGET_HAS_not_i32; - case INDEX_op_neg_i32: - return TCG_TARGET_HAS_neg_i32; case INDEX_op_andc_i32: return TCG_TARGET_HAS_andc_i32; case INDEX_op_orc_i32: @@ -2085,6 +2084,7 @@ bool tcg_op_supported(TCGOpcode op) case INDEX_op_st_i64: case INDEX_op_add_i64: case INDEX_op_sub_i64: + case INDEX_op_neg_i64: case INDEX_op_mul_i64: case INDEX_op_and_i64: case INDEX_op_or_i64: @@ -2141,8 +2141,6 @@ bool tcg_op_supported(TCGOpcode op) return TCG_TARGET_HAS_bswap64_i64; case INDEX_op_not_i64: return TCG_TARGET_HAS_not_i64; - case INDEX_op_neg_i64: - return TCG_TARGET_HAS_neg_i64; case INDEX_op_andc_i64: return TCG_TARGET_HAS_andc_i64; case INDEX_op_orc_i64: diff --git a/tcg/tci.c b/tcg/tci.c index 4640902c88..3cc851b7bd 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -733,12 +733,10 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, regs[r0] = ~regs[r1]; break; #endif -#if TCG_TARGET_HAS_neg_i32 || TCG_TARGET_HAS_neg_i64 CASE_32_64(neg) tci_args_rr(insn, &r0, &r1); regs[r0] = -regs[r1]; break; -#endif #if TCG_TARGET_REG_BITS == 64 /* Load/store operations (64 bit). */ diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index 3503fc4a4c..2a13816c8e 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -65,7 +65,6 @@ #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 1 #define TCG_TARGET_HAS_ctpop_i32 1 -#define TCG_TARGET_HAS_neg_i32 1 #define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_orc_i32 1 #define TCG_TARGET_HAS_rot_i32 1 @@ -99,7 +98,6 @@ #define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 1 #define TCG_TARGET_HAS_ctpop_i64 1 -#define TCG_TARGET_HAS_neg_i64 1 #define TCG_TARGET_HAS_not_i64 1 #define TCG_TARGET_HAS_orc_i64 1 #define TCG_TARGET_HAS_rot_i64 1 From 9628d008bd2461e654ca47b7576002bd501d7a7d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 23 Aug 2023 20:35:05 -0700 Subject: [PATCH 26/35] tcg: Don't free vector results Avoid reusing vector temporaries so that we may re-use them when propagating stores to loads. Reviewed-by: Song Gao Signed-off-by: Richard Henderson --- tcg/tcg-op-gvec.c | 112 ++++++++++++++++------------------------------ 1 file changed, 38 insertions(+), 74 deletions(-) diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c index feb2d3686b..bb88943f79 100644 --- a/tcg/tcg-op-gvec.c +++ b/tcg/tcg-op-gvec.c @@ -561,7 +561,6 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz, tcg_gen_dupi_vec(vece, t_vec, in_c); } do_dup_store(type, dofs, oprsz, maxsz, t_vec); - tcg_temp_free_vec(t_vec); return; } @@ -1024,11 +1023,10 @@ static void expand_2_vec(unsigned vece, uint32_t dofs, uint32_t aofs, bool load_dest, void (*fni)(unsigned, TCGv_vec, TCGv_vec)) { - TCGv_vec t0 = tcg_temp_new_vec(type); - TCGv_vec t1 = tcg_temp_new_vec(type); - uint32_t i; + for (uint32_t i = 0; i < oprsz; i += tysz) { + TCGv_vec t0 = tcg_temp_new_vec(type); + TCGv_vec t1 = tcg_temp_new_vec(type); - for (i = 0; i < oprsz; i += tysz) { tcg_gen_ld_vec(t0, tcg_env, aofs + i); if (load_dest) { tcg_gen_ld_vec(t1, tcg_env, dofs + i); @@ -1036,8 +1034,6 @@ static void expand_2_vec(unsigned vece, uint32_t dofs, uint32_t aofs, fni(vece, t1, t0); tcg_gen_st_vec(t1, tcg_env, dofs + i); } - tcg_temp_free_vec(t0); - tcg_temp_free_vec(t1); } /* Expand OPSZ bytes worth of two-vector operands and an immediate operand @@ -1047,11 +1043,10 @@ static void expand_2i_vec(unsigned vece, uint32_t dofs, uint32_t aofs, int64_t c, bool load_dest, void (*fni)(unsigned, TCGv_vec, TCGv_vec, int64_t)) { - TCGv_vec t0 = tcg_temp_new_vec(type); - TCGv_vec t1 = tcg_temp_new_vec(type); - uint32_t i; + for (uint32_t i = 0; i < oprsz; i += tysz) { + TCGv_vec t0 = tcg_temp_new_vec(type); + TCGv_vec t1 = tcg_temp_new_vec(type); - for (i = 0; i < oprsz; i += tysz) { tcg_gen_ld_vec(t0, tcg_env, aofs + i); if (load_dest) { tcg_gen_ld_vec(t1, tcg_env, dofs + i); @@ -1059,8 +1054,6 @@ static void expand_2i_vec(unsigned vece, uint32_t dofs, uint32_t aofs, fni(vece, t1, t0, c); tcg_gen_st_vec(t1, tcg_env, dofs + i); } - tcg_temp_free_vec(t0); - tcg_temp_free_vec(t1); } static void expand_2s_vec(unsigned vece, uint32_t dofs, uint32_t aofs, @@ -1068,11 +1061,10 @@ static void expand_2s_vec(unsigned vece, uint32_t dofs, uint32_t aofs, TCGv_vec c, bool scalar_first, void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec)) { - TCGv_vec t0 = tcg_temp_new_vec(type); - TCGv_vec t1 = tcg_temp_new_vec(type); - uint32_t i; + for (uint32_t i = 0; i < oprsz; i += tysz) { + TCGv_vec t0 = tcg_temp_new_vec(type); + TCGv_vec t1 = tcg_temp_new_vec(type); - for (i = 0; i < oprsz; i += tysz) { tcg_gen_ld_vec(t0, tcg_env, aofs + i); if (scalar_first) { fni(vece, t1, c, t0); @@ -1081,8 +1073,6 @@ static void expand_2s_vec(unsigned vece, uint32_t dofs, uint32_t aofs, } tcg_gen_st_vec(t1, tcg_env, dofs + i); } - tcg_temp_free_vec(t0); - tcg_temp_free_vec(t1); } /* Expand OPSZ bytes worth of three-operand operations using host vectors. */ @@ -1091,12 +1081,11 @@ static void expand_3_vec(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t tysz, TCGType type, bool load_dest, void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec)) { - TCGv_vec t0 = tcg_temp_new_vec(type); - TCGv_vec t1 = tcg_temp_new_vec(type); - TCGv_vec t2 = tcg_temp_new_vec(type); - uint32_t i; + for (uint32_t i = 0; i < oprsz; i += tysz) { + TCGv_vec t0 = tcg_temp_new_vec(type); + TCGv_vec t1 = tcg_temp_new_vec(type); + TCGv_vec t2 = tcg_temp_new_vec(type); - for (i = 0; i < oprsz; i += tysz) { tcg_gen_ld_vec(t0, tcg_env, aofs + i); tcg_gen_ld_vec(t1, tcg_env, bofs + i); if (load_dest) { @@ -1105,9 +1094,6 @@ static void expand_3_vec(unsigned vece, uint32_t dofs, uint32_t aofs, fni(vece, t2, t0, t1); tcg_gen_st_vec(t2, tcg_env, dofs + i); } - tcg_temp_free_vec(t2); - tcg_temp_free_vec(t1); - tcg_temp_free_vec(t0); } /* @@ -1120,12 +1106,11 @@ static void expand_3i_vec(unsigned vece, uint32_t dofs, uint32_t aofs, void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec, int64_t)) { - TCGv_vec t0 = tcg_temp_new_vec(type); - TCGv_vec t1 = tcg_temp_new_vec(type); - TCGv_vec t2 = tcg_temp_new_vec(type); - uint32_t i; + for (uint32_t i = 0; i < oprsz; i += tysz) { + TCGv_vec t0 = tcg_temp_new_vec(type); + TCGv_vec t1 = tcg_temp_new_vec(type); + TCGv_vec t2 = tcg_temp_new_vec(type); - for (i = 0; i < oprsz; i += tysz) { tcg_gen_ld_vec(t0, tcg_env, aofs + i); tcg_gen_ld_vec(t1, tcg_env, bofs + i); if (load_dest) { @@ -1134,9 +1119,6 @@ static void expand_3i_vec(unsigned vece, uint32_t dofs, uint32_t aofs, fni(vece, t2, t0, t1, c); tcg_gen_st_vec(t2, tcg_env, dofs + i); } - tcg_temp_free_vec(t0); - tcg_temp_free_vec(t1); - tcg_temp_free_vec(t2); } /* Expand OPSZ bytes worth of four-operand operations using host vectors. */ @@ -1146,13 +1128,12 @@ static void expand_4_vec(unsigned vece, uint32_t dofs, uint32_t aofs, void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec, TCGv_vec)) { - TCGv_vec t0 = tcg_temp_new_vec(type); - TCGv_vec t1 = tcg_temp_new_vec(type); - TCGv_vec t2 = tcg_temp_new_vec(type); - TCGv_vec t3 = tcg_temp_new_vec(type); - uint32_t i; + for (uint32_t i = 0; i < oprsz; i += tysz) { + TCGv_vec t0 = tcg_temp_new_vec(type); + TCGv_vec t1 = tcg_temp_new_vec(type); + TCGv_vec t2 = tcg_temp_new_vec(type); + TCGv_vec t3 = tcg_temp_new_vec(type); - for (i = 0; i < oprsz; i += tysz) { tcg_gen_ld_vec(t1, tcg_env, aofs + i); tcg_gen_ld_vec(t2, tcg_env, bofs + i); tcg_gen_ld_vec(t3, tcg_env, cofs + i); @@ -1162,10 +1143,6 @@ static void expand_4_vec(unsigned vece, uint32_t dofs, uint32_t aofs, tcg_gen_st_vec(t1, tcg_env, aofs + i); } } - tcg_temp_free_vec(t3); - tcg_temp_free_vec(t2); - tcg_temp_free_vec(t1); - tcg_temp_free_vec(t0); } /* @@ -1178,23 +1155,18 @@ static void expand_4i_vec(unsigned vece, uint32_t dofs, uint32_t aofs, void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec, TCGv_vec, int64_t)) { - TCGv_vec t0 = tcg_temp_new_vec(type); - TCGv_vec t1 = tcg_temp_new_vec(type); - TCGv_vec t2 = tcg_temp_new_vec(type); - TCGv_vec t3 = tcg_temp_new_vec(type); - uint32_t i; + for (uint32_t i = 0; i < oprsz; i += tysz) { + TCGv_vec t0 = tcg_temp_new_vec(type); + TCGv_vec t1 = tcg_temp_new_vec(type); + TCGv_vec t2 = tcg_temp_new_vec(type); + TCGv_vec t3 = tcg_temp_new_vec(type); - for (i = 0; i < oprsz; i += tysz) { tcg_gen_ld_vec(t1, tcg_env, aofs + i); tcg_gen_ld_vec(t2, tcg_env, bofs + i); tcg_gen_ld_vec(t3, tcg_env, cofs + i); fni(vece, t0, t1, t2, t3, c); tcg_gen_st_vec(t0, tcg_env, dofs + i); } - tcg_temp_free_vec(t3); - tcg_temp_free_vec(t2); - tcg_temp_free_vec(t1); - tcg_temp_free_vec(t0); } /* Expand a vector two-operand operation. */ @@ -1732,7 +1704,6 @@ void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs, TCGv_vec t_vec = tcg_temp_new_vec(type); tcg_gen_dup_mem_vec(vece, t_vec, tcg_env, aofs); do_dup_store(type, dofs, oprsz, maxsz, t_vec); - tcg_temp_free_vec(t_vec); } else if (vece <= MO_32) { TCGv_i32 in = tcg_temp_ebb_new_i32(); switch (vece) { @@ -1766,7 +1737,6 @@ void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs, for (i = (aofs == dofs) * 16; i < oprsz; i += 16) { tcg_gen_st_vec(in, tcg_env, dofs + i); } - tcg_temp_free_vec(in); } else { TCGv_i64 in0 = tcg_temp_ebb_new_i64(); TCGv_i64 in1 = tcg_temp_ebb_new_i64(); @@ -1796,7 +1766,6 @@ void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs, for (i = (aofs == dofs) * 32; i < oprsz; i += 32) { tcg_gen_st_vec(in, tcg_env, dofs + i); } - tcg_temp_free_vec(in); } else if (TCG_TARGET_HAS_v128) { TCGv_vec in0 = tcg_temp_new_vec(TCG_TYPE_V128); TCGv_vec in1 = tcg_temp_new_vec(TCG_TYPE_V128); @@ -1807,8 +1776,6 @@ void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs, tcg_gen_st_vec(in0, tcg_env, dofs + i); tcg_gen_st_vec(in1, tcg_env, dofs + i + 16); } - tcg_temp_free_vec(in0); - tcg_temp_free_vec(in1); } else { TCGv_i64 in[4]; int j; @@ -3136,15 +3103,14 @@ static void expand_2sh_vec(unsigned vece, uint32_t dofs, uint32_t aofs, TCGv_i32 shift, void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_i32)) { - TCGv_vec t0 = tcg_temp_new_vec(type); - uint32_t i; + for (uint32_t i = 0; i < oprsz; i += tysz) { + TCGv_vec t0 = tcg_temp_new_vec(type); + TCGv_vec t1 = tcg_temp_new_vec(type); - for (i = 0; i < oprsz; i += tysz) { tcg_gen_ld_vec(t0, tcg_env, aofs + i); - fni(vece, t0, t0, shift); - tcg_gen_st_vec(t0, tcg_env, dofs + i); + fni(vece, t1, t0, shift); + tcg_gen_st_vec(t1, tcg_env, dofs + i); } - tcg_temp_free_vec(t0); } static void @@ -3720,18 +3686,16 @@ static void expand_cmp_vec(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t tysz, TCGType type, TCGCond cond) { - TCGv_vec t0 = tcg_temp_new_vec(type); - TCGv_vec t1 = tcg_temp_new_vec(type); - uint32_t i; + for (uint32_t i = 0; i < oprsz; i += tysz) { + TCGv_vec t0 = tcg_temp_new_vec(type); + TCGv_vec t1 = tcg_temp_new_vec(type); + TCGv_vec t2 = tcg_temp_new_vec(type); - for (i = 0; i < oprsz; i += tysz) { tcg_gen_ld_vec(t0, tcg_env, aofs + i); tcg_gen_ld_vec(t1, tcg_env, bofs + i); - tcg_gen_cmp_vec(cond, vece, t0, t0, t1); - tcg_gen_st_vec(t0, tcg_env, dofs + i); + tcg_gen_cmp_vec(cond, vece, t2, t0, t1); + tcg_gen_st_vec(t2, tcg_env, dofs + i); } - tcg_temp_free_vec(t1); - tcg_temp_free_vec(t0); } void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs, From 986cac1d2a773f6b2d8f1051504a8af512688cbd Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 9 Jan 2023 13:59:35 -0800 Subject: [PATCH 27/35] tcg/optimize: Pipe OptContext into reset_ts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Will be needed in the next patch. Reviewed-by: Song Gao Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/optimize.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tcg/optimize.c b/tcg/optimize.c index 6b072d4cdb..cbb095b241 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -123,7 +123,7 @@ static inline bool ts_is_copy(TCGTemp *ts) } /* Reset TEMP's state, possibly removing the temp for the list of copies. */ -static void reset_ts(TCGTemp *ts) +static void reset_ts(OptContext *ctx, TCGTemp *ts) { TempOptInfo *ti = ts_info(ts); TempOptInfo *pi = ts_info(ti->prev_copy); @@ -138,9 +138,9 @@ static void reset_ts(TCGTemp *ts) ti->s_mask = 0; } -static void reset_temp(TCGArg arg) +static void reset_temp(OptContext *ctx, TCGArg arg) { - reset_ts(arg_temp(arg)); + reset_ts(ctx, arg_temp(arg)); } /* Initialize and activate a temporary. */ @@ -239,7 +239,7 @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) return true; } - reset_ts(dst_ts); + reset_ts(ctx, dst_ts); di = ts_info(dst_ts); si = ts_info(src_ts); @@ -702,7 +702,7 @@ static void finish_folding(OptContext *ctx, TCGOp *op) nb_oargs = def->nb_oargs; for (i = 0; i < nb_oargs; i++) { TCGTemp *ts = arg_temp(op->args[i]); - reset_ts(ts); + reset_ts(ctx, ts); /* * Save the corresponding known-zero/sign bits mask for the * first output argument (only one supported so far). @@ -1215,14 +1215,14 @@ static bool fold_call(OptContext *ctx, TCGOp *op) for (i = 0; i < nb_globals; i++) { if (test_bit(i, ctx->temps_used.l)) { - reset_ts(&ctx->tcg->temps[i]); + reset_ts(ctx, &ctx->tcg->temps[i]); } } } /* Reset temp data for outputs. */ for (i = 0; i < nb_oargs; i++) { - reset_temp(op->args[i]); + reset_temp(ctx, op->args[i]); } /* Stop optimizing MB across calls. */ From 9f75e52828a967e6c95eb73e726ef1eff4c05496 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 2 Nov 2023 13:37:46 -0700 Subject: [PATCH 28/35] tcg/optimize: Split out cmp_better_copy Compare two temps for "better", split out from finding the best from a whole list. Use TCGKind, which already gives the proper priority. Signed-off-by: Richard Henderson --- tcg/optimize.c | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/tcg/optimize.c b/tcg/optimize.c index cbb095b241..118561f56d 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -122,6 +122,11 @@ static inline bool ts_is_copy(TCGTemp *ts) return ts_info(ts)->next_copy != ts; } +static TCGTemp *cmp_better_copy(TCGTemp *a, TCGTemp *b) +{ + return a->kind < b->kind ? b : a; +} + /* Reset TEMP's state, possibly removing the temp for the list of copies. */ static void reset_ts(OptContext *ctx, TCGTemp *ts) { @@ -174,30 +179,20 @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts) } } -static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts) +static TCGTemp *find_better_copy(TCGTemp *ts) { - TCGTemp *i, *g, *l; + TCGTemp *i, *ret; /* If this is already readonly, we can't do better. */ if (temp_readonly(ts)) { return ts; } - g = l = NULL; + ret = ts; for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) { - if (temp_readonly(i)) { - return i; - } else if (i->kind > ts->kind) { - if (i->kind == TEMP_GLOBAL) { - g = i; - } else if (i->kind == TEMP_TB) { - l = i; - } - } + ret = cmp_better_copy(ret, i); } - - /* If we didn't find a better representation, return the same temp. */ - return g ? g : l ? l : ts; + return ret; } static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2) @@ -672,12 +667,10 @@ static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args) static void copy_propagate(OptContext *ctx, TCGOp *op, int nb_oargs, int nb_iargs) { - TCGContext *s = ctx->tcg; - for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { TCGTemp *ts = arg_temp(op->args[i]); if (ts_is_copy(ts)) { - op->args[i] = temp_arg(find_better_copy(s, ts)); + op->args[i] = temp_arg(find_better_copy(ts)); } } } From ab84dc398b3b702b0c692538b947ef65dbbdf52f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 23 Aug 2023 23:04:24 -0700 Subject: [PATCH 29/35] tcg/optimize: Optimize env memory operations Propagate stores to loads, loads to loads. Reviewed-by: Song Gao Signed-off-by: Richard Henderson --- tcg/optimize.c | 264 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 243 insertions(+), 21 deletions(-) diff --git a/tcg/optimize.c b/tcg/optimize.c index 118561f56d..b32ef0be0f 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -25,6 +25,7 @@ #include "qemu/osdep.h" #include "qemu/int128.h" +#include "qemu/interval-tree.h" #include "tcg/tcg-op-common.h" #include "tcg-internal.h" @@ -37,10 +38,18 @@ glue(glue(case INDEX_op_, x), _i64): \ glue(glue(case INDEX_op_, x), _vec) +typedef struct MemCopyInfo { + IntervalTreeNode itree; + QSIMPLEQ_ENTRY (MemCopyInfo) next; + TCGTemp *ts; + TCGType type; +} MemCopyInfo; + typedef struct TempOptInfo { bool is_const; TCGTemp *prev_copy; TCGTemp *next_copy; + QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy; uint64_t val; uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */ uint64_t s_mask; /* a left-aligned mask of clrsb(value) bits. */ @@ -51,6 +60,9 @@ typedef struct OptContext { TCGOp *prev_mb; TCGTempSet temps_used; + IntervalTreeRoot mem_copy; + QSIMPLEQ_HEAD(, MemCopyInfo) mem_free; + /* In flight values from optimization. */ uint64_t a_mask; /* mask bit is 0 iff value identical to first input */ uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */ @@ -127,27 +139,6 @@ static TCGTemp *cmp_better_copy(TCGTemp *a, TCGTemp *b) return a->kind < b->kind ? b : a; } -/* Reset TEMP's state, possibly removing the temp for the list of copies. */ -static void reset_ts(OptContext *ctx, TCGTemp *ts) -{ - TempOptInfo *ti = ts_info(ts); - TempOptInfo *pi = ts_info(ti->prev_copy); - TempOptInfo *ni = ts_info(ti->next_copy); - - ni->prev_copy = ti->prev_copy; - pi->next_copy = ti->next_copy; - ti->next_copy = ts; - ti->prev_copy = ts; - ti->is_const = false; - ti->z_mask = -1; - ti->s_mask = 0; -} - -static void reset_temp(OptContext *ctx, TCGArg arg) -{ - reset_ts(ctx, arg_temp(arg)); -} - /* Initialize and activate a temporary. */ static void init_ts_info(OptContext *ctx, TCGTemp *ts) { @@ -167,6 +158,7 @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts) ti->next_copy = ts; ti->prev_copy = ts; + QSIMPLEQ_INIT(&ti->mem_copy); if (ts->kind == TEMP_CONST) { ti->is_const = true; ti->val = ts->val; @@ -179,6 +171,45 @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts) } } +static MemCopyInfo *mem_copy_first(OptContext *ctx, intptr_t s, intptr_t l) +{ + IntervalTreeNode *r = interval_tree_iter_first(&ctx->mem_copy, s, l); + return r ? container_of(r, MemCopyInfo, itree) : NULL; +} + +static MemCopyInfo *mem_copy_next(MemCopyInfo *mem, intptr_t s, intptr_t l) +{ + IntervalTreeNode *r = interval_tree_iter_next(&mem->itree, s, l); + return r ? container_of(r, MemCopyInfo, itree) : NULL; +} + +static void remove_mem_copy(OptContext *ctx, MemCopyInfo *mc) +{ + TCGTemp *ts = mc->ts; + TempOptInfo *ti = ts_info(ts); + + interval_tree_remove(&mc->itree, &ctx->mem_copy); + QSIMPLEQ_REMOVE(&ti->mem_copy, mc, MemCopyInfo, next); + QSIMPLEQ_INSERT_TAIL(&ctx->mem_free, mc, next); +} + +static void remove_mem_copy_in(OptContext *ctx, intptr_t s, intptr_t l) +{ + while (true) { + MemCopyInfo *mc = mem_copy_first(ctx, s, l); + if (!mc) { + break; + } + remove_mem_copy(ctx, mc); + } +} + +static void remove_mem_copy_all(OptContext *ctx) +{ + remove_mem_copy_in(ctx, 0, -1); + tcg_debug_assert(interval_tree_is_empty(&ctx->mem_copy)); +} + static TCGTemp *find_better_copy(TCGTemp *ts) { TCGTemp *i, *ret; @@ -195,6 +226,80 @@ static TCGTemp *find_better_copy(TCGTemp *ts) return ret; } +static void move_mem_copies(TCGTemp *dst_ts, TCGTemp *src_ts) +{ + TempOptInfo *si = ts_info(src_ts); + TempOptInfo *di = ts_info(dst_ts); + MemCopyInfo *mc; + + QSIMPLEQ_FOREACH(mc, &si->mem_copy, next) { + tcg_debug_assert(mc->ts == src_ts); + mc->ts = dst_ts; + } + QSIMPLEQ_CONCAT(&di->mem_copy, &si->mem_copy); +} + +/* Reset TEMP's state, possibly removing the temp for the list of copies. */ +static void reset_ts(OptContext *ctx, TCGTemp *ts) +{ + TempOptInfo *ti = ts_info(ts); + TCGTemp *pts = ti->prev_copy; + TCGTemp *nts = ti->next_copy; + TempOptInfo *pi = ts_info(pts); + TempOptInfo *ni = ts_info(nts); + + ni->prev_copy = ti->prev_copy; + pi->next_copy = ti->next_copy; + ti->next_copy = ts; + ti->prev_copy = ts; + ti->is_const = false; + ti->z_mask = -1; + ti->s_mask = 0; + + if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) { + if (ts == nts) { + /* Last temp copy being removed, the mem copies die. */ + MemCopyInfo *mc; + QSIMPLEQ_FOREACH(mc, &ti->mem_copy, next) { + interval_tree_remove(&mc->itree, &ctx->mem_copy); + } + QSIMPLEQ_CONCAT(&ctx->mem_free, &ti->mem_copy); + } else { + move_mem_copies(find_better_copy(nts), ts); + } + } +} + +static void reset_temp(OptContext *ctx, TCGArg arg) +{ + reset_ts(ctx, arg_temp(arg)); +} + +static void record_mem_copy(OptContext *ctx, TCGType type, + TCGTemp *ts, intptr_t start, intptr_t last) +{ + MemCopyInfo *mc; + TempOptInfo *ti; + + mc = QSIMPLEQ_FIRST(&ctx->mem_free); + if (mc) { + QSIMPLEQ_REMOVE_HEAD(&ctx->mem_free, next); + } else { + mc = tcg_malloc(sizeof(*mc)); + } + + memset(mc, 0, sizeof(*mc)); + mc->itree.start = start; + mc->itree.last = last; + mc->type = type; + interval_tree_insert(&mc->itree, &ctx->mem_copy); + + ts = find_better_copy(ts); + ti = ts_info(ts); + mc->ts = ts; + QSIMPLEQ_INSERT_TAIL(&ti->mem_copy, mc, next); +} + static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2) { TCGTemp *i; @@ -221,6 +326,18 @@ static bool args_are_copies(TCGArg arg1, TCGArg arg2) return ts_are_copies(arg_temp(arg1), arg_temp(arg2)); } +static TCGTemp *find_mem_copy_for(OptContext *ctx, TCGType type, intptr_t s) +{ + MemCopyInfo *mc; + + for (mc = mem_copy_first(ctx, s, s); mc; mc = mem_copy_next(mc, s, s)) { + if (mc->itree.start == s && mc->type == type) { + return find_better_copy(mc->ts); + } + } + return NULL; +} + static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) { TCGTemp *dst_ts = arg_temp(dst); @@ -270,6 +387,11 @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) si->next_copy = dst_ts; di->is_const = si->is_const; di->val = si->val; + + if (!QSIMPLEQ_EMPTY(&si->mem_copy) + && cmp_better_copy(src_ts, dst_ts) == dst_ts) { + move_mem_copies(dst_ts, src_ts); + } } return true; } @@ -688,6 +810,7 @@ static void finish_folding(OptContext *ctx, TCGOp *op) ctx->prev_mb = NULL; if (!(def->flags & TCG_OPF_COND_BRANCH)) { memset(&ctx->temps_used, 0, sizeof(ctx->temps_used)); + remove_mem_copy_all(ctx); } return; } @@ -1213,6 +1336,11 @@ static bool fold_call(OptContext *ctx, TCGOp *op) } } + /* If the function has side effects, reset mem data. */ + if (!(flags & TCG_CALL_NO_SIDE_EFFECTS)) { + remove_mem_copy_all(ctx); + } + /* Reset temp data for outputs. */ for (i = 0; i < nb_oargs; i++) { reset_temp(ctx, op->args[i]); @@ -2070,6 +2198,83 @@ static bool fold_tcg_ld(OptContext *ctx, TCGOp *op) return false; } +static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op) +{ + TCGTemp *dst, *src; + intptr_t ofs; + TCGType type; + + if (op->args[1] != tcgv_ptr_arg(tcg_env)) { + return false; + } + + type = ctx->type; + ofs = op->args[2]; + dst = arg_temp(op->args[0]); + src = find_mem_copy_for(ctx, type, ofs); + if (src && src->base_type == type) { + return tcg_opt_gen_mov(ctx, op, temp_arg(dst), temp_arg(src)); + } + + reset_ts(ctx, dst); + record_mem_copy(ctx, type, dst, ofs, ofs + tcg_type_size(type) - 1); + return true; +} + +static bool fold_tcg_st(OptContext *ctx, TCGOp *op) +{ + intptr_t ofs = op->args[2]; + intptr_t lm1; + + if (op->args[1] != tcgv_ptr_arg(tcg_env)) { + remove_mem_copy_all(ctx); + return false; + } + + switch (op->opc) { + CASE_OP_32_64(st8): + lm1 = 0; + break; + CASE_OP_32_64(st16): + lm1 = 1; + break; + case INDEX_op_st32_i64: + case INDEX_op_st_i32: + lm1 = 3; + break; + case INDEX_op_st_i64: + lm1 = 7; + break; + case INDEX_op_st_vec: + lm1 = tcg_type_size(ctx->type) - 1; + break; + default: + g_assert_not_reached(); + } + remove_mem_copy_in(ctx, ofs, ofs + lm1); + return false; +} + +static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op) +{ + TCGTemp *src; + intptr_t ofs, last; + TCGType type; + + if (op->args[1] != tcgv_ptr_arg(tcg_env)) { + fold_tcg_st(ctx, op); + return false; + } + + src = arg_temp(op->args[0]); + ofs = op->args[2]; + type = ctx->type; + last = ofs + tcg_type_size(type) - 1; + remove_mem_copy_in(ctx, ofs, last); + record_mem_copy(ctx, type, src, ofs, last); + return false; +} + static bool fold_xor(OptContext *ctx, TCGOp *op) { if (fold_const2_commutative(ctx, op) || @@ -2093,6 +2298,8 @@ void tcg_optimize(TCGContext *s) TCGOp *op, *op_next; OptContext ctx = { .tcg = s }; + QSIMPLEQ_INIT(&ctx.mem_free); + /* Array VALS has an element for each temp. If this temp holds a constant then its value is kept in VALS' element. If this temp is a copy of other ones then the other copies are @@ -2214,6 +2421,21 @@ void tcg_optimize(TCGContext *s) case INDEX_op_ld32u_i64: done = fold_tcg_ld(&ctx, op); break; + case INDEX_op_ld_i32: + case INDEX_op_ld_i64: + case INDEX_op_ld_vec: + done = fold_tcg_ld_memcopy(&ctx, op); + break; + CASE_OP_32_64(st8): + CASE_OP_32_64(st16): + case INDEX_op_st32_i64: + done = fold_tcg_st(&ctx, op); + break; + case INDEX_op_st_i32: + case INDEX_op_st_i64: + case INDEX_op_st_vec: + done = fold_tcg_st_memcopy(&ctx, op); + break; case INDEX_op_mb: done = fold_mb(&ctx, op); break; From 3eaadaeb4ee9cc6a1267882b3e31c893cd99bb9e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 23 Aug 2023 23:13:06 -0700 Subject: [PATCH 30/35] tcg: Eliminate duplicate env store operations Notice when a constant is stored to the same location twice. Reviewed-by: Song Gao Signed-off-by: Richard Henderson --- tcg/optimize.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tcg/optimize.c b/tcg/optimize.c index b32ef0be0f..a4fe9ee9bb 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -2269,6 +2269,19 @@ static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op) src = arg_temp(op->args[0]); ofs = op->args[2]; type = ctx->type; + + /* + * Eliminate duplicate stores of a constant. + * This happens frequently when the target ISA zero-extends. + */ + if (ts_is_const(src)) { + TCGTemp *prev = find_mem_copy_for(ctx, type, ofs); + if (src == prev) { + tcg_op_remove(ctx->tcg, op); + return true; + } + } + last = ofs + tcg_type_size(type) - 1; remove_mem_copy_in(ctx, ofs, last); record_mem_copy(ctx, type, src, ofs, last); From 26aac97c849f28e23bc818035bac38be34e62983 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 23 Oct 2023 12:31:57 -0700 Subject: [PATCH 31/35] tcg/optimize: Split out arg_new_constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes a bug wherein raw uses of tcg_constant_internal do not have their TempOptInfo initialized. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/optimize.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/tcg/optimize.c b/tcg/optimize.c index a4fe9ee9bb..d8e437c826 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -338,6 +338,21 @@ static TCGTemp *find_mem_copy_for(OptContext *ctx, TCGType type, intptr_t s) return NULL; } +static TCGArg arg_new_constant(OptContext *ctx, uint64_t val) +{ + TCGType type = ctx->type; + TCGTemp *ts; + + if (type == TCG_TYPE_I32) { + val = (int32_t)val; + } + + ts = tcg_constant_internal(type, val); + init_ts_info(ctx, ts); + + return temp_arg(ts); +} + static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) { TCGTemp *dst_ts = arg_temp(dst); @@ -399,16 +414,8 @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op, TCGArg dst, uint64_t val) { - TCGTemp *tv; - - if (ctx->type == TCG_TYPE_I32) { - val = (int32_t)val; - } - /* Convert movi to mov with constant temp. */ - tv = tcg_constant_internal(ctx->type, val); - init_ts_info(ctx, tv); - return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv)); + return tcg_opt_gen_mov(ctx, op, dst, arg_new_constant(ctx, val)); } static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) @@ -1431,7 +1438,7 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op) op->opc = and_opc; op->args[1] = op->args[2]; - op->args[2] = temp_arg(tcg_constant_internal(ctx->type, mask)); + op->args[2] = arg_new_constant(ctx, mask); ctx->z_mask = mask & arg_info(op->args[1])->z_mask; return false; } @@ -1442,7 +1449,7 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op) uint64_t mask = deposit64(-1, op->args[3], op->args[4], 0); op->opc = and_opc; - op->args[2] = temp_arg(tcg_constant_internal(ctx->type, mask)); + op->args[2] = arg_new_constant(ctx, mask); ctx->z_mask = mask & arg_info(op->args[1])->z_mask; return false; } From 1551004eeb4436a163472c3b5b108a60766e74cb Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 25 Oct 2023 18:39:42 -0700 Subject: [PATCH 32/35] tcg: Canonicalize subi to addi during opcode generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Suggested-by: Paolo Bonzini Signed-off-by: Richard Henderson Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20231026013945.1152174-2-richard.henderson@linaro.org> --- tcg/tcg-op.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index de096a6f93..aa6bc6f57d 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -372,12 +372,7 @@ void tcg_gen_subfi_i32(TCGv_i32 ret, int32_t arg1, TCGv_i32 arg2) void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { - /* some cases can be optimized here */ - if (arg2 == 0) { - tcg_gen_mov_i32(ret, arg1); - } else { - tcg_gen_sub_i32(ret, arg1, tcg_constant_i32(arg2)); - } + tcg_gen_addi_i32(ret, arg1, -arg2); } void tcg_gen_neg_i32(TCGv_i32 ret, TCGv_i32 arg) @@ -1752,16 +1747,7 @@ void tcg_gen_subfi_i64(TCGv_i64 ret, int64_t arg1, TCGv_i64 arg2) void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) { - /* some cases can be optimized here */ - if (arg2 == 0) { - tcg_gen_mov_i64(ret, arg1); - } else if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_sub_i64(ret, arg1, tcg_constant_i64(arg2)); - } else { - tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), - TCGV_LOW(arg1), TCGV_HIGH(arg1), - tcg_constant_i32(arg2), tcg_constant_i32(arg2 >> 32)); - } + tcg_gen_addi_i64(ret, arg1, -arg2); } void tcg_gen_neg_i64(TCGv_i64 ret, TCGv_i64 arg) From 6334a968eec3f2498a992a7b96c1bfcaf100066f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 25 Oct 2023 18:39:43 -0700 Subject: [PATCH 33/35] tcg/optimize: Canonicalize subi to addi during optimization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Richard Henderson Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20231026013945.1152174-3-richard.henderson@linaro.org> --- tcg/optimize.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tcg/optimize.c b/tcg/optimize.c index d8e437c826..468f827399 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -2166,7 +2166,19 @@ static bool fold_sub_vec(OptContext *ctx, TCGOp *op) static bool fold_sub(OptContext *ctx, TCGOp *op) { - return fold_const2(ctx, op) || fold_sub_vec(ctx, op); + if (fold_const2(ctx, op) || fold_sub_vec(ctx, op)) { + return true; + } + + /* Fold sub r,x,i to add r,x,-i */ + if (arg_is_const(op->args[2])) { + uint64_t val = arg_info(op->args[2])->val; + + op->opc = (ctx->type == TCG_TYPE_I32 + ? INDEX_op_add_i32 : INDEX_op_add_i64); + op->args[2] = arg_new_constant(ctx, -val); + } + return false; } static bool fold_sub2(OptContext *ctx, TCGOp *op) From f245757701ccd2d4f1c9ee0ed349e3a1d8049996 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 25 Oct 2023 18:39:44 -0700 Subject: [PATCH 34/35] tcg/optimize: Canonicalize sub2 with constants to add2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Richard Henderson Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20231026013945.1152174-4-richard.henderson@linaro.org> --- tcg/optimize.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/tcg/optimize.c b/tcg/optimize.c index 468f827399..f2d01654c5 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -1044,8 +1044,10 @@ static bool fold_add_vec(OptContext *ctx, TCGOp *op) static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add) { - if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) && - arg_is_const(op->args[4]) && arg_is_const(op->args[5])) { + bool a_const = arg_is_const(op->args[2]) && arg_is_const(op->args[3]); + bool b_const = arg_is_const(op->args[4]) && arg_is_const(op->args[5]); + + if (a_const && b_const) { uint64_t al = arg_info(op->args[2])->val; uint64_t ah = arg_info(op->args[3])->val; uint64_t bl = arg_info(op->args[4])->val; @@ -1089,6 +1091,21 @@ static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add) tcg_opt_gen_movi(ctx, op2, rh, ah); return true; } + + /* Fold sub2 r,x,i to add2 r,x,-i */ + if (!add && b_const) { + uint64_t bl = arg_info(op->args[4])->val; + uint64_t bh = arg_info(op->args[5])->val; + + /* Negate the two parts without assembling and disassembling. */ + bl = -bl; + bh = ~bh + !bl; + + op->opc = (ctx->type == TCG_TYPE_I32 + ? INDEX_op_add2_i32 : INDEX_op_add2_i64); + op->args[4] = arg_new_constant(ctx, bl); + op->args[5] = arg_new_constant(ctx, bh); + } return false; } From d36ce28be424385fc9f7273bf5c15ce815b5cf4e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 24 Oct 2023 14:17:07 -0700 Subject: [PATCH 35/35] tcg/sparc64: Implement tcg_out_extrl_i64_i32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Build fix for missing symbol. Cc: qemu-stable@nongnu.org Fixes: dad2f2f5af ("tcg/sparc64: Disable TCG_TARGET_HAS_extr_i64_i32") Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/sparc64/tcg-target.c.inc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 19d9df4a09..a91defd0ac 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -529,6 +529,11 @@ static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rs) tcg_out_ext32u(s, rd, rs); } +static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rs) +{ + tcg_out_ext32u(s, rd, rs); +} + static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2) { return false;