diff --git a/accel/tcg/atomic_common.c.inc b/accel/tcg/atomic_common.c.inc index 95a5c5ff12..6056598c23 100644 --- a/accel/tcg/atomic_common.c.inc +++ b/accel/tcg/atomic_common.c.inc @@ -14,9 +14,20 @@ */ static void atomic_trace_rmw_post(CPUArchState *env, uint64_t addr, + uint64_t read_value_low, + uint64_t read_value_high, + uint64_t write_value_low, + uint64_t write_value_high, MemOpIdx oi) { - qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_RW); + if (cpu_plugin_mem_cbs_enabled(env_cpu(env))) { + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, + read_value_low, read_value_high, + oi, QEMU_PLUGIN_MEM_R); + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, + write_value_low, write_value_high, + oi, QEMU_PLUGIN_MEM_W); + } } /* diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h index 1dc2151daf..89593b2502 100644 --- a/accel/tcg/atomic_template.h +++ b/accel/tcg/atomic_template.h @@ -53,6 +53,14 @@ # error unsupported data size #endif +#if DATA_SIZE == 16 +# define VALUE_LOW(val) int128_getlo(val) +# define VALUE_HIGH(val) int128_gethi(val) +#else +# define VALUE_LOW(val) val +# define VALUE_HIGH(val) 0 +#endif + #if DATA_SIZE >= 4 # define ABI_TYPE DATA_TYPE #else @@ -83,7 +91,12 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, abi_ptr addr, ret = qatomic_cmpxchg__nocheck(haddr, cmpv, newv); #endif ATOMIC_MMU_CLEANUP; - atomic_trace_rmw_post(env, addr, oi); + atomic_trace_rmw_post(env, addr, + VALUE_LOW(ret), + VALUE_HIGH(ret), + VALUE_LOW(newv), + VALUE_HIGH(newv), + oi); return ret; } @@ -97,7 +110,12 @@ ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, abi_ptr addr, ABI_TYPE val, ret = qatomic_xchg__nocheck(haddr, val); ATOMIC_MMU_CLEANUP; - atomic_trace_rmw_post(env, addr, oi); + atomic_trace_rmw_post(env, addr, + VALUE_LOW(ret), + VALUE_HIGH(ret), + VALUE_LOW(val), + VALUE_HIGH(val), + oi); return ret; } @@ -109,7 +127,12 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, abi_ptr addr, \ haddr = atomic_mmu_lookup(env_cpu(env), addr, oi, DATA_SIZE, retaddr); \ ret = qatomic_##X(haddr, val); \ ATOMIC_MMU_CLEANUP; \ - atomic_trace_rmw_post(env, addr, oi); \ + atomic_trace_rmw_post(env, addr, \ + VALUE_LOW(ret), \ + VALUE_HIGH(ret), \ + VALUE_LOW(val), \ + VALUE_HIGH(val), \ + oi); \ return ret; \ } @@ -145,7 +168,12 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, abi_ptr addr, \ cmp = qatomic_cmpxchg__nocheck(haddr, old, new); \ } while (cmp != old); \ ATOMIC_MMU_CLEANUP; \ - atomic_trace_rmw_post(env, addr, oi); \ + atomic_trace_rmw_post(env, addr, \ + VALUE_LOW(old), \ + VALUE_HIGH(old), \ + VALUE_LOW(xval), \ + VALUE_HIGH(xval), \ + oi); \ return RET; \ } @@ -188,7 +216,12 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, abi_ptr addr, ret = qatomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv)); #endif ATOMIC_MMU_CLEANUP; - atomic_trace_rmw_post(env, addr, oi); + atomic_trace_rmw_post(env, addr, + VALUE_LOW(ret), + VALUE_HIGH(ret), + VALUE_LOW(newv), + VALUE_HIGH(newv), + oi); return BSWAP(ret); } @@ -202,7 +235,12 @@ ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, abi_ptr addr, ABI_TYPE val, ret = qatomic_xchg__nocheck(haddr, BSWAP(val)); ATOMIC_MMU_CLEANUP; - atomic_trace_rmw_post(env, addr, oi); + atomic_trace_rmw_post(env, addr, + VALUE_LOW(ret), + VALUE_HIGH(ret), + VALUE_LOW(val), + VALUE_HIGH(val), + oi); return BSWAP(ret); } @@ -214,7 +252,12 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, abi_ptr addr, \ haddr = atomic_mmu_lookup(env_cpu(env), addr, oi, DATA_SIZE, retaddr); \ ret = qatomic_##X(haddr, BSWAP(val)); \ ATOMIC_MMU_CLEANUP; \ - atomic_trace_rmw_post(env, addr, oi); \ + atomic_trace_rmw_post(env, addr, \ + VALUE_LOW(ret), \ + VALUE_HIGH(ret), \ + VALUE_LOW(val), \ + VALUE_HIGH(val), \ + oi); \ return BSWAP(ret); \ } @@ -247,7 +290,12 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, abi_ptr addr, \ ldn = qatomic_cmpxchg__nocheck(haddr, ldo, BSWAP(new)); \ } while (ldo != ldn); \ ATOMIC_MMU_CLEANUP; \ - atomic_trace_rmw_post(env, addr, oi); \ + atomic_trace_rmw_post(env, addr, \ + VALUE_LOW(old), \ + VALUE_HIGH(old), \ + VALUE_LOW(xval), \ + VALUE_HIGH(xval), \ + oi); \ return RET; \ } @@ -281,3 +329,5 @@ GEN_ATOMIC_HELPER_FN(add_fetch, ADD, DATA_TYPE, new) #undef SUFFIX #undef DATA_SIZE #undef SHIFT +#undef VALUE_LOW +#undef VALUE_HIGH diff --git a/accel/tcg/ldst_common.c.inc b/accel/tcg/ldst_common.c.inc index 87ceb95487..ebbf380d76 100644 --- a/accel/tcg/ldst_common.c.inc +++ b/accel/tcg/ldst_common.c.inc @@ -123,10 +123,15 @@ void helper_st_i128(CPUArchState *env, uint64_t addr, Int128 val, MemOpIdx oi) * Load helpers for cpu_ldst.h */ -static void plugin_load_cb(CPUArchState *env, abi_ptr addr, MemOpIdx oi) +static void plugin_load_cb(CPUArchState *env, abi_ptr addr, + uint64_t value_low, + uint64_t value_high, + MemOpIdx oi) { if (cpu_plugin_mem_cbs_enabled(env_cpu(env))) { - qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R); + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, + value_low, value_high, + oi, QEMU_PLUGIN_MEM_R); } } @@ -136,7 +141,7 @@ uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra) tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_UB); ret = do_ld1_mmu(env_cpu(env), addr, oi, ra, MMU_DATA_LOAD); - plugin_load_cb(env, addr, oi); + plugin_load_cb(env, addr, ret, 0, oi); return ret; } @@ -147,7 +152,7 @@ uint16_t cpu_ldw_mmu(CPUArchState *env, abi_ptr addr, tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_16); ret = do_ld2_mmu(env_cpu(env), addr, oi, ra, MMU_DATA_LOAD); - plugin_load_cb(env, addr, oi); + plugin_load_cb(env, addr, ret, 0, oi); return ret; } @@ -158,7 +163,7 @@ uint32_t cpu_ldl_mmu(CPUArchState *env, abi_ptr addr, tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_32); ret = do_ld4_mmu(env_cpu(env), addr, oi, ra, MMU_DATA_LOAD); - plugin_load_cb(env, addr, oi); + plugin_load_cb(env, addr, ret, 0, oi); return ret; } @@ -169,7 +174,7 @@ uint64_t cpu_ldq_mmu(CPUArchState *env, abi_ptr addr, tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_64); ret = do_ld8_mmu(env_cpu(env), addr, oi, ra, MMU_DATA_LOAD); - plugin_load_cb(env, addr, oi); + plugin_load_cb(env, addr, ret, 0, oi); return ret; } @@ -180,7 +185,7 @@ Int128 cpu_ld16_mmu(CPUArchState *env, abi_ptr addr, tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128); ret = do_ld16_mmu(env_cpu(env), addr, oi, ra); - plugin_load_cb(env, addr, oi); + plugin_load_cb(env, addr, int128_getlo(ret), int128_gethi(ret), oi); return ret; } @@ -188,10 +193,15 @@ Int128 cpu_ld16_mmu(CPUArchState *env, abi_ptr addr, * Store helpers for cpu_ldst.h */ -static void plugin_store_cb(CPUArchState *env, abi_ptr addr, MemOpIdx oi) +static void plugin_store_cb(CPUArchState *env, abi_ptr addr, + uint64_t value_low, + uint64_t value_high, + MemOpIdx oi) { if (cpu_plugin_mem_cbs_enabled(env_cpu(env))) { - qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W); + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, + value_low, value_high, + oi, QEMU_PLUGIN_MEM_W); } } @@ -199,7 +209,7 @@ void cpu_stb_mmu(CPUArchState *env, abi_ptr addr, uint8_t val, MemOpIdx oi, uintptr_t retaddr) { helper_stb_mmu(env, addr, val, oi, retaddr); - plugin_store_cb(env, addr, oi); + plugin_store_cb(env, addr, val, 0, oi); } void cpu_stw_mmu(CPUArchState *env, abi_ptr addr, uint16_t val, @@ -207,7 +217,7 @@ void cpu_stw_mmu(CPUArchState *env, abi_ptr addr, uint16_t val, { tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_16); do_st2_mmu(env_cpu(env), addr, val, oi, retaddr); - plugin_store_cb(env, addr, oi); + plugin_store_cb(env, addr, val, 0, oi); } void cpu_stl_mmu(CPUArchState *env, abi_ptr addr, uint32_t val, @@ -215,7 +225,7 @@ void cpu_stl_mmu(CPUArchState *env, abi_ptr addr, uint32_t val, { tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_32); do_st4_mmu(env_cpu(env), addr, val, oi, retaddr); - plugin_store_cb(env, addr, oi); + plugin_store_cb(env, addr, val, 0, oi); } void cpu_stq_mmu(CPUArchState *env, abi_ptr addr, uint64_t val, @@ -223,7 +233,7 @@ void cpu_stq_mmu(CPUArchState *env, abi_ptr addr, uint64_t val, { tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_64); do_st8_mmu(env_cpu(env), addr, val, oi, retaddr); - plugin_store_cb(env, addr, oi); + plugin_store_cb(env, addr, val, 0, oi); } void cpu_st16_mmu(CPUArchState *env, abi_ptr addr, Int128 val, @@ -231,7 +241,7 @@ void cpu_st16_mmu(CPUArchState *env, abi_ptr addr, Int128 val, { tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128); do_st16_mmu(env_cpu(env), addr, val, oi, retaddr); - plugin_store_cb(env, addr, oi); + plugin_store_cb(env, addr, int128_getlo(val), int128_gethi(val), oi); } /* diff --git a/configure b/configure index f3e7572afb..aa7aae70fa 100755 --- a/configure +++ b/configure @@ -516,6 +516,25 @@ case "$cpu" in ;; esac +# Now we have our CPU_CFLAGS we can check if we are targeting a 32 or +# 64 bit host. + +check_64bit_host() { +cat > $TMPC <<EOF +#if __SIZEOF_POINTER__ != 8 +#error not 64 bit system +#endif +int main(void) { return 0; } +EOF + compile_object "$1" +} + +if check_64bit_host "$CPU_CFLAGS"; then + host_bits=64 +else + host_bits=32 +fi + if test -n "$host_arch" && { ! test -d "$source_path/linux-user/include/host/$host_arch" || ! test -d "$source_path/common-user/host/$host_arch"; }; then @@ -610,6 +629,9 @@ meson_option_parse() { exit 1 fi } +has_meson_option() { + test "${meson_options#*"$1"}" != "$meson_options" +} meson_add_machine_file() { if test "$cross_compile" = "yes"; then @@ -1028,9 +1050,13 @@ if test "$static" = "yes" ; then fi plugins="no" fi -if test "$plugins" != "no"; then - plugins=yes - subdirs="$subdirs contrib/plugins" +if test "$plugins" != "no" && test $host_bits -eq 64; then + if has_meson_option "-Dtcg_interpreter=true"; then + plugins="no" + else + plugins=yes + subdirs="$subdirs contrib/plugins" + fi fi cat > $TMPC << EOF diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile index 05a2a45c5c..bbddd4800f 100644 --- a/contrib/plugins/Makefile +++ b/contrib/plugins/Makefile @@ -13,6 +13,7 @@ TOP_SRC_PATH = $(SRC_PATH)/../.. VPATH += $(SRC_PATH) NAMES := +NAMES += bbv NAMES += execlog NAMES += hotblocks NAMES += hotpages @@ -29,6 +30,7 @@ NAMES += cache NAMES += drcov NAMES += ips NAMES += stoptrigger +NAMES += cflow ifeq ($(CONFIG_WIN32),y) SO_SUFFIX := .dll diff --git a/contrib/plugins/bbv.c b/contrib/plugins/bbv.c new file mode 100644 index 0000000000..a5256517dd --- /dev/null +++ b/contrib/plugins/bbv.c @@ -0,0 +1,158 @@ +/* + * Generate basic block vectors for use with the SimPoint analysis tool. + * SimPoint: https://cseweb.ucsd.edu/~calder/simpoint/ + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include <stdio.h> +#include <glib.h> + +#include <qemu-plugin.h> + +typedef struct Bb { + uint64_t vaddr; + struct qemu_plugin_scoreboard *count; + unsigned int index; +} Bb; + +typedef struct Vcpu { + uint64_t count; + FILE *file; +} Vcpu; + +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; +static GHashTable *bbs; +static GRWLock bbs_lock; +static char *filename; +static struct qemu_plugin_scoreboard *vcpus; +static uint64_t interval = 100000000; + +static void plugin_exit(qemu_plugin_id_t id, void *p) +{ + for (int i = 0; i < qemu_plugin_num_vcpus(); i++) { + fclose(((Vcpu *)qemu_plugin_scoreboard_find(vcpus, i))->file); + } + + g_hash_table_unref(bbs); + g_free(filename); + qemu_plugin_scoreboard_free(vcpus); +} + +static void free_bb(void *data) +{ + qemu_plugin_scoreboard_free(((Bb *)data)->count); + g_free(data); +} + +static qemu_plugin_u64 count_u64(void) +{ + return qemu_plugin_scoreboard_u64_in_struct(vcpus, Vcpu, count); +} + +static qemu_plugin_u64 bb_count_u64(Bb *bb) +{ + return qemu_plugin_scoreboard_u64(bb->count); +} + +static void vcpu_init(qemu_plugin_id_t id, unsigned int vcpu_index) +{ + g_autofree gchar *vcpu_filename = NULL; + Vcpu *vcpu = qemu_plugin_scoreboard_find(vcpus, vcpu_index); + + vcpu_filename = g_strdup_printf("%s.%u.bb", filename, vcpu_index); + vcpu->file = fopen(vcpu_filename, "w"); +} + +static void vcpu_interval_exec(unsigned int vcpu_index, void *udata) +{ + Vcpu *vcpu = qemu_plugin_scoreboard_find(vcpus, vcpu_index); + GHashTableIter iter; + void *value; + + if (!vcpu->file) { + return; + } + + vcpu->count -= interval; + + fputc('T', vcpu->file); + + g_rw_lock_reader_lock(&bbs_lock); + g_hash_table_iter_init(&iter, bbs); + + while (g_hash_table_iter_next(&iter, NULL, &value)) { + Bb *bb = value; + uint64_t bb_count = qemu_plugin_u64_get(bb_count_u64(bb), vcpu_index); + + if (!bb_count) { + continue; + } + + fprintf(vcpu->file, ":%u:%" PRIu64 " ", bb->index, bb_count); + qemu_plugin_u64_set(bb_count_u64(bb), vcpu_index, 0); + } + + g_rw_lock_reader_unlock(&bbs_lock); + fputc('\n', vcpu->file); +} + +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) +{ + uint64_t n_insns = qemu_plugin_tb_n_insns(tb); + uint64_t vaddr = qemu_plugin_tb_vaddr(tb); + Bb *bb; + + g_rw_lock_writer_lock(&bbs_lock); + bb = g_hash_table_lookup(bbs, &vaddr); + if (!bb) { + bb = g_new(Bb, 1); + bb->vaddr = vaddr; + bb->count = qemu_plugin_scoreboard_new(sizeof(uint64_t)); + bb->index = g_hash_table_size(bbs); + g_hash_table_replace(bbs, &bb->vaddr, bb); + } + g_rw_lock_writer_unlock(&bbs_lock); + + qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu( + tb, QEMU_PLUGIN_INLINE_ADD_U64, count_u64(), n_insns); + + qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu( + tb, QEMU_PLUGIN_INLINE_ADD_U64, bb_count_u64(bb), n_insns); + + qemu_plugin_register_vcpu_tb_exec_cond_cb( + tb, vcpu_interval_exec, QEMU_PLUGIN_CB_NO_REGS, + QEMU_PLUGIN_COND_GE, count_u64(), interval, NULL); +} + +QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, + const qemu_info_t *info, + int argc, char **argv) +{ + for (int i = 0; i < argc; i++) { + char *opt = argv[i]; + g_auto(GStrv) tokens = g_strsplit(opt, "=", 2); + if (g_strcmp0(tokens[0], "interval") == 0) { + interval = g_ascii_strtoull(tokens[1], NULL, 10); + } else if (g_strcmp0(tokens[0], "outfile") == 0) { + filename = tokens[1]; + tokens[1] = NULL; + } else { + fprintf(stderr, "option parsing failed: %s\n", opt); + return -1; + } + } + + if (!filename) { + fputs("outfile unspecified\n", stderr); + return -1; + } + + bbs = g_hash_table_new_full(g_int64_hash, g_int64_equal, NULL, free_bb); + vcpus = qemu_plugin_scoreboard_new(sizeof(Vcpu)); + qemu_plugin_register_atexit_cb(id, plugin_exit, NULL); + qemu_plugin_register_vcpu_init_cb(id, vcpu_init); + qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans); + + return 0; +} diff --git a/contrib/plugins/cflow.c b/contrib/plugins/cflow.c new file mode 100644 index 0000000000..6faa55d10d --- /dev/null +++ b/contrib/plugins/cflow.c @@ -0,0 +1,388 @@ +/* + * Control Flow plugin + * + * This plugin will track changes to control flow and detect where + * instructions fault. + * + * Copyright (c) 2024 Linaro Ltd + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#include <glib.h> +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <qemu-plugin.h> + +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; + +typedef enum { + SORT_HOTTEST, /* hottest branch insn */ + SORT_EXCEPTION, /* most early exits */ + SORT_POPDEST, /* most destinations (usually ret's) */ +} ReportType; + +ReportType report = SORT_HOTTEST; +int topn = 10; + +typedef struct { + uint64_t daddr; + uint64_t dcount; +} DestData; + +/* A node is an address where we can go to multiple places */ +typedef struct { + GMutex lock; + /* address of the branch point */ + uint64_t addr; + /* array of DestData */ + GArray *dests; + /* early exit/fault count */ + uint64_t early_exit; + /* jump destination count */ + uint64_t dest_count; + /* instruction data */ + char *insn_disas; + /* symbol? */ + const char *symbol; + /* times translated as last in block? */ + int last_count; + /* times translated in the middle of block? */ + int mid_count; +} NodeData; + +typedef enum { + /* last insn in block, expected flow control */ + LAST_INSN = (1 << 0), + /* mid-block insn, can only be an exception */ + EXCP_INSN = (1 << 1), + /* multiple disassembly, may have changed */ + MULT_INSN = (1 << 2), +} InsnTypes; + +typedef struct { + /* address of the branch point */ + uint64_t addr; + /* disassembly */ + char *insn_disas; + /* symbol? */ + const char *symbol; + /* types */ + InsnTypes type_flag; +} InsnData; + +/* We use this to track the current execution state */ +typedef struct { + /* address of end of block */ + uint64_t end_block; + /* next pc after end of block */ + uint64_t pc_after_block; + /* address of last executed PC */ + uint64_t last_pc; +} VCPUScoreBoard; + +/* descriptors for accessing the above scoreboard */ +static qemu_plugin_u64 end_block; +static qemu_plugin_u64 pc_after_block; +static qemu_plugin_u64 last_pc; + + +static GMutex node_lock; +static GHashTable *nodes; +struct qemu_plugin_scoreboard *state; + +/* SORT_HOTTEST */ +static gint hottest(gconstpointer a, gconstpointer b) +{ + NodeData *na = (NodeData *) a; + NodeData *nb = (NodeData *) b; + + return na->dest_count > nb->dest_count ? -1 : + na->dest_count == nb->dest_count ? 0 : 1; +} + +static gint exception(gconstpointer a, gconstpointer b) +{ + NodeData *na = (NodeData *) a; + NodeData *nb = (NodeData *) b; + + return na->early_exit > nb->early_exit ? -1 : + na->early_exit == nb->early_exit ? 0 : 1; +} + +static gint popular(gconstpointer a, gconstpointer b) +{ + NodeData *na = (NodeData *) a; + NodeData *nb = (NodeData *) b; + + return na->dests->len > nb->dests->len ? -1 : + na->dests->len == nb->dests->len ? 0 : 1; +} + +/* Filter out non-branches - returns true to remove entry */ +static gboolean filter_non_branches(gpointer key, gpointer value, + gpointer user_data) +{ + NodeData *node = (NodeData *) value; + + return node->dest_count == 0; +} + +static void plugin_exit(qemu_plugin_id_t id, void *p) +{ + g_autoptr(GString) result = g_string_new("collected "); + GList *data; + GCompareFunc sort = &hottest; + int n = 0; + + g_mutex_lock(&node_lock); + g_string_append_printf(result, "%d control flow nodes in the hash table\n", + g_hash_table_size(nodes)); + + /* remove all nodes that didn't branch */ + g_hash_table_foreach_remove(nodes, filter_non_branches, NULL); + + data = g_hash_table_get_values(nodes); + + switch (report) { + case SORT_HOTTEST: + sort = &hottest; + break; + case SORT_EXCEPTION: + sort = &exception; + break; + case SORT_POPDEST: + sort = &popular; + break; + } + + data = g_list_sort(data, sort); + + for (GList *l = data; + l != NULL && n < topn; + l = l->next, n++) { + NodeData *n = l->data; + const char *type = n->mid_count ? "sync fault" : "branch"; + g_string_append_printf(result, " addr: 0x%"PRIx64 " %s: %s (%s)\n", + n->addr, n->symbol, n->insn_disas, type); + if (n->early_exit) { + g_string_append_printf(result, " early exits %"PRId64"\n", + n->early_exit); + } + g_string_append_printf(result, " branches %"PRId64"\n", + n->dest_count); + for (int j = 0; j < n->dests->len; j++) { + DestData *dd = &g_array_index(n->dests, DestData, j); + g_string_append_printf(result, " to 0x%"PRIx64" (%"PRId64")\n", + dd->daddr, dd->dcount); + } + } + + qemu_plugin_outs(result->str); + + g_mutex_unlock(&node_lock); +} + +static void plugin_init(void) +{ + g_mutex_init(&node_lock); + nodes = g_hash_table_new(NULL, g_direct_equal); + state = qemu_plugin_scoreboard_new(sizeof(VCPUScoreBoard)); + + /* score board declarations */ + end_block = qemu_plugin_scoreboard_u64_in_struct(state, VCPUScoreBoard, + end_block); + pc_after_block = qemu_plugin_scoreboard_u64_in_struct(state, VCPUScoreBoard, + pc_after_block); + last_pc = qemu_plugin_scoreboard_u64_in_struct(state, VCPUScoreBoard, + last_pc); +} + +static NodeData *create_node(uint64_t addr) +{ + NodeData *node = g_new0(NodeData, 1); + g_mutex_init(&node->lock); + node->addr = addr; + node->dests = g_array_new(true, true, sizeof(DestData)); + return node; +} + +static NodeData *fetch_node(uint64_t addr, bool create_if_not_found) +{ + NodeData *node = NULL; + + g_mutex_lock(&node_lock); + node = (NodeData *) g_hash_table_lookup(nodes, (gconstpointer) addr); + if (!node && create_if_not_found) { + node = create_node(addr); + g_hash_table_insert(nodes, (gpointer) addr, (gpointer) node); + } + g_mutex_unlock(&node_lock); + return node; +} + +/* + * Called when we detect a non-linear execution (pc != + * pc_after_block). This could be due to a fault causing some sort of + * exit exception (if last_pc != block_end) or just a taken branch. + */ +static void vcpu_tb_branched_exec(unsigned int cpu_index, void *udata) +{ + uint64_t lpc = qemu_plugin_u64_get(last_pc, cpu_index); + uint64_t ebpc = qemu_plugin_u64_get(end_block, cpu_index); + uint64_t npc = qemu_plugin_u64_get(pc_after_block, cpu_index); + uint64_t pc = GPOINTER_TO_UINT(udata); + + /* return early for address 0 */ + if (!lpc) { + return; + } + + NodeData *node = fetch_node(lpc, true); + DestData *data = NULL; + bool early_exit = (lpc != ebpc); + GArray *dests; + + /* the condition should never hit */ + g_assert(pc != npc); + + g_mutex_lock(&node->lock); + + if (early_exit) { + fprintf(stderr, "%s: pc=%"PRIx64", epbc=%"PRIx64 + " npc=%"PRIx64", lpc=%"PRIx64"\n", + __func__, pc, ebpc, npc, lpc); + node->early_exit++; + if (!node->mid_count) { + /* count now as we've only just allocated */ + node->mid_count++; + } + } + + dests = node->dests; + for (int i = 0; i < dests->len; i++) { + if (g_array_index(dests, DestData, i).daddr == pc) { + data = &g_array_index(dests, DestData, i); + } + } + + /* we've never seen this before, allocate a new entry */ + if (!data) { + DestData new_entry = { .daddr = pc }; + g_array_append_val(dests, new_entry); + data = &g_array_index(dests, DestData, dests->len - 1); + g_assert(data->daddr == pc); + } + + data->dcount++; + node->dest_count++; + + g_mutex_unlock(&node->lock); +} + +/* + * At the start of each block we need to resolve two things: + * + * - is last_pc == block_end, if not we had an early exit + * - is start of block last_pc + insn width, if not we jumped + * + * Once those are dealt with we can instrument the rest of the + * instructions for their execution. + * + */ +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) +{ + uint64_t pc = qemu_plugin_tb_vaddr(tb); + size_t insns = qemu_plugin_tb_n_insns(tb); + struct qemu_plugin_insn *first_insn = qemu_plugin_tb_get_insn(tb, 0); + struct qemu_plugin_insn *last_insn = qemu_plugin_tb_get_insn(tb, insns - 1); + + /* + * check if we are executing linearly after the last block. We can + * handle both early block exits and normal branches in the + * callback if we hit it. + */ + gpointer udata = GUINT_TO_POINTER(pc); + qemu_plugin_register_vcpu_tb_exec_cond_cb( + tb, vcpu_tb_branched_exec, QEMU_PLUGIN_CB_NO_REGS, + QEMU_PLUGIN_COND_NE, pc_after_block, pc, udata); + + /* + * Now we can set start/end for this block so the next block can + * check where we are at. Do this on the first instruction and not + * the TB so we don't get mixed up with above. + */ + qemu_plugin_register_vcpu_insn_exec_inline_per_vcpu(first_insn, + QEMU_PLUGIN_INLINE_STORE_U64, + end_block, qemu_plugin_insn_vaddr(last_insn)); + qemu_plugin_register_vcpu_insn_exec_inline_per_vcpu(first_insn, + QEMU_PLUGIN_INLINE_STORE_U64, + pc_after_block, + qemu_plugin_insn_vaddr(last_insn) + + qemu_plugin_insn_size(last_insn)); + + for (int idx = 0; idx < qemu_plugin_tb_n_insns(tb); ++idx) { + struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, idx); + uint64_t ipc = qemu_plugin_insn_vaddr(insn); + /* + * If this is a potential branch point check if we could grab + * the disassembly for it. If it is the last instruction + * always create an entry. + */ + NodeData *node = fetch_node(ipc, last_insn); + if (node) { + g_mutex_lock(&node->lock); + if (!node->insn_disas) { + node->insn_disas = qemu_plugin_insn_disas(insn); + } + if (!node->symbol) { + node->symbol = qemu_plugin_insn_symbol(insn); + } + if (last_insn == insn) { + node->last_count++; + } else { + node->mid_count++; + } + g_mutex_unlock(&node->lock); + } + + /* Store the PC of what we are about to execute */ + qemu_plugin_register_vcpu_insn_exec_inline_per_vcpu(insn, + QEMU_PLUGIN_INLINE_STORE_U64, + last_pc, ipc); + } +} + +QEMU_PLUGIN_EXPORT +int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info, + int argc, char **argv) +{ + for (int i = 0; i < argc; i++) { + char *opt = argv[i]; + g_auto(GStrv) tokens = g_strsplit(opt, "=", 2); + if (g_strcmp0(tokens[0], "sort") == 0) { + if (g_strcmp0(tokens[1], "hottest") == 0) { + report = SORT_HOTTEST; + } else if (g_strcmp0(tokens[1], "early") == 0) { + report = SORT_EXCEPTION; + } else if (g_strcmp0(tokens[1], "exceptions") == 0) { + report = SORT_POPDEST; + } else { + fprintf(stderr, "failed to parse: %s\n", tokens[1]); + return -1; + } + } else { + fprintf(stderr, "option parsing failed: %s\n", opt); + return -1; + } + } + + plugin_init(); + + qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans); + qemu_plugin_register_atexit_cb(id, plugin_exit, NULL); + return 0; +} diff --git a/contrib/plugins/ips.c b/contrib/plugins/ips.c index 29fa556d0f..e5297dbb01 100644 --- a/contrib/plugins/ips.c +++ b/contrib/plugins/ips.c @@ -152,6 +152,12 @@ QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, vcpus = qemu_plugin_scoreboard_new(sizeof(vCPUTime)); max_insn_per_quantum = max_insn_per_second / NUM_TIME_UPDATE_PER_SEC; + if (max_insn_per_quantum == 0) { + fprintf(stderr, "minimum of %d instructions per second needed\n", + NUM_TIME_UPDATE_PER_SEC); + return -1; + } + time_handle = qemu_plugin_request_time_control(); g_assert(time_handle); diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst index ed31d4b0b2..c0aa52def5 100644 --- a/docs/about/deprecated.rst +++ b/docs/about/deprecated.rst @@ -184,6 +184,25 @@ be an effective use of its limited resources, and thus intends to discontinue it. Since all recent x86 hardware from the past >10 years is capable of the 64-bit x86 extensions, a corresponding 64-bit OS should be used instead. +TCG Plugin support not enabled by default on 32-bit hosts (since 9.2) +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +While it is still possible to enable TCG plugin support for 32-bit +hosts there are a number of potential pitfalls when instrumenting +64-bit guests. The plugin APIs typically pass most addresses as +uint64_t but practices like encoding that address in a host pointer +for passing as user-data will lose data. As most software analysis +benefits from having plenty of host memory it seems reasonable to +encourage users to use 64 bit builds of QEMU for analysis work +whatever targets they are instrumenting. + +TCG Plugin support not enabled by default with TCI (since 9.2) +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +While the TCG interpreter can interpret the TCG ops used by plugins it +is going to be so much slower it wouldn't make sense for any serious +instrumentation. Due to implementation differences there will also be +anomalies in things like memory instrumentation. System emulator CPUs -------------------- diff --git a/docs/about/emulation.rst b/docs/about/emulation.rst index 05f54d3f27..3028d5fff7 100644 --- a/docs/about/emulation.rst +++ b/docs/about/emulation.rst @@ -268,6 +268,36 @@ Behaviour can be tweaked with the following arguments: * - idle=true|false - Dump the current execution stats whenever the guest vCPU idles +Basic Block Vectors +................... + +``contrib/plugins/bbv.c`` + +The bbv plugin allows you to generate basic block vectors for use with the +`SimPoint <https://cseweb.ucsd.edu/~calder/simpoint/>`__ analysis tool. + +.. list-table:: Basic block vectors arguments + :widths: 20 80 + :header-rows: 1 + + * - Option + - Description + * - interval=N + - The interval to generate a basic block vector specified by the number of + instructions (Default: N = 100000000) + * - outfile=PATH + - The path to output files. + It will be suffixed with ``.N.bb`` where ``N`` is a vCPU index. + +Example:: + + $ qemu-aarch64 \ + -plugin contrib/plugins/libbbv.so,interval=100,outfile=sha1 \ + tests/tcg/aarch64-linux-user/sha1 + SHA1=15dd99a1991e0b3826fede3deffc1feba42278e6 + $ du sha1.0.bb + 23128 sha1.0.bb + Instruction ........... @@ -384,6 +414,19 @@ run:: 160 1 0 135 1 0 +Behaviour can be tweaked with the following arguments: + +.. list-table:: Syscall plugin arguments + :widths: 20 80 + :header-rows: 1 + + * - Option + - Description + * - print=true|false + - Print the number of times each syscall is called + * - log_writes=true|false + - Log the buffer of each write syscall in hexdump format + Test inline operations ...................... @@ -773,4 +816,3 @@ Other emulation features When running system emulation you can also enable deterministic execution which allows for repeatable record/replay debugging. See :ref:`Record/Replay<replay>` for more details. - diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index 1c9c775df6..04e9ad4996 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -350,6 +350,8 @@ typedef union IcountDecr { * from CPUArchState, via small negative offsets. * @can_do_io: True if memory-mapped IO is allowed. * @plugin_mem_cbs: active plugin memory callbacks + * @plugin_mem_value_low: 64 lower bits of latest accessed mem value. + * @plugin_mem_value_high: 64 higher bits of latest accessed mem value. */ typedef struct CPUNegativeOffsetState { CPUTLB tlb; @@ -358,6 +360,8 @@ typedef struct CPUNegativeOffsetState { * The callback pointer are accessed via TCG (see gen_empty_mem_helper). */ GArray *plugin_mem_cbs; + uint64_t plugin_mem_value_low; + uint64_t plugin_mem_value_high; #endif IcountDecr icount_decr; bool can_do_io; diff --git a/include/qemu/plugin.h b/include/qemu/plugin.h index af5f9db469..9726a9ebf3 100644 --- a/include/qemu/plugin.h +++ b/include/qemu/plugin.h @@ -167,6 +167,8 @@ qemu_plugin_vcpu_syscall(CPUState *cpu, int64_t num, uint64_t a1, void qemu_plugin_vcpu_syscall_ret(CPUState *cpu, int64_t num, int64_t ret); void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr, + uint64_t value_low, + uint64_t value_high, MemOpIdx oi, enum qemu_plugin_mem_rw rw); void qemu_plugin_flush_cb(void); @@ -251,6 +253,8 @@ void qemu_plugin_vcpu_syscall_ret(CPUState *cpu, int64_t num, int64_t ret) { } static inline void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr, + uint64_t value_low, + uint64_t value_high, MemOpIdx oi, enum qemu_plugin_mem_rw rw) { } diff --git a/include/qemu/qemu-plugin.h b/include/qemu/qemu-plugin.h index c71c705b69..622c9a0232 100644 --- a/include/qemu/qemu-plugin.h +++ b/include/qemu/qemu-plugin.h @@ -57,11 +57,19 @@ typedef uint64_t qemu_plugin_id_t; * - Remove qemu_plugin_register_vcpu_{tb, insn, mem}_exec_inline. * Those functions are replaced by *_per_vcpu variants, which guarantee * thread-safety for operations. + * + * version 3: + * - modified arguments and return value of qemu_plugin_insn_data to copy + * the data into a user-provided buffer instead of returning a pointer + * to the data. + * + * version 4: + * - added qemu_plugin_read_memory_vaddr */ extern QEMU_PLUGIN_EXPORT int qemu_plugin_version; -#define QEMU_PLUGIN_VERSION 3 +#define QEMU_PLUGIN_VERSION 4 /** * struct qemu_info_t - system information for plugins @@ -262,6 +270,29 @@ enum qemu_plugin_mem_rw { QEMU_PLUGIN_MEM_RW, }; +enum qemu_plugin_mem_value_type { + QEMU_PLUGIN_MEM_VALUE_U8, + QEMU_PLUGIN_MEM_VALUE_U16, + QEMU_PLUGIN_MEM_VALUE_U32, + QEMU_PLUGIN_MEM_VALUE_U64, + QEMU_PLUGIN_MEM_VALUE_U128, +}; + +/* typedef qemu_plugin_mem_value - value accessed during a load/store */ +typedef struct { + enum qemu_plugin_mem_value_type type; + union { + uint8_t u8; + uint16_t u16; + uint32_t u32; + uint64_t u64; + struct { + uint64_t low; + uint64_t high; + } u128; + } data; +} qemu_plugin_mem_value; + /** * enum qemu_plugin_cond - condition to enable callback * @@ -551,6 +582,15 @@ bool qemu_plugin_mem_is_big_endian(qemu_plugin_meminfo_t info); QEMU_PLUGIN_API bool qemu_plugin_mem_is_store(qemu_plugin_meminfo_t info); +/** + * qemu_plugin_mem_get_mem_value() - return last value loaded/stored + * @info: opaque memory transaction handle + * + * Returns: memory value + */ +QEMU_PLUGIN_API +qemu_plugin_mem_value qemu_plugin_mem_get_value(qemu_plugin_meminfo_t info); + /** * qemu_plugin_get_hwaddr() - return handle for memory operation * @info: opaque memory info structure @@ -852,6 +892,28 @@ typedef struct { QEMU_PLUGIN_API GArray *qemu_plugin_get_registers(void); +/** + * qemu_plugin_read_memory_vaddr() - read from memory using a virtual address + * + * @addr: A virtual address to read from + * @data: A byte array to store data into + * @len: The number of bytes to read, starting from @addr + * + * @len bytes of data is read starting at @addr and stored into @data. If @data + * is not large enough to hold @len bytes, it will be expanded to the necessary + * size, reallocating if necessary. @len must be greater than 0. + * + * This function does not ensure writes are flushed prior to reading, so + * callers should take care when calling this function in plugin callbacks to + * avoid attempting to read data which may not yet be written and should use + * the memory callback API instead. + * + * Returns true on success and false on failure. + */ +QEMU_PLUGIN_API +bool qemu_plugin_read_memory_vaddr(uint64_t addr, + GByteArray *data, size_t len); + /** * qemu_plugin_read_register() - read register for current vCPU * diff --git a/plugins/api.c b/plugins/api.c index 2ff13d09de..24ea64e2de 100644 --- a/plugins/api.c +++ b/plugins/api.c @@ -351,6 +351,39 @@ bool qemu_plugin_mem_is_store(qemu_plugin_meminfo_t info) return get_plugin_meminfo_rw(info) & QEMU_PLUGIN_MEM_W; } +qemu_plugin_mem_value qemu_plugin_mem_get_value(qemu_plugin_meminfo_t info) +{ + uint64_t low = current_cpu->neg.plugin_mem_value_low; + qemu_plugin_mem_value value; + + switch (qemu_plugin_mem_size_shift(info)) { + case 0: + value.type = QEMU_PLUGIN_MEM_VALUE_U8; + value.data.u8 = (uint8_t)low; + break; + case 1: + value.type = QEMU_PLUGIN_MEM_VALUE_U16; + value.data.u16 = (uint16_t)low; + break; + case 2: + value.type = QEMU_PLUGIN_MEM_VALUE_U32; + value.data.u32 = (uint32_t)low; + break; + case 3: + value.type = QEMU_PLUGIN_MEM_VALUE_U64; + value.data.u64 = low; + break; + case 4: + value.type = QEMU_PLUGIN_MEM_VALUE_U128; + value.data.u128.low = low; + value.data.u128.high = current_cpu->neg.plugin_mem_value_high; + break; + default: + g_assert_not_reached(); + } + return value; +} + /* * Virtual Memory queries */ @@ -527,6 +560,26 @@ GArray *qemu_plugin_get_registers(void) return create_register_handles(regs); } +bool qemu_plugin_read_memory_vaddr(vaddr addr, GByteArray *data, size_t len) +{ + g_assert(current_cpu); + + if (len == 0) { + return false; + } + + g_byte_array_set_size(data, len); + + int result = cpu_memory_rw_debug(current_cpu, addr, data->data, + data->len, false); + + if (result < 0) { + return false; + } + + return true; +} + int qemu_plugin_read_register(struct qemu_plugin_register *reg, GByteArray *buf) { g_assert(current_cpu); diff --git a/plugins/core.c b/plugins/core.c index 2897453cac..bb105e8e68 100644 --- a/plugins/core.c +++ b/plugins/core.c @@ -602,6 +602,8 @@ void exec_inline_op(enum plugin_dyn_cb_type type, } void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr, + uint64_t value_low, + uint64_t value_high, MemOpIdx oi, enum qemu_plugin_mem_rw rw) { GArray *arr = cpu->neg.plugin_mem_cbs; @@ -610,6 +612,10 @@ void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr, if (arr == NULL) { return; } + + cpu->neg.plugin_mem_value_low = value_low; + cpu->neg.plugin_mem_value_high = value_high; + for (i = 0; i < arr->len; i++) { struct qemu_plugin_dyn_cb *cb = &g_array_index(arr, struct qemu_plugin_dyn_cb, i); diff --git a/plugins/qemu-plugins.symbols b/plugins/qemu-plugins.symbols index ca773d8d9f..032661f9ea 100644 --- a/plugins/qemu-plugins.symbols +++ b/plugins/qemu-plugins.symbols @@ -13,6 +13,7 @@ qemu_plugin_insn_size; qemu_plugin_insn_symbol; qemu_plugin_insn_vaddr; + qemu_plugin_mem_get_value; qemu_plugin_mem_is_big_endian; qemu_plugin_mem_is_sign_extended; qemu_plugin_mem_is_store; @@ -20,6 +21,7 @@ qemu_plugin_num_vcpus; qemu_plugin_outs; qemu_plugin_path_to_binary; + qemu_plugin_read_memory_vaddr; qemu_plugin_read_register; qemu_plugin_register_atexit_cb; qemu_plugin_register_flush_cb; diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c index 8510160258..23dc807f11 100644 --- a/tcg/tcg-op-ldst.c +++ b/tcg/tcg-op-ldst.c @@ -148,11 +148,11 @@ static TCGv_i64 plugin_maybe_preserve_addr(TCGTemp *addr) return NULL; } +#ifdef CONFIG_PLUGIN static void plugin_gen_mem_callbacks(TCGv_i64 copy_addr, TCGTemp *orig_addr, MemOpIdx oi, enum qemu_plugin_mem_rw rw) { -#ifdef CONFIG_PLUGIN if (tcg_ctx->plugin_insn != NULL) { qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw); @@ -172,6 +172,54 @@ plugin_gen_mem_callbacks(TCGv_i64 copy_addr, TCGTemp *orig_addr, MemOpIdx oi, } } } +} +#endif + +static void +plugin_gen_mem_callbacks_i32(TCGv_i32 val, + TCGv_i64 copy_addr, TCGTemp *orig_addr, + MemOpIdx oi, enum qemu_plugin_mem_rw rw) +{ +#ifdef CONFIG_PLUGIN + if (tcg_ctx->plugin_insn != NULL) { + tcg_gen_st_i32(val, tcg_env, + offsetof(CPUState, neg.plugin_mem_value_low) - + sizeof(CPUState) + (HOST_BIG_ENDIAN * 4)); + plugin_gen_mem_callbacks(copy_addr, orig_addr, oi, rw); + } +#endif +} + +static void +plugin_gen_mem_callbacks_i64(TCGv_i64 val, + TCGv_i64 copy_addr, TCGTemp *orig_addr, + MemOpIdx oi, enum qemu_plugin_mem_rw rw) +{ +#ifdef CONFIG_PLUGIN + if (tcg_ctx->plugin_insn != NULL) { + tcg_gen_st_i64(val, tcg_env, + offsetof(CPUState, neg.plugin_mem_value_low) - + sizeof(CPUState)); + plugin_gen_mem_callbacks(copy_addr, orig_addr, oi, rw); + } +#endif +} + +static void +plugin_gen_mem_callbacks_i128(TCGv_i128 val, + TCGv_i64 copy_addr, TCGTemp *orig_addr, + MemOpIdx oi, enum qemu_plugin_mem_rw rw) +{ +#ifdef CONFIG_PLUGIN + if (tcg_ctx->plugin_insn != NULL) { + tcg_gen_st_i64(TCGV128_LOW(val), tcg_env, + offsetof(CPUState, neg.plugin_mem_value_low) - + sizeof(CPUState)); + tcg_gen_st_i64(TCGV128_HIGH(val), tcg_env, + offsetof(CPUState, neg.plugin_mem_value_high) - + sizeof(CPUState)); + plugin_gen_mem_callbacks(copy_addr, orig_addr, oi, rw); + } #endif } @@ -203,7 +251,8 @@ static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr, opc = INDEX_op_qemu_ld_a64_i32; } gen_ldst(opc, tcgv_i32_temp(val), NULL, addr, oi); - plugin_gen_mem_callbacks(copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R); + plugin_gen_mem_callbacks_i32(val, copy_addr, addr, orig_oi, + QEMU_PLUGIN_MEM_R); if ((orig_memop ^ memop) & MO_BSWAP) { switch (orig_memop & MO_SIZE) { @@ -271,7 +320,7 @@ static void tcg_gen_qemu_st_i32_int(TCGv_i32 val, TCGTemp *addr, } } gen_ldst(opc, tcgv_i32_temp(val), NULL, addr, oi); - plugin_gen_mem_callbacks(NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W); + plugin_gen_mem_callbacks_i32(val, NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W); if (swap) { tcg_temp_free_i32(swap); @@ -324,7 +373,8 @@ static void tcg_gen_qemu_ld_i64_int(TCGv_i64 val, TCGTemp *addr, opc = INDEX_op_qemu_ld_a64_i64; } gen_ldst_i64(opc, val, addr, oi); - plugin_gen_mem_callbacks(copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R); + plugin_gen_mem_callbacks_i64(val, copy_addr, addr, orig_oi, + QEMU_PLUGIN_MEM_R); if ((orig_memop ^ memop) & MO_BSWAP) { int flags = (orig_memop & MO_SIGN @@ -396,7 +446,7 @@ static void tcg_gen_qemu_st_i64_int(TCGv_i64 val, TCGTemp *addr, opc = INDEX_op_qemu_st_a64_i64; } gen_ldst_i64(opc, val, addr, oi); - plugin_gen_mem_callbacks(NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W); + plugin_gen_mem_callbacks_i64(val, NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W); if (swap) { tcg_temp_free_i64(swap); @@ -606,7 +656,8 @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr, tcg_constant_i32(orig_oi)); } - plugin_gen_mem_callbacks(ext_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R); + plugin_gen_mem_callbacks_i128(val, ext_addr, addr, orig_oi, + QEMU_PLUGIN_MEM_R); } void tcg_gen_qemu_ld_i128_chk(TCGv_i128 val, TCGTemp *addr, TCGArg idx, @@ -722,7 +773,8 @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr, tcg_constant_i32(orig_oi)); } - plugin_gen_mem_callbacks(ext_addr, addr, orig_oi, QEMU_PLUGIN_MEM_W); + plugin_gen_mem_callbacks_i128(val, ext_addr, addr, orig_oi, + QEMU_PLUGIN_MEM_W); } void tcg_gen_qemu_st_i128_chk(TCGv_i128 val, TCGTemp *addr, TCGArg idx, diff --git a/tests/tcg/Makefile.target b/tests/tcg/Makefile.target index 452a2cde65..2da70b2fcf 100644 --- a/tests/tcg/Makefile.target +++ b/tests/tcg/Makefile.target @@ -90,6 +90,7 @@ CFLAGS= LDFLAGS= QEMU_OPTS= +CHECK_PLUGIN_OUTPUT_COMMAND= # If TCG debugging, or TCI is enabled things are a lot slower @@ -152,10 +153,11 @@ PLUGINS=$(patsubst %.c, lib%.so, $(notdir $(wildcard $(PLUGIN_SRC)/*.c))) # only expand MULTIARCH_TESTS which are common on most of our targets # to avoid an exponential explosion as new tests are added. We also # add some special helpers the run-plugin- rules can use below. +# In more, extra tests can be added using ADDITIONAL_PLUGINS_TESTS variable. ifneq ($(MULTIARCH_TESTS),) $(foreach p,$(PLUGINS), \ - $(foreach t,$(MULTIARCH_TESTS),\ + $(foreach t,$(MULTIARCH_TESTS) $(ADDITIONAL_PLUGINS_TESTS),\ $(eval run-plugin-$(t)-with-$(p): $t $p) \ $(eval RUN_TESTS+=run-plugin-$(t)-with-$(p)))) endif # MULTIARCH_TESTS @@ -179,6 +181,10 @@ run-plugin-%: -plugin $(PLUGIN_LIB)/$(call extract-plugin,$@)$(PLUGIN_ARGS) \ -d plugin -D $*.pout \ $(call strip-plugin,$<)) + $(if $(CHECK_PLUGIN_OUTPUT_COMMAND), \ + $(call quiet-command, $(CHECK_PLUGIN_OUTPUT_COMMAND) $*.pout, \ + TEST, check plugin $(call extract-plugin,$@) output \ + with $(call strip-plugin,$<))) else run-%: % $(call run-test, $<, \ @@ -193,6 +199,10 @@ run-plugin-%: -plugin $(PLUGIN_LIB)/$(call extract-plugin,$@)$(PLUGIN_ARGS) \ -d plugin -D $*.pout \ $(QEMU_OPTS) $(call strip-plugin,$<)) + $(if $(CHECK_PLUGIN_OUTPUT_COMMAND), \ + $(call quiet-command, $(CHECK_PLUGIN_OUTPUT_COMMAND) $*.pout, \ + TEST, check plugin $(call extract-plugin,$@) output \ + with $(call strip-plugin,$<))) endif gdb-%: % diff --git a/tests/tcg/alpha/Makefile.softmmu-target b/tests/tcg/alpha/Makefile.softmmu-target index a0eca4d6ea..a944102a3c 100644 --- a/tests/tcg/alpha/Makefile.softmmu-target +++ b/tests/tcg/alpha/Makefile.softmmu-target @@ -28,7 +28,7 @@ LDFLAGS+=-static -nostdlib $(CRT_OBJS) $(MINILIB_OBJS) -lgcc %: %.c $(LINK_SCRIPT) $(CRT_OBJS) $(MINILIB_OBJS) $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS) -memory: CFLAGS+=-DCHECK_UNALIGNED=0 +memory: CFLAGS+=-DCHECK_UNALIGNED=0 -mbwx # Running QEMU_OPTS+=-serial chardev:output -kernel diff --git a/tests/tcg/alpha/Makefile.target b/tests/tcg/alpha/Makefile.target index fdd7ddf64e..36d8ed1eae 100644 --- a/tests/tcg/alpha/Makefile.target +++ b/tests/tcg/alpha/Makefile.target @@ -12,4 +12,7 @@ test-cmov: EXTRA_CFLAGS=-DTEST_CMOV test-cmov: test-cond.c $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS) +# Force generation of byte read/write +test-plugin-mem-access: CFLAGS+=-mbwx + run-test-cmov: test-cmov diff --git a/tests/tcg/multiarch/Makefile.target b/tests/tcg/multiarch/Makefile.target index 5e3391ec9d..78b83d5575 100644 --- a/tests/tcg/multiarch/Makefile.target +++ b/tests/tcg/multiarch/Makefile.target @@ -170,5 +170,16 @@ run-plugin-semiconsole-with-%: TESTS += semihosting semiconsole endif +# Test plugin memory access instrumentation +run-plugin-test-plugin-mem-access-with-libmem.so: \ + PLUGIN_ARGS=$(COMMA)print-accesses=true +run-plugin-test-plugin-mem-access-with-libmem.so: \ + CHECK_PLUGIN_OUTPUT_COMMAND= \ + $(SRC_PATH)/tests/tcg/multiarch/check-plugin-output.sh \ + $(QEMU) $< + +test-plugin-mem-access: CFLAGS+=-pthread -O0 +test-plugin-mem-access: LDFLAGS+=-pthread -O0 + # Update TESTS TESTS += $(MULTIARCH_TESTS) diff --git a/tests/tcg/multiarch/check-plugin-output.sh b/tests/tcg/multiarch/check-plugin-output.sh new file mode 100755 index 0000000000..80607f04b5 --- /dev/null +++ b/tests/tcg/multiarch/check-plugin-output.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash + +# This script runs a given executable using qemu, and compare its standard +# output with an expected plugin output. +# Each line of output is searched (as a regexp) in the expected plugin output. + +set -euo pipefail + +die() +{ + echo "$@" 1>&2 + exit 1 +} + +check() +{ + file=$1 + pattern=$2 + grep "$pattern" "$file" > /dev/null || die "\"$pattern\" not found in $file" +} + +[ $# -eq 3 ] || die "usage: qemu_bin exe plugin_out_file" + +qemu_bin=$1; shift +exe=$1;shift +plugin_out=$1; shift + +expected() +{ + $qemu_bin $exe || + die "running $exe failed" +} + +expected | while read line; do + check "$plugin_out" "$line" +done diff --git a/tests/tcg/multiarch/system/Makefile.softmmu-target b/tests/tcg/multiarch/system/Makefile.softmmu-target index 32dc0f9830..07be001102 100644 --- a/tests/tcg/multiarch/system/Makefile.softmmu-target +++ b/tests/tcg/multiarch/system/Makefile.softmmu-target @@ -65,3 +65,9 @@ endif MULTIARCH_RUNS += run-gdbstub-memory run-gdbstub-interrupt \ run-gdbstub-untimely-packet run-gdbstub-registers + +# Test plugin memory access instrumentation +run-plugin-memory-with-libmem.so: \ + PLUGIN_ARGS=$(COMMA)region-summary=true +run-plugin-memory-with-libmem.so: \ + CHECK_PLUGIN_OUTPUT_COMMAND=$(MULTIARCH_SYSTEM_SRC)/validate-memory-counts.py $@.out diff --git a/tests/tcg/multiarch/system/memory.c b/tests/tcg/multiarch/system/memory.c index 6eb2eb16f7..65a6038a24 100644 --- a/tests/tcg/multiarch/system/memory.c +++ b/tests/tcg/multiarch/system/memory.c @@ -14,26 +14,35 @@ #include <stdint.h> #include <stdbool.h> +#include <inttypes.h> #include <minilib.h> #ifndef CHECK_UNALIGNED # error "Target does not specify CHECK_UNALIGNED" #endif +uint32_t test_read_count; +uint32_t test_write_count; + #define MEM_PAGE_SIZE 4096 /* nominal 4k "pages" */ #define TEST_SIZE (MEM_PAGE_SIZE * 4) /* 4 pages */ #define ARRAY_SIZE(x) ((sizeof(x) / sizeof((x)[0]))) -__attribute__((aligned(MEM_PAGE_SIZE))) +__attribute__((aligned(TEST_SIZE))) static uint8_t test_data[TEST_SIZE]; typedef void (*init_ufn) (int offset); typedef bool (*read_ufn) (int offset); typedef bool (*read_sfn) (int offset, bool nf); -static void pdot(int count) +static void pdot(int count, bool write) { + if (write) { + test_write_count++; + } else { + test_read_count++; + } if (count % 128 == 0) { ml_printf("."); } @@ -63,12 +72,14 @@ static void init_test_data_u8(int unused_offset) int i; (void)(unused_offset); - ml_printf("Filling test area with u8:"); + ml_printf("Filling test area with u8 (%p):", ptr); + for (i = 0; i < TEST_SIZE; i++) { *ptr++ = BYTE_NEXT(count); - pdot(i); + pdot(i, true); } - ml_printf("done\n"); + + ml_printf("done %d @ %p\n", i, ptr); } /* @@ -91,10 +102,11 @@ static void init_test_data_s8(bool neg_first) neg_first ? "neg first" : "pos first"); for (i = 0; i < TEST_SIZE / 2; i++) { *ptr++ = get_byte(i, neg_first); + pdot(i, true); *ptr++ = get_byte(i, !neg_first); - pdot(i); + pdot(i, true); } - ml_printf("done\n"); + ml_printf("done %d @ %p\n", i * 2, ptr); } /* @@ -105,9 +117,19 @@ static void reset_start_data(int offset) { uint32_t *ptr = (uint32_t *) &test_data[0]; int i; + + if (!offset) { + return; + } + + ml_printf("Flushing %d bytes from %p: ", offset, ptr); + for (i = 0; i < offset; i++) { *ptr++ = 0; + pdot(i, true); } + + ml_printf("done %d @ %p\n", i, ptr); } static void init_test_data_u16(int offset) @@ -117,17 +139,17 @@ static void init_test_data_u16(int offset) const int max = (TEST_SIZE - offset) / sizeof(word); int i; - ml_printf("Filling test area with u16 (offset %d, %p):", offset, ptr); - reset_start_data(offset); + ml_printf("Filling test area with u16 (offset %d, %p):", offset, ptr); + for (i = 0; i < max; i++) { uint16_t low = BYTE_NEXT(count), high = BYTE_NEXT(count); word = BYTE_SHIFT(high, 1) | BYTE_SHIFT(low, 0); *ptr++ = word; - pdot(i); + pdot(i, true); } - ml_printf("done @ %p\n", ptr); + ml_printf("done %d @ %p\n", i, ptr); } static void init_test_data_u32(int offset) @@ -137,21 +159,22 @@ static void init_test_data_u32(int offset) const int max = (TEST_SIZE - offset) / sizeof(word); int i; - ml_printf("Filling test area with u32 (offset %d, %p):", offset, ptr); - reset_start_data(offset); + ml_printf("Filling test area with u32 (offset %d, %p):", offset, ptr); + for (i = 0; i < max; i++) { uint32_t b4 = BYTE_NEXT(count), b3 = BYTE_NEXT(count); uint32_t b2 = BYTE_NEXT(count), b1 = BYTE_NEXT(count); word = BYTE_SHIFT(b1, 3) | BYTE_SHIFT(b2, 2) | BYTE_SHIFT(b3, 1) | BYTE_SHIFT(b4, 0); *ptr++ = word; - pdot(i); + pdot(i, true); } - ml_printf("done @ %p\n", ptr); + ml_printf("done %d @ %p\n", i, ptr); } +#if __SIZEOF_POINTER__ >= 8 static void init_test_data_u64(int offset) { uint8_t count = 0; @@ -159,10 +182,10 @@ static void init_test_data_u64(int offset) const int max = (TEST_SIZE - offset) / sizeof(word); int i; - ml_printf("Filling test area with u64 (offset %d, %p):", offset, ptr); - reset_start_data(offset); + ml_printf("Filling test area with u64 (offset %d, %p):", offset, ptr); + for (i = 0; i < max; i++) { uint64_t b8 = BYTE_NEXT(count), b7 = BYTE_NEXT(count); uint64_t b6 = BYTE_NEXT(count), b5 = BYTE_NEXT(count); @@ -172,10 +195,11 @@ static void init_test_data_u64(int offset) BYTE_SHIFT(b4, 4) | BYTE_SHIFT(b5, 3) | BYTE_SHIFT(b6, 2) | BYTE_SHIFT(b7, 1) | BYTE_SHIFT(b8, 0); *ptr++ = word; - pdot(i); + pdot(i, true); } - ml_printf("done @ %p\n", ptr); + ml_printf("done %d @ %p\n", i, ptr); } +#endif static bool read_test_data_u16(int offset) { @@ -194,11 +218,11 @@ static bool read_test_data_u16(int offset) ml_printf("Error %d < %d\n", high, low); return false; } else { - pdot(i); + pdot(i, false); } } - ml_printf("done @ %p\n", ptr); + ml_printf("done %d @ %p\n", i, ptr); return true; } @@ -236,13 +260,14 @@ static bool read_test_data_u32(int offset) ml_printf("Error %d, %d, %d, %d", b1, b2, b3, b4); return false; } else { - pdot(i); + pdot(i, false); } } - ml_printf("done @ %p\n", ptr); + ml_printf("done %d @ %p\n", i, ptr); return true; } +#if __SIZEOF_POINTER__ >= 8 static bool read_test_data_u64(int offset) { uint64_t word, *ptr = (uint64_t *)&test_data[offset]; @@ -290,17 +315,22 @@ static bool read_test_data_u64(int offset) b1, b2, b3, b4, b5, b6, b7, b8); return false; } else { - pdot(i); + pdot(i, false); } } - ml_printf("done @ %p\n", ptr); + ml_printf("done %d @ %p\n", i, ptr); return true; } +#endif /* Read the test data and verify at various offsets */ -read_ufn read_ufns[] = { read_test_data_u16, - read_test_data_u32, - read_test_data_u64 }; +read_ufn read_ufns[] = { + read_test_data_u16, + read_test_data_u32, +#if __SIZEOF_POINTER__ >= 8 + read_test_data_u64 +#endif +}; bool do_unsigned_reads(int start_off) { @@ -357,15 +387,17 @@ static bool read_test_data_s8(int offset, bool neg_first) second = *ptr++; if (neg_first && first < 0 && second > 0) { - pdot(i); + pdot(i, false); + pdot(i, false); } else if (!neg_first && first > 0 && second < 0) { - pdot(i); + pdot(i, false); + pdot(i, false); } else { ml_printf("Error %d %c %d\n", first, neg_first ? '<' : '>', second); return false; } } - ml_printf("done @ %p\n", ptr); + ml_printf("done %d @ %p\n", i * 2, ptr); return true; } @@ -390,15 +422,15 @@ static bool read_test_data_s16(int offset, bool neg_first) int32_t data = *ptr++; if (neg_first && data < 0) { - pdot(i); + pdot(i, false); } else if (!neg_first && data > 0) { - pdot(i); + pdot(i, false); } else { ml_printf("Error %d %c 0\n", data, neg_first ? '<' : '>'); return false; } } - ml_printf("done @ %p\n", ptr); + ml_printf("done %d @ %p\n", i, ptr); return true; } @@ -423,15 +455,15 @@ static bool read_test_data_s32(int offset, bool neg_first) int64_t data = *ptr++; if (neg_first && data < 0) { - pdot(i); + pdot(i, false); } else if (!neg_first && data > 0) { - pdot(i); + pdot(i, false); } else { ml_printf("Error %d %c 0\n", data, neg_first ? '<' : '>'); return false; } } - ml_printf("done @ %p\n", ptr); + ml_printf("done %d @ %p\n", i, ptr); return true; } @@ -465,16 +497,23 @@ bool do_signed_reads(bool neg_first) return ok; } -init_ufn init_ufns[] = { init_test_data_u8, - init_test_data_u16, - init_test_data_u32, - init_test_data_u64 }; +init_ufn init_ufns[] = { + init_test_data_u8, + init_test_data_u16, + init_test_data_u32, +#if __SIZEOF_POINTER__ >= 8 + init_test_data_u64 +#endif +}; int main(void) { int i; bool ok = true; + ml_printf("Test data start: 0x%"PRIxPTR"\n", &test_data[0]); + ml_printf("Test data end: 0x%"PRIxPTR"\n", &test_data[TEST_SIZE]); + /* Run through the unsigned tests first */ for (i = 0; i < ARRAY_SIZE(init_ufns) && ok; i++) { ok = do_unsigned_test(init_ufns[i]); @@ -490,6 +529,8 @@ int main(void) ok = do_signed_reads(true); } + ml_printf("Test data read: %"PRId32"\n", test_read_count); + ml_printf("Test data write: %"PRId32"\n", test_write_count); ml_printf("Test complete: %s\n", ok ? "PASSED" : "FAILED"); return ok ? 0 : -1; } diff --git a/tests/tcg/multiarch/system/validate-memory-counts.py b/tests/tcg/multiarch/system/validate-memory-counts.py new file mode 100755 index 0000000000..5b8bbf3ef3 --- /dev/null +++ b/tests/tcg/multiarch/system/validate-memory-counts.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python3 +# +# validate-memory-counts.py: check we instrumented memory properly +# +# This program takes two inputs: +# - the mem plugin output +# - the memory binary output +# +# Copyright (C) 2024 Linaro Ltd +# +# SPDX-License-Identifier: GPL-2.0-or-later + +import sys +from argparse import ArgumentParser + +def extract_counts(path): + """ + Load the output from path and extract the lines containing: + + Test data start: 0x40214000 + Test data end: 0x40218001 + Test data read: 2522280 + Test data write: 262111 + + From the stream of data. Extract the values for use in the + validation function. + """ + start_address = None + end_address = None + read_count = 0 + write_count = 0 + with open(path, 'r') as f: + for line in f: + if line.startswith("Test data start:"): + start_address = int(line.split(':')[1].strip(), 16) + elif line.startswith("Test data end:"): + end_address = int(line.split(':')[1].strip(), 16) + elif line.startswith("Test data read:"): + read_count = int(line.split(':')[1].strip()) + elif line.startswith("Test data write:"): + write_count = int(line.split(':')[1].strip()) + return start_address, end_address, read_count, write_count + + +def parse_plugin_output(path, start, end): + """ + Load the plugin output from path in the form of: + + Region Base, Reads, Writes, Seen all + 0x0000000040004000, 31093, 0, false + 0x0000000040214000, 2522280, 278579, true + 0x0000000040000000, 137398, 0, false + 0x0000000040210000, 54727397, 33721956, false + + And extract the ranges that match test data start and end and + return the results. + """ + total_reads = 0 + total_writes = 0 + seen_all = False + + with open(path, 'r') as f: + next(f) # Skip the header + for line in f: + + if line.startswith("Region Base"): + continue + + parts = line.strip().split(', ') + if len(parts) != 4: + continue + + region_base = int(parts[0], 16) + reads = int(parts[1]) + writes = int(parts[2]) + + if start <= region_base < end: # Checking if within range + total_reads += reads + total_writes += writes + seen_all = parts[3] == "true" + + return total_reads, total_writes, seen_all + +def main() -> None: + """ + Process the arguments, injest the program and plugin out and + verify they match up and report if they do not. + """ + parser = ArgumentParser(description="Validate memory instrumentation") + parser.add_argument('test_output', + help="The output from the test itself") + parser.add_argument('plugin_output', + help="The output from memory plugin") + parser.add_argument('--bss-cleared', + action='store_true', + help='Assume bss was cleared (and adjusts counts).') + + args = parser.parse_args() + + # Extract counts from memory binary + start, end, exp_reads, exp_writes = extract_counts(args.test_output) + + # Some targets clear BSS before running but the test doesn't know + # that so we adjust it by the size of the test region. + if args.bss_cleared: + exp_writes += 16384 + + if start is None or end is None: + print("Failed to test_data boundaries from output.") + sys.exit(1) + + # Parse plugin output + preads, pwrites, seen_all = parse_plugin_output(args.plugin_output, + start, end) + + if not seen_all: + print("Fail: didn't instrument all accesses to test_data.") + sys.exit(1) + + # Compare and report + if preads == exp_reads and pwrites == exp_writes: + sys.exit(0) + else: + print("Fail: The memory reads and writes count does not match.") + print(f"Expected Reads: {exp_reads}, Actual Reads: {preads}") + print(f"Expected Writes: {exp_writes}, Actual Writes: {pwrites}") + sys.exit(1) + +if __name__ == "__main__": + main() diff --git a/tests/tcg/multiarch/test-plugin-mem-access.c b/tests/tcg/multiarch/test-plugin-mem-access.c new file mode 100644 index 0000000000..057b9aac9f --- /dev/null +++ b/tests/tcg/multiarch/test-plugin-mem-access.c @@ -0,0 +1,177 @@ +/* + * SPDX-License-Identifier: GPL-2.0-or-later + * + * Check if we detect all memory accesses expected using plugin API. + * Used in conjunction with ./check-plugin-mem-access.sh check script. + * Output of this program is the list of patterns expected in plugin output. + * + * 8,16,32 load/store are tested for all arch. + * 64,128 load/store are tested for aarch64/x64. + * atomic operations (8,16,32,64) are tested for x64 only. + */ + +#include <pthread.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> + +#if defined(__x86_64__) +#include <emmintrin.h> +#elif defined(__aarch64__) +#include <arm_neon.h> +#endif /* __x86_64__ */ + +static void *data; + +/* ,store_u8,.*,8,store,0xf1 */ +#define PRINT_EXPECTED(function, type, value, action) \ +do { \ + printf(",%s,.*,%d,%s,%s\n", \ + #function, (int) sizeof(type) * 8, action, value); \ +} \ +while (0) + +#define DEFINE_STORE(name, type, value) \ + \ +static void print_expected_store_##name(void) \ +{ \ + PRINT_EXPECTED(store_##name, type, #value, "store"); \ +} \ + \ +static void store_##name(void) \ +{ \ + *((type *)data) = value; \ + print_expected_store_##name(); \ +} + +#define DEFINE_ATOMIC_OP(name, type, value) \ + \ +static void print_expected_atomic_op_##name(void) \ +{ \ + PRINT_EXPECTED(atomic_op_##name, type, "0x0*42", "load"); \ + PRINT_EXPECTED(atomic_op_##name, type, #value, "store"); \ +} \ + \ +static void atomic_op_##name(void) \ +{ \ + *((type *)data) = 0x42; \ + __sync_val_compare_and_swap((type *)data, 0x42, value); \ + print_expected_atomic_op_##name(); \ +} + +#define DEFINE_LOAD(name, type, value) \ + \ +static void print_expected_load_##name(void) \ +{ \ + PRINT_EXPECTED(load_##name, type, #value, "load"); \ +} \ + \ +static void load_##name(void) \ +{ \ + \ + /* volatile forces load to be generated. */ \ + volatile type src = *((type *) data); \ + volatile type dest = src; \ + (void)src, (void)dest; \ + print_expected_load_##name(); \ +} + +DEFINE_STORE(u8, uint8_t, 0xf1) +DEFINE_LOAD(u8, uint8_t, 0xf1) +DEFINE_STORE(u16, uint16_t, 0xf123) +DEFINE_LOAD(u16, uint16_t, 0xf123) +DEFINE_STORE(u32, uint32_t, 0xff112233) +DEFINE_LOAD(u32, uint32_t, 0xff112233) + +#if defined(__x86_64__) || defined(__aarch64__) +DEFINE_STORE(u64, uint64_t, 0xf123456789abcdef) +DEFINE_LOAD(u64, uint64_t, 0xf123456789abcdef) + +static void print_expected_store_u128(void) +{ + PRINT_EXPECTED(store_u128, __int128, + "0xf122334455667788f123456789abcdef", "store"); +} + +static void store_u128(void) +{ +#ifdef __x86_64__ + _mm_store_si128(data, _mm_set_epi32(0xf1223344, 0x55667788, + 0xf1234567, 0x89abcdef)); +#else + const uint32_t init[4] = {0x89abcdef, 0xf1234567, 0x55667788, 0xf1223344}; + uint32x4_t vec = vld1q_u32(init); + vst1q_u32(data, vec); +#endif /* __x86_64__ */ + print_expected_store_u128(); +} + +static void print_expected_load_u128(void) +{ + PRINT_EXPECTED(load_u128, __int128, + "0xf122334455667788f123456789abcdef", "load"); +} + +static void load_u128(void) +{ +#ifdef __x86_64__ + __m128i var = _mm_load_si128(data); +#else + uint32x4_t var = vld1q_u32(data); +#endif + (void) var; + print_expected_load_u128(); +} +#endif /* __x86_64__ || __aarch64__ */ + +#if defined(__x86_64__) +DEFINE_ATOMIC_OP(u8, uint8_t, 0xf1) +DEFINE_ATOMIC_OP(u16, uint16_t, 0xf123) +DEFINE_ATOMIC_OP(u32, uint32_t, 0xff112233) +DEFINE_ATOMIC_OP(u64, uint64_t, 0xf123456789abcdef) +#endif /* __x86_64__ */ + +static void *f(void *p) +{ + return NULL; +} + +int main(void) +{ + /* + * We force creation of a second thread to enable cpu flag CF_PARALLEL. + * This will generate atomic operations when needed. + */ + pthread_t thread; + pthread_create(&thread, NULL, &f, NULL); + pthread_join(thread, NULL); + + /* allocate storage up to 128 bits */ + data = malloc(16); + + store_u8(); + load_u8(); + + store_u16(); + load_u16(); + + store_u32(); + load_u32(); + +#if defined(__x86_64__) || defined(__aarch64__) + store_u64(); + load_u64(); + + store_u128(); + load_u128(); +#endif /* __x86_64__ || __aarch64__ */ + +#if defined(__x86_64__) + atomic_op_u8(); + atomic_op_u16(); + atomic_op_u32(); + atomic_op_u64(); +#endif /* __x86_64__ */ + + free(data); +} diff --git a/tests/tcg/plugins/mem.c b/tests/tcg/plugins/mem.c index b650dddcce..b0fa8a9f27 100644 --- a/tests/tcg/plugins/mem.c +++ b/tests/tcg/plugins/mem.c @@ -12,6 +12,15 @@ #include <stdio.h> #include <glib.h> +/* + * plugins should not include anything from QEMU aside from the + * API header. However as this is a test plugin to exercise the + * internals of QEMU and we want to avoid needless code duplication we + * do so here. bswap.h is pretty self-contained although it needs a + * few things provided by compiler.h. + */ +#include <compiler.h> +#include <bswap.h> #include <qemu-plugin.h> QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; @@ -21,13 +30,52 @@ typedef struct { uint64_t io_count; } CPUCount; +typedef struct { + uint64_t vaddr; + const char *sym; +} InsnInfo; + +/* + * For the "memory" system test we need to track accesses to + * individual regions. We mirror the data written to the region and + * then check when it is read that it matches up. + * + * We do this as regions rather than pages to save on complications + * with page crossing and the fact the test only cares about the + * test_data region. + */ +static uint64_t region_size = 4096 * 4; +static uint64_t region_mask; + +typedef struct { + uint64_t region_address; + uint64_t reads; + uint64_t writes; + uint8_t *data; + /* Did we see every write and read with correct values? */ + bool seen_all; +} RegionInfo; + static struct qemu_plugin_scoreboard *counts; static qemu_plugin_u64 mem_count; static qemu_plugin_u64 io_count; -static bool do_inline, do_callback; +static bool do_inline, do_callback, do_print_accesses, do_region_summary; static bool do_haddr; static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW; + +static GMutex lock; +static GHashTable *regions; + +static gint addr_order(gconstpointer a, gconstpointer b) +{ + RegionInfo *na = (RegionInfo *) a; + RegionInfo *nb = (RegionInfo *) b; + + return na->region_address > nb->region_address ? 1 : -1; +} + + static void plugin_exit(qemu_plugin_id_t id, void *p) { g_autoptr(GString) out = g_string_new(""); @@ -41,9 +89,145 @@ static void plugin_exit(qemu_plugin_id_t id, void *p) qemu_plugin_u64_sum(io_count)); } qemu_plugin_outs(out->str); + + + if (do_region_summary) { + GList *counts = g_hash_table_get_values(regions); + + counts = g_list_sort(counts, addr_order); + + g_string_printf(out, "Region Base, Reads, Writes, Seen all\n"); + + if (counts && g_list_next(counts)) { + for (/* counts */; counts; counts = counts->next) { + RegionInfo *ri = (RegionInfo *) counts->data; + + g_string_append_printf(out, + "0x%016"PRIx64", " + "%"PRId64", %"PRId64", %s\n", + ri->region_address, + ri->reads, + ri->writes, + ri->seen_all ? "true" : "false"); + } + } + qemu_plugin_outs(out->str); + } + qemu_plugin_scoreboard_free(counts); } +/* + * Update the region tracking info for the access. We split up accesses + * that span regions even though the plugin infrastructure will deliver + * it as a single access. + */ +static void update_region_info(uint64_t region, uint64_t offset, + qemu_plugin_meminfo_t meminfo, + qemu_plugin_mem_value value, + unsigned size) +{ + bool be = qemu_plugin_mem_is_big_endian(meminfo); + bool is_store = qemu_plugin_mem_is_store(meminfo); + RegionInfo *ri; + bool unseen_data = false; + + g_assert(offset + size <= region_size); + + g_mutex_lock(&lock); + ri = (RegionInfo *) g_hash_table_lookup(regions, GUINT_TO_POINTER(region)); + + if (!ri) { + ri = g_new0(RegionInfo, 1); + ri->region_address = region; + ri->data = g_malloc0(region_size); + ri->seen_all = true; + g_hash_table_insert(regions, GUINT_TO_POINTER(region), (gpointer) ri); + } + + if (is_store) { + ri->writes++; + } else { + ri->reads++; + } + + switch (value.type) { + case QEMU_PLUGIN_MEM_VALUE_U8: + if (is_store) { + ri->data[offset] = value.data.u8; + } else if (ri->data[offset] != value.data.u8) { + unseen_data = true; + } + break; + case QEMU_PLUGIN_MEM_VALUE_U16: + { + uint16_t *p = (uint16_t *) &ri->data[offset]; + if (is_store) { + if (be) { + stw_be_p(p, value.data.u16); + } else { + stw_le_p(p, value.data.u16); + } + } else { + uint16_t val = be ? lduw_be_p(p) : lduw_le_p(p); + unseen_data = val != value.data.u16; + } + break; + } + case QEMU_PLUGIN_MEM_VALUE_U32: + { + uint32_t *p = (uint32_t *) &ri->data[offset]; + if (is_store) { + if (be) { + stl_be_p(p, value.data.u32); + } else { + stl_le_p(p, value.data.u32); + } + } else { + uint32_t val = be ? ldl_be_p(p) : ldl_le_p(p); + unseen_data = val != value.data.u32; + } + break; + } + case QEMU_PLUGIN_MEM_VALUE_U64: + { + uint64_t *p = (uint64_t *) &ri->data[offset]; + if (is_store) { + if (be) { + stq_be_p(p, value.data.u64); + } else { + stq_le_p(p, value.data.u64); + } + } else { + uint64_t val = be ? ldq_be_p(p) : ldq_le_p(p); + unseen_data = val != value.data.u64; + } + break; + } + case QEMU_PLUGIN_MEM_VALUE_U128: + /* non in test so skip */ + break; + default: + g_assert_not_reached(); + } + + /* + * This is expected for regions initialised by QEMU (.text etc) but we + * expect to see all data read and written to the test_data region + * of the memory test. + */ + if (unseen_data && ri->seen_all) { + g_autoptr(GString) error = g_string_new("Warning: "); + g_string_append_printf(error, "0x%016"PRIx64":%"PRId64 + " read an un-instrumented value\n", + region, offset); + qemu_plugin_outs(error->str); + ri->seen_all = false; + } + + g_mutex_unlock(&lock); +} + static void vcpu_mem(unsigned int cpu_index, qemu_plugin_meminfo_t meminfo, uint64_t vaddr, void *udata) { @@ -58,6 +242,53 @@ static void vcpu_mem(unsigned int cpu_index, qemu_plugin_meminfo_t meminfo, } else { qemu_plugin_u64_add(mem_count, cpu_index, 1); } + + if (do_region_summary) { + uint64_t region = vaddr & ~region_mask; + uint64_t offset = vaddr & region_mask; + qemu_plugin_mem_value value = qemu_plugin_mem_get_value(meminfo); + unsigned size = 1 << qemu_plugin_mem_size_shift(meminfo); + + update_region_info(region, offset, meminfo, value, size); + } +} + +static void print_access(unsigned int cpu_index, qemu_plugin_meminfo_t meminfo, + uint64_t vaddr, void *udata) +{ + InsnInfo *insn_info = udata; + unsigned size = 8 << qemu_plugin_mem_size_shift(meminfo); + const char *type = qemu_plugin_mem_is_store(meminfo) ? "store" : "load"; + qemu_plugin_mem_value value = qemu_plugin_mem_get_value(meminfo); + uint64_t hwaddr = + qemu_plugin_hwaddr_phys_addr(qemu_plugin_get_hwaddr(meminfo, vaddr)); + g_autoptr(GString) out = g_string_new(""); + g_string_printf(out, + "0x%"PRIx64",%s,0x%"PRIx64",0x%"PRIx64",%d,%s,", + insn_info->vaddr, insn_info->sym, + vaddr, hwaddr, size, type); + switch (value.type) { + case QEMU_PLUGIN_MEM_VALUE_U8: + g_string_append_printf(out, "0x%02"PRIx8, value.data.u8); + break; + case QEMU_PLUGIN_MEM_VALUE_U16: + g_string_append_printf(out, "0x%04"PRIx16, value.data.u16); + break; + case QEMU_PLUGIN_MEM_VALUE_U32: + g_string_append_printf(out, "0x%08"PRIx32, value.data.u32); + break; + case QEMU_PLUGIN_MEM_VALUE_U64: + g_string_append_printf(out, "0x%016"PRIx64, value.data.u64); + break; + case QEMU_PLUGIN_MEM_VALUE_U128: + g_string_append_printf(out, "0x%016"PRIx64"%016"PRIx64, + value.data.u128.high, value.data.u128.low); + break; + default: + g_assert_not_reached(); + } + g_string_append_printf(out, "\n"); + qemu_plugin_outs(out->str); } static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) @@ -74,11 +305,21 @@ static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) QEMU_PLUGIN_INLINE_ADD_U64, mem_count, 1); } - if (do_callback) { + if (do_callback || do_region_summary) { qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem, QEMU_PLUGIN_CB_NO_REGS, rw, NULL); } + if (do_print_accesses) { + /* we leak this pointer, to avoid locking to keep track of it */ + InsnInfo *insn_info = g_malloc(sizeof(InsnInfo)); + const char *sym = qemu_plugin_insn_symbol(insn); + insn_info->sym = sym ? sym : ""; + insn_info->vaddr = qemu_plugin_insn_vaddr(insn); + qemu_plugin_register_vcpu_mem_cb(insn, print_access, + QEMU_PLUGIN_CB_NO_REGS, + rw, (void *) insn_info); + } } } @@ -117,6 +358,18 @@ QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, fprintf(stderr, "boolean argument parsing failed: %s\n", opt); return -1; } + } else if (g_strcmp0(tokens[0], "print-accesses") == 0) { + if (!qemu_plugin_bool_parse(tokens[0], tokens[1], + &do_print_accesses)) { + fprintf(stderr, "boolean argument parsing failed: %s\n", opt); + return -1; + } + } else if (g_strcmp0(tokens[0], "region-summary") == 0) { + if (!qemu_plugin_bool_parse(tokens[0], tokens[1], + &do_region_summary)) { + fprintf(stderr, "boolean argument parsing failed: %s\n", opt); + return -1; + } } else { fprintf(stderr, "option parsing failed: %s\n", opt); return -1; @@ -129,6 +382,19 @@ QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, return -1; } + if (do_print_accesses) { + g_autoptr(GString) out = g_string_new(""); + g_string_printf(out, + "insn_vaddr,insn_symbol,mem_vaddr,mem_hwaddr," + "access_size,access_type,mem_value\n"); + qemu_plugin_outs(out->str); + } + + if (do_region_summary) { + region_mask = (region_size - 1); + regions = g_hash_table_new(NULL, g_direct_equal); + } + counts = qemu_plugin_scoreboard_new(sizeof(CPUCount)); mem_count = qemu_plugin_scoreboard_u64_in_struct( counts, CPUCount, mem_count); diff --git a/tests/tcg/plugins/syscall.c b/tests/tcg/plugins/syscall.c index 72e1a5bf90..89dc7f49b1 100644 --- a/tests/tcg/plugins/syscall.c +++ b/tests/tcg/plugins/syscall.c @@ -22,8 +22,57 @@ typedef struct { int64_t errors; } SyscallStats; +struct SyscallInfo { + const char *name; + int64_t write_sysno; +}; + +static const struct SyscallInfo arch_syscall_info[] = { + { "aarch64", 64 }, + { "aarch64_be", 64 }, + { "alpha", 4 }, + { "arm", 4 }, + { "armeb", 4 }, + { "avr", -1 }, + { "cris", -1 }, + { "hexagon", 64 }, + { "hppa", -1 }, + { "i386", 4 }, + { "loongarch64", -1 }, + { "m68k", 4 }, + { "microblaze", 4 }, + { "microblazeel", 4 }, + { "mips", 1 }, + { "mips64", 1 }, + { "mips64el", 1 }, + { "mipsel", 1 }, + { "mipsn32", 1 }, + { "mipsn32el", 1 }, + { "or1k", -1 }, + { "ppc", 4 }, + { "ppc64", 4 }, + { "ppc64le", 4 }, + { "riscv32", 64 }, + { "riscv64", 64 }, + { "rx", -1 }, + { "s390x", -1 }, + { "sh4", -1 }, + { "sh4eb", -1 }, + { "sparc", 4 }, + { "sparc32plus", 4 }, + { "sparc64", 4 }, + { "tricore", -1 }, + { "x86_64", 1 }, + { "xtensa", 13 }, + { "xtensaeb", 13 }, + { NULL, -1 }, +}; + static GMutex lock; static GHashTable *statistics; +static GByteArray *memory_buffer; +static bool do_log_writes; +static int64_t write_sysno = -1; static SyscallStats *get_or_create_entry(int64_t num) { @@ -39,6 +88,44 @@ static SyscallStats *get_or_create_entry(int64_t num) return entry; } +/* + * Hex-dump a GByteArray to the QEMU plugin output in the format: + * 61 63 63 65 6c 09 09 20 20 20 66 70 75 09 09 09 | accel.....fpu... + * 20 6d 6f 64 75 6c 65 2d 63 6f 6d 6d 6f 6e 2e 63 | .module-common.c + */ +static void hexdump(const GByteArray *data) +{ + g_autoptr(GString) out = g_string_new(""); + + for (guint index = 0; index < data->len; index += 16) { + for (guint col = 0; col < 16; col++) { + if (index + col < data->len) { + g_string_append_printf(out, "%02x ", data->data[index + col]); + } else { + g_string_append(out, " "); + } + } + + g_string_append(out, " | "); + + for (guint col = 0; col < 16; col++) { + if (index + col >= data->len) { + break; + } + + if (g_ascii_isgraph(data->data[index + col])) { + g_string_append_printf(out, "%c", data->data[index + col]); + } else { + g_string_append(out, "."); + } + } + + g_string_append(out, "\n"); + } + + qemu_plugin_outs(out->str); +} + static void vcpu_syscall(qemu_plugin_id_t id, unsigned int vcpu_index, int64_t num, uint64_t a1, uint64_t a2, uint64_t a3, uint64_t a4, uint64_t a5, @@ -54,6 +141,14 @@ static void vcpu_syscall(qemu_plugin_id_t id, unsigned int vcpu_index, g_autofree gchar *out = g_strdup_printf("syscall #%" PRIi64 "\n", num); qemu_plugin_outs(out); } + + if (do_log_writes && num == write_sysno) { + if (qemu_plugin_read_memory_vaddr(a2, memory_buffer, a3)) { + hexdump(memory_buffer); + } else { + fprintf(stderr, "Error reading memory from vaddr %"PRIu64"\n", a2); + } + } } static void vcpu_syscall_ret(qemu_plugin_id_t id, unsigned int vcpu_idx, @@ -127,6 +222,10 @@ QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &do_print)) { fprintf(stderr, "boolean argument parsing failed: %s\n", opt); } + } else if (g_strcmp0(tokens[0], "log_writes") == 0) { + if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &do_log_writes)) { + fprintf(stderr, "boolean argument parsing failed: %s\n", opt); + } } else { fprintf(stderr, "unsupported argument: %s\n", argv[i]); return -1; @@ -137,6 +236,24 @@ QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, statistics = g_hash_table_new_full(NULL, g_direct_equal, NULL, g_free); } + if (do_log_writes) { + for (const struct SyscallInfo *syscall_info = arch_syscall_info; + syscall_info->name != NULL; syscall_info++) { + + if (g_strcmp0(syscall_info->name, info->target_name) == 0) { + write_sysno = syscall_info->write_sysno; + break; + } + } + + if (write_sysno == -1) { + fprintf(stderr, "write syscall number not found\n"); + return -1; + } + + memory_buffer = g_byte_array_new(); + } + qemu_plugin_register_vcpu_syscall_cb(id, vcpu_syscall); qemu_plugin_register_vcpu_syscall_ret_cb(id, vcpu_syscall_ret); qemu_plugin_register_atexit_cb(id, plugin_exit, NULL); diff --git a/tests/tcg/ppc64/Makefile.target b/tests/tcg/ppc64/Makefile.target index 509a20be2b..1940886c73 100644 --- a/tests/tcg/ppc64/Makefile.target +++ b/tests/tcg/ppc64/Makefile.target @@ -55,4 +55,9 @@ PPC64_TESTS += signal_save_restore_xer PPC64_TESTS += xxspltw PPC64_TESTS += test-aes +# ppc64 ABI uses function descriptors, and thus, QEMU can't find symbol for a +# given instruction. Thus, we don't check output of mem-access plugin. +run-plugin-test-plugin-mem-access-with-libmem.so: \ + CHECK_PLUGIN_OUTPUT_COMMAND= + TESTS += $(PPC64_TESTS) diff --git a/tests/tcg/s390x/Makefile.softmmu-target b/tests/tcg/s390x/Makefile.softmmu-target index f60f94b090..3227903348 100644 --- a/tests/tcg/s390x/Makefile.softmmu-target +++ b/tests/tcg/s390x/Makefile.softmmu-target @@ -1,6 +1,7 @@ S390X_SRC=$(SRC_PATH)/tests/tcg/s390x VPATH+=$(S390X_SRC) -QEMU_OPTS+=-action panic=exit-failure -nographic $(EXTFLAGS) -kernel +# EXTFLAGS can be passed by the user, e.g. to override the --accel +QEMU_OPTS+=-action panic=exit-failure -nographic -serial chardev:output $(EXTFLAGS) -kernel LINK_SCRIPT=$(S390X_SRC)/softmmu.ld CFLAGS+=-ggdb -O0 LDFLAGS=-nostdlib -static @@ -46,3 +47,8 @@ $(MULTIARCH_TESTS): $(S390X_MULTIARCH_RUNTIME_OBJS) $(MULTIARCH_TESTS): LDFLAGS += $(S390X_MULTIARCH_RUNTIME_OBJS) $(MULTIARCH_TESTS): CFLAGS += $(MINILIB_INC) memory: CFLAGS += -DCHECK_UNALIGNED=0 + +# s390x clears the BSS section so we need to account for that +run-plugin-memory-with-libmem.so: \ + CHECK_PLUGIN_OUTPUT_COMMAND=$(MULTIARCH_SYSTEM_SRC)/validate-memory-counts.py \ + --bss-cleared $@.out diff --git a/util/qemu-timer.c b/util/qemu-timer.c index 213114be68..6b1533bc2a 100644 --- a/util/qemu-timer.c +++ b/util/qemu-timer.c @@ -685,10 +685,17 @@ int64_t qemu_clock_advance_virtual_time(int64_t dest) { int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); AioContext *aio_context; + int64_t deadline; + aio_context = qemu_get_aio_context(); - while (clock < dest) { - int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL, + + deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL, QEMU_TIMER_ATTR_ALL); + /* + * A deadline of < 0 indicates this timer is not enabled, so we + * won't get far trying to run it forward. + */ + while (deadline >= 0 && clock < dest) { int64_t warp = qemu_soonest_timeout(dest - clock, deadline); qemu_virtual_clock_set_ns(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + warp); @@ -696,6 +703,9 @@ int64_t qemu_clock_advance_virtual_time(int64_t dest) qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL); timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]); clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + + deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL, + QEMU_TIMER_ATTR_ALL); } qemu_clock_notify(QEMU_CLOCK_VIRTUAL);