From 0b5538c300a56c3cfb33022840fe0b4968147e7a Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Sat, 26 Feb 2011 18:38:39 +0000 Subject: [PATCH 1/3] simpletrace: Thread-safe tracing Trace events outside the global mutex cannot be used with the simple trace backend since it is not thread-safe. There is no check to prevent them being enabled so people sometimes learn this the hard way. This patch restructures the simple trace backend with a ring buffer suitable for multiple concurrent writers. A writeout thread empties the trace buffer when threshold fill levels are reached. Should the writeout thread be unable to keep up with trace generation, records will simply be dropped. Each time events are dropped a special record is written to the trace file indicating how many events were dropped. The event ID is 0xfffffffffffffffe and its signature is dropped(uint32_t count). Signed-off-by: Stefan Hajnoczi --- docs/tracing.txt | 5 - scripts/simpletrace.py | 3 +- simpletrace.c | 339 ++++++++++++++++++++++++++--------------- simpletrace.h | 8 + vl.c | 16 +- 5 files changed, 234 insertions(+), 137 deletions(-) diff --git a/docs/tracing.txt b/docs/tracing.txt index a6cc56fd33..f15069c96b 100644 --- a/docs/tracing.txt +++ b/docs/tracing.txt @@ -141,11 +141,6 @@ source tree. It may not be as powerful as platform-specific or third-party trace backends but it is portable. This is the recommended trace backend unless you have specific needs for more advanced backends. -Warning: the "simple" backend is not thread-safe so only enable trace events -that are executed while the global mutex is held. Much of QEMU meets this -requirement but some utility functions like qemu_malloc() or thread-related -code cannot be safely traced using the "simple" backend. - ==== Monitor commands ==== * info trace diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py index 9fe3dda076..2ad56998ee 100755 --- a/scripts/simpletrace.py +++ b/scripts/simpletrace.py @@ -16,6 +16,7 @@ import inspect header_event_id = 0xffffffffffffffff header_magic = 0xf2b177cb0aa429b4 header_version = 0 +dropped_event_id = 0xfffffffffffffffe trace_fmt = '=QQQQQQQQ' trace_len = struct.calcsize(trace_fmt) @@ -28,7 +29,7 @@ def parse_events(fobj): """Extract argument names from a parameter list.""" return tuple(arg.split()[-1].lstrip('*') for arg in args.split(',')) - events = {} + events = {dropped_event_id: ('dropped', 'count')} event_num = 0 for line in fobj: m = event_re.match(line.strip()) diff --git a/simpletrace.c b/simpletrace.c index 9ea0d1f984..9926ab39bb 100644 --- a/simpletrace.c +++ b/simpletrace.c @@ -12,6 +12,9 @@ #include #include #include +#include +#include +#include "qerror.h" #include "qemu-timer.h" #include "trace.h" @@ -24,6 +27,12 @@ /** Trace file version number, bump if format changes */ #define HEADER_VERSION 0 +/** Records were dropped event ID */ +#define DROPPED_EVENT_ID (~(uint64_t)0 - 1) + +/** Trace record is valid */ +#define TRACE_RECORD_VALID ((uint64_t)1 << 63) + /** Trace buffer entry */ typedef struct { uint64_t event; @@ -37,126 +46,135 @@ typedef struct { } TraceRecord; enum { - TRACE_BUF_LEN = 64 * 1024 / sizeof(TraceRecord), + TRACE_BUF_LEN = 4096, + TRACE_BUF_FLUSH_THRESHOLD = TRACE_BUF_LEN / 4, }; +/* + * Trace records are written out by a dedicated thread. The thread waits for + * records to become available, writes them out, and then waits again. + */ +static pthread_mutex_t trace_lock = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t trace_available_cond = PTHREAD_COND_INITIALIZER; +static pthread_cond_t trace_empty_cond = PTHREAD_COND_INITIALIZER; +static bool trace_available; +static bool trace_writeout_enabled; + static TraceRecord trace_buf[TRACE_BUF_LEN]; static unsigned int trace_idx; static FILE *trace_fp; static char *trace_file_name = NULL; -static bool trace_file_enabled = false; - -void st_print_trace_file_status(FILE *stream, int (*stream_printf)(FILE *stream, const char *fmt, ...)) -{ - stream_printf(stream, "Trace file \"%s\" %s.\n", - trace_file_name, trace_file_enabled ? "on" : "off"); -} - -static bool write_header(FILE *fp) -{ - static const TraceRecord header = { - .event = HEADER_EVENT_ID, - .timestamp_ns = HEADER_MAGIC, - .x1 = HEADER_VERSION, - }; - - return fwrite(&header, sizeof header, 1, fp) == 1; -} /** - * set_trace_file : To set the name of a trace file. - * @file : pointer to the name to be set. - * If NULL, set to the default name- set at config time. + * Read a trace record from the trace buffer + * + * @idx Trace buffer index + * @record Trace record to fill + * + * Returns false if the record is not valid. */ -bool st_set_trace_file(const char *file) +static bool get_trace_record(unsigned int idx, TraceRecord *record) { - st_set_trace_file_enabled(false); - - free(trace_file_name); - - if (!file) { - if (asprintf(&trace_file_name, CONFIG_TRACE_FILE, getpid()) < 0) { - trace_file_name = NULL; - return false; - } - } else { - if (asprintf(&trace_file_name, "%s", file) < 0) { - trace_file_name = NULL; - return false; - } + if (!(trace_buf[idx].event & TRACE_RECORD_VALID)) { + return false; } - st_set_trace_file_enabled(true); + __sync_synchronize(); /* read memory barrier before accessing record */ + + *record = trace_buf[idx]; + record->event &= ~TRACE_RECORD_VALID; return true; } -static void flush_trace_file(void) +/** + * Kick writeout thread + * + * @wait Whether to wait for writeout thread to complete + */ +static void flush_trace_file(bool wait) { - /* If the trace file is not open yet, open it now */ - if (!trace_fp) { - trace_fp = fopen(trace_file_name, "w"); - if (!trace_fp) { - /* Avoid repeatedly trying to open file on failure */ - trace_file_enabled = false; - return; + pthread_mutex_lock(&trace_lock); + trace_available = true; + pthread_cond_signal(&trace_available_cond); + + if (wait) { + pthread_cond_wait(&trace_empty_cond, &trace_lock); + } + + pthread_mutex_unlock(&trace_lock); +} + +static void wait_for_trace_records_available(void) +{ + pthread_mutex_lock(&trace_lock); + while (!(trace_available && trace_writeout_enabled)) { + pthread_cond_signal(&trace_empty_cond); + pthread_cond_wait(&trace_available_cond, &trace_lock); + } + trace_available = false; + pthread_mutex_unlock(&trace_lock); +} + +static void *writeout_thread(void *opaque) +{ + TraceRecord record; + unsigned int writeout_idx = 0; + unsigned int num_available, idx; + size_t unused; + + for (;;) { + wait_for_trace_records_available(); + + num_available = trace_idx - writeout_idx; + if (num_available > TRACE_BUF_LEN) { + record = (TraceRecord){ + .event = DROPPED_EVENT_ID, + .x1 = num_available, + }; + unused = fwrite(&record, sizeof(record), 1, trace_fp); + writeout_idx += num_available; } - write_header(trace_fp); + + idx = writeout_idx % TRACE_BUF_LEN; + while (get_trace_record(idx, &record)) { + trace_buf[idx].event = 0; /* clear valid bit */ + unused = fwrite(&record, sizeof(record), 1, trace_fp); + idx = ++writeout_idx % TRACE_BUF_LEN; + } + + fflush(trace_fp); } - - if (trace_fp) { - size_t unused; /* for when fwrite(3) is declared warn_unused_result */ - unused = fwrite(trace_buf, trace_idx * sizeof(trace_buf[0]), 1, trace_fp); - } -} - -void st_flush_trace_buffer(void) -{ - if (trace_file_enabled) { - flush_trace_file(); - } - - /* Discard written trace records */ - trace_idx = 0; -} - -void st_set_trace_file_enabled(bool enable) -{ - if (enable == trace_file_enabled) { - return; /* no change */ - } - - /* Flush/discard trace buffer */ - st_flush_trace_buffer(); - - /* To disable, close trace file */ - if (!enable) { - fclose(trace_fp); - trace_fp = NULL; - } - - trace_file_enabled = enable; + return NULL; } static void trace(TraceEventID event, uint64_t x1, uint64_t x2, uint64_t x3, uint64_t x4, uint64_t x5, uint64_t x6) { - TraceRecord *rec = &trace_buf[trace_idx]; + unsigned int idx; + uint64_t timestamp; if (!trace_list[event].state) { return; } - rec->event = event; - rec->timestamp_ns = get_clock(); - rec->x1 = x1; - rec->x2 = x2; - rec->x3 = x3; - rec->x4 = x4; - rec->x5 = x5; - rec->x6 = x6; + timestamp = get_clock(); - if (++trace_idx == TRACE_BUF_LEN) { - st_flush_trace_buffer(); + idx = __sync_fetch_and_add(&trace_idx, 1) % TRACE_BUF_LEN; + trace_buf[idx] = (TraceRecord){ + .event = event, + .timestamp_ns = timestamp, + .x1 = x1, + .x2 = x2, + .x3 = x3, + .x4 = x4, + .x5 = x5, + .x6 = x6, + }; + __sync_synchronize(); /* write barrier before marking as valid */ + trace_buf[idx].event |= TRACE_RECORD_VALID; + + if ((idx + 1) % TRACE_BUF_FLUSH_THRESHOLD == 0) { + flush_trace_file(false); } } @@ -195,24 +213,93 @@ void trace6(TraceEventID event, uint64_t x1, uint64_t x2, uint64_t x3, uint64_t trace(event, x1, x2, x3, x4, x5, x6); } -/** - * Flush the trace buffer on exit - */ -static void __attribute__((constructor)) st_init(void) +void st_set_trace_file_enabled(bool enable) { - atexit(st_flush_trace_buffer); + if (enable == !!trace_fp) { + return; /* no change */ + } + + /* Halt trace writeout */ + flush_trace_file(true); + trace_writeout_enabled = false; + flush_trace_file(true); + + if (enable) { + static const TraceRecord header = { + .event = HEADER_EVENT_ID, + .timestamp_ns = HEADER_MAGIC, + .x1 = HEADER_VERSION, + }; + + trace_fp = fopen(trace_file_name, "w"); + if (!trace_fp) { + return; + } + + if (fwrite(&header, sizeof header, 1, trace_fp) != 1) { + fclose(trace_fp); + trace_fp = NULL; + return; + } + + /* Resume trace writeout */ + trace_writeout_enabled = true; + flush_trace_file(false); + } else { + fclose(trace_fp); + trace_fp = NULL; + } +} + +/** + * Set the name of a trace file + * + * @file The trace file name or NULL for the default name- set at + * config time + */ +bool st_set_trace_file(const char *file) +{ + st_set_trace_file_enabled(false); + + free(trace_file_name); + + if (!file) { + if (asprintf(&trace_file_name, CONFIG_TRACE_FILE, getpid()) < 0) { + trace_file_name = NULL; + return false; + } + } else { + if (asprintf(&trace_file_name, "%s", file) < 0) { + trace_file_name = NULL; + return false; + } + } + + st_set_trace_file_enabled(true); + return true; +} + +void st_print_trace_file_status(FILE *stream, int (*stream_printf)(FILE *stream, const char *fmt, ...)) +{ + stream_printf(stream, "Trace file \"%s\" %s.\n", + trace_file_name, trace_fp ? "on" : "off"); } void st_print_trace(FILE *stream, int (*stream_printf)(FILE *stream, const char *fmt, ...)) { unsigned int i; - for (i = 0; i < trace_idx; i++) { + for (i = 0; i < TRACE_BUF_LEN; i++) { + TraceRecord record; + + if (!get_trace_record(i, &record)) { + continue; + } stream_printf(stream, "Event %" PRIu64 " : %" PRIx64 " %" PRIx64 " %" PRIx64 " %" PRIx64 " %" PRIx64 " %" PRIx64 "\n", - trace_buf[i].event, trace_buf[i].x1, trace_buf[i].x2, - trace_buf[i].x3, trace_buf[i].x4, trace_buf[i].x5, - trace_buf[i].x6); + record.event, record.x1, record.x2, + record.x3, record.x4, record.x5, + record.x6); } } @@ -226,30 +313,44 @@ void st_print_trace_events(FILE *stream, int (*stream_printf)(FILE *stream, cons } } -static TraceEvent* find_trace_event_by_name(const char *tname) +bool st_change_trace_event_state(const char *name, bool enabled) { unsigned int i; - if (!tname) { - return NULL; - } - for (i = 0; i < NR_TRACE_EVENTS; i++) { - if (!strcmp(trace_list[i].tp_name, tname)) { - return &trace_list[i]; + if (!strcmp(trace_list[i].tp_name, name)) { + trace_list[i].state = enabled; + return true; } } - return NULL; /* indicates end of list reached without a match */ -} - -bool st_change_trace_event_state(const char *tname, bool tstate) -{ - TraceEvent *tp; - - tp = find_trace_event_by_name(tname); - if (tp) { - tp->state = tstate; - return true; - } return false; } + +void st_flush_trace_buffer(void) +{ + flush_trace_file(true); +} + +void st_init(const char *file) +{ + pthread_t thread; + pthread_attr_t attr; + sigset_t set, oldset; + int ret; + + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + + sigfillset(&set); + pthread_sigmask(SIG_SETMASK, &set, &oldset); + ret = pthread_create(&thread, &attr, writeout_thread, NULL); + pthread_sigmask(SIG_SETMASK, &oldset, NULL); + + if (ret != 0) { + error_report("warning: unable to create trace file thread\n"); + return; + } + + atexit(st_flush_trace_buffer); + st_set_trace_file(file); +} diff --git a/simpletrace.h b/simpletrace.h index 2f44ed3c3c..3a5bd9fb9d 100644 --- a/simpletrace.h +++ b/simpletrace.h @@ -15,6 +15,7 @@ #include #include +#ifdef CONFIG_SIMPLE_TRACE typedef uint64_t TraceEventID; typedef struct { @@ -36,5 +37,12 @@ void st_print_trace_file_status(FILE *stream, fprintf_function stream_printf); void st_set_trace_file_enabled(bool enable); bool st_set_trace_file(const char *file); void st_flush_trace_buffer(void); +void st_init(const char *file); +#else +static inline void st_init(const char *file) +{ + /* Do nothing */ +} +#endif /* !CONFIG_SIMPLE_TRACE */ #endif /* SIMPLETRACE_H */ diff --git a/vl.c b/vl.c index b436952d6c..5e007a764c 100644 --- a/vl.c +++ b/vl.c @@ -47,9 +47,6 @@ #include #include #include -#ifdef CONFIG_SIMPLE_TRACE -#include "trace.h" -#endif #ifdef CONFIG_BSD #include @@ -159,6 +156,7 @@ int main(int argc, char **argv) #include "slirp/libslirp.h" #include "trace.h" +#include "simpletrace.h" #include "qemu-queue.h" #include "cpus.h" #include "arch_init.h" @@ -1941,10 +1939,8 @@ int main(int argc, char **argv, char **envp) const char *incoming = NULL; int show_vnc_port = 0; int defconfig = 1; - -#ifdef CONFIG_SIMPLE_TRACE const char *trace_file = NULL; -#endif + atexit(qemu_run_exit_notifiers); error_set_progname(argv[0]); @@ -2770,6 +2766,8 @@ int main(int argc, char **argv, char **envp) } loc_set_none(); + st_init(trace_file); + /* If no data_dir is specified then try to find it relative to the executable path. */ if (!data_dir) { @@ -2780,12 +2778,6 @@ int main(int argc, char **argv, char **envp) data_dir = CONFIG_QEMU_DATADIR; } -#ifdef CONFIG_SIMPLE_TRACE - /* - * Set the trace file name, if specified. - */ - st_set_trace_file(trace_file); -#endif /* * Default to max_cpus = smp_cpus, in case the user doesn't * specify a max_cpus value. From a13aac04e13b246edab6c0c08513504ecb71bbd4 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 7 Mar 2011 07:58:04 +0000 Subject: [PATCH 2/3] trace: Trace bdrv_aio_flush() Add a trace event for bdrv_aio_flush() to complement the existing bdrv_aio_readv() and bdrv_aio_writev() events. Signed-off-by: Stefan Hajnoczi --- block.c | 2 ++ trace-events | 1 + 2 files changed, 3 insertions(+) diff --git a/block.c b/block.c index f7d91a2de1..0559d835a2 100644 --- a/block.c +++ b/block.c @@ -2477,6 +2477,8 @@ BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs, { BlockDriver *drv = bs->drv; + trace_bdrv_aio_flush(bs, opaque); + if (bs->open_flags & BDRV_O_NO_FLUSH) { return bdrv_aio_noop_em(bs, cb, opaque); } diff --git a/trace-events b/trace-events index c79171989c..0a26d6a94f 100644 --- a/trace-events +++ b/trace-events @@ -51,6 +51,7 @@ disable multiwrite_cb(void *mcb, int ret) "mcb %p ret %d" disable bdrv_aio_multiwrite(void *mcb, int num_callbacks, int num_reqs) "mcb %p num_callbacks %d num_reqs %d" disable bdrv_aio_multiwrite_earlyfail(void *mcb) "mcb %p" disable bdrv_aio_multiwrite_latefail(void *mcb, int i) "mcb %p i %d" +disable bdrv_aio_flush(void *bs, void *opaque) "bs %p opaque %p" disable bdrv_aio_readv(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p" disable bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p" From ddca9fb2b5ecc7ccaa81fbc73d2b723922c181f1 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 7 Mar 2011 08:06:10 +0000 Subject: [PATCH 3/3] trace: Trace posix-aio-compat.c completion and cancellation This patch adds paio_complete() and paio_cancel() trace events to complement the paio_submit() event. Signed-off-by: Stefan Hajnoczi --- posix-aio-compat.c | 5 +++++ trace-events | 2 ++ 2 files changed, 7 insertions(+) diff --git a/posix-aio-compat.c b/posix-aio-compat.c index fa5494db8c..6d4df9da30 100644 --- a/posix-aio-compat.c +++ b/posix-aio-compat.c @@ -455,6 +455,9 @@ static int posix_aio_process_queue(void *opaque) } else { ret = -ret; } + + trace_paio_complete(acb, acb->common.opaque, ret); + /* remove the request */ *pacb = acb->next; /* call the callback */ @@ -537,6 +540,8 @@ static void paio_cancel(BlockDriverAIOCB *blockacb) struct qemu_paiocb *acb = (struct qemu_paiocb *)blockacb; int active = 0; + trace_paio_cancel(acb, acb->common.opaque); + mutex_lock(&lock); if (!acb->active) { QTAILQ_REMOVE(&request_list, acb, node); diff --git a/trace-events b/trace-events index 0a26d6a94f..90c9e0b5e7 100644 --- a/trace-events +++ b/trace-events @@ -62,6 +62,8 @@ disable virtio_blk_handle_write(void *req, uint64_t sector, size_t nsectors) "re # posix-aio-compat.c disable paio_submit(void *acb, void *opaque, int64_t sector_num, int nb_sectors, int type) "acb %p opaque %p sector_num %"PRId64" nb_sectors %d type %d" +disable paio_complete(void *acb, void *opaque, int ret) "acb %p opaque %p ret %d" +disable paio_cancel(void *acb, void *opaque) "acb %p opaque %p" # ioport.c disable cpu_in(unsigned int addr, unsigned int val) "addr %#x value %u"