From 2c109f21994a72e9b91c24c422e540b921e84f66 Mon Sep 17 00:00:00 2001 From: Mads Ynddal Date: Tue, 26 Sep 2023 12:34:23 +0200 Subject: [PATCH 01/14] simpletrace: add __all__ to define public interface It was unclear what was the supported public interface. I.e. when refactoring the code, what functions/classes are important to retain. Reviewed-by: Stefan Hajnoczi Signed-off-by: Mads Ynddal Message-id: 20230926103436.25700-2-mads@ynddal.dk Signed-off-by: Stefan Hajnoczi --- scripts/simpletrace.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py index 1f6d1ae1f3..b221d9a241 100755 --- a/scripts/simpletrace.py +++ b/scripts/simpletrace.py @@ -14,6 +14,8 @@ import inspect from tracetool import read_events, Event from tracetool.backend.simple import is_string +__all__ = ['Analyzer', 'process', 'run'] + header_event_id = 0xffffffffffffffff header_magic = 0xf2b177cb0aa429b4 dropped_event_id = 0xfffffffffffffffe From 8405ec6ab6a05b8c84f7be1130a7891d1a874624 Mon Sep 17 00:00:00 2001 From: Mads Ynddal Date: Tue, 26 Sep 2023 12:34:24 +0200 Subject: [PATCH 02/14] simpletrace: annotate magic constants from QEMU code It wasn't clear where the constants and structs came from, so I added comments to help. Reviewed-by: Stefan Hajnoczi Signed-off-by: Mads Ynddal Message-id: 20230926103436.25700-3-mads@ynddal.dk Signed-off-by: Stefan Hajnoczi --- scripts/simpletrace.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py index b221d9a241..5c230a1b74 100755 --- a/scripts/simpletrace.py +++ b/scripts/simpletrace.py @@ -16,6 +16,11 @@ from tracetool.backend.simple import is_string __all__ = ['Analyzer', 'process', 'run'] +# This is the binary format that the QEMU "simple" trace backend +# emits. There is no specification documentation because the format is +# not guaranteed to be stable. Trace files must be parsed with the +# same trace-events-all file and the same simpletrace.py file that +# QEMU was built with. header_event_id = 0xffffffffffffffff header_magic = 0xf2b177cb0aa429b4 dropped_event_id = 0xfffffffffffffffe From f7bd4f0237ec394198940f5b81cbde280c5b5c01 Mon Sep 17 00:00:00 2001 From: Mads Ynddal Date: Tue, 26 Sep 2023 12:34:25 +0200 Subject: [PATCH 03/14] simpletrace: improve parsing of sys.argv; fix files never closed. The arguments extracted from `sys.argv` named and unpacked to make it clear what the arguments are and what they're used for. The two input files were opened, but never explicitly closed. File usage changed to use `with` statement to take care of this. At the same time, ownership of the file-object is moved up to `run` function. Added option to process to support file-like objects. Reviewed-by: Stefan Hajnoczi Signed-off-by: Mads Ynddal Message-id: 20230926103436.25700-4-mads@ynddal.dk Signed-off-by: Stefan Hajnoczi --- scripts/simpletrace.py | 48 +++++++++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py index 5c230a1b74..283b5918a1 100755 --- a/scripts/simpletrace.py +++ b/scripts/simpletrace.py @@ -9,6 +9,7 @@ # # For help see docs/devel/tracing.rst +import sys import struct import inspect from tracetool import read_events, Event @@ -51,7 +52,6 @@ def get_record(edict, idtoname, rechdr, fobj): try: event = edict[name] except KeyError as e: - import sys sys.stderr.write('%s event is logged but is not declared ' \ 'in the trace events file, try using ' \ 'trace-events-all instead.\n' % str(e)) @@ -172,11 +172,28 @@ class Analyzer(object): pass def process(events, log, analyzer, read_header=True): - """Invoke an analyzer on each event in a log.""" + """Invoke an analyzer on each event in a log. + Args: + events (file-object or list or str): events list or file-like object or file path as str to read event data from + log (file-object or str): file-like object or file path as str to read log data from + analyzer (Analyzer): Instance of Analyzer to interpret the event data + read_header (bool, optional): Whether to read header data from the log data. Defaults to True. + """ + if isinstance(events, str): - events = read_events(open(events, 'r'), events) + with open(events, 'r') as f: + events_list = read_events(f, events) + elif isinstance(events, list): + # Treat as a list of events already produced by tracetool.read_events + events_list = events + else: + # Treat as an already opened file-object + events_list = read_events(events, events.name) + + close_log = False if isinstance(log, str): log = open(log, 'rb') + close_log = True if read_header: read_trace_header(log) @@ -187,12 +204,12 @@ def process(events, log, analyzer, read_header=True): edict = {"dropped": dropped_event} idtoname = {dropped_event_id: "dropped"} - for event in events: + for event in events_list: edict[event.name] = event # If there is no header assume event ID mapping matches events list if not read_header: - for event_id, event in enumerate(events): + for event_id, event in enumerate(events_list): idtoname[event_id] = event.name def build_fn(analyzer, event): @@ -225,24 +242,25 @@ def process(events, log, analyzer, read_header=True): fn_cache[event_num](event, rec) analyzer.end() + if close_log: + log.close() + def run(analyzer): """Execute an analyzer on a trace file given on the command-line. This function is useful as a driver for simple analysis scripts. More advanced scripts will want to call process() instead.""" - import sys - read_header = True - if len(sys.argv) == 4 and sys.argv[1] == '--no-header': - read_header = False - del sys.argv[1] - elif len(sys.argv) != 3: - sys.stderr.write('usage: %s [--no-header] ' \ - '\n' % sys.argv[0]) + try: + # NOTE: See built-in `argparse` module for a more robust cli interface + *no_header, trace_event_path, trace_file_path = sys.argv[1:] + assert no_header == [] or no_header == ['--no-header'], 'Invalid no-header argument' + except (AssertionError, ValueError): + sys.stderr.write(f'usage: {sys.argv[0]} [--no-header] \n') sys.exit(1) - events = read_events(open(sys.argv[1], 'r'), sys.argv[1]) - process(events, sys.argv[2], analyzer, read_header=read_header) + with open(trace_event_path, 'r') as events_fobj, open(trace_file_path, 'rb') as log_fobj: + process(events_fobj, log_fobj, analyzer, read_header=not no_header) if __name__ == '__main__': class Formatter(Analyzer): From 3b71b61e9f50cad73958a87216dba7b22b851318 Mon Sep 17 00:00:00 2001 From: Mads Ynddal Date: Tue, 26 Sep 2023 12:34:26 +0200 Subject: [PATCH 04/14] simpletrace: changed naming of edict and idtoname to improve readability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Readability is subjective, but I've expanded the naming of the variables and arguments, to help with understanding for new eyes on the code. Signed-off-by: Mads Ynddal Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Stefan Hajnoczi Message-id: 20230926103436.25700-5-mads@ynddal.dk Signed-off-by: Stefan Hajnoczi --- scripts/simpletrace.py | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py index 283b5918a1..09511f624d 100755 --- a/scripts/simpletrace.py +++ b/scripts/simpletrace.py @@ -40,17 +40,17 @@ def read_header(fobj, hfmt): return None return struct.unpack(hfmt, hdr) -def get_record(edict, idtoname, rechdr, fobj): +def get_record(event_mapping, event_id_to_name, rechdr, fobj): """Deserialize a trace record from a file into a tuple (name, timestamp, pid, arg1, ..., arg6).""" if rechdr is None: return None if rechdr[0] != dropped_event_id: event_id = rechdr[0] - name = idtoname[event_id] + name = event_id_to_name[event_id] rec = (name, rechdr[1], rechdr[3]) try: - event = edict[name] + event = event_mapping[name] except KeyError as e: sys.stderr.write('%s event is logged but is not declared ' \ 'in the trace events file, try using ' \ @@ -79,10 +79,10 @@ def get_mapping(fobj): return (event_id, name) -def read_record(edict, idtoname, fobj): +def read_record(event_mapping, event_id_to_name, fobj): """Deserialize a trace record from a file into a tuple (event_num, timestamp, pid, arg1, ..., arg6).""" rechdr = read_header(fobj, rec_header_fmt) - return get_record(edict, idtoname, rechdr, fobj) + return get_record(event_mapping, event_id_to_name, rechdr, fobj) def read_trace_header(fobj): """Read and verify trace file header""" @@ -103,14 +103,14 @@ def read_trace_header(fobj): raise ValueError('Log format %d not supported with this QEMU release!' % log_version) -def read_trace_records(edict, idtoname, fobj): +def read_trace_records(event_mapping, event_id_to_name, fobj): """Deserialize trace records from a file, yielding record tuples (event_num, timestamp, pid, arg1, ..., arg6). - Note that `idtoname` is modified if the file contains mapping records. + Note that `event_id_to_name` is modified if the file contains mapping records. Args: - edict (str -> Event): events dict, indexed by name - idtoname (int -> str): event names dict, indexed by event ID + event_mapping (str -> Event): events dict, indexed by name + event_id_to_name (int -> str): event names dict, indexed by event ID fobj (file): input file """ @@ -122,9 +122,9 @@ def read_trace_records(edict, idtoname, fobj): (rectype, ) = struct.unpack('=Q', t) if rectype == record_type_mapping: event_id, name = get_mapping(fobj) - idtoname[event_id] = name + event_id_to_name[event_id] = name else: - rec = read_record(edict, idtoname, fobj) + rec = read_record(event_mapping, event_id_to_name, fobj) yield rec @@ -201,16 +201,16 @@ def process(events, log, analyzer, read_header=True): frameinfo = inspect.getframeinfo(inspect.currentframe()) dropped_event = Event.build("Dropped_Event(uint64_t num_events_dropped)", frameinfo.lineno + 1, frameinfo.filename) - edict = {"dropped": dropped_event} - idtoname = {dropped_event_id: "dropped"} + event_mapping = {"dropped": dropped_event} + event_id_to_name = {dropped_event_id: "dropped"} for event in events_list: - edict[event.name] = event + event_mapping[event.name] = event # If there is no header assume event ID mapping matches events list if not read_header: for event_id, event in enumerate(events_list): - idtoname[event_id] = event.name + event_id_to_name[event_id] = event.name def build_fn(analyzer, event): if isinstance(event, str): @@ -234,9 +234,9 @@ def process(events, log, analyzer, read_header=True): analyzer.begin() fn_cache = {} - for rec in read_trace_records(edict, idtoname, log): + for rec in read_trace_records(event_mapping, event_id_to_name, log): event_num = rec[0] - event = edict[event_num] + event = event_mapping[event_num] if event_num not in fn_cache: fn_cache[event_num] = build_fn(analyzer, event) fn_cache[event_num](event, rec) From ce96eb334dc0a3a7f570f422e8a578495f34e4f2 Mon Sep 17 00:00:00 2001 From: Mads Ynddal Date: Tue, 26 Sep 2023 12:34:27 +0200 Subject: [PATCH 05/14] simpletrace: update code for Python 3.11 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The call to `getargspec` was deprecated and in Python 3.11 it has been removed in favor of `getfullargspec`. `getfullargspec` is compatible with QEMU's requirement of at least Python version 3.6. Reviewed-by: Stefan Hajnoczi Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Mads Ynddal Message-id: 20230926103436.25700-6-mads@ynddal.dk Signed-off-by: Stefan Hajnoczi --- scripts/simpletrace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py index 09511f624d..971b2a0f6a 100755 --- a/scripts/simpletrace.py +++ b/scripts/simpletrace.py @@ -221,7 +221,7 @@ def process(events, log, analyzer, read_header=True): return analyzer.catchall event_argcount = len(event.args) - fn_argcount = len(inspect.getargspec(fn)[0]) - 1 + fn_argcount = len(inspect.getfullargspec(fn)[0]) - 1 if fn_argcount == event_argcount + 1: # Include timestamp as first argument return lambda _, rec: fn(*(rec[1:2] + rec[3:3 + event_argcount])) From d1f9259014317baa3bce7db7d4c2e927341e3cae Mon Sep 17 00:00:00 2001 From: Mads Ynddal Date: Tue, 26 Sep 2023 12:34:28 +0200 Subject: [PATCH 06/14] simpletrace: improved error handling on struct unpack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A failed call to `read_header` wouldn't be handled the same for the two different code paths (one path would try to use `None` as a list). Changed to raise exception to be handled centrally. This also allows for easier unpacking, as errors has been filtered out. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Stefan Hajnoczi Signed-off-by: Mads Ynddal Message-id: 20230926103436.25700-7-mads@ynddal.dk Signed-off-by: Stefan Hajnoczi --- scripts/simpletrace.py | 39 +++++++++++++++------------------------ 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py index 971b2a0f6a..8aea0d169b 100755 --- a/scripts/simpletrace.py +++ b/scripts/simpletrace.py @@ -37,26 +37,24 @@ def read_header(fobj, hfmt): hlen = struct.calcsize(hfmt) hdr = fobj.read(hlen) if len(hdr) != hlen: - return None + raise ValueError('Error reading header. Wrong filetype provided?') return struct.unpack(hfmt, hdr) def get_record(event_mapping, event_id_to_name, rechdr, fobj): """Deserialize a trace record from a file into a tuple (name, timestamp, pid, arg1, ..., arg6).""" - if rechdr is None: - return None - if rechdr[0] != dropped_event_id: - event_id = rechdr[0] + event_id, timestamp_ns, length, pid = rechdr + if event_id != dropped_event_id: name = event_id_to_name[event_id] - rec = (name, rechdr[1], rechdr[3]) try: event = event_mapping[name] except KeyError as e: - sys.stderr.write('%s event is logged but is not declared ' \ + sys.stderr.write(f'{e} event is logged but is not declared ' \ 'in the trace events file, try using ' \ - 'trace-events-all instead.\n' % str(e)) + 'trace-events-all instead.\n') sys.exit(1) + rec = (name, timestamp_ns, pid) for type, name in event.args: if is_string(type): l = fobj.read(4) @@ -67,9 +65,8 @@ def get_record(event_mapping, event_id_to_name, rechdr, fobj): (value,) = struct.unpack('=Q', fobj.read(8)) rec = rec + (value,) else: - rec = ("dropped", rechdr[1], rechdr[3]) - (value,) = struct.unpack('=Q', fobj.read(8)) - rec = rec + (value,) + (dropped_count,) = struct.unpack('=Q', fobj.read(8)) + rec = ("dropped", timestamp_ns, pid, dropped_count) return rec def get_mapping(fobj): @@ -86,22 +83,16 @@ def read_record(event_mapping, event_id_to_name, fobj): def read_trace_header(fobj): """Read and verify trace file header""" - header = read_header(fobj, log_header_fmt) - if header is None: - raise ValueError('Not a valid trace file!') - if header[0] != header_event_id: - raise ValueError('Not a valid trace file, header id %d != %d' % - (header[0], header_event_id)) - if header[1] != header_magic: - raise ValueError('Not a valid trace file, header magic %d != %d' % - (header[1], header_magic)) + _header_event_id, _header_magic, log_version = read_header(fobj, log_header_fmt) + if _header_event_id != header_event_id: + raise ValueError(f'Not a valid trace file, header id {_header_event_id} != {header_event_id}') + if _header_magic != header_magic: + raise ValueError(f'Not a valid trace file, header magic {_header_magic} != {header_magic}') - log_version = header[2] if log_version not in [0, 2, 3, 4]: - raise ValueError('Unknown version of tracelog format!') + raise ValueError(f'Unknown version {log_version} of tracelog format!') if log_version != 4: - raise ValueError('Log format %d not supported with this QEMU release!' - % log_version) + raise ValueError(f'Log format {log_version} not supported with this QEMU release!') def read_trace_records(event_mapping, event_id_to_name, fobj): """Deserialize trace records from a file, yielding record tuples (event_num, timestamp, pid, arg1, ..., arg6). From 1990fb98934d8e114ac73b0ca649bf4820ca896a Mon Sep 17 00:00:00 2001 From: Mads Ynddal Date: Tue, 26 Sep 2023 12:34:29 +0200 Subject: [PATCH 07/14] simpletrace: define exception and add handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define `SimpleException` to differentiate our exceptions from generic exceptions (IOError, etc.). Adapted simpletrace to support this and output to stderr. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Stefan Hajnoczi Signed-off-by: Mads Ynddal Message-id: 20230926103436.25700-8-mads@ynddal.dk Signed-off-by: Stefan Hajnoczi --- scripts/simpletrace.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py index 8aea0d169b..229b10aa99 100755 --- a/scripts/simpletrace.py +++ b/scripts/simpletrace.py @@ -32,12 +32,15 @@ record_type_event = 1 log_header_fmt = '=QQQ' rec_header_fmt = '=QQII' +class SimpleException(Exception): + pass + def read_header(fobj, hfmt): '''Read a trace record header''' hlen = struct.calcsize(hfmt) hdr = fobj.read(hlen) if len(hdr) != hlen: - raise ValueError('Error reading header. Wrong filetype provided?') + raise SimpleException('Error reading header. Wrong filetype provided?') return struct.unpack(hfmt, hdr) def get_record(event_mapping, event_id_to_name, rechdr, fobj): @@ -49,10 +52,10 @@ def get_record(event_mapping, event_id_to_name, rechdr, fobj): try: event = event_mapping[name] except KeyError as e: - sys.stderr.write(f'{e} event is logged but is not declared ' \ - 'in the trace events file, try using ' \ - 'trace-events-all instead.\n') - sys.exit(1) + raise SimpleException( + f'{e} event is logged but is not declared in the trace events' + 'file, try using trace-events-all instead.' + ) rec = (name, timestamp_ns, pid) for type, name in event.args: @@ -247,8 +250,7 @@ def run(analyzer): *no_header, trace_event_path, trace_file_path = sys.argv[1:] assert no_header == [] or no_header == ['--no-header'], 'Invalid no-header argument' except (AssertionError, ValueError): - sys.stderr.write(f'usage: {sys.argv[0]} [--no-header] \n') - sys.exit(1) + raise SimpleException(f'usage: {sys.argv[0]} [--no-header] \n') with open(trace_event_path, 'r') as events_fobj, open(trace_file_path, 'rb') as log_fobj: process(events_fobj, log_fobj, analyzer, read_header=not no_header) @@ -276,4 +278,8 @@ if __name__ == '__main__': i += 1 print(' '.join(fields)) - run(Formatter()) + try: + run(Formatter()) + except SimpleException as e: + sys.stderr.write(str(e) + "\n") + sys.exit(1) From 87617b9ae63c4675460244d067e246b362dc3867 Mon Sep 17 00:00:00 2001 From: Mads Ynddal Date: Tue, 26 Sep 2023 12:34:30 +0200 Subject: [PATCH 08/14] simpletrace: made Analyzer into context-manager Instead of explicitly calling `begin` and `end`, we can change the class to use the context-manager paradigm. This is mostly a styling choice, used in modern Python code. But it also allows for more advanced analyzers to handle exceptions gracefully in the `__exit__` method (not demonstrated here). Reviewed-by: Stefan Hajnoczi Signed-off-by: Mads Ynddal Message-id: 20230926103436.25700-9-mads@ynddal.dk Signed-off-by: Stefan Hajnoczi --- scripts/simpletrace.py | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py index 229b10aa99..7f514d1577 100755 --- a/scripts/simpletrace.py +++ b/scripts/simpletrace.py @@ -122,12 +122,13 @@ def read_trace_records(event_mapping, event_id_to_name, fobj): yield rec -class Analyzer(object): +class Analyzer: """A trace file analyzer which processes trace records. An analyzer can be passed to run() or process(). The begin() method is invoked, then each trace record is processed, and finally the end() method - is invoked. + is invoked. When Analyzer is used as a context-manager (using the `with` + statement), begin() and end() are called automatically. If a method matching a trace event name exists, it is invoked to process that trace record. Otherwise the catchall() method is invoked. @@ -165,6 +166,15 @@ class Analyzer(object): """Called at the end of the trace.""" pass + def __enter__(self): + self.begin() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + if exc_type is None: + self.end() + return False + def process(events, log, analyzer, read_header=True): """Invoke an analyzer on each event in a log. Args: @@ -226,15 +236,14 @@ def process(events, log, analyzer, read_header=True): # Just arguments, no timestamp or pid return lambda _, rec: fn(*rec[3:3 + event_argcount]) - analyzer.begin() - fn_cache = {} - for rec in read_trace_records(event_mapping, event_id_to_name, log): - event_num = rec[0] - event = event_mapping[event_num] - if event_num not in fn_cache: - fn_cache[event_num] = build_fn(analyzer, event) - fn_cache[event_num](event, rec) - analyzer.end() + with analyzer: + fn_cache = {} + for rec in read_trace_records(event_mapping, event_id_to_name, log): + event_num = rec[0] + event = event_mapping[event_num] + if event_num not in fn_cache: + fn_cache[event_num] = build_fn(analyzer, event) + fn_cache[event_num](event, rec) if close_log: log.close() From 6f53641a980df2d1b38503b10bfda9236fa075b4 Mon Sep 17 00:00:00 2001 From: Mads Ynddal Date: Tue, 26 Sep 2023 12:34:31 +0200 Subject: [PATCH 09/14] simpletrace: refactor to separate responsibilities Moved event_mapping and event_id_to_name down one level in the function call-stack to keep variable instantiation and usage closer (`process` and `run` has no use of the variables; `read_trace_records` does). Instead of passing event_mapping and event_id_to_name to the bottom of the call-stack, we move their use to `read_trace_records`. This separates responsibility and ownership of the information. `read_record` now just reads the arguments from the file-object by knowning the total number of bytes. Parsing it to specific arguments is moved up to `read_trace_records`. Special handling of dropped events removed, as they can be handled by the general code. Reviewed-by: Stefan Hajnoczi Signed-off-by: Mads Ynddal Message-id: 20230926103436.25700-10-mads@ynddal.dk Signed-off-by: Stefan Hajnoczi --- scripts/simpletrace.py | 115 +++++++++++++++++++---------------------- 1 file changed, 53 insertions(+), 62 deletions(-) diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py index 7f514d1577..0826aef283 100755 --- a/scripts/simpletrace.py +++ b/scripts/simpletrace.py @@ -31,6 +31,7 @@ record_type_event = 1 log_header_fmt = '=QQQ' rec_header_fmt = '=QQII' +rec_header_fmt_len = struct.calcsize(rec_header_fmt) class SimpleException(Exception): pass @@ -43,35 +44,6 @@ def read_header(fobj, hfmt): raise SimpleException('Error reading header. Wrong filetype provided?') return struct.unpack(hfmt, hdr) -def get_record(event_mapping, event_id_to_name, rechdr, fobj): - """Deserialize a trace record from a file into a tuple - (name, timestamp, pid, arg1, ..., arg6).""" - event_id, timestamp_ns, length, pid = rechdr - if event_id != dropped_event_id: - name = event_id_to_name[event_id] - try: - event = event_mapping[name] - except KeyError as e: - raise SimpleException( - f'{e} event is logged but is not declared in the trace events' - 'file, try using trace-events-all instead.' - ) - - rec = (name, timestamp_ns, pid) - for type, name in event.args: - if is_string(type): - l = fobj.read(4) - (len,) = struct.unpack('=L', l) - s = fobj.read(len) - rec = rec + (s,) - else: - (value,) = struct.unpack('=Q', fobj.read(8)) - rec = rec + (value,) - else: - (dropped_count,) = struct.unpack('=Q', fobj.read(8)) - rec = ("dropped", timestamp_ns, pid, dropped_count) - return rec - def get_mapping(fobj): (event_id, ) = struct.unpack('=Q', fobj.read(8)) (len, ) = struct.unpack('=L', fobj.read(4)) @@ -79,10 +51,11 @@ def get_mapping(fobj): return (event_id, name) -def read_record(event_mapping, event_id_to_name, fobj): - """Deserialize a trace record from a file into a tuple (event_num, timestamp, pid, arg1, ..., arg6).""" - rechdr = read_header(fobj, rec_header_fmt) - return get_record(event_mapping, event_id_to_name, rechdr, fobj) +def read_record(fobj): + """Deserialize a trace record from a file into a tuple (event_num, timestamp, pid, args).""" + event_id, timestamp_ns, record_length, record_pid = read_header(fobj, rec_header_fmt) + args_payload = fobj.read(record_length - rec_header_fmt_len) + return (event_id, timestamp_ns, record_pid, args_payload) def read_trace_header(fobj): """Read and verify trace file header""" @@ -97,17 +70,28 @@ def read_trace_header(fobj): if log_version != 4: raise ValueError(f'Log format {log_version} not supported with this QEMU release!') -def read_trace_records(event_mapping, event_id_to_name, fobj): - """Deserialize trace records from a file, yielding record tuples (event_num, timestamp, pid, arg1, ..., arg6). - - Note that `event_id_to_name` is modified if the file contains mapping records. +def read_trace_records(events, fobj, read_header): + """Deserialize trace records from a file, yielding record tuples (event, event_num, timestamp, pid, arg1, ..., arg6). Args: event_mapping (str -> Event): events dict, indexed by name - event_id_to_name (int -> str): event names dict, indexed by event ID fobj (file): input file + read_header (bool): whether headers were read from fobj """ + frameinfo = inspect.getframeinfo(inspect.currentframe()) + dropped_event = Event.build("Dropped_Event(uint64_t num_events_dropped)", + frameinfo.lineno + 1, frameinfo.filename) + + event_mapping = {e.name: e for e in events} + event_mapping["dropped"] = dropped_event + event_id_to_name = {dropped_event_id: "dropped"} + + # If there is no header assume event ID mapping matches events list + if not read_header: + for event_id, event in enumerate(events): + event_id_to_name[event_id] = event.name + while True: t = fobj.read(8) if len(t) == 0: @@ -115,12 +99,35 @@ def read_trace_records(event_mapping, event_id_to_name, fobj): (rectype, ) = struct.unpack('=Q', t) if rectype == record_type_mapping: - event_id, name = get_mapping(fobj) - event_id_to_name[event_id] = name + event_id, event_name = get_mapping(fobj) + event_id_to_name[event_id] = event_name else: - rec = read_record(event_mapping, event_id_to_name, fobj) + event_id, timestamp_ns, pid, args_payload = read_record(fobj) + event_name = event_id_to_name[event_id] - yield rec + try: + event = event_mapping[event_name] + except KeyError as e: + raise SimpleException( + f'{e} event is logged but is not declared in the trace events' + 'file, try using trace-events-all instead.' + ) + + offset = 0 + args = [] + for type, _ in event.args: + if is_string(type): + (length,) = struct.unpack_from('=L', args_payload, offset=offset) + offset += 4 + s = args_payload[offset:offset+length] + offset += length + args.append(s) + else: + (value,) = struct.unpack_from('=Q', args_payload, offset=offset) + offset += 8 + args.append(value) + + yield (event_mapping[event_name], event_name, timestamp_ns, pid) + tuple(args) class Analyzer: """A trace file analyzer which processes trace records. @@ -202,20 +209,6 @@ def process(events, log, analyzer, read_header=True): if read_header: read_trace_header(log) - frameinfo = inspect.getframeinfo(inspect.currentframe()) - dropped_event = Event.build("Dropped_Event(uint64_t num_events_dropped)", - frameinfo.lineno + 1, frameinfo.filename) - event_mapping = {"dropped": dropped_event} - event_id_to_name = {dropped_event_id: "dropped"} - - for event in events_list: - event_mapping[event.name] = event - - # If there is no header assume event ID mapping matches events list - if not read_header: - for event_id, event in enumerate(events_list): - event_id_to_name[event_id] = event.name - def build_fn(analyzer, event): if isinstance(event, str): return analyzer.catchall @@ -238,12 +231,10 @@ def process(events, log, analyzer, read_header=True): with analyzer: fn_cache = {} - for rec in read_trace_records(event_mapping, event_id_to_name, log): - event_num = rec[0] - event = event_mapping[event_num] - if event_num not in fn_cache: - fn_cache[event_num] = build_fn(analyzer, event) - fn_cache[event_num](event, rec) + for event, event_id, timestamp_ns, record_pid, *rec_args in read_trace_records(events, log, read_header): + if event_id not in fn_cache: + fn_cache[event_id] = build_fn(analyzer, event) + fn_cache[event_id](event, (event_id, timestamp_ns, record_pid, *rec_args)) if close_log: log.close() From b78234e65cad8c4899a2cfcc5e5cd8d33fce1b95 Mon Sep 17 00:00:00 2001 From: Mads Ynddal Date: Tue, 26 Sep 2023 12:34:32 +0200 Subject: [PATCH 10/14] simpletrace: move logic of process into internal function To avoid duplicate code depending on input types and to better handle open/close of log with a context-manager, we move the logic of process into _process. Reviewed-by: Stefan Hajnoczi Signed-off-by: Mads Ynddal Message-id: 20230926103436.25700-11-mads@ynddal.dk Signed-off-by: Stefan Hajnoczi --- scripts/simpletrace.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py index 0826aef283..6969fdd59a 100755 --- a/scripts/simpletrace.py +++ b/scripts/simpletrace.py @@ -201,13 +201,26 @@ def process(events, log, analyzer, read_header=True): # Treat as an already opened file-object events_list = read_events(events, events.name) - close_log = False if isinstance(log, str): - log = open(log, 'rb') - close_log = True + with open(log, 'rb') as log_fobj: + _process(events_list, log_fobj, analyzer, read_header) + else: + # Treat `log` as an already opened file-object. We will not close it, + # as we do not own it. + _process(events_list, log, analyzer, read_header) + +def _process(events, log_fobj, analyzer, read_header=True): + """Internal function for processing + + Args: + events (list): list of events already produced by tracetool.read_events + log_fobj (file): file-object to read log data from + analyzer (Analyzer): the Analyzer to interpret the event data + read_header (bool, optional): Whether to read header data from the log data. Defaults to True. + """ if read_header: - read_trace_header(log) + read_trace_header(log_fobj) def build_fn(analyzer, event): if isinstance(event, str): @@ -231,14 +244,11 @@ def process(events, log, analyzer, read_header=True): with analyzer: fn_cache = {} - for event, event_id, timestamp_ns, record_pid, *rec_args in read_trace_records(events, log, read_header): + for event, event_id, timestamp_ns, record_pid, *rec_args in read_trace_records(events, log_fobj, read_header): if event_id not in fn_cache: fn_cache[event_id] = build_fn(analyzer, event) fn_cache[event_id](event, (event_id, timestamp_ns, record_pid, *rec_args)) - if close_log: - log.close() - def run(analyzer): """Execute an analyzer on a trace file given on the command-line. From d1f89c23bd93e40e180d4fc727e691a698a6d522 Mon Sep 17 00:00:00 2001 From: Mads Ynddal Date: Tue, 26 Sep 2023 12:34:33 +0200 Subject: [PATCH 11/14] simpletrace: move event processing to Analyzer class MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Moved event processing to the Analyzer class to separate specific analyzer logic (like caching and function signatures) from the _process function. This allows for new types of Analyzer-based subclasses without changing the core code. Note, that the fn_cache is important for performance in cases where the analyzer is branching away from the catch-all a lot. The cache has no measurable performance penalty. Reviewed-by: Stefan Hajnoczi Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Mads Ynddal Message-id: 20230926103436.25700-12-mads@ynddal.dk Signed-off-by: Stefan Hajnoczi --- scripts/simpletrace.py | 60 +++++++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 24 deletions(-) diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py index 6969fdd59a..4136d00600 100755 --- a/scripts/simpletrace.py +++ b/scripts/simpletrace.py @@ -169,6 +169,35 @@ class Analyzer: """Called if no specific method for processing a trace event has been found.""" pass + def _build_fn(self, event): + fn = getattr(self, event.name, None) + if fn is None: + # Return early to avoid costly call to inspect.getfullargspec + return self.catchall + + event_argcount = len(event.args) + fn_argcount = len(inspect.getfullargspec(fn)[0]) - 1 + if fn_argcount == event_argcount + 1: + # Include timestamp as first argument + return lambda _, rec: fn(*(rec[1:2] + rec[3:3 + event_argcount])) + elif fn_argcount == event_argcount + 2: + # Include timestamp and pid + return lambda _, rec: fn(*rec[1:3 + event_argcount]) + else: + # Just arguments, no timestamp or pid + return lambda _, rec: fn(*rec[3:3 + event_argcount]) + + def _process_event(self, rec_args, *, event, event_id, timestamp_ns, pid, **kwargs): + if not hasattr(self, '_fn_cache'): + # NOTE: Cannot depend on downstream subclasses to have + # super().__init__() because of legacy. + self._fn_cache = {} + + rec = (event_id, timestamp_ns, pid, *rec_args) + if event_id not in self._fn_cache: + self._fn_cache[event_id] = self._build_fn(event) + self._fn_cache[event_id](event, rec) + def end(self): """Called at the end of the trace.""" pass @@ -222,32 +251,15 @@ def _process(events, log_fobj, analyzer, read_header=True): if read_header: read_trace_header(log_fobj) - def build_fn(analyzer, event): - if isinstance(event, str): - return analyzer.catchall - - fn = getattr(analyzer, event.name, None) - if fn is None: - return analyzer.catchall - - event_argcount = len(event.args) - fn_argcount = len(inspect.getfullargspec(fn)[0]) - 1 - if fn_argcount == event_argcount + 1: - # Include timestamp as first argument - return lambda _, rec: fn(*(rec[1:2] + rec[3:3 + event_argcount])) - elif fn_argcount == event_argcount + 2: - # Include timestamp and pid - return lambda _, rec: fn(*rec[1:3 + event_argcount]) - else: - # Just arguments, no timestamp or pid - return lambda _, rec: fn(*rec[3:3 + event_argcount]) - with analyzer: - fn_cache = {} for event, event_id, timestamp_ns, record_pid, *rec_args in read_trace_records(events, log_fobj, read_header): - if event_id not in fn_cache: - fn_cache[event_id] = build_fn(analyzer, event) - fn_cache[event_id](event, (event_id, timestamp_ns, record_pid, *rec_args)) + analyzer._process_event( + rec_args, + event=event, + event_id=event_id, + timestamp_ns=timestamp_ns, + pid=record_pid, + ) def run(analyzer): """Execute an analyzer on a trace file given on the command-line. From 3470fef15a7bff730b00633346d28b889027a609 Mon Sep 17 00:00:00 2001 From: Mads Ynddal Date: Tue, 26 Sep 2023 12:34:34 +0200 Subject: [PATCH 12/14] simpletrace: added simplified Analyzer2 class By moving the dynamic argument construction to keyword-arguments, we can remove all of the specialized handling, and streamline it. If a tracing method wants to access these, they can define the kwargs, or ignore it be placing `**kwargs` at the end of the function's arguments list. Added deprecation warning to Analyzer class to make users aware of the Analyzer2 class. No removal date is planned. Signed-off-by: Mads Ynddal Message-id: 20230926103436.25700-13-mads@ynddal.dk Signed-off-by: Stefan Hajnoczi --- scripts/simpletrace.py | 94 ++++++++++++++++++++++++++++++++---------- 1 file changed, 73 insertions(+), 21 deletions(-) diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py index 4136d00600..cef81b0707 100755 --- a/scripts/simpletrace.py +++ b/scripts/simpletrace.py @@ -12,10 +12,11 @@ import sys import struct import inspect +import warnings from tracetool import read_events, Event from tracetool.backend.simple import is_string -__all__ = ['Analyzer', 'process', 'run'] +__all__ = ['Analyzer', 'Analyzer2', 'process', 'run'] # This is the binary format that the QEMU "simple" trace backend # emits. There is no specification documentation because the format is @@ -130,7 +131,9 @@ def read_trace_records(events, fobj, read_header): yield (event_mapping[event_name], event_name, timestamp_ns, pid) + tuple(args) class Analyzer: - """A trace file analyzer which processes trace records. + """[Deprecated. Refer to Analyzer2 instead.] + + A trace file analyzer which processes trace records. An analyzer can be passed to run() or process(). The begin() method is invoked, then each trace record is processed, and finally the end() method @@ -188,6 +191,11 @@ class Analyzer: return lambda _, rec: fn(*rec[3:3 + event_argcount]) def _process_event(self, rec_args, *, event, event_id, timestamp_ns, pid, **kwargs): + warnings.warn( + "Use of deprecated Analyzer class. Refer to Analyzer2 instead.", + DeprecationWarning, + ) + if not hasattr(self, '_fn_cache'): # NOTE: Cannot depend on downstream subclasses to have # super().__init__() because of legacy. @@ -211,6 +219,56 @@ class Analyzer: self.end() return False +class Analyzer2(Analyzer): + """A trace file analyzer which processes trace records. + + An analyzer can be passed to run() or process(). The begin() method is + invoked, then each trace record is processed, and finally the end() method + is invoked. When Analyzer is used as a context-manager (using the `with` + statement), begin() and end() are called automatically. + + If a method matching a trace event name exists, it is invoked to process + that trace record. Otherwise the catchall() method is invoked. + + The methods are called with a set of keyword-arguments. These can be ignored + using `**kwargs` or defined like any keyword-argument. + + The following keyword-arguments are available, but make sure to have an + **kwargs to allow for unmatched arguments in the future: + event: Event object of current trace + event_id: The id of the event in the current trace file + timestamp_ns: The timestamp in nanoseconds of the trace + pid: The process id recorded for the given trace + + Example: + The following method handles the runstate_set(int new_state) trace event:: + + def runstate_set(self, new_state, **kwargs): + ... + + The method can also explicitly take a timestamp keyword-argument with the + trace event arguments:: + + def runstate_set(self, new_state, *, timestamp_ns, **kwargs): + ... + + Timestamps have the uint64_t type and are in nanoseconds. + + The pid can be included in addition to the timestamp and is useful when + dealing with traces from multiple processes: + + def runstate_set(self, new_state, *, timestamp_ns, pid, **kwargs): + ... + """ + + def catchall(self, *rec_args, event, timestamp_ns, pid, event_id, **kwargs): + """Called if no specific method for processing a trace event has been found.""" + pass + + def _process_event(self, rec_args, *, event, **kwargs): + fn = getattr(self, event.name, self.catchall) + fn(*rec_args, event=event, **kwargs) + def process(events, log, analyzer, read_header=True): """Invoke an analyzer on each event in a log. Args: @@ -278,30 +336,24 @@ def run(analyzer): process(events_fobj, log_fobj, analyzer, read_header=not no_header) if __name__ == '__main__': - class Formatter(Analyzer): + class Formatter2(Analyzer2): def __init__(self): - self.last_timestamp = None + self.last_timestamp_ns = None - def catchall(self, event, rec): - timestamp = rec[1] - if self.last_timestamp is None: - self.last_timestamp = timestamp - delta_ns = timestamp - self.last_timestamp - self.last_timestamp = timestamp + def catchall(self, *rec_args, event, timestamp_ns, pid, event_id): + if self.last_timestamp_ns is None: + self.last_timestamp_ns = timestamp_ns + delta_ns = timestamp_ns - self.last_timestamp_ns + self.last_timestamp_ns = timestamp_ns - fields = [event.name, '%0.3f' % (delta_ns / 1000.0), - 'pid=%d' % rec[2]] - i = 3 - for type, name in event.args: - if is_string(type): - fields.append('%s=%s' % (name, rec[i])) - else: - fields.append('%s=0x%x' % (name, rec[i])) - i += 1 - print(' '.join(fields)) + fields = [ + f'{name}={r}' if is_string(type) else f'{name}=0x{r:x}' + for r, (type, name) in zip(rec_args, event.args) + ] + print(f'{event.name} {delta_ns / 1000:0.3f} {pid=} ' + ' '.join(fields)) try: - run(Formatter()) + run(Formatter2()) except SimpleException as e: sys.stderr.write(str(e) + "\n") sys.exit(1) From 84197267d1620405c664f6d4510288d0fd3a8837 Mon Sep 17 00:00:00 2001 From: Mads Ynddal Date: Tue, 26 Sep 2023 12:34:35 +0200 Subject: [PATCH 13/14] MAINTAINERS: add maintainer of simpletrace.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In my work to refactor simpletrace.py, I noticed that there's no maintainer of it, and has the status of "odd fixes". I'm using it from time to time, so I'd like to maintain the script. I've added myself as reviewer under "Tracing" to be informed of changes that might affect simpletrace.py. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Mads Ynddal Message-id: 20230926103436.25700-14-mads@ynddal.dk Signed-off-by: Stefan Hajnoczi --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 355b1960ce..81625f036b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3170,6 +3170,7 @@ F: stubs/ Tracing M: Stefan Hajnoczi +R: Mads Ynddal S: Maintained F: trace/ F: trace-events @@ -3182,6 +3183,11 @@ F: docs/tools/qemu-trace-stap.rst F: docs/devel/tracing.rst T: git https://github.com/stefanha/qemu.git tracing +Simpletrace +M: Mads Ynddal +S: Maintained +F: scripts/simpletrace.py + TPM M: Stefan Berger S: Maintained From ff014701ab0f09fdbf37b4c39334790f9b298253 Mon Sep 17 00:00:00 2001 From: Mads Ynddal Date: Tue, 26 Sep 2023 12:34:36 +0200 Subject: [PATCH 14/14] scripts/analyse-locks-simpletrace.py: changed iteritems() to items() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Python 3 removed `dict.iteritems()` in favor of `dict.items()`. This means the script currently doesn't work on Python 3. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Stefan Hajnoczi Signed-off-by: Mads Ynddal Message-id: 20230926103436.25700-15-mads@ynddal.dk Signed-off-by: Stefan Hajnoczi --- scripts/analyse-locks-simpletrace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/analyse-locks-simpletrace.py b/scripts/analyse-locks-simpletrace.py index 63c11f4fce..d650dd7140 100755 --- a/scripts/analyse-locks-simpletrace.py +++ b/scripts/analyse-locks-simpletrace.py @@ -75,7 +75,7 @@ if __name__ == '__main__': (analyser.locks, analyser.locked, analyser.unlocks)) # Now dump the individual lock stats - for key, val in sorted(analyser.mutex_records.iteritems(), + for key, val in sorted(analyser.mutex_records.items(), key=lambda k_v: k_v[1]["locks"]): print ("Lock: %#x locks: %d, locked: %d, unlocked: %d" % (key, val["locks"], val["locked"], val["unlocked"]))