From b094f2e015e75fd2d1ddb773061700f5e474f22a Mon Sep 17 00:00:00 2001 From: Rita Sinha Date: Tue, 8 Mar 2016 00:52:05 +0530 Subject: [PATCH 1/8] kvm: x86: q35: Add support for -machine kernel_irqchip=split for q35 The split IRQ chip mode via KVM_CAP_SPLIT_IRQCHIP was introduced with commit 15eafc2e60 but was broken for q35. This patch makes kernel_irqchip=split functional for q35. Signed-off-by: Rita Sinha Message-Id: <1457378525-16455-1-git-send-email-rita.sinha89@gmail.com> Reviewed-by: Jan Kiszka Signed-off-by: Paolo Bonzini --- hw/i386/pc_q35.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 17915b05c4..9ee939b4c2 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -39,6 +39,7 @@ #include "hw/kvm/clock.h" #include "hw/pci-host/q35.h" #include "exec/address-spaces.h" +#include "hw/i386/pc.h" #include "hw/i386/ich9.h" #include "hw/smbios/smbios.h" #include "hw/ide/pci.h" @@ -146,7 +147,7 @@ static void pc_q35_init(MachineState *machine) /* irq lines */ gsi_state = g_malloc0(sizeof(*gsi_state)); - if (kvm_irqchip_in_kernel()) { + if (kvm_ioapic_in_kernel()) { kvm_pc_setup_irq_routing(pcmc->pci_enabled); gsi = qemu_allocate_irqs(kvm_pc_gsi_handler, gsi_state, GSI_NUM_PINS); @@ -193,7 +194,7 @@ static void pc_q35_init(MachineState *machine) /*end early*/ isa_bus_irqs(isa_bus, gsi); - if (kvm_irqchip_in_kernel()) { + if (kvm_pic_in_kernel()) { i8259 = kvm_i8259_init(isa_bus); } else if (xen_enabled()) { i8259 = xen_interrupt_controller_init(); From 2ae823d4f707df05f28509dfa7ae7293b8e9164f Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Mon, 15 Feb 2016 15:59:41 +1100 Subject: [PATCH 2/8] update-linux-headers: Add userfaultfd.h userfailtfd.h is used by post-copy migration so include it to the update-linux-headers.sh as we want it updated altogether with other kernel headers. Signed-off-by: Alexey Kardashevskiy Message-Id: <1455512381-15271-1-git-send-email-aik@ozlabs.ru> Acked-by: Christian Borntraeger Signed-off-by: Paolo Bonzini --- scripts/update-linux-headers.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh index ff5b0c7033..6aa8407f12 100755 --- a/scripts/update-linux-headers.sh +++ b/scripts/update-linux-headers.sh @@ -103,7 +103,7 @@ done rm -rf "$output/linux-headers/linux" mkdir -p "$output/linux-headers/linux" for header in kvm.h kvm_para.h vfio.h vhost.h \ - psci.h; do + psci.h userfaultfd.h; do cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux" done rm -rf "$output/linux-headers/asm-generic" From fd97fd4408040a9a6dfaf2fdaeca1c566db6d0aa Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Mon, 7 Mar 2016 20:25:13 +0100 Subject: [PATCH 3/8] exec: Fix memory allocation when memory path names new file Commit 8d31d6b extended file_ram_alloc() to accept file names in addition to directory names. Even though it passes O_CREAT to open(), it actually works only for existing files. Reproducer adapted from the commit's qemu-doc.texi update: $ qemu-system-x86_64 -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1 qemu-system-x86_64: -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1: failed to get page size of file /dev/hugepages/my-shmem-file: No such file or directory This is because we first get the page size for @path, then open the actual file. Unwise even before the flawed commit, because the directory could change in between, invalidating the page size. Unlikely to bite in practice. Rearrange the code to create the file (if necessary) before getting its page size. Carefully avoid TOCTTOU conditions with a method suggested by Paolo Bonzini. While there, replace "hugepages" by "guest RAM" in error messages, because host memory backends can be used for purposes other than huge pages, e.g. /dev/shm/ shared memory. Help text of -mem-path agrees. Cc: Paolo Bonzini Signed-off-by: Markus Armbruster Message-Id: <1457378754-21649-2-git-send-email-armbru@redhat.com> Signed-off-by: Paolo Bonzini --- exec.c | 115 ++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 68 insertions(+), 47 deletions(-) diff --git a/exec.c b/exec.c index f09dd4e928..3380836748 100644 --- a/exec.c +++ b/exec.c @@ -1233,19 +1233,17 @@ void qemu_mutex_unlock_ramlist(void) #define HUGETLBFS_MAGIC 0x958458f6 -static long gethugepagesize(const char *path, Error **errp) +static long gethugepagesize(int fd) { struct statfs fs; int ret; do { - ret = statfs(path, &fs); + ret = fstatfs(fd, &fs); } while (ret != 0 && errno == EINTR); if (ret != 0) { - error_setg_errno(errp, errno, "failed to get page size of file %s", - path); - return 0; + return -1; } return fs.f_bsize; @@ -1256,63 +1254,82 @@ static void *file_ram_alloc(RAMBlock *block, const char *path, Error **errp) { - struct stat st; + bool unlink_on_error = false; char *filename; char *sanitized_name; char *c; void *area; int fd; - uint64_t hpagesize; - Error *local_err = NULL; + int64_t hpagesize; - hpagesize = gethugepagesize(path, &local_err); - if (local_err) { - error_propagate(errp, local_err); + if (kvm_enabled() && !kvm_has_sync_mmu()) { + error_setg(errp, + "host lacks kvm mmu notifiers, -mem-path unsupported"); + return NULL; + } + + for (;;) { + fd = open(path, O_RDWR); + if (fd >= 0) { + /* @path names an existing file, use it */ + break; + } + if (errno == ENOENT) { + /* @path names a file that doesn't exist, create it */ + fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644); + if (fd >= 0) { + unlink_on_error = true; + break; + } + } else if (errno == EISDIR) { + /* @path names a directory, create a file there */ + /* Make name safe to use with mkstemp by replacing '/' with '_'. */ + sanitized_name = g_strdup(memory_region_name(block->mr)); + for (c = sanitized_name; *c != '\0'; c++) { + if (*c == '/') { + *c = '_'; + } + } + + filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path, + sanitized_name); + g_free(sanitized_name); + + fd = mkstemp(filename); + if (fd >= 0) { + unlink(filename); + g_free(filename); + break; + } + g_free(filename); + } + if (errno != EEXIST && errno != EINTR) { + error_setg_errno(errp, errno, + "can't open backing store %s for guest RAM", + path); + goto error; + } + /* + * Try again on EINTR and EEXIST. The latter happens when + * something else creates the file between our two open(). + */ + } + + hpagesize = gethugepagesize(fd); + if (hpagesize < 0) { + error_setg_errno(errp, errno, "can't get page size for %s", + path); goto error; } block->mr->align = hpagesize; if (memory < hpagesize) { error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to " - "or larger than huge page size 0x%" PRIx64, + "or larger than page size 0x%" PRIx64, memory, hpagesize); goto error; } - if (kvm_enabled() && !kvm_has_sync_mmu()) { - error_setg(errp, - "host lacks kvm mmu notifiers, -mem-path unsupported"); - goto error; - } - - if (!stat(path, &st) && S_ISDIR(st.st_mode)) { - /* Make name safe to use with mkstemp by replacing '/' with '_'. */ - sanitized_name = g_strdup(memory_region_name(block->mr)); - for (c = sanitized_name; *c != '\0'; c++) { - if (*c == '/') { - *c = '_'; - } - } - - filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path, - sanitized_name); - g_free(sanitized_name); - - fd = mkstemp(filename); - if (fd >= 0) { - unlink(filename); - } - g_free(filename); - } else { - fd = open(path, O_RDWR | O_CREAT, 0644); - } - - if (fd < 0) { - error_setg_errno(errp, errno, - "unable to create backing store for hugepages"); - goto error; - } - memory = ROUND_UP(memory, hpagesize); /* @@ -1328,7 +1345,7 @@ static void *file_ram_alloc(RAMBlock *block, area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED); if (area == MAP_FAILED) { error_setg_errno(errp, errno, - "unable to map backing store for hugepages"); + "unable to map backing store for guest RAM"); close(fd); goto error; } @@ -1341,6 +1358,10 @@ static void *file_ram_alloc(RAMBlock *block, return area; error: + if (unlink_on_error) { + unlink(path); + } + close(fd); return NULL; } #endif From e1fb6471999939539ecfb21b41cbbb24047fa4dc Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Mon, 7 Mar 2016 20:25:14 +0100 Subject: [PATCH 4/8] exec: Fix memory allocation when memory path isn't on hugetlbfs gethugepagesize() works reliably only when its argument is on hugetlbfs. When it's not, it returns the filesystem's "optimal transfer block size", which may or may not be the actual page size you'll get when you mmap(). If the value is too small or not a power of two, we fail qemu_ram_mmap()'s assertions. These were added in commit 794e8f3 (v2.5.0). The bug's impact before that is currently unknown. Seems fairly unlikely at least when the normal page size is 4KiB. Else, if the value is too large, we align more strictly than necessary. gethugepagesize() goes back to commit c902760 (v0.13). That commit clearly intended gethugepagesize() to be used on hugetlbfs only. Not only was it named accordingly, it also printed a warning when used on anything else. However, the commit neglected to spell out the restriction in user documentation of -mem-path. Commit bfc2a1a (v2.5.0) dropped the warning as bogus "because QEMU functions perfectly well with the path on a regular tmpfs filesystem". It sure does when you're sufficiently lucky. In my testing, I was lucky, too. Fix by switching to qemu_fd_getpagesize(). Rename the variable holding its result from hpagesize to page_size. Cc: Paolo Bonzini Signed-off-by: Markus Armbruster Message-Id: <1457378754-21649-3-git-send-email-armbru@redhat.com> Signed-off-by: Paolo Bonzini --- exec.c | 40 +++++++--------------------------------- 1 file changed, 7 insertions(+), 33 deletions(-) diff --git a/exec.c b/exec.c index 3380836748..274b619f88 100644 --- a/exec.c +++ b/exec.c @@ -1228,27 +1228,6 @@ void qemu_mutex_unlock_ramlist(void) } #ifdef __linux__ - -#include - -#define HUGETLBFS_MAGIC 0x958458f6 - -static long gethugepagesize(int fd) -{ - struct statfs fs; - int ret; - - do { - ret = fstatfs(fd, &fs); - } while (ret != 0 && errno == EINTR); - - if (ret != 0) { - return -1; - } - - return fs.f_bsize; -} - static void *file_ram_alloc(RAMBlock *block, ram_addr_t memory, const char *path, @@ -1260,7 +1239,7 @@ static void *file_ram_alloc(RAMBlock *block, char *c; void *area; int fd; - int64_t hpagesize; + int64_t page_size; if (kvm_enabled() && !kvm_has_sync_mmu()) { error_setg(errp, @@ -1315,22 +1294,17 @@ static void *file_ram_alloc(RAMBlock *block, */ } - hpagesize = gethugepagesize(fd); - if (hpagesize < 0) { - error_setg_errno(errp, errno, "can't get page size for %s", - path); - goto error; - } - block->mr->align = hpagesize; + page_size = qemu_fd_getpagesize(fd); + block->mr->align = page_size; - if (memory < hpagesize) { + if (memory < page_size) { error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to " "or larger than page size 0x%" PRIx64, - memory, hpagesize); + memory, page_size); goto error; } - memory = ROUND_UP(memory, hpagesize); + memory = ROUND_UP(memory, page_size); /* * ftruncate is not supported by hugetlbfs in older @@ -1342,7 +1316,7 @@ static void *file_ram_alloc(RAMBlock *block, perror("ftruncate"); } - area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED); + area = qemu_ram_mmap(fd, memory, page_size, block->flags & RAM_SHARED); if (area == MAP_FAILED) { error_setg_errno(errp, errno, "unable to map backing store for guest RAM"); From 39c350ee12e733070e63d64a21bd42607366ea99 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 9 Mar 2016 18:14:01 +0100 Subject: [PATCH 5/8] exec: fix early return from ram_block_add After reporting an error, ram_block_add was going on with the registration of the RAMBlock. The visible effect is that it unlocked the ramlist mutex twice. Fixes: 528f46af6ecd1e300db18684969104d4067b867b Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- exec.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/exec.c b/exec.c index 274b619f88..f398d212f6 100644 --- a/exec.c +++ b/exec.c @@ -1589,6 +1589,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) if (err) { error_propagate(errp, err); qemu_mutex_unlock_ramlist(); + return; } } else { new_block->host = phys_mem_alloc(new_block->max_length, @@ -1598,6 +1599,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) "cannot set up guest memory '%s'", memory_region_name(new_block->mr)); qemu_mutex_unlock_ramlist(); + return; } memory_try_enable_merging(new_block->host, new_block->max_length); } From 33577b47c64435fcc2a1bc01c7e82534256f1fc3 Mon Sep 17 00:00:00 2001 From: Pavel Dovgalyuk Date: Mon, 14 Mar 2016 10:44:36 +0300 Subject: [PATCH 6/8] replay: character devices This patch implements record and replay of character devices. It records chardevs communication in replay mode. Recorded information include data read from backend and counter of bytes written from frontend to backend to preserve frontend internal state. If character device was configured through the command line in record mode, then in replay mode it should be also added to command line. Backend of the character device could be changed in replay mode. Replaying of devices that perform ioctl and get_msgfd operations is not supported. gdbstub which also acts as a backend is not recorded to allow controlling the replaying through gdb. Monitor backends are also not recorded. Signed-off-by: Pavel Dovgalyuk Message-Id: <20160314074436.4980.83856.stgit@PASHA-ISP> [Add stubs. - Paolo] Signed-off-by: Paolo Bonzini --- gdbstub.c | 2 +- include/sysemu/char.h | 26 ++++++ include/sysemu/replay.h | 17 ++++ qemu-char.c | 144 ++++++++++++++++++++++++++------- replay/Makefile.objs | 1 + replay/replay-char.c | 168 +++++++++++++++++++++++++++++++++++++++ replay/replay-events.c | 17 +++- replay/replay-internal.h | 18 +++++ replay/replay.c | 2 +- stubs/replay.c | 34 ++++++++ 10 files changed, 396 insertions(+), 33 deletions(-) create mode 100755 replay/replay-char.c diff --git a/gdbstub.c b/gdbstub.c index 61c12b168e..fdcb0eea8f 100644 --- a/gdbstub.c +++ b/gdbstub.c @@ -1752,7 +1752,7 @@ int gdbserver_start(const char *device) sigaction(SIGINT, &act, NULL); } #endif - chr = qemu_chr_new("gdb", device, NULL); + chr = qemu_chr_new_noreplay("gdb", device, NULL); if (!chr) return -1; diff --git a/include/sysemu/char.h b/include/sysemu/char.h index e46884f367..4c2f777ad1 100644 --- a/include/sysemu/char.h +++ b/include/sysemu/char.h @@ -86,6 +86,7 @@ struct CharDriverState { int is_mux; guint fd_in_tag; QemuOpts *opts; + bool replay; QTAILQ_ENTRY(CharDriverState) next; }; @@ -138,6 +139,22 @@ void qemu_chr_parse_common(QemuOpts *opts, ChardevCommon *backend); CharDriverState *qemu_chr_new(const char *label, const char *filename, void (*init)(struct CharDriverState *s)); +/** + * @qemu_chr_new_noreplay: + * + * Create a new character backend from a URI. + * Character device communications are not written + * into the replay log. + * + * @label the name of the backend + * @filename the URI + * @init not sure.. + * + * Returns: a new character backend + */ +CharDriverState *qemu_chr_new_noreplay(const char *label, const char *filename, + void (*init)(struct CharDriverState *s)); + /** * @qemu_chr_delete: * @@ -341,6 +358,15 @@ int qemu_chr_be_can_write(CharDriverState *s); */ void qemu_chr_be_write(CharDriverState *s, uint8_t *buf, int len); +/** + * @qemu_chr_be_write_impl: + * + * Implementation of back end writing. Used by replay module. + * + * @buf a buffer to receive data from the front end + * @len the number of bytes to receive from the front end + */ +void qemu_chr_be_write_impl(CharDriverState *s, uint8_t *buf, int len); /** * @qemu_chr_be_event: diff --git a/include/sysemu/replay.h b/include/sysemu/replay.h index e4108e8b1a..d24d50238e 100644 --- a/include/sysemu/replay.h +++ b/include/sysemu/replay.h @@ -114,4 +114,21 @@ void replay_input_event(QemuConsole *src, InputEvent *evt); /*! Adds input sync event to the queue */ void replay_input_sync_event(void); +/* Character device */ + +/*! Registers char driver to save it's events */ +void replay_register_char_driver(struct CharDriverState *chr); +/*! Saves write to char device event to the log */ +void replay_chr_be_write(struct CharDriverState *s, uint8_t *buf, int len); +/*! Writes char write return value to the replay log. */ +void replay_char_write_event_save(int res, int offset); +/*! Reads char write return value from the replay log. */ +void replay_char_write_event_load(int *res, int *offset); +/*! Reads information about read_all character event. */ +int replay_char_read_all_load(uint8_t *buf); +/*! Writes character read_all error code into the replay log. */ +void replay_char_read_all_save_error(int res); +/*! Writes character read_all execution result into the replay log. */ +void replay_char_read_all_save_buf(uint8_t *buf, int offset); + #endif diff --git a/qemu-char.c b/qemu-char.c index 26202c3e63..0a14e57839 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -37,6 +37,7 @@ #include "io/channel-socket.h" #include "io/channel-file.h" #include "io/channel-tls.h" +#include "sysemu/replay.h" #include @@ -234,30 +235,15 @@ static void qemu_chr_fe_write_log(CharDriverState *s, } } -int qemu_chr_fe_write(CharDriverState *s, const uint8_t *buf, int len) +static int qemu_chr_fe_write_buffer(CharDriverState *s, const uint8_t *buf, int len, int *offset) { - int ret; - - qemu_mutex_lock(&s->chr_write_lock); - ret = s->chr_write(s, buf, len); - - if (ret > 0) { - qemu_chr_fe_write_log(s, buf, ret); - } - - qemu_mutex_unlock(&s->chr_write_lock); - return ret; -} - -int qemu_chr_fe_write_all(CharDriverState *s, const uint8_t *buf, int len) -{ - int offset = 0; int res = 0; + *offset = 0; qemu_mutex_lock(&s->chr_write_lock); - while (offset < len) { + while (*offset < len) { do { - res = s->chr_write(s, buf + offset, len - offset); + res = s->chr_write(s, buf + *offset, len - *offset); if (res == -1 && errno == EAGAIN) { g_usleep(100); } @@ -267,13 +253,61 @@ int qemu_chr_fe_write_all(CharDriverState *s, const uint8_t *buf, int len) break; } - offset += res; + *offset += res; } - if (offset > 0) { - qemu_chr_fe_write_log(s, buf, offset); + if (*offset > 0) { + qemu_chr_fe_write_log(s, buf, *offset); + } + qemu_mutex_unlock(&s->chr_write_lock); + + return res; +} + +int qemu_chr_fe_write(CharDriverState *s, const uint8_t *buf, int len) +{ + int ret; + + if (s->replay && replay_mode == REPLAY_MODE_PLAY) { + int offset; + replay_char_write_event_load(&ret, &offset); + assert(offset <= len); + qemu_chr_fe_write_buffer(s, buf, offset, &offset); + return ret; + } + + qemu_mutex_lock(&s->chr_write_lock); + ret = s->chr_write(s, buf, len); + + if (ret > 0) { + qemu_chr_fe_write_log(s, buf, ret); } qemu_mutex_unlock(&s->chr_write_lock); + + if (s->replay && replay_mode == REPLAY_MODE_RECORD) { + replay_char_write_event_save(ret, ret < 0 ? 0 : ret); + } + + return ret; +} + +int qemu_chr_fe_write_all(CharDriverState *s, const uint8_t *buf, int len) +{ + int offset; + int res; + + if (s->replay && replay_mode == REPLAY_MODE_PLAY) { + replay_char_write_event_load(&res, &offset); + assert(offset <= len); + qemu_chr_fe_write_buffer(s, buf, offset, &offset); + return res; + } + + res = qemu_chr_fe_write_buffer(s, buf, len, &offset); + + if (s->replay && replay_mode == REPLAY_MODE_RECORD) { + replay_char_write_event_save(res, offset); + } if (res < 0) { return res; @@ -289,6 +323,10 @@ int qemu_chr_fe_read_all(CharDriverState *s, uint8_t *buf, int len) if (!s->chr_sync_read) { return 0; } + + if (s->replay && replay_mode == REPLAY_MODE_PLAY) { + return replay_char_read_all_load(buf); + } while (offset < len) { do { @@ -303,6 +341,9 @@ int qemu_chr_fe_read_all(CharDriverState *s, uint8_t *buf, int len) } if (res < 0) { + if (s->replay && replay_mode == REPLAY_MODE_RECORD) { + replay_char_read_all_save_error(res); + } return res; } @@ -313,14 +354,22 @@ int qemu_chr_fe_read_all(CharDriverState *s, uint8_t *buf, int len) } } + if (s->replay && replay_mode == REPLAY_MODE_RECORD) { + replay_char_read_all_save_buf(buf, offset); + } return offset; } int qemu_chr_fe_ioctl(CharDriverState *s, int cmd, void *arg) { - if (!s->chr_ioctl) - return -ENOTSUP; - return s->chr_ioctl(s, cmd, arg); + int res; + if (!s->chr_ioctl || s->replay) { + res = -ENOTSUP; + } else { + res = s->chr_ioctl(s, cmd, arg); + } + + return res; } int qemu_chr_be_can_write(CharDriverState *s) @@ -330,17 +379,35 @@ int qemu_chr_be_can_write(CharDriverState *s) return s->chr_can_read(s->handler_opaque); } -void qemu_chr_be_write(CharDriverState *s, uint8_t *buf, int len) +void qemu_chr_be_write_impl(CharDriverState *s, uint8_t *buf, int len) { if (s->chr_read) { s->chr_read(s->handler_opaque, buf, len); } } +void qemu_chr_be_write(CharDriverState *s, uint8_t *buf, int len) +{ + if (s->replay) { + if (replay_mode == REPLAY_MODE_PLAY) { + return; + } + replay_chr_be_write(s, buf, len); + } else { + qemu_chr_be_write_impl(s, buf, len); + } +} + int qemu_chr_fe_get_msgfd(CharDriverState *s) { int fd; - return (qemu_chr_fe_get_msgfds(s, &fd, 1) == 1) ? fd : -1; + int res = (qemu_chr_fe_get_msgfds(s, &fd, 1) == 1) ? fd : -1; + if (s->replay) { + fprintf(stderr, + "Replay: get msgfd is not supported for serial devices yet\n"); + exit(1); + } + return res; } int qemu_chr_fe_get_msgfds(CharDriverState *s, int *fds, int len) @@ -3821,7 +3888,8 @@ err: return NULL; } -CharDriverState *qemu_chr_new(const char *label, const char *filename, void (*init)(struct CharDriverState *s)) +CharDriverState *qemu_chr_new_noreplay(const char *label, const char *filename, + void (*init)(struct CharDriverState *s)) { const char *p; CharDriverState *chr; @@ -3847,6 +3915,21 @@ CharDriverState *qemu_chr_new(const char *label, const char *filename, void (*in return chr; } +CharDriverState *qemu_chr_new(const char *label, const char *filename, void (*init)(struct CharDriverState *s)) +{ + CharDriverState *chr; + chr = qemu_chr_new_noreplay(label, filename, init); + if (chr) { + chr->replay = replay_mode != REPLAY_MODE_NONE; + if (chr->replay && chr->chr_ioctl) { + fprintf(stderr, + "Replay: ioctl is not supported for serial devices yet\n"); + } + replay_register_char_driver(chr); + } + return chr; +} + void qemu_chr_fe_set_echo(struct CharDriverState *chr, bool echo) { if (chr->chr_set_echo) { @@ -4455,6 +4538,11 @@ void qmp_chardev_remove(const char *id, Error **errp) error_setg(errp, "Chardev '%s' is busy", id); return; } + if (chr->replay) { + error_setg(errp, + "Chardev '%s' cannot be unplugged in record/replay mode", id); + return; + } qemu_chr_delete(chr); } diff --git a/replay/Makefile.objs b/replay/Makefile.objs index 232193a24b..fcb3f74d60 100644 --- a/replay/Makefile.objs +++ b/replay/Makefile.objs @@ -3,3 +3,4 @@ common-obj-y += replay-internal.o common-obj-y += replay-events.o common-obj-y += replay-time.o common-obj-y += replay-input.o +common-obj-y += replay-char.o diff --git a/replay/replay-char.c b/replay/replay-char.c new file mode 100755 index 0000000000..23b6922977 --- /dev/null +++ b/replay/replay-char.c @@ -0,0 +1,168 @@ +/* + * replay-char.c + * + * Copyright (c) 2010-2016 Institute for System Programming + * of the Russian Academy of Sciences. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include +#include +#include + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "sysemu/replay.h" +#include "replay-internal.h" +#include "sysemu/sysemu.h" +#include "sysemu/char.h" + +/* Char drivers that generate qemu_chr_be_write events + that should be saved into the log. */ +static CharDriverState **char_drivers; +static int drivers_count; + +/* Char event attributes. */ +typedef struct CharEvent { + int id; + uint8_t *buf; + size_t len; +} CharEvent; + +static int find_char_driver(CharDriverState *chr) +{ + int i = 0; + for ( ; i < drivers_count ; ++i) { + if (char_drivers[i] == chr) { + return i; + } + } + return -1; +} + +void replay_register_char_driver(CharDriverState *chr) +{ + if (replay_mode == REPLAY_MODE_NONE) { + return; + } + char_drivers = g_realloc(char_drivers, + sizeof(*char_drivers) * (drivers_count + 1)); + char_drivers[drivers_count++] = chr; +} + +void replay_chr_be_write(CharDriverState *s, uint8_t *buf, int len) +{ + CharEvent *event = g_malloc0(sizeof(CharEvent)); + + event->id = find_char_driver(s); + if (event->id < 0) { + fprintf(stderr, "Replay: cannot find char driver\n"); + exit(1); + } + event->buf = g_malloc(len); + memcpy(event->buf, buf, len); + event->len = len; + + replay_add_event(REPLAY_ASYNC_EVENT_CHAR_READ, event, NULL, 0); +} + +void replay_event_char_read_run(void *opaque) +{ + CharEvent *event = (CharEvent *)opaque; + + qemu_chr_be_write_impl(char_drivers[event->id], event->buf, + (int)event->len); + + g_free(event->buf); + g_free(event); +} + +void replay_event_char_read_save(void *opaque) +{ + CharEvent *event = (CharEvent *)opaque; + + replay_put_byte(event->id); + replay_put_array(event->buf, event->len); +} + +void *replay_event_char_read_load(void) +{ + CharEvent *event = g_malloc0(sizeof(CharEvent)); + + event->id = replay_get_byte(); + replay_get_array_alloc(&event->buf, &event->len); + + return event; +} + +void replay_char_write_event_save(int res, int offset) +{ + replay_save_instructions(); + replay_mutex_lock(); + replay_put_event(EVENT_CHAR_WRITE); + replay_put_dword(res); + replay_put_dword(offset); + replay_mutex_unlock(); +} + +void replay_char_write_event_load(int *res, int *offset) +{ + replay_account_executed_instructions(); + replay_mutex_lock(); + if (replay_next_event_is(EVENT_CHAR_WRITE)) { + *res = replay_get_dword(); + *offset = replay_get_dword(); + replay_finish_event(); + replay_mutex_unlock(); + } else { + replay_mutex_unlock(); + error_report("Missing character write event in the replay log"); + exit(1); + } +} + +int replay_char_read_all_load(uint8_t *buf) +{ + replay_mutex_lock(); + if (replay_next_event_is(EVENT_CHAR_READ_ALL)) { + size_t size; + int res; + replay_get_array(buf, &size); + replay_finish_event(); + replay_mutex_unlock(); + res = (int)size; + assert(res >= 0); + return res; + } else if (replay_next_event_is(EVENT_CHAR_READ_ALL_ERROR)) { + int res = replay_get_dword(); + replay_finish_event(); + replay_mutex_unlock(); + return res; + } else { + replay_mutex_unlock(); + error_report("Missing character read all event in the replay log"); + exit(1); + } +} + +void replay_char_read_all_save_error(int res) +{ + assert(res < 0); + replay_save_instructions(); + replay_mutex_lock(); + replay_put_event(EVENT_CHAR_READ_ALL_ERROR); + replay_put_dword(res); + replay_mutex_unlock(); +} + +void replay_char_read_all_save_buf(uint8_t *buf, int offset) +{ + replay_save_instructions(); + replay_mutex_lock(); + replay_put_event(EVENT_CHAR_READ_ALL); + replay_put_array(buf, offset); + replay_mutex_unlock(); +} diff --git a/replay/replay-events.c b/replay/replay-events.c index 2628109ed8..ca940f70e7 100644 --- a/replay/replay-events.c +++ b/replay/replay-events.c @@ -48,6 +48,9 @@ static void replay_run_event(Event *event) case REPLAY_ASYNC_EVENT_INPUT_SYNC: qemu_input_event_sync_impl(); break; + case REPLAY_ASYNC_EVENT_CHAR_READ: + replay_event_char_read_run(event->opaque); + break; default: error_report("Replay: invalid async event ID (%d) in the queue", event->event_kind); @@ -102,9 +105,9 @@ void replay_clear_events(void) } /*! Adds specified async event to the queue */ -static void replay_add_event(ReplayAsyncEventKind event_kind, - void *opaque, - void *opaque2, uint64_t id) +void replay_add_event(ReplayAsyncEventKind event_kind, + void *opaque, + void *opaque2, uint64_t id) { assert(event_kind < REPLAY_ASYNC_COUNT); @@ -168,6 +171,9 @@ static void replay_save_event(Event *event, int checkpoint) break; case REPLAY_ASYNC_EVENT_INPUT_SYNC: break; + case REPLAY_ASYNC_EVENT_CHAR_READ: + replay_event_char_read_save(event->opaque); + break; default: error_report("Unknown ID %d of replay event", read_event_kind); exit(1); @@ -221,6 +227,11 @@ static Event *replay_read_event(int checkpoint) event->event_kind = read_event_kind; event->opaque = 0; return event; + case REPLAY_ASYNC_EVENT_CHAR_READ: + event = g_malloc0(sizeof(Event)); + event->event_kind = read_event_kind; + event->opaque = replay_event_char_read_load(); + return event; default: error_report("Unknown ID %d of replay event", read_event_kind); exit(1); diff --git a/replay/replay-internal.h b/replay/replay-internal.h index 5438ebdb9c..11f9a85f3e 100644 --- a/replay/replay-internal.h +++ b/replay/replay-internal.h @@ -24,6 +24,11 @@ enum ReplayEvents { EVENT_ASYNC, /* for shutdown request */ EVENT_SHUTDOWN, + /* for character device write event */ + EVENT_CHAR_WRITE, + /* for character device read all event */ + EVENT_CHAR_READ_ALL, + EVENT_CHAR_READ_ALL_ERROR, /* for clock read/writes */ /* some of greater codes are reserved for clocks */ EVENT_CLOCK, @@ -43,6 +48,7 @@ enum ReplayAsyncEventKind { REPLAY_ASYNC_EVENT_BH, REPLAY_ASYNC_EVENT_INPUT, REPLAY_ASYNC_EVENT_INPUT_SYNC, + REPLAY_ASYNC_EVENT_CHAR_READ, REPLAY_ASYNC_COUNT }; @@ -124,6 +130,9 @@ bool replay_has_events(void); void replay_save_events(int checkpoint); /*! Read events from the file into the input queue */ void replay_read_events(int checkpoint); +/*! Adds specified async event to the queue */ +void replay_add_event(ReplayAsyncEventKind event_kind, void *opaque, + void *opaque2, uint64_t id); /* Input events */ @@ -136,4 +145,13 @@ void replay_add_input_event(struct InputEvent *event); /*! Adds input sync event to the queue */ void replay_add_input_sync_event(void); +/* Character devices */ + +/*! Called to run char device read event. */ +void replay_event_char_read_run(void *opaque); +/*! Writes char read event to the file. */ +void replay_event_char_read_save(void *opaque); +/*! Reads char event read from the file. */ +void *replay_event_char_read_load(void); + #endif diff --git a/replay/replay.c b/replay/replay.c index f8739c26c8..fcfde4fc93 100644 --- a/replay/replay.c +++ b/replay/replay.c @@ -20,7 +20,7 @@ /* Current version of the replay mechanism. Increase it when file format changes. */ -#define REPLAY_VERSION 0xe02002 +#define REPLAY_VERSION 0xe02003 /* Size of replay log header */ #define HEADER_SIZE (sizeof(uint32_t) + sizeof(uint64_t)) diff --git a/stubs/replay.c b/stubs/replay.c index 00ca01f55a..2f1a6dc62e 100644 --- a/stubs/replay.c +++ b/stubs/replay.c @@ -29,3 +29,37 @@ bool replay_events_enabled(void) void replay_finish(void) { } + +void replay_register_char_driver(CharDriverState *chr) +{ +} + +void replay_chr_be_write(CharDriverState *s, uint8_t *buf, int len) +{ + abort(); +} + +void replay_char_write_event_save(int res, int offset) +{ + abort(); +} + +void replay_char_write_event_load(int *res, int *offset) +{ + abort(); +} + +int replay_char_read_all_load(uint8_t *buf) +{ + abort(); +} + +void replay_char_read_all_save_error(int res) +{ + abort(); +} + +void replay_char_read_all_save_buf(uint8_t *buf, int offset) +{ + abort(); +} From 281b2201e4e18d5b9a26e1e8d81b62b5581a13be Mon Sep 17 00:00:00 2001 From: Pavel Dovgalyuk Date: Thu, 10 Mar 2016 14:56:03 +0300 Subject: [PATCH 7/8] icount: remove obsolete warp call qemu_clock_warp call in qemu_tcg_wait_io_event function is not needed anymore, because it is called in every iteration of main_loop_wait. Reviewed-by: Paolo Bonzini Signed-off-by: Pavel Dovgalyuk Message-Id: <20160310115603.4812.67559.stgit@PASHA-ISP> Signed-off-by: Paolo Bonzini --- cpus.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/cpus.c b/cpus.c index bc774e2540..85d0f876f7 100644 --- a/cpus.c +++ b/cpus.c @@ -995,9 +995,6 @@ static void qemu_wait_io_event_common(CPUState *cpu) static void qemu_tcg_wait_io_event(CPUState *cpu) { while (all_cpu_threads_idle()) { - /* Start accounting real time to the virtual clock if the CPUs - are idle. */ - qemu_clock_warp(QEMU_CLOCK_VIRTUAL); qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex); } From e76d1798faa6d29f54c0930a034b67f3ecdb947d Mon Sep 17 00:00:00 2001 From: Pavel Dovgalyuk Date: Thu, 10 Mar 2016 14:56:09 +0300 Subject: [PATCH 8/8] icount: decouple warp calls qemu_clock_warp function is called to update virtual clock when CPU is sleeping. This function includes replay checkpoint to make execution deterministic in icount mode. Record/replay module flushes async event queue at checkpoints. Some of the events (e.g., block devices operations) include interaction with hardware. E.g., APIC polled by block devices sets one of IRQ flags. Flag to be set depends on currently executed thread (CPU or iothread). Therefore in replay mode we have to process the checkpoints in the same thread as they were recorded. qemu_clock_warp function (and its checkpoint) may be called from different thread. This patch decouples two different execution cases of this function: call when CPU is sleeping from iothread and call from cpu thread to update virtual clock. First task is performed by qemu_start_warp_timer function. It sets warp timer event to the moment of nearest pending virtual timer. Second function (qemu_account_warp_timer) is called from cpu thread before execution of the code. It advances virtual clock by adding the length of period while CPU was sleeping. Signed-off-by: Pavel Dovgalyuk Message-Id: <20160310115609.4812.44986.stgit@PASHA-ISP> [Update docs. - Paolo] Signed-off-by: Paolo Bonzini --- cpus.c | 58 +++++++++++++++++++++++------------------ docs/replay.txt | 21 ++++++++++----- include/qemu/timer.h | 7 +++-- include/sysemu/replay.h | 3 ++- main-loop.c | 2 +- qemu-timer.c | 4 ++- stubs/clock-warp.c | 2 +- 7 files changed, 58 insertions(+), 39 deletions(-) diff --git a/cpus.c b/cpus.c index 85d0f876f7..4052be525f 100644 --- a/cpus.c +++ b/cpus.c @@ -370,9 +370,12 @@ static void icount_warp_rt(void) } } -static void icount_dummy_timer(void *opaque) +static void icount_timer_cb(void *opaque) { - (void)opaque; + /* No need for a checkpoint because the timer already synchronizes + * with CHECKPOINT_CLOCK_VIRTUAL_RT. + */ + icount_warp_rt(); } void qtest_clock_warp(int64_t dest) @@ -396,17 +399,12 @@ void qtest_clock_warp(int64_t dest) qemu_clock_notify(QEMU_CLOCK_VIRTUAL); } -void qemu_clock_warp(QEMUClockType type) +void qemu_start_warp_timer(void) { int64_t clock; int64_t deadline; - /* - * There are too many global variables to make the "warp" behavior - * applicable to other clocks. But a clock argument removes the - * need for if statements all over the place. - */ - if (type != QEMU_CLOCK_VIRTUAL || !use_icount) { + if (!use_icount) { return; } @@ -418,29 +416,17 @@ void qemu_clock_warp(QEMUClockType type) } /* warp clock deterministically in record/replay mode */ - if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP)) { + if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) { return; } - if (icount_sleep) { - /* - * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now. - * This ensures that the deadline for the timer is computed correctly - * below. - * This also makes sure that the insn counter is synchronized before - * the CPU starts running, in case the CPU is woken by an event other - * than the earliest QEMU_CLOCK_VIRTUAL timer. - */ - icount_warp_rt(); - timer_del(icount_warp_timer); - } if (!all_cpu_threads_idle()) { return; } if (qtest_enabled()) { /* When testing, qtest commands advance icount. */ - return; + return; } /* We want to use the earliest deadline from ALL vm_clocks */ @@ -496,6 +482,28 @@ void qemu_clock_warp(QEMUClockType type) } } +static void qemu_account_warp_timer(void) +{ + if (!use_icount || !icount_sleep) { + return; + } + + /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers + * do not fire, so computing the deadline does not make sense. + */ + if (!runstate_is_running()) { + return; + } + + /* warp clock deterministically in record/replay mode */ + if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) { + return; + } + + timer_del(icount_warp_timer); + icount_warp_rt(); +} + static bool icount_state_needed(void *opaque) { return use_icount; @@ -624,7 +632,7 @@ void configure_icount(QemuOpts *opts, Error **errp) icount_sleep = qemu_opt_get_bool(opts, "sleep", true); if (icount_sleep) { icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT, - icount_dummy_timer, NULL); + icount_timer_cb, NULL); } icount_align_option = qemu_opt_get_bool(opts, "align", false); @@ -1496,7 +1504,7 @@ static void tcg_exec_all(void) int r; /* Account partial waits to QEMU_CLOCK_VIRTUAL. */ - qemu_clock_warp(QEMU_CLOCK_VIRTUAL); + qemu_account_warp_timer(); if (next_cpu == NULL) { next_cpu = first_cpu; diff --git a/docs/replay.txt b/docs/replay.txt index 149727e2a6..3cedc25b2e 100644 --- a/docs/replay.txt +++ b/docs/replay.txt @@ -107,7 +107,7 @@ at the specified moments of time. There are several kinds of timers: sources (e.g. real time clock chip). Host clock is the one of the sources of non-determinism. Host clock read operations should be logged to make the execution deterministic. - * Real time clock for icount. This clock is similar to real time clock but + * Virtual real time clock. This clock is similar to real time clock but it is used only for increasing virtual clock while virtual machine is sleeping. Due to its nature it is also non-deterministic as the host clock and has to be logged too. @@ -134,11 +134,20 @@ of time. That's why we do not process a group of timers until the checkpoint event will be read from the log. Such an event allows synchronizing CPU execution and timer events. -Another checkpoints application in record/replay is instruction counting -while the virtual machine is idle. This function (qemu_clock_warp) is called -from the wait loop. It changes virtual machine state and must be deterministic -then. That is why we added checkpoint to this function to prevent its -operation in replay mode when it does not correspond to record mode. +Two other checkpoints govern the "warping" of the virtual clock. +While the virtual machine is idle, the virtual clock increments at +1 ns per *real time* nanosecond. This is done by setting up a timer +(called the warp timer) on the virtual real time clock, so that the +timer fires at the next deadline of the virtual clock; the virtual clock +is then incremented (which is called "warping" the virtual clock) as +soon as the timer fires or the CPUs need to go out of the idle state. +Two functions are used for this purpose; because these actions change +virtual machine state and must be deterministic, each of them creates a +checkpoint. qemu_start_warp_timer checks if the CPUs are idle and if so +starts accounting real time to virtual clock. qemu_account_warp_timer +is called when the CPUs get an interrupt or when the warp timer fires, +and it warps the virtual clock by the amount of real time that has passed +since qemu_start_warp_timer. Bottom halves ------------- diff --git a/include/qemu/timer.h b/include/qemu/timer.h index d0946cb953..7197d0859a 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -210,12 +210,11 @@ void qemu_clock_notify(QEMUClockType type); void qemu_clock_enable(QEMUClockType type, bool enabled); /** - * qemu_clock_warp: - * @type: the clock type + * qemu_start_warp_timer: * - * Warp a clock to a new value + * Starts a timer for virtual clock update */ -void qemu_clock_warp(QEMUClockType type); +void qemu_start_warp_timer(void); /** * qemu_clock_register_reset_notifier: diff --git a/include/sysemu/replay.h b/include/sysemu/replay.h index d24d50238e..e7989199fc 100644 --- a/include/sysemu/replay.h +++ b/include/sysemu/replay.h @@ -27,7 +27,8 @@ typedef enum ReplayClockKind ReplayClockKind; /* IDs of the checkpoints */ enum ReplayCheckpoint { - CHECKPOINT_CLOCK_WARP, + CHECKPOINT_CLOCK_WARP_START, + CHECKPOINT_CLOCK_WARP_ACCOUNT, CHECKPOINT_RESET_REQUESTED, CHECKPOINT_SUSPEND_REQUESTED, CHECKPOINT_CLOCK_VIRTUAL, diff --git a/main-loop.c b/main-loop.c index 19beae76ad..3a7f4cdbb2 100644 --- a/main-loop.c +++ b/main-loop.c @@ -509,7 +509,7 @@ int main_loop_wait(int nonblocking) /* CPU thread can infinitely wait for event after missing the warp */ - qemu_clock_warp(QEMU_CLOCK_VIRTUAL); + qemu_start_warp_timer(); qemu_clock_run_all_timers(); return ret; diff --git a/qemu-timer.c b/qemu-timer.c index e98ecc9733..4441fe66ff 100644 --- a/qemu-timer.c +++ b/qemu-timer.c @@ -394,7 +394,9 @@ static bool timer_mod_ns_locked(QEMUTimerList *timer_list, static void timerlist_rearm(QEMUTimerList *timer_list) { /* Interrupt execution to force deadline recalculation. */ - qemu_clock_warp(timer_list->clock->type); + if (timer_list->clock->type == QEMU_CLOCK_VIRTUAL) { + qemu_start_warp_timer(); + } timerlist_notify(timer_list); } diff --git a/stubs/clock-warp.c b/stubs/clock-warp.c index 5ae32b9e6a..8acb58a775 100644 --- a/stubs/clock-warp.c +++ b/stubs/clock-warp.c @@ -2,7 +2,7 @@ #include "qemu-common.h" #include "qemu/timer.h" -void qemu_clock_warp(QEMUClockType type) +void qemu_start_warp_timer(void) { }