From a01a9cb821a29852abb142ec52b26c8488ced6e8 Mon Sep 17 00:00:00 2001 From: Amit Shah Date: Tue, 23 Nov 2010 17:01:15 +0530 Subject: [PATCH 1/9] virtio-serial-bus: bump up control vq size to 32 The current default of 16 buffers for the control vq is too small. We can get more entries in there, for example when asking the guest to add max. allowed ports. Note: a more robust solution would involve some kind of event queueing in host to guarantee no event loss. Added a TODO to look into this later. Signed-off-by: Amit Shah Signed-off-by: Michael S. Tsirkin --- hw/virtio-serial-bus.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/hw/virtio-serial-bus.c b/hw/virtio-serial-bus.c index 74ba5ec3d3..b728040f3a 100644 --- a/hw/virtio-serial-bus.c +++ b/hw/virtio-serial-bus.c @@ -769,10 +769,16 @@ VirtIODevice *virtio_serial_init(DeviceState *dev, uint32_t max_nr_ports) /* Add a queue for guest to host transfers for port 0 (backward compat) */ vser->ovqs[0] = virtio_add_queue(vdev, 128, handle_output); + /* TODO: host to guest notifications can get dropped + * if the queue fills up. Implement queueing in host, + * this might also make it possible to reduce the control + * queue size: as guest preposts buffers there, + * this will save 4Kbyte of guest memory per entry. */ + /* control queue: host to guest */ - vser->c_ivq = virtio_add_queue(vdev, 16, control_in); + vser->c_ivq = virtio_add_queue(vdev, 32, control_in); /* control queue: guest to host */ - vser->c_ovq = virtio_add_queue(vdev, 16, control_out); + vser->c_ovq = virtio_add_queue(vdev, 32, control_out); for (i = 1; i < vser->bus->max_nr_ports; i++) { /* Add a per-port queue for host to guest transfers */ From 9c046d96d4d0d1fef89a30b1491775492082da9d Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 11 Jan 2011 14:20:38 -0200 Subject: [PATCH 2/9] document QEMU<->ACPIBIOS PCI hotplug interface Document how QEMU communicates with ACPI BIOS for PCI hotplug. Signed-off-by: Marcelo Tosatti Signed-off-by: Michael S. Tsirkin --- docs/specs/acpi_pci_hotplug.txt | 37 +++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 docs/specs/acpi_pci_hotplug.txt diff --git a/docs/specs/acpi_pci_hotplug.txt b/docs/specs/acpi_pci_hotplug.txt new file mode 100644 index 0000000000..f0f74a7c7c --- /dev/null +++ b/docs/specs/acpi_pci_hotplug.txt @@ -0,0 +1,37 @@ +QEMU<->ACPI BIOS PCI hotplug interface +-------------------------------------- + +QEMU supports PCI hotplug via ACPI, for PCI bus 0. This document +describes the interface between QEMU and the ACPI BIOS. + +ACPI GPE block (IO ports 0xafe0-0xafe3, byte access): +----------------------------------------- + +Generic ACPI GPE block. Bit 1 (GPE.1) used to notify PCI hotplug/eject +event to ACPI BIOS, via SCI interrupt. + +PCI slot injection notification pending (IO port 0xae00-0xae03, 4-byte access): +--------------------------------------------------------------- +Slot injection notification pending. One bit per slot. + +Read by ACPI BIOS GPE.1 handler to notify OS of injection +events. + +PCI slot removal notification (IO port 0xae04-0xae07, 4-byte access): +----------------------------------------------------- +Slot removal notification pending. One bit per slot. + +Read by ACPI BIOS GPE.1 handler to notify OS of removal +events. + +PCI device eject (IO port 0xae08-0xae0b, 4-byte access): +---------------------------------------- + +Used by ACPI BIOS _EJ0 method to request device removal. One bit per slot. +Reads return 0. + +PCI removability status (IO port 0xae0c-0xae0f, 4-byte access): +----------------------------------------------- + +Used by ACPI BIOS _RMV method to indicate removability status to OS. One +bit per slot. From 668643b025dcff72b9b18adb5df794be9e9be5dc Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 11 Jan 2011 14:20:39 -0200 Subject: [PATCH 3/9] acpi_piix4: expose no_hotplug attribute via i/o port Expose no_hotplug attribute via I/O port, so ACPI BIOS can indicate removability status to guest OS. An updated seabios is required to make use of this feature (seabios.git commit ID 3c241edf3d7ef29c21). Signed-off-by: Marcelo Tosatti Tested-by: Gleb Natapov Signed-off-by: Michael S. Tsirkin --- hw/acpi_piix4.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/hw/acpi_piix4.c b/hw/acpi_piix4.c index 273097d480..5bbc2b5a26 100644 --- a/hw/acpi_piix4.c +++ b/hw/acpi_piix4.c @@ -37,6 +37,7 @@ #define GPE_BASE 0xafe0 #define PCI_BASE 0xae00 #define PCI_EJ_BASE 0xae08 +#define PCI_RMV_BASE 0xae0c #define PIIX4_PCI_HOTPLUG_STATUS 2 @@ -73,6 +74,7 @@ typedef struct PIIX4PMState { /* for pci hotplug */ struct gpe_regs gpe; struct pci_status pci0_status; + uint32_t pci0_hotplug_enable; } PIIX4PMState; static void piix4_acpi_system_hot_add_init(PCIBus *bus, PIIX4PMState *s); @@ -322,6 +324,25 @@ static const VMStateDescription vmstate_acpi = { } }; +static void piix4_update_hotplug(PIIX4PMState *s) +{ + PCIDevice *dev = &s->dev; + BusState *bus = qdev_get_parent_bus(&dev->qdev); + DeviceState *qdev, *next; + + s->pci0_hotplug_enable = ~0; + + QLIST_FOREACH_SAFE(qdev, &bus->children, sibling, next) { + PCIDeviceInfo *info = container_of(qdev->info, PCIDeviceInfo, qdev); + PCIDevice *pdev = DO_UPCAST(PCIDevice, qdev, qdev); + int slot = PCI_SLOT(pdev->devfn); + + if (info->no_hotplug) { + s->pci0_hotplug_enable &= ~(1 << slot); + } + } +} + static void piix4_reset(void *opaque) { PIIX4PMState *s = opaque; @@ -336,6 +357,7 @@ static void piix4_reset(void *opaque) /* Mark SMM as already inited (until KVM supports SMM). */ pci_conf[0x5B] = 0x02; } + piix4_update_hotplug(s); } static void piix4_powerdown(void *opaque, int irq, int power_failing) @@ -576,6 +598,18 @@ static void pciej_write(void *opaque, uint32_t addr, uint32_t val) PIIX4_DPRINTF("pciej write %x <== %d\n", addr, val); } +static uint32_t pcirmv_read(void *opaque, uint32_t addr) +{ + PIIX4PMState *s = opaque; + + return s->pci0_hotplug_enable; +} + +static void pcirmv_write(void *opaque, uint32_t addr, uint32_t val) +{ + return; +} + static int piix4_device_hotplug(DeviceState *qdev, PCIDevice *dev, PCIHotplugState state); @@ -592,6 +626,9 @@ static void piix4_acpi_system_hot_add_init(PCIBus *bus, PIIX4PMState *s) register_ioport_write(PCI_EJ_BASE, 4, 4, pciej_write, bus); register_ioport_read(PCI_EJ_BASE, 4, 4, pciej_read, bus); + register_ioport_write(PCI_RMV_BASE, 4, 4, pcirmv_write, s); + register_ioport_read(PCI_RMV_BASE, 4, 4, pcirmv_read, s); + pci_bus_hotplug(bus, piix4_device_hotplug, &s->dev.qdev); } From dc9121210eaf34e768901ffc6992dd13062c743a Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 11 Jan 2011 14:39:43 -0700 Subject: [PATCH 4/9] savevm: Fix no_migrate The no_migrate save state flag is currently only checked in the last phase of migration. This means that we potentially waste a lot of time and bandwidth with the live state handlers before we ever check the no_migrate flags. The error message printed when we catch a non-migratable device doesn't get printed for a detached migration. And, no_migrate does nothing to prevent an incoming migration to a target that includes a non-migratable device. This attempts to fix all of these. One notable difference in behavior is that an outgoing migration now checks for non-migratable devices before ever connecting to the target system. This means the target will remain listening rather than exit from failure. Signed-off-by: Alex Williamson Signed-off-by: Michael S. Tsirkin --- migration.c | 4 ++++ savevm.c | 40 ++++++++++++++++++++++++++-------------- sysemu.h | 1 + 3 files changed, 31 insertions(+), 14 deletions(-) diff --git a/migration.c b/migration.c index e5ba51c314..d593b1df5d 100644 --- a/migration.c +++ b/migration.c @@ -88,6 +88,10 @@ int do_migrate(Monitor *mon, const QDict *qdict, QObject **ret_data) return -1; } + if (qemu_savevm_state_blocked(mon)) { + return -1; + } + if (strstart(uri, "tcp:", &p)) { s = tcp_start_outgoing_migration(mon, p, max_throttle, detach, blk, inc); diff --git a/savevm.c b/savevm.c index 90aa237c9c..fcd8db4f85 100644 --- a/savevm.c +++ b/savevm.c @@ -1401,19 +1401,13 @@ static int vmstate_load(QEMUFile *f, SaveStateEntry *se, int version_id) return vmstate_load_state(f, se->vmsd, se->opaque, version_id); } -static int vmstate_save(QEMUFile *f, SaveStateEntry *se) +static void vmstate_save(QEMUFile *f, SaveStateEntry *se) { - if (se->no_migrate) { - return -1; - } - if (!se->vmsd) { /* Old style */ se->save_state(f, se->opaque); - return 0; + return; } vmstate_save_state(f,se->vmsd, se->opaque); - - return 0; } #define QEMU_VM_FILE_MAGIC 0x5145564d @@ -1427,6 +1421,20 @@ static int vmstate_save(QEMUFile *f, SaveStateEntry *se) #define QEMU_VM_SECTION_FULL 0x04 #define QEMU_VM_SUBSECTION 0x05 +bool qemu_savevm_state_blocked(Monitor *mon) +{ + SaveStateEntry *se; + + QTAILQ_FOREACH(se, &savevm_handlers, entry) { + if (se->no_migrate) { + monitor_printf(mon, "state blocked by non-migratable device '%s'\n", + se->idstr); + return true; + } + } + return false; +} + int qemu_savevm_state_begin(Monitor *mon, QEMUFile *f, int blk_enable, int shared) { @@ -1508,7 +1516,6 @@ int qemu_savevm_state_iterate(Monitor *mon, QEMUFile *f) int qemu_savevm_state_complete(Monitor *mon, QEMUFile *f) { SaveStateEntry *se; - int r; cpu_synchronize_all_states(); @@ -1541,11 +1548,7 @@ int qemu_savevm_state_complete(Monitor *mon, QEMUFile *f) qemu_put_be32(f, se->instance_id); qemu_put_be32(f, se->version_id); - r = vmstate_save(f, se); - if (r < 0) { - monitor_printf(mon, "cannot migrate with device '%s'\n", se->idstr); - return r; - } + vmstate_save(f, se); } qemu_put_byte(f, QEMU_VM_EOF); @@ -1575,6 +1578,11 @@ static int qemu_savevm_state(Monitor *mon, QEMUFile *f) saved_vm_running = vm_running; vm_stop(0); + if (qemu_savevm_state_blocked(mon)) { + ret = -EINVAL; + goto out; + } + ret = qemu_savevm_state_begin(mon, f, 0, 0); if (ret < 0) goto out; @@ -1692,6 +1700,10 @@ int qemu_loadvm_state(QEMUFile *f) unsigned int v; int ret; + if (qemu_savevm_state_blocked(default_mon)) { + return -EINVAL; + } + v = qemu_get_be32(f); if (v != QEMU_VM_FILE_MAGIC) return -EINVAL; diff --git a/sysemu.h b/sysemu.h index d8fceec806..0c969f22b9 100644 --- a/sysemu.h +++ b/sysemu.h @@ -75,6 +75,7 @@ void qemu_announce_self(void); void main_loop_wait(int nonblocking); +bool qemu_savevm_state_blocked(Monitor *mon); int qemu_savevm_state_begin(Monitor *mon, QEMUFile *f, int blk_enable, int shared); int qemu_savevm_state_iterate(Monitor *mon, QEMUFile *f); From 2991181aaa026d8b1444bfaa9c4bcd82065ba5a3 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 19 Jan 2011 21:18:19 +0200 Subject: [PATCH 5/9] pci: fix device paths Patch a6a7005d14b3c32d4864a718fb1cb19c789f58a5 generated broken device paths. We snprintf with a length shorter than the output, so the last character is discarded and replaced by the null byte. Fix it up by snprintf to a buffer which is larger by 1 byte and then memcpy the data (without the null byte) to where we need it. Reported-by: Christoph Hellwig Signed-off-by: Michael S. Tsirkin --- hw/pci.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/hw/pci.c b/hw/pci.c index 8d0e3df2e5..c77f6e998a 100644 --- a/hw/pci.c +++ b/hw/pci.c @@ -2032,10 +2032,13 @@ static char *pcibus_get_dev_path(DeviceState *dev) * domain:Bus:Slot.Func for systems without nested PCI bridges. * Slot.Function list specifies the slot and function numbers for all * devices on the path from root to the specific device. */ - int domain_len = strlen("DDDD:00"); - int slot_len = strlen(":SS.F"); + char domain[] = "DDDD:00"; + char slot[] = ":SS.F"; + int domain_len = sizeof domain - 1 /* For '\0' */; + int slot_len = sizeof slot - 1 /* For '\0' */; int path_len; char *path, *p; + int s; /* Calculate # of slots on path between device and root. */; slot_depth = 0; @@ -2050,14 +2053,19 @@ static char *pcibus_get_dev_path(DeviceState *dev) path[path_len] = '\0'; /* First field is the domain. */ - snprintf(path, domain_len, "%04x:00", pci_find_domain(d->bus)); + s = snprintf(domain, sizeof domain, "%04x:00", pci_find_domain(d->bus)); + assert(s == domain_len); + memcpy(path, domain, domain_len); /* Fill in slot numbers. We walk up from device to root, so need to print * them in the reverse order, last to first. */ p = path + path_len; for (t = d; t; t = t->bus->parent_dev) { p -= slot_len; - snprintf(p, slot_len, ":%02x.%x", PCI_SLOT(t->devfn), PCI_FUNC(d->devfn)); + s = snprintf(slot, sizeof slot, ":%02x.%x", + PCI_SLOT(t->devfn), PCI_FUNC(d->devfn)); + assert(s == slot_len); + memcpy(p, slot, slot_len); } return path; From 4c92325b5196ebd34886174a80d2f9fac35a004f Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Thu, 20 Jan 2011 16:21:38 +0900 Subject: [PATCH 6/9] pci: deassert intx on reset. deassert intx on device reset. So far pci_device_reset() is used for system reset. In that case, interrupt controller is reset at the same time so that all irq is are deasserted. But now pci bus reset/flr is supported, and in that case irq needs to be disabled explicitly. Signed-off-by: Isaku Yamahata Signed-off-by: Michael S. Tsirkin --- hw/pci.c | 9 +++++++++ hw/pci.h | 2 ++ 2 files changed, 11 insertions(+) diff --git a/hw/pci.c b/hw/pci.c index c77f6e998a..1ffe428c5c 100644 --- a/hw/pci.c +++ b/hw/pci.c @@ -137,6 +137,14 @@ static void pci_update_irq_status(PCIDevice *dev) } } +void pci_device_deassert_intx(PCIDevice *dev) +{ + int i; + for (i = 0; i < PCI_NUM_PINS; ++i) { + qemu_set_irq(dev->irq[i], 0); + } +} + /* * This function is called on #RST and FLR. * FLR if PCI_EXP_DEVCTL_BCR_FLR is set @@ -152,6 +160,7 @@ void pci_device_reset(PCIDevice *dev) dev->irq_state = 0; pci_update_irq_status(dev); + pci_device_deassert_intx(dev); /* Clear all writeable bits */ pci_word_test_and_clear_mask(dev->config + PCI_COMMAND, pci_get_word(dev->wmask + PCI_COMMAND) | diff --git a/hw/pci.h b/hw/pci.h index bc8d5bb3c7..0d2753f27e 100644 --- a/hw/pci.h +++ b/hw/pci.h @@ -264,6 +264,8 @@ void do_pci_info_print(Monitor *mon, const QObject *data); void do_pci_info(Monitor *mon, QObject **ret_data); void pci_bridge_update_mappings(PCIBus *b); +void pci_device_deassert_intx(PCIDevice *dev); + static inline void pci_set_byte(uint8_t *config, uint8_t val) { From 59369b0816de3e76fa20204be5f6144de1ce8937 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Thu, 20 Jan 2011 16:21:39 +0900 Subject: [PATCH 7/9] msi: simplify write config a bit. use pci_device_deassert_intx(). Signed-off-by: Isaku Yamahata Signed-off-by: Michael S. Tsirkin --- hw/msi.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/hw/msi.c b/hw/msi.c index f03f519a2e..3dc3a24b77 100644 --- a/hw/msi.c +++ b/hw/msi.c @@ -255,7 +255,6 @@ void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len) uint8_t log_max_vecs; unsigned int vector; uint32_t pending; - int i; if (!ranges_overlap(addr, len, dev->msi_cap, msi_cap_sizeof(flags))) { return; @@ -296,9 +295,7 @@ void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len) * from using its INTx# pin (if implemented) to request * service (MSI, MSI-X, and INTx# are mutually exclusive). */ - for (i = 0; i < PCI_NUM_PINS; ++i) { - qemu_set_irq(dev->irq[i], 0); - } + pci_device_deassert_intx(dev); /* * nr_vectors might be set bigger than capable. So clamp it. From e407bf13ba65163a8f8669e0157839bbefdb43b8 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Thu, 20 Jan 2011 16:21:40 +0900 Subject: [PATCH 8/9] msix: simplify write config use pci_device_deassert_intx(). Signed-off-by: Isaku Yamahata Signed-off-by: Michael S. Tsirkin --- hw/msix.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/hw/msix.c b/hw/msix.c index e1230824b2..daaf9b7878 100644 --- a/hw/msix.c +++ b/hw/msix.c @@ -159,7 +159,6 @@ void msix_write_config(PCIDevice *dev, uint32_t addr, { unsigned enable_pos = dev->msix_cap + MSIX_CONTROL_OFFSET; int vector; - int i; if (!range_covers_byte(addr, len, enable_pos)) { return; @@ -169,9 +168,7 @@ void msix_write_config(PCIDevice *dev, uint32_t addr, return; } - for (i = 0; i < PCI_NUM_PINS; ++i) { - qemu_set_irq(dev->irq[i], 0); - } + pci_device_deassert_intx(dev); if (msix_function_masked(dev)) { return; From e10990c3f0c39e92ab5f74004b89a24fcc36fa14 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Thu, 20 Jan 2011 15:57:49 +0900 Subject: [PATCH 9/9] pci: use qemu_malloc() in pcibus_get_dev_path() use qemu_malloc() instead of direct use of malloc(). Signed-off-by: Isaku Yamahata Signed-off-by: Michael S. Tsirkin --- hw/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/pci.c b/hw/pci.c index 1ffe428c5c..b8f5385170 100644 --- a/hw/pci.c +++ b/hw/pci.c @@ -2058,7 +2058,7 @@ static char *pcibus_get_dev_path(DeviceState *dev) path_len = domain_len + slot_len * slot_depth; /* Allocate memory, fill in the terminating null byte. */ - path = malloc(path_len + 1 /* For '\0' */); + path = qemu_malloc(path_len + 1 /* For '\0' */); path[path_len] = '\0'; /* First field is the domain. */