From f0ada3608ac13cf13f4e2955ed348dc93a38ac45 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 9 Dec 2015 15:45:29 +0000 Subject: [PATCH 1/4] xen/MSI-X: latch MSI-X table writes The remaining log message in pci_msix_write() is wrong, as there guest behavior may only appear to be wrong: For one, the old logic didn't take the mask-all bit into account. And then this shouldn't depend on host device state (i.e. the host may have masked the entry without the guest having done so). Plus these writes shouldn't be dropped even when an entry gets unmasked. Instead, if they can't be made take effect right away, they should take effect on the next unmasking or enabling operation - the specification explicitly describes such caching behavior. Signed-off-by: Jan Beulich Signed-off-by: Stefano Stabellini Reviewed-by: Stefano Stabellini --- hw/xen/xen_pt.h | 4 +- hw/xen/xen_pt_config_init.c | 2 + hw/xen/xen_pt_msi.c | 74 ++++++++++++++----------------------- 3 files changed, 32 insertions(+), 48 deletions(-) diff --git a/hw/xen/xen_pt.h b/hw/xen/xen_pt.h index c545280085..4f922f46a0 100644 --- a/hw/xen/xen_pt.h +++ b/hw/xen/xen_pt.h @@ -187,13 +187,13 @@ typedef struct XenPTMSIXEntry { int pirq; uint64_t addr; uint32_t data; - uint32_t vector_ctrl; + uint32_t latch[4]; bool updated; /* indicate whether MSI ADDR or DATA is updated */ - bool warned; /* avoid issuing (bogus) warning more than once */ } XenPTMSIXEntry; typedef struct XenPTMSIX { uint32_t ctrl_offset; bool enabled; + bool maskall; int total_entries; int bar_index; uint64_t table_base; diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c index 3d8686d550..4fd7206071 100644 --- a/hw/xen/xen_pt_config_init.c +++ b/hw/xen/xen_pt_config_init.c @@ -1499,6 +1499,8 @@ static int xen_pt_msixctrl_reg_write(XenPCIPassthroughState *s, xen_pt_msix_disable(s); } + s->msix->maskall = *val & PCI_MSIX_FLAGS_MASKALL; + debug_msix_enabled_old = s->msix->enabled; s->msix->enabled = !!(*val & PCI_MSIX_FLAGS_ENABLE); if (s->msix->enabled != debug_msix_enabled_old) { diff --git a/hw/xen/xen_pt_msi.c b/hw/xen/xen_pt_msi.c index 82de2bced3..e10cd2a79f 100644 --- a/hw/xen/xen_pt_msi.c +++ b/hw/xen/xen_pt_msi.c @@ -25,6 +25,7 @@ #define XEN_PT_GFLAGSSHIFT_DELIV_MODE 12 #define XEN_PT_GFLAGSSHIFT_TRG_MODE 15 +#define latch(fld) latch[PCI_MSIX_ENTRY_##fld / sizeof(uint32_t)] /* * Helpers @@ -314,7 +315,8 @@ static int msix_set_enable(XenPCIPassthroughState *s, bool enabled) enabled); } -static int xen_pt_msix_update_one(XenPCIPassthroughState *s, int entry_nr) +static int xen_pt_msix_update_one(XenPCIPassthroughState *s, int entry_nr, + uint32_t vec_ctrl) { XenPTMSIXEntry *entry = NULL; int pirq; @@ -332,6 +334,19 @@ static int xen_pt_msix_update_one(XenPCIPassthroughState *s, int entry_nr) pirq = entry->pirq; + /* + * Update the entry addr and data to the latest values only when the + * entry is masked or they are all masked, as required by the spec. + * Addr and data changes while the MSI-X entry is unmasked get deferred + * until the next masked -> unmasked transition. + */ + if (pirq == XEN_PT_UNASSIGNED_PIRQ || s->msix->maskall || + (vec_ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT)) { + entry->addr = entry->latch(LOWER_ADDR) | + ((uint64_t)entry->latch(UPPER_ADDR) << 32); + entry->data = entry->latch(DATA); + } + rc = msi_msix_setup(s, entry->addr, entry->data, &pirq, true, entry_nr, entry->pirq == XEN_PT_UNASSIGNED_PIRQ); if (rc) { @@ -357,7 +372,7 @@ int xen_pt_msix_update(XenPCIPassthroughState *s) int i; for (i = 0; i < msix->total_entries; i++) { - xen_pt_msix_update_one(s, i); + xen_pt_msix_update_one(s, i, msix->msix_entry[i].latch(VECTOR_CTRL)); } return 0; @@ -406,35 +421,15 @@ int xen_pt_msix_update_remap(XenPCIPassthroughState *s, int bar_index) static uint32_t get_entry_value(XenPTMSIXEntry *e, int offset) { - switch (offset) { - case PCI_MSIX_ENTRY_LOWER_ADDR: - return e->addr & UINT32_MAX; - case PCI_MSIX_ENTRY_UPPER_ADDR: - return e->addr >> 32; - case PCI_MSIX_ENTRY_DATA: - return e->data; - case PCI_MSIX_ENTRY_VECTOR_CTRL: - return e->vector_ctrl; - default: - return 0; - } + return !(offset % sizeof(*e->latch)) + ? e->latch[offset / sizeof(*e->latch)] : 0; } static void set_entry_value(XenPTMSIXEntry *e, int offset, uint32_t val) { - switch (offset) { - case PCI_MSIX_ENTRY_LOWER_ADDR: - e->addr = (e->addr & ((uint64_t)UINT32_MAX << 32)) | val; - break; - case PCI_MSIX_ENTRY_UPPER_ADDR: - e->addr = (uint64_t)val << 32 | (e->addr & UINT32_MAX); - break; - case PCI_MSIX_ENTRY_DATA: - e->data = val; - break; - case PCI_MSIX_ENTRY_VECTOR_CTRL: - e->vector_ctrl = val; - break; + if (!(offset % sizeof(*e->latch))) + { + e->latch[offset / sizeof(*e->latch)] = val; } } @@ -454,39 +449,26 @@ static void pci_msix_write(void *opaque, hwaddr addr, offset = addr % PCI_MSIX_ENTRY_SIZE; if (offset != PCI_MSIX_ENTRY_VECTOR_CTRL) { - const volatile uint32_t *vec_ctrl; - if (get_entry_value(entry, offset) == val && entry->pirq != XEN_PT_UNASSIGNED_PIRQ) { return; } + entry->updated = true; + } else if (msix->enabled && entry->updated && + !(val & PCI_MSIX_ENTRY_CTRL_MASKBIT)) { + const volatile uint32_t *vec_ctrl; + /* * If Xen intercepts the mask bit access, entry->vec_ctrl may not be * up-to-date. Read from hardware directly. */ vec_ctrl = s->msix->phys_iomem_base + entry_nr * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL; - - if (msix->enabled && !(*vec_ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT)) { - if (!entry->warned) { - entry->warned = true; - XEN_PT_ERR(&s->dev, "Can't update msix entry %d since MSI-X is" - " already enabled.\n", entry_nr); - } - return; - } - - entry->updated = true; + xen_pt_msix_update_one(s, entry_nr, *vec_ctrl); } set_entry_value(entry, offset, val); - - if (offset == PCI_MSIX_ENTRY_VECTOR_CTRL) { - if (msix->enabled && !(val & PCI_MSIX_ENTRY_CTRL_MASKBIT)) { - xen_pt_msix_update_one(s, entry_nr); - } - } } static uint64_t pci_msix_read(void *opaque, hwaddr addr, From bdfe5159cbaebf9e935786040459c56d23646d5a Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 9 Dec 2015 15:46:57 +0000 Subject: [PATCH 2/4] xen/MSI-X: really enforce alignment The way the generic infrastructure works the intention of not allowing unaligned accesses can't be achieved by simply setting .unaligned to false. The benefit is that we can now replace the conditionals in {get,set}_entry_value() by assert()-s. Signed-off-by: Jan Beulich Signed-off-by: Stefano Stabellini Reviewed-by: Stefano Stabellini --- hw/xen/xen_pt_msi.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/hw/xen/xen_pt_msi.c b/hw/xen/xen_pt_msi.c index e10cd2a79f..302a310190 100644 --- a/hw/xen/xen_pt_msi.c +++ b/hw/xen/xen_pt_msi.c @@ -421,16 +421,14 @@ int xen_pt_msix_update_remap(XenPCIPassthroughState *s, int bar_index) static uint32_t get_entry_value(XenPTMSIXEntry *e, int offset) { - return !(offset % sizeof(*e->latch)) - ? e->latch[offset / sizeof(*e->latch)] : 0; + assert(!(offset % sizeof(*e->latch))); + return e->latch[offset / sizeof(*e->latch)]; } static void set_entry_value(XenPTMSIXEntry *e, int offset, uint32_t val) { - if (!(offset % sizeof(*e->latch))) - { - e->latch[offset / sizeof(*e->latch)] = val; - } + assert(!(offset % sizeof(*e->latch))); + e->latch[offset / sizeof(*e->latch)] = val; } static void pci_msix_write(void *opaque, hwaddr addr, @@ -494,6 +492,12 @@ static uint64_t pci_msix_read(void *opaque, hwaddr addr, } } +static bool pci_msix_accepts(void *opaque, hwaddr addr, + unsigned size, bool is_write) +{ + return !(addr & (size - 1)); +} + static const MemoryRegionOps pci_msix_ops = { .read = pci_msix_read, .write = pci_msix_write, @@ -502,7 +506,13 @@ static const MemoryRegionOps pci_msix_ops = { .min_access_size = 4, .max_access_size = 4, .unaligned = false, + .accepts = pci_msix_accepts }, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + .unaligned = false + } }; int xen_pt_msix_init(XenPCIPassthroughState *s, uint32_t base) From 55c8672c2e65276c19edd3323076518248730cca Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 9 Dec 2015 15:47:28 +0000 Subject: [PATCH 3/4] xen/pass-through: correctly deal with RW1C bits Introduce yet another mask for them, so that the generic routine can handle them, at once rendering xen_pt_pmcsr_reg_write() superfluous. Signed-off-by: Jan Beulich Signed-off-by: Stefano Stabellini Reviewed-by: Stefano Stabellini --- hw/xen/xen_pt.h | 2 ++ hw/xen/xen_pt_config_init.c | 38 ++++++++++++------------------------- 2 files changed, 14 insertions(+), 26 deletions(-) diff --git a/hw/xen/xen_pt.h b/hw/xen/xen_pt.h index 4f922f46a0..37497119f5 100644 --- a/hw/xen/xen_pt.h +++ b/hw/xen/xen_pt.h @@ -113,6 +113,8 @@ struct XenPTRegInfo { uint32_t res_mask; /* reg read only field mask (ON:RO/ROS, OFF:other) */ uint32_t ro_mask; + /* reg read/write-1-clear field mask (ON:RW1C/RW1CS, OFF:other) */ + uint32_t rw1c_mask; /* reg emulate field mask (ON:emu, OFF:passthrough) */ uint32_t emu_mask; xen_pt_conf_reg_init init; diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c index 4fd7206071..185a698732 100644 --- a/hw/xen/xen_pt_config_init.c +++ b/hw/xen/xen_pt_config_init.c @@ -179,7 +179,8 @@ static int xen_pt_byte_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); /* create value for writing to I/O device register */ - *val = XEN_PT_MERGE_VALUE(*val, dev_value, throughable_mask); + *val = XEN_PT_MERGE_VALUE(*val, dev_value & ~reg->rw1c_mask, + throughable_mask); return 0; } @@ -197,7 +198,8 @@ static int xen_pt_word_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); /* create value for writing to I/O device register */ - *val = XEN_PT_MERGE_VALUE(*val, dev_value, throughable_mask); + *val = XEN_PT_MERGE_VALUE(*val, dev_value & ~reg->rw1c_mask, + throughable_mask); return 0; } @@ -215,7 +217,8 @@ static int xen_pt_long_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); /* create value for writing to I/O device register */ - *val = XEN_PT_MERGE_VALUE(*val, dev_value, throughable_mask); + *val = XEN_PT_MERGE_VALUE(*val, dev_value & ~reg->rw1c_mask, + throughable_mask); return 0; } @@ -633,6 +636,7 @@ static XenPTRegInfo xen_pt_emu_reg_header0[] = { .init_val = 0x0000, .res_mask = 0x0007, .ro_mask = 0x06F8, + .rw1c_mask = 0xF900, .emu_mask = 0x0010, .init = xen_pt_status_reg_init, .u.w.read = xen_pt_word_reg_read, @@ -944,6 +948,7 @@ static XenPTRegInfo xen_pt_emu_reg_pcie[] = { .size = 2, .res_mask = 0xFFC0, .ro_mask = 0x0030, + .rw1c_mask = 0x000F, .init = xen_pt_common_reg_init, .u.w.read = xen_pt_word_reg_read, .u.w.write = xen_pt_word_reg_write, @@ -964,6 +969,7 @@ static XenPTRegInfo xen_pt_emu_reg_pcie[] = { .offset = PCI_EXP_LNKSTA, .size = 2, .ro_mask = 0x3FFF, + .rw1c_mask = 0xC000, .init = xen_pt_common_reg_init, .u.w.read = xen_pt_word_reg_read, .u.w.write = xen_pt_word_reg_write, @@ -1000,27 +1006,6 @@ static XenPTRegInfo xen_pt_emu_reg_pcie[] = { * Power Management Capability */ -/* write Power Management Control/Status register */ -static int xen_pt_pmcsr_reg_write(XenPCIPassthroughState *s, - XenPTReg *cfg_entry, uint16_t *val, - uint16_t dev_value, uint16_t valid_mask) -{ - XenPTRegInfo *reg = cfg_entry->reg; - uint16_t writable_mask = 0; - uint16_t throughable_mask = get_throughable_mask(s, reg, valid_mask); - uint16_t *data = cfg_entry->ptr.half_word; - - /* modify emulate register */ - writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; - *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); - - /* create value for writing to I/O device register */ - *val = XEN_PT_MERGE_VALUE(*val, dev_value & ~PCI_PM_CTRL_PME_STATUS, - throughable_mask); - - return 0; -} - /* Power Management Capability reg static information table */ static XenPTRegInfo xen_pt_emu_reg_pm[] = { /* Next Pointer reg */ @@ -1051,11 +1036,12 @@ static XenPTRegInfo xen_pt_emu_reg_pm[] = { .size = 2, .init_val = 0x0008, .res_mask = 0x00F0, - .ro_mask = 0xE10C, + .ro_mask = 0x610C, + .rw1c_mask = 0x8000, .emu_mask = 0x810B, .init = xen_pt_common_reg_init, .u.w.read = xen_pt_word_reg_read, - .u.w.write = xen_pt_pmcsr_reg_write, + .u.w.write = xen_pt_word_reg_write, }, { .size = 0, From fc3e493bc8e96ef4bf7ae4f035f43cb39382c936 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Fri, 4 Dec 2015 14:41:02 +0000 Subject: [PATCH 4/4] xen_disk: treat "vhd" as "vpc" The Xen toolstack uses "vhd" to specify a disk in VHD format, however the name of the driver in QEMU is "vpc". Replace "vhd" with "vpc", so that QEMU can find the right driver to use for it. Signed-off-by: Stefano Stabellini --- hw/block/xen_disk.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c index 814665034d..a48e726f4a 100644 --- a/hw/block/xen_disk.c +++ b/hw/block/xen_disk.c @@ -825,6 +825,9 @@ static int blk_init(struct XenDevice *xendev) if (!strcmp("aio", blkdev->fileproto)) { blkdev->fileproto = "raw"; } + if (!strcmp("vhd", blkdev->fileproto)) { + blkdev->fileproto = "vpc"; + } if (blkdev->mode == NULL) { blkdev->mode = xenstore_read_be_str(&blkdev->xendev, "mode"); }