From 0517cc9863bd4753be56d47da1a568538069e19f Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Fri, 10 Jun 2016 04:15:40 -0500 Subject: [PATCH 01/34] smbios: Move table build tools into an include file. This will let things in other files (like IPMI) build SMBIOS tables. Signed-off-by: Corey Minyard Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/smbios/smbios.c | 70 +++----------------------------- hw/smbios/smbios_build.h | 87 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 64 deletions(-) create mode 100644 hw/smbios/smbios_build.h diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c index cb8a111102..5dc3e43aa8 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -24,6 +24,7 @@ #include "hw/smbios/smbios.h" #include "hw/loader.h" #include "exec/cpu-common.h" +#include "smbios_build.h" /* legacy structures and constants for <= 2.0 machines */ struct smbios_header { @@ -53,10 +54,10 @@ static bool smbios_uuid_encoded = true; /* end: legacy structures & constants for <= 2.0 machines */ -static uint8_t *smbios_tables; -static size_t smbios_tables_len; -static unsigned smbios_table_max; -static unsigned smbios_table_cnt; +uint8_t *smbios_tables; +size_t smbios_tables_len; +unsigned smbios_table_max; +unsigned smbios_table_cnt; static SmbiosEntryPointType smbios_ep_type = SMBIOS_ENTRY_POINT_21; static SmbiosEntryPoint ep; @@ -429,7 +430,7 @@ uint8_t *smbios_get_table_legacy(size_t *length) /* end: legacy setup functions for <= 2.0 machines */ -static bool smbios_skip_table(uint8_t type, bool required_table) +bool smbios_skip_table(uint8_t type, bool required_table) { if (test_bit(type, have_binfile_bitmap)) { return true; /* user provided their own binary blob(s) */ @@ -443,65 +444,6 @@ static bool smbios_skip_table(uint8_t type, bool required_table) return true; } -#define SMBIOS_BUILD_TABLE_PRE(tbl_type, tbl_handle, tbl_required) \ - struct smbios_type_##tbl_type *t; \ - size_t t_off; /* table offset into smbios_tables */ \ - int str_index = 0; \ - do { \ - /* should we skip building this table ? */ \ - if (smbios_skip_table(tbl_type, tbl_required)) { \ - return; \ - } \ - \ - /* use offset of table t within smbios_tables */ \ - /* (pointer must be updated after each realloc) */ \ - t_off = smbios_tables_len; \ - smbios_tables_len += sizeof(*t); \ - smbios_tables = g_realloc(smbios_tables, smbios_tables_len); \ - t = (struct smbios_type_##tbl_type *)(smbios_tables + t_off); \ - \ - t->header.type = tbl_type; \ - t->header.length = sizeof(*t); \ - t->header.handle = cpu_to_le16(tbl_handle); \ - } while (0) - -#define SMBIOS_TABLE_SET_STR(tbl_type, field, value) \ - do { \ - int len = (value != NULL) ? strlen(value) + 1 : 0; \ - if (len > 1) { \ - smbios_tables = g_realloc(smbios_tables, \ - smbios_tables_len + len); \ - memcpy(smbios_tables + smbios_tables_len, value, len); \ - smbios_tables_len += len; \ - /* update pointer post-realloc */ \ - t = (struct smbios_type_##tbl_type *)(smbios_tables + t_off); \ - t->field = ++str_index; \ - } else { \ - t->field = 0; \ - } \ - } while (0) - -#define SMBIOS_BUILD_TABLE_POST \ - do { \ - size_t term_cnt, t_size; \ - \ - /* add '\0' terminator (add two if no strings defined) */ \ - term_cnt = (str_index == 0) ? 2 : 1; \ - smbios_tables = g_realloc(smbios_tables, \ - smbios_tables_len + term_cnt); \ - memset(smbios_tables + smbios_tables_len, 0, term_cnt); \ - smbios_tables_len += term_cnt; \ - \ - /* update smbios max. element size */ \ - t_size = smbios_tables_len - t_off; \ - if (t_size > smbios_table_max) { \ - smbios_table_max = t_size; \ - } \ - \ - /* update smbios element count */ \ - smbios_table_cnt++; \ - } while (0) - static void smbios_build_type_0_table(void) { SMBIOS_BUILD_TABLE_PRE(0, 0x000, false); /* optional, leave up to BIOS */ diff --git a/hw/smbios/smbios_build.h b/hw/smbios/smbios_build.h new file mode 100644 index 0000000000..68b8b72e09 --- /dev/null +++ b/hw/smbios/smbios_build.h @@ -0,0 +1,87 @@ +/* + * SMBIOS Support + * + * Copyright (C) 2009 Hewlett-Packard Development Company, L.P. + * Copyright (C) 2013 Red Hat, Inc. + * + * Authors: + * Alex Williamson + * Markus Armbruster + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#ifndef QEMU_SMBIOS_BUILD_H +#define QEMU_SMBIOS_BUILD_H + +bool smbios_skip_table(uint8_t type, bool required_table); + +extern uint8_t *smbios_tables; +extern size_t smbios_tables_len; +extern unsigned smbios_table_max; +extern unsigned smbios_table_cnt; + +#define SMBIOS_BUILD_TABLE_PRE(tbl_type, tbl_handle, tbl_required) \ + struct smbios_type_##tbl_type *t; \ + size_t t_off; /* table offset into smbios_tables */ \ + int str_index = 0; \ + do { \ + /* should we skip building this table ? */ \ + if (smbios_skip_table(tbl_type, tbl_required)) { \ + return; \ + } \ + \ + /* use offset of table t within smbios_tables */ \ + /* (pointer must be updated after each realloc) */ \ + t_off = smbios_tables_len; \ + smbios_tables_len += sizeof(*t); \ + smbios_tables = g_realloc(smbios_tables, smbios_tables_len); \ + t = (struct smbios_type_##tbl_type *)(smbios_tables + t_off); \ + \ + t->header.type = tbl_type; \ + t->header.length = sizeof(*t); \ + t->header.handle = cpu_to_le16(tbl_handle); \ + } while (0) + +#define SMBIOS_TABLE_SET_STR(tbl_type, field, value) \ + do { \ + int len = (value != NULL) ? strlen(value) + 1 : 0; \ + if (len > 1) { \ + smbios_tables = g_realloc(smbios_tables, \ + smbios_tables_len + len); \ + memcpy(smbios_tables + smbios_tables_len, value, len); \ + smbios_tables_len += len; \ + /* update pointer post-realloc */ \ + t = (struct smbios_type_##tbl_type *)(smbios_tables + t_off); \ + t->field = ++str_index; \ + } else { \ + t->field = 0; \ + } \ + } while (0) + +#define SMBIOS_BUILD_TABLE_POST \ + do { \ + size_t term_cnt, t_size; \ + \ + /* add '\0' terminator (add two if no strings defined) */ \ + term_cnt = (str_index == 0) ? 2 : 1; \ + smbios_tables = g_realloc(smbios_tables, \ + smbios_tables_len + term_cnt); \ + memset(smbios_tables + smbios_tables_len, 0, term_cnt); \ + smbios_tables_len += term_cnt; \ + \ + /* update smbios max. element size */ \ + t_size = smbios_tables_len - t_off; \ + if (t_size > smbios_table_max) { \ + smbios_table_max = t_size; \ + } \ + \ + /* update smbios element count */ \ + smbios_table_cnt++; \ + } while (0) + +#endif /* QEMU_SMBIOS_BUILD_H */ From 35658f6e0c3dacfdb22198ee3917c0c28914d81d Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Fri, 10 Jun 2016 04:15:41 -0500 Subject: [PATCH 02/34] ipmi: Add SMBIOS table entry Add an IPMI table entry to the SMBIOS. Signed-off-by: Corey Minyard Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/smbios/Makefile.objs | 1 + hw/smbios/smbios.c | 2 + hw/smbios/smbios_type_38.c | 117 +++++++++++++++++++++++++++++++++++++ include/hw/smbios/ipmi.h | 15 +++++ stubs/Makefile.objs | 1 + stubs/smbios_type_38.c | 14 +++++ 6 files changed, 150 insertions(+) create mode 100644 hw/smbios/smbios_type_38.c create mode 100644 include/hw/smbios/ipmi.h create mode 100644 stubs/smbios_type_38.c diff --git a/hw/smbios/Makefile.objs b/hw/smbios/Makefile.objs index f69a92f967..c3d3753602 100644 --- a/hw/smbios/Makefile.objs +++ b/hw/smbios/Makefile.objs @@ -1 +1,2 @@ common-obj-$(CONFIG_SMBIOS) += smbios.o +common-obj-$(call land,$(CONFIG_SMBIOS),$(CONFIG_IPMI)) += smbios_type_38.o diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c index 5dc3e43aa8..74c7102929 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -25,6 +25,7 @@ #include "hw/loader.h" #include "exec/cpu-common.h" #include "smbios_build.h" +#include "hw/smbios/ipmi.h" /* legacy structures and constants for <= 2.0 machines */ struct smbios_header { @@ -848,6 +849,7 @@ void smbios_get_tables(const struct smbios_phys_mem_area *mem_array, } smbios_build_type_32_table(); + smbios_build_type_38_table(); smbios_build_type_127_table(); smbios_validate_table(); diff --git a/hw/smbios/smbios_type_38.c b/hw/smbios/smbios_type_38.c new file mode 100644 index 0000000000..56e8609c00 --- /dev/null +++ b/hw/smbios/smbios_type_38.c @@ -0,0 +1,117 @@ +/* + * IPMI SMBIOS firmware handling + * + * Copyright (c) 2015,2016 Corey Minyard, MontaVista Software, LLC + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "hw/ipmi/ipmi.h" +#include "hw/smbios/ipmi.h" +#include "hw/smbios/smbios.h" +#include "qemu/error-report.h" +#include "smbios_build.h" + +/* SMBIOS type 38 - IPMI */ +struct smbios_type_38 { + struct smbios_structure_header header; + uint8_t interface_type; + uint8_t ipmi_spec_revision; + uint8_t i2c_slave_address; + uint8_t nv_storage_device_address; + uint64_t base_address; + uint8_t base_address_modifier; + uint8_t interrupt_number; +} QEMU_PACKED; + +static void smbios_build_one_type_38(IPMIFwInfo *info) +{ + uint64_t baseaddr = info->base_address; + SMBIOS_BUILD_TABLE_PRE(38, 0x3000, true); + + t->interface_type = info->interface_type; + t->ipmi_spec_revision = ((info->ipmi_spec_major_revision << 4) + | info->ipmi_spec_minor_revision); + t->i2c_slave_address = info->i2c_slave_address; + t->nv_storage_device_address = 0; + + assert(info->ipmi_spec_minor_revision <= 15); + assert(info->ipmi_spec_major_revision <= 15); + + /* or 1 to set it to I/O space */ + switch (info->memspace) { + case IPMI_MEMSPACE_IO: + baseaddr |= 1; + break; + case IPMI_MEMSPACE_MEM32: + case IPMI_MEMSPACE_MEM64: + break; + case IPMI_MEMSPACE_SMBUS: + baseaddr <<= 1; + break; + } + + t->base_address = cpu_to_le64(baseaddr); + + t->base_address_modifier = 0; + if (info->irq_type == IPMI_LEVEL_IRQ) { + t->base_address_modifier |= 1; + } + switch (info->register_spacing) { + case 1: + break; + case 4: + t->base_address_modifier |= 1 << 6; + break; + case 16: + t->base_address_modifier |= 2 << 6; + break; + default: + error_report("IPMI register spacing %d is not compatible with" + " SMBIOS, ignoring this entry.", info->register_spacing); + return; + } + t->interrupt_number = info->interrupt_number; + + SMBIOS_BUILD_TABLE_POST; +} + +static void smbios_add_ipmi_devices(BusState *bus) +{ + BusChild *kid; + + QTAILQ_FOREACH(kid, &bus->children, sibling) { + DeviceState *dev = kid->child; + Object *obj = object_dynamic_cast(OBJECT(dev), TYPE_IPMI_INTERFACE); + BusState *childbus; + + if (obj) { + IPMIInterface *ii; + IPMIInterfaceClass *iic; + IPMIFwInfo info; + + ii = IPMI_INTERFACE(obj); + iic = IPMI_INTERFACE_GET_CLASS(obj); + memset(&info, 0, sizeof(info)); + iic->get_fwinfo(ii, &info); + smbios_build_one_type_38(&info); + continue; + } + + QLIST_FOREACH(childbus, &dev->child_bus, sibling) { + smbios_add_ipmi_devices(childbus); + } + } +} + +void smbios_build_type_38_table(void) +{ + BusState *bus; + + bus = sysbus_get_default(); + if (bus) { + smbios_add_ipmi_devices(bus); + } +} diff --git a/include/hw/smbios/ipmi.h b/include/hw/smbios/ipmi.h new file mode 100644 index 0000000000..1c9aae38f2 --- /dev/null +++ b/include/hw/smbios/ipmi.h @@ -0,0 +1,15 @@ +/* + * IPMI SMBIOS firmware handling + * + * Copyright (c) 2015,2016 Corey Minyard, MontaVista Software, LLC + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_SMBIOS_IPMI_H +#define QEMU_SMBIOS_IPMI_H + +void smbios_build_type_38_table(void); + +#endif /* QEMU_SMBIOS_IPMI_H */ diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs index 4b258a6731..b21cd5f77d 100644 --- a/stubs/Makefile.objs +++ b/stubs/Makefile.objs @@ -41,3 +41,4 @@ stub-obj-y += target-monitor-defs.o stub-obj-y += target-get-monitor-def.o stub-obj-y += vhost.o stub-obj-y += iohandler.o +stub-obj-y += smbios_type_38.o diff --git a/stubs/smbios_type_38.c b/stubs/smbios_type_38.c new file mode 100644 index 0000000000..9528c2c28e --- /dev/null +++ b/stubs/smbios_type_38.c @@ -0,0 +1,14 @@ +/* + * IPMI SMBIOS firmware handling + * + * Copyright (c) 2015,2016 Corey Minyard, MontaVista Software, LLC + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "hw/smbios/ipmi.h" + +void smbios_build_type_38_table(void) +{ +} From 86e91dd713414dda40bb16aabe2f1fc9ba95a3a7 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Fri, 10 Jun 2016 04:15:42 -0500 Subject: [PATCH 03/34] acpi: Add IPMI table entries Use the ACPI table construction tools to create an ACPI entry for IPMI. This adds a function called build_acpi_ipmi_devices to add an DSDT entry for IPMI if IPMI is compiled in and an IPMI device exists. It also adds a dummy function if IPMI is not compiled in. This conforms to section "C3-2 Locating IPMI System Interfaces in ACPI Name Space" in the IPMI 2.0 specification. Signed-off-by: Corey Minyard Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/Makefile.objs | 1 + hw/acpi/ipmi.c | 105 +++++++++++++++++++++++++++++++++++++++++ hw/i386/acpi-build.c | 12 +++++ include/hw/acpi/ipmi.h | 22 +++++++++ stubs/Makefile.objs | 1 + stubs/ipmi.c | 14 ++++++ 6 files changed, 155 insertions(+) create mode 100644 hw/acpi/ipmi.c create mode 100644 include/hw/acpi/ipmi.h create mode 100644 stubs/ipmi.c diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs index 66bd72702b..1a48f61676 100644 --- a/hw/acpi/Makefile.objs +++ b/hw/acpi/Makefile.objs @@ -6,3 +6,4 @@ obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o common-obj-$(CONFIG_ACPI) += acpi_interface.o common-obj-$(CONFIG_ACPI) += bios-linker-loader.o common-obj-$(CONFIG_ACPI) += aml-build.o +common-obj-$(call land,$(CONFIG_ACPI),$(CONFIG_IPMI)) += ipmi.o diff --git a/hw/acpi/ipmi.c b/hw/acpi/ipmi.c new file mode 100644 index 0000000000..7e74ce4460 --- /dev/null +++ b/hw/acpi/ipmi.c @@ -0,0 +1,105 @@ +/* + * IPMI ACPI firmware handling + * + * Copyright (c) 2015,2016 Corey Minyard, MontaVista Software, LLC + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "hw/ipmi/ipmi.h" +#include "hw/acpi/aml-build.h" +#include "hw/acpi/acpi.h" +#include "hw/acpi/ipmi.h" + +static Aml *aml_ipmi_crs(IPMIFwInfo *info) +{ + Aml *crs = aml_resource_template(); + + /* + * The base address is fixed and cannot change. That may be different + * if someone does PCI, but we aren't there yet. + */ + switch (info->memspace) { + case IPMI_MEMSPACE_IO: + aml_append(crs, aml_io(AML_DECODE16, info->base_address, + info->base_address + info->register_length - 1, + info->register_spacing, info->register_length)); + break; + case IPMI_MEMSPACE_MEM32: + aml_append(crs, + aml_dword_memory(AML_POS_DECODE, + AML_MIN_FIXED, AML_MAX_FIXED, + AML_NON_CACHEABLE, AML_READ_WRITE, + 0xffffffff, + info->base_address, + info->base_address + info->register_length - 1, + info->register_spacing, info->register_length)); + break; + case IPMI_MEMSPACE_MEM64: + aml_append(crs, + aml_qword_memory(AML_POS_DECODE, + AML_MIN_FIXED, AML_MAX_FIXED, + AML_NON_CACHEABLE, AML_READ_WRITE, + 0xffffffffffffffffULL, + info->base_address, + info->base_address + info->register_length - 1, + info->register_spacing, info->register_length)); + break; + case IPMI_MEMSPACE_SMBUS: + aml_append(crs, aml_return(aml_int(info->base_address))); + break; + default: + abort(); + } + + if (info->interrupt_number) { + aml_append(crs, aml_irq_no_flags(info->interrupt_number)); + } + + return crs; +} + +static Aml *aml_ipmi_device(IPMIFwInfo *info) +{ + Aml *dev; + uint16_t version = ((info->ipmi_spec_major_revision << 8) + | (info->ipmi_spec_minor_revision << 4)); + + assert(info->ipmi_spec_minor_revision <= 15); + + dev = aml_device("MI%d", info->uuid); + aml_append(dev, aml_name_decl("_HID", aml_eisaid("IPI0001"))); + aml_append(dev, aml_name_decl("_STR", aml_string("ipmi_%s", + info->interface_name))); + aml_append(dev, aml_name_decl("_UID", aml_int(info->uuid))); + aml_append(dev, aml_name_decl("_CRS", aml_ipmi_crs(info))); + aml_append(dev, aml_name_decl("_IFT", aml_int(info->interface_type))); + aml_append(dev, aml_name_decl("_SRV", aml_int(version))); + + return dev; +} + +void build_acpi_ipmi_devices(Aml *scope, BusState *bus) +{ + + BusChild *kid; + + QTAILQ_FOREACH(kid, &bus->children, sibling) { + IPMIInterface *ii; + IPMIInterfaceClass *iic; + IPMIFwInfo info; + Object *obj = object_dynamic_cast(OBJECT(kid->child), + TYPE_IPMI_INTERFACE); + + if (!obj) { + continue; + } + + ii = IPMI_INTERFACE(obj); + iic = IPMI_INTERFACE_GET_CLASS(obj); + iic->get_fwinfo(ii, &info); + aml_append(scope, aml_ipmi_device(&info)); + } +} diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 8ca203211a..b3dc1df25a 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -58,6 +58,8 @@ #include "qapi/qmp/qint.h" #include "qom/qom-qobject.h" +#include "hw/acpi/ipmi.h" + /* These are used to size the ACPI tables for -M pc-i440fx-1.7 and * -M pc-i440fx-2.0. Even if the actual amount of AML generated grows * a little bit, there should be plenty of free space since the DSDT @@ -1334,8 +1336,10 @@ static Aml *build_com_device_aml(uint8_t uid) static void build_isa_devices_aml(Aml *table) { ISADevice *fdc = pc_find_fdc0(); + bool ambiguous; Aml *scope = aml_scope("_SB.PCI0.ISA"); + Object *obj = object_resolve_path_type("", TYPE_ISA_BUS, &ambiguous); aml_append(scope, build_rtc_device_aml()); aml_append(scope, build_kbd_device_aml()); @@ -1347,6 +1351,14 @@ static void build_isa_devices_aml(Aml *table) aml_append(scope, build_com_device_aml(1)); aml_append(scope, build_com_device_aml(2)); + if (ambiguous) { + error_report("Multiple ISA busses, unable to define IPMI ACPI data"); + } else if (!obj) { + error_report("No ISA bus, unable to define IPMI ACPI data"); + } else { + build_acpi_ipmi_devices(scope, BUS(obj)); + } + aml_append(table, scope); } diff --git a/include/hw/acpi/ipmi.h b/include/hw/acpi/ipmi.h new file mode 100644 index 0000000000..ab2bb29048 --- /dev/null +++ b/include/hw/acpi/ipmi.h @@ -0,0 +1,22 @@ +/* + * QEMU IPMI ACPI handling + * + * Copyright (c) 2015,2016 Corey Minyard + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef HW_ACPI_IPMI_H +#define HW_ACPI_IPMI_H + +#include "qemu/osdep.h" +#include "hw/acpi/aml-build.h" + +/* + * Add ACPI IPMI entries for all registered IPMI devices whose parent + * bus matches the given bus. The resource is the ACPI resource that + * contains the IPMI device, this is required for the I2C CRS. + */ +void build_acpi_ipmi_devices(Aml *table, BusState *bus); + +#endif /* HW_ACPI_IPMI_H */ diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs index b21cd5f77d..b829ee6e1a 100644 --- a/stubs/Makefile.objs +++ b/stubs/Makefile.objs @@ -42,3 +42,4 @@ stub-obj-y += target-get-monitor-def.o stub-obj-y += vhost.o stub-obj-y += iohandler.o stub-obj-y += smbios_type_38.o +stub-obj-y += ipmi.o diff --git a/stubs/ipmi.c b/stubs/ipmi.c new file mode 100644 index 0000000000..98b6dcee0d --- /dev/null +++ b/stubs/ipmi.c @@ -0,0 +1,14 @@ +/* + * IPMI ACPI firmware handling + * + * Copyright (c) 2015,2016 Corey Minyard, MontaVista Software, LLC + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "hw/acpi/ipmi.h" + +void build_acpi_ipmi_devices(Aml *table, BusState *bus) +{ +} From f4eda2d429ecd85a99008c6c4fc74cf81b2f1498 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Fri, 10 Jun 2016 04:15:43 -0500 Subject: [PATCH 04/34] bios: Add tests for the IPMI ACPI and SMBIOS entries Signed-off-by: Corey Minyard Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/acpi-test-data/pc/DSDT.ipmikcs | Bin 0 -> 5575 bytes tests/acpi-test-data/q35/DSDT.ipmibt | Bin 0 -> 8340 bytes tests/bios-tables-test.c | 60 +++++++++++++++++++++++++-- 3 files changed, 56 insertions(+), 4 deletions(-) create mode 100644 tests/acpi-test-data/pc/DSDT.ipmikcs create mode 100644 tests/acpi-test-data/q35/DSDT.ipmibt diff --git a/tests/acpi-test-data/pc/DSDT.ipmikcs b/tests/acpi-test-data/pc/DSDT.ipmikcs new file mode 100644 index 0000000000000000000000000000000000000000..f10cd9e296c942b66d6f2404f1a95fe5cc4a6796 GIT binary patch literal 5575 zcmb7I-ESMm5ud#yrQ?#6j?!6{?Ib4Tug0xye%Nw?7KO+=N~FxA&OFL>aYpYb$tcw( z^+Ab&$U*>_0Td^1DdVDjqg_7Re?s%tKITtI|AiDy{1kQOj!Ushu1E>+I3K^Ancbb4 z-J@X}t-qfDU|*Ei^s1dL-?elDWefm_reC!dy9ds`QC@LbQi?I>aWbIsQC75{8RhjA z_UDfGC(nETV~@3c*m$l!-DvwCJ@GbxKo2*3&WTVLT)XT#-7}V6%PDTPW~54Aa#?Xp zMiUfwy=E}gR6b|%B84iY$HG!8L< zT~IGvrD!Nzr+dn}n=b~jret6kij^P?vhB#bU!d=hl68AWUd+|3{ z$qJ1Gs9uA~x>ac~cnf5`M1-uD@khON?wTwdlQpM%rP_=eKc@-_V_@B z#}_~)U`bgv^wB0x4kJ3l=U3R*^4BuNR@k@lx42LXOK>?B*v>E4t&-U6WurEeRe{I( z8qYEQl1uyoerNa@e#qb9-|%U!DDJXR$(-aGrQ!HTLdNO-vTn6TvBp4~54WC{)ay^+ zb`RiLY&2(;r&rhq@&~e_EbIsIRATy?nHP&1XJG~kGqfR5W?sp!WKb5=z|1n4DI7Dy zrI8)n(MUVeD+%o=Lpx?IKb+6S1YkIJtC`;R-`#e9zxm# zY_g;pDu~g`jde5YuyS3$1=m3;#5AG z!CkA_0)Ix@_k3tSFDIV1`M&?C%_Z3I_jcMFz-8F%tG!JGC9wSJjavq6_0RVY|cZeR4*y+|Mahg~ZVPU7R5mB2{aINgj9H0GU3KDW0 zvz_BNiQiq&ro&pMASgU7;~F&^J|vp&2a;*(4ltmQ0{$fChRr_hgi&BuY}QCV^j<#Z z9aHH?d(V$~&j;S~7~i<}y!ON1^G?^Kx$DQgbLszZUl?;=2;3Krxi3VxFAUuq$KB&W z^P^{99CKd`+!v3zFGjd8I^DGx&fY%mJ{GzcDMY6cZn@+jo}Wo_1r)S{pSHjKgbL2@;ExQxhd(hGw6)W^g(-bBsvq~T zaXym=pmE)zVqf6@&{ikVpAMKnwtFAXc^|yz%kih)$K-1`*n@VT?|FO@9&NCVXDJll z#}ilvs`3qn6~6GtUVGEu3s^P?3oh$a`~a>bZ^D3QYBi=%4m)FL{J@R z-CEC9AH46s^YFLXo%`=Sc(}89pZX&n{5(0yOx$Ekp!DJ-eZ zN7r>LmkSq1OS@*N1p`_@04)`k87&N;sqZ4Im*x z5Lm@*E?&$@FT=S_7EE=4dhc8Y&|4rkgSH}#d<|ZFC#gW&Y7jV zL+oiJtG?MRS7{ItbuxrRy-EX!sFRT$5)(mVi3!v+y01MY8R>V3XMr#i2-*812~l;@ z)%(U|kHmmeE*3Ip`D-7a9G@~xm8MBk45n8saE|6(oD-5$ICese zC#i-zM8ihp;&tu3Y27v1@eS}%>K<*=Ps}yD75g4&+gz*M@qdR(Jv6B;n3(%`+J_k> zwiD9?FAO0>txZtU2fZv^(ydx9hbgVSja85b=iI%c%WQaK7!z1}qvc#06A(f>g(Hh_ z?^;Y;f~jCA%hgI|SXI~?!cs3!@;^LDgM{w{w5IgHOYN)Fs}xgE&camt7S*p(BR(DM zz`w@#h2wtE2}>Ud0*$zKDu;)E`!3Xc@eN&m^%zb!y!;-kV3PmOru5azuU@hk8Ci{4 zRjU~;+JCnJYO4LXj>krpr6k3N zt!K3f>SmH^KA_`5DWewGHfiT_c$KF2N~e3-&JP((0tqynV&1?WN<={rMIxN;n|A&! z9B8jMhVP{~LK%>B?qJ?Xo1WU6i;n*g;0D8iGT{LaRH4-L5spF9(SSDiAboh zIw_!&QPAtcD!x^PR+UJou$mCiL=^Ofu*wCLM?!^FJfgz@l2On_VKpV7sYs}>nikM> z6!fOBIwhb}kx*e3@8e;t(^1g3gjH2Q)kvtYni0@U6!dLjbw)sEBB8?Stboo&K}*6a zW_~y=EfOlMo)FLzQP8rmIwzoWkx*gvq=24`g6hKRynxO}LWR{+0(vS6Y6z>R1@v?z zR9HPDpl70>6=C%?0evkJDy+UPpszB7@y5WU|Bv%@Q)$kwVQrF{DZFb&~ks)`5*}WyndyjdU^OusIBMY~=8J;C6YBeOKrL>miuZU2Kq)BT>wj8(UM__XKX>+ADNjnK>fRr4!a?>;m z#RifX0c-`xhYtl3#X)hr$Ck!MJ&hmJM9S&j&2Sr= zlI#DZT)JFy{FOz!N>g#L7-b3v6R&Cp6|6wz3!bTXm^~9 z_)=!MU2-znFDWhhOXs7YQF6VtEJL?zS;Wh3uihwUi(gCE?FFipbbG~eOSrwujk4pm zFGrDZgHEd{{PI+9tsSK9y&;0~E2jt5$(_yoob4H2| zb7Bz-FR>(?nzJRgL?ZpJ=x+b?VyV0jPbU2-o{xQrOzexkaKL(9ypH`+0!q8v&xLE3 z_lYxeqeNFuED4W*FGI^dFeUs2HRCk?{veH6-W3m0`vpCjt2yr^-$}ae(j6{TNnuV- zYef@|x-xYdnvjFiJW4)F3U|rlG*wD-W+aVF?B~Ip9+)TbD1kgK=V_$E#_6G+XQu>) z)9$o#H{v&M^fqql+lD8drTkO;OE}4;0=RCkQSC@vneEnm$g#ymiENKE8sY=W7<4kKb}XhJX(P-Y?|CPu^9Bs290=u+ma3{5H~pgJU` zu7X=gVL4BVnHri>Og(snrp^gV=L9n~bV4!p;1QZS)0WOOGc`1A>qKbkoV0XKS~@3f zod`{xuBFqpbh@@qgr-g`HpWcj;#0H7lR0JUL}==imQHEul(tTUrp}C|Gh^w@*g6rK zI(=QC^R%-T8;nmTiq&YYz)XX`|0>YTQ8PFp&sZJh{Boimos8B6DktrMZC)3bDX zmQK&siO|%Uw{+$$oq1a)LR06grE}KOIcw`gXzHA^bk12i=WLw_O`X1_)3WBGfuzpbE=O7^ndy165FFl7S-B zI$@v+%S;%k0VM-fP-c>WBGfuzpbE=O7^ndy165FFl7S-BI$@v+%S;%k0VM-fP-c>W zBGfuzpbE=O7^ndy165FFl7S-BI$@v+%S;%k0VM-fP-c>WBGfuzpbE=O7^ndy165FF zl7S-BI$@v+%S;%k0VM-fP-c>WBGfuzpbE=O7^ndy165FFl7S-BI$@v+%S;%k0VM-f zP-c>WBGfuzpbE=O7^ndy165FFl7S-BI$@v+%S;%k0VM-fP-c>WBGfuzpbE=O7^ndy z165FFl7S-BI$@v+%S;%k0VM-fP-c>WBGfuzpbE=O7^ndy165FFl7S-BI$@v+%S;%k z0VM-fP-c>WBGfuzpbE=O7^ndy165FFl7S-BI$@v+%S;%k0VM-fP-c>WBGfuzpbE=O z7^ndy165FFl7S-BI$@v+%S;%k0VM-fP-c>WBGfuzpok0uMPwQ%LeoGI8U|`GVW0++ z4Afwfff`I0sKJDR8cZ@!gGmNzFkzqu69#H9$v_Pz8K}X8ff`I0sKF!yHJD_e1``H~ zNS|F8C?cFOo=h@Ogg6a4B&MEQ7$_n=w=hsddTz-;5$d@m14URC{?Q&RR1C5^{AYES z{*XRM)2`UP`|}r*^e>fOJJ5>_Cu&_P!f`K^XcN$;>Mfti>zRuU(0kvHQli)8SEN zES>6mV!E;W;Ebz~Kh9^QEAX#`e(Uj#kJn{F#GBY9Mcl_OC3e^Sqd^4Ed(0Y4 z^zqjFL2_64GKeY{65S>FSN}p9t7jnaDWcuGj!!WYK*cUoG(FbtAsL;fQO4rO$TOuf^ARfPFELJJ+1mPW83E^T6f3Oy25uw=bqn zk%338N?YGb5-SJ{G#5md3-z~d#=pDuo5I%hx8J(8b?y4wuvL64ks0+(Atz&ICrjDLy3r4VNZhHt9J2{MA#vT2$#(md^+@>3a*N%v(Kae0wRL4C|4) zXL8YMv|L<1r{2xy!s~Cowe@acb4;8+$DTPmH)9wkyrS+)mh7Z95@PckEE`V1D55gst$3>Xk`!Biiaa{qA#= zWY_UehBSXh+W1eKf)i$T!b-d6#BxTX`+jjD#Hu!>hoG@;*gAJMzap3K{aA=Bt_S<~ z-JbgC-eI_hCoCe$X$-chnzLxuRrA?qMfbjd=CF4@EWSF#9!0YrhpkebNYrRjA~c$H zB2c4ANw%n)8pNf54-H6ch>)57`+-g zqbDFnZ}0JG`cb&n?xe;*H^Q~bmH4+f7^sx~Qiy9mF$Q$dJsg0?*F4{*xndACmX~ow z`$upStca^Grt~*Jy6Wjt^%~jXt-)V;$pTB0^7kL-L;`OWXkuuCZ+efikFt)7en(B- zw>bPLyC3Ix8vkQXuEeC$lQjJjf{*ZXI>N8^_Wtr69}r)?OyB;36mC@Ym3ibgr2hZA zvJ}#Vw1W?b-t$*!-C60)rc(|Ip?$VD>!xG^i!4Pi(Ge=@{o=9Y?)ItbD}l15+@(r* z8S{09&?D=YH{neLVNjVD!?go1Pr`=x{PIu8fSE4g&XN8SnY`a0dp?SY5)KL literal 0 HcmV?d00001 diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c index 16d11aa854..92c90dd194 100644 --- a/tests/bios-tables-test.c +++ b/tests/bios-tables-test.c @@ -49,6 +49,8 @@ typedef struct { GArray *tables; uint32_t smbios_ep_addr; struct smbios_21_entry_point smbios_ep_table; + uint8_t *required_struct_types; + int required_struct_types_len; } test_data; #define ACPI_READ_FIELD(field, addr) \ @@ -334,7 +336,7 @@ static void test_acpi_tables(test_data *data) for (i = 0; i < tables_nr; i++) { AcpiSdtTable ssdt_table; - memset(&ssdt_table, 0 , sizeof(ssdt_table)); + memset(&ssdt_table, 0, sizeof(ssdt_table)); uint32_t addr = data->rsdt_tables_addr[i + 1]; /* fadt is first */ test_dst_table(&ssdt_table, addr); g_array_append_val(data->tables, ssdt_table); @@ -661,7 +663,6 @@ static void test_smbios_structs(test_data *data) uint32_t addr = ep_table->structure_table_address; int i, len, max_len = 0; uint8_t type, prv, crt; - uint8_t required_struct_types[] = {0, 1, 3, 4, 16, 17, 19, 32, 127}; /* walk the smbios tables */ for (i = 0; i < ep_table->number_of_structures; i++) { @@ -701,8 +702,8 @@ static void test_smbios_structs(test_data *data) g_assert_cmpuint(ep_table->max_structure_size, ==, max_len); /* required struct types must all be present */ - for (i = 0; i < ARRAY_SIZE(required_struct_types); i++) { - g_assert(test_bit(required_struct_types[i], struct_bitmap)); + for (i = 0; i < data->required_struct_types_len; i++) { + g_assert(test_bit(data->required_struct_types[i], struct_bitmap)); } } @@ -742,6 +743,10 @@ static void test_acpi_one(const char *params, test_data *data) g_free(args); } +static uint8_t base_required_struct_types[] = { + 0, 1, 3, 4, 16, 17, 19, 32, 127 +}; + static void test_acpi_piix4_tcg(void) { test_data data; @@ -751,6 +756,8 @@ static void test_acpi_piix4_tcg(void) */ memset(&data, 0, sizeof(data)); data.machine = MACHINE_PC; + data.required_struct_types = base_required_struct_types; + data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types); test_acpi_one("-machine accel=tcg", &data); free_test_data(&data); } @@ -762,6 +769,8 @@ static void test_acpi_piix4_tcg_bridge(void) memset(&data, 0, sizeof(data)); data.machine = MACHINE_PC; data.variant = ".bridge"; + data.required_struct_types = base_required_struct_types; + data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types); test_acpi_one("-machine accel=tcg -device pci-bridge,chassis_nr=1", &data); free_test_data(&data); } @@ -772,6 +781,8 @@ static void test_acpi_q35_tcg(void) memset(&data, 0, sizeof(data)); data.machine = MACHINE_Q35; + data.required_struct_types = base_required_struct_types; + data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types); test_acpi_one("-machine q35,accel=tcg", &data); free_test_data(&data); } @@ -783,11 +794,50 @@ static void test_acpi_q35_tcg_bridge(void) memset(&data, 0, sizeof(data)); data.machine = MACHINE_Q35; data.variant = ".bridge"; + data.required_struct_types = base_required_struct_types; + data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types); test_acpi_one("-machine q35,accel=tcg -device pci-bridge,chassis_nr=1", &data); free_test_data(&data); } +static uint8_t ipmi_required_struct_types[] = { + 0, 1, 3, 4, 16, 17, 19, 32, 38, 127 +}; + +static void test_acpi_q35_tcg_ipmi(void) +{ + test_data data; + + memset(&data, 0, sizeof(data)); + data.machine = MACHINE_Q35; + data.variant = ".ipmibt"; + data.required_struct_types = ipmi_required_struct_types; + data.required_struct_types_len = ARRAY_SIZE(ipmi_required_struct_types); + test_acpi_one("-machine q35,accel=tcg -device ipmi-bmc-sim,id=bmc0" + " -device isa-ipmi-bt,bmc=bmc0", + &data); + free_test_data(&data); +} + +static void test_acpi_piix4_tcg_ipmi(void) +{ + test_data data; + + /* Supplying -machine accel argument overrides the default (qtest). + * This is to make guest actually run. + */ + memset(&data, 0, sizeof(data)); + data.machine = MACHINE_PC; + data.variant = ".ipmikcs"; + data.required_struct_types = ipmi_required_struct_types; + data.required_struct_types_len = ARRAY_SIZE(ipmi_required_struct_types); + test_acpi_one("-machine accel=tcg -device ipmi-bmc-sim,id=bmc0" + " -device isa-ipmi-kcs,irq=0,bmc=bmc0", + &data); + free_test_data(&data); +} + int main(int argc, char *argv[]) { const char *arch = qtest_get_arch(); @@ -804,6 +854,8 @@ int main(int argc, char *argv[]) qtest_add_func("acpi/piix4/tcg/bridge", test_acpi_piix4_tcg_bridge); qtest_add_func("acpi/q35/tcg", test_acpi_q35_tcg); qtest_add_func("acpi/q35/tcg/bridge", test_acpi_q35_tcg_bridge); + qtest_add_func("acpi/piix4/tcg/ipmi", test_acpi_piix4_tcg_ipmi); + qtest_add_func("acpi/q35/tcg/ipmi", test_acpi_q35_tcg_ipmi); } ret = g_test_run(); boot_sector_cleanup(disk); From 8df1426e44176512be1b6456e90d100d1af907e1 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 7 Jun 2016 20:21:57 +0800 Subject: [PATCH 05/34] pc-dimm: introduce get_vmstate_memory_region callback This callback returns the MemoryRegion that is the memory of dimm should be kept during live migration nvdimm device is different with pc-dimm as its memory includes not only the MemoryRegion directly mapping to guest's address space but also the memory used as label data Signed-off-by: Xiao Guangrong Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/mem/pc-dimm.c | 14 ++++++++++++-- include/hw/mem/pc-dimm.h | 5 ++++- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c index 6de2275986..249193a543 100644 --- a/hw/mem/pc-dimm.c +++ b/hw/mem/pc-dimm.c @@ -40,6 +40,8 @@ void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms, int slot; MachineState *machine = MACHINE(qdev_get_machine()); PCDIMMDevice *dimm = PC_DIMM(dev); + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); + MemoryRegion *vmstate_mr = ddc->get_vmstate_memory_region(dimm); Error *local_err = NULL; uint64_t existing_dimms_capacity = 0; uint64_t addr; @@ -105,7 +107,7 @@ void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms, } memory_region_add_subregion(&hpms->mr, addr - hpms->base, mr); - vmstate_register_ram(mr, dev); + vmstate_register_ram(vmstate_mr, dev); numa_set_mem_node_id(addr, memory_region_size(mr), dimm->node); out: @@ -116,10 +118,12 @@ void pc_dimm_memory_unplug(DeviceState *dev, MemoryHotplugState *hpms, MemoryRegion *mr) { PCDIMMDevice *dimm = PC_DIMM(dev); + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); + MemoryRegion *vmstate_mr = ddc->get_vmstate_memory_region(dimm); numa_unset_mem_node_id(dimm->addr, memory_region_size(mr), dimm->node); memory_region_del_subregion(&hpms->mr, mr); - vmstate_unregister_ram(mr, dev); + vmstate_unregister_ram(vmstate_mr, dev); } static int pc_existing_dimms_capacity_internal(Object *obj, void *opaque) @@ -424,6 +428,11 @@ static MemoryRegion *pc_dimm_get_memory_region(PCDIMMDevice *dimm) return host_memory_backend_get_memory(dimm->hostmem, &error_abort); } +static MemoryRegion *pc_dimm_get_vmstate_memory_region(PCDIMMDevice *dimm) +{ + return host_memory_backend_get_memory(dimm->hostmem, &error_abort); +} + static void pc_dimm_class_init(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); @@ -434,6 +443,7 @@ static void pc_dimm_class_init(ObjectClass *oc, void *data) dc->desc = "DIMM memory module"; ddc->get_memory_region = pc_dimm_get_memory_region; + ddc->get_vmstate_memory_region = pc_dimm_get_vmstate_memory_region; } static TypeInfo pc_dimm_info = { diff --git a/include/hw/mem/pc-dimm.h b/include/hw/mem/pc-dimm.h index 67e92d8f7b..1e483f2670 100644 --- a/include/hw/mem/pc-dimm.h +++ b/include/hw/mem/pc-dimm.h @@ -61,7 +61,9 @@ typedef struct PCDIMMDevice { * @realize: called after common dimm is realized so that the dimm based * devices get the chance to do specified operations. * @get_memory_region: returns #MemoryRegion associated with @dimm which - * is directly mapped into the physical address space of guest + * is directly mapped into the physical address space of guest. + * @get_vmstate_memory_region: returns #MemoryRegion which indicates the + * memory of @dimm should be kept during live migration. */ typedef struct PCDIMMDeviceClass { /* private */ @@ -70,6 +72,7 @@ typedef struct PCDIMMDeviceClass { /* public */ void (*realize)(PCDIMMDevice *dimm, Error **errp); MemoryRegion *(*get_memory_region)(PCDIMMDevice *dimm); + MemoryRegion *(*get_vmstate_memory_region)(PCDIMMDevice *dimm); } PCDIMMDeviceClass; /** From d6fb213a628c66b391d0ce704982bab7c14e559b Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 7 Jun 2016 20:21:58 +0800 Subject: [PATCH 06/34] nvdimm: support nvdimm label Introduce a parameter, 'label-size', which is the size of nvdimm label data area which is reserved at the end of backend memory. It is required at least 128k Two callbacks, read_label_data() and write_label_data(), are used to operate the label area Reviewed-by: Stefan Hajnoczi Signed-off-by: Xiao Guangrong Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/mem/nvdimm.c | 132 ++++++++++++++++++++++++++++++++++++++++ include/hw/mem/nvdimm.h | 55 ++++++++++++++++- 2 files changed, 186 insertions(+), 1 deletion(-) diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c index 0a602f28ba..81896c0e84 100644 --- a/hw/mem/nvdimm.c +++ b/hw/mem/nvdimm.c @@ -23,20 +23,152 @@ */ #include "qemu/osdep.h" +#include "qapi/error.h" +#include "qapi/visitor.h" #include "hw/mem/nvdimm.h" +static void nvdimm_get_label_size(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + NVDIMMDevice *nvdimm = NVDIMM(obj); + uint64_t value = nvdimm->label_size; + + visit_type_size(v, name, &value, errp); +} + +static void nvdimm_set_label_size(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + NVDIMMDevice *nvdimm = NVDIMM(obj); + Error *local_err = NULL; + uint64_t value; + + if (memory_region_size(&nvdimm->nvdimm_mr)) { + error_setg(&local_err, "cannot change property value"); + goto out; + } + + visit_type_size(v, name, &value, &local_err); + if (local_err) { + goto out; + } + if (value < MIN_NAMESPACE_LABEL_SIZE) { + error_setg(&local_err, "Property '%s.%s' (0x%" PRIx64 ") is required" + " at least 0x%lx", object_get_typename(obj), + name, value, MIN_NAMESPACE_LABEL_SIZE); + goto out; + } + + nvdimm->label_size = value; +out: + error_propagate(errp, local_err); +} + +static void nvdimm_init(Object *obj) +{ + object_property_add(obj, "label-size", "int", + nvdimm_get_label_size, nvdimm_set_label_size, NULL, + NULL, NULL); +} + +static MemoryRegion *nvdimm_get_memory_region(PCDIMMDevice *dimm) +{ + NVDIMMDevice *nvdimm = NVDIMM(dimm); + + return &nvdimm->nvdimm_mr; +} + +static void nvdimm_realize(PCDIMMDevice *dimm, Error **errp) +{ + MemoryRegion *mr = host_memory_backend_get_memory(dimm->hostmem, errp); + NVDIMMDevice *nvdimm = NVDIMM(dimm); + uint64_t align, pmem_size, size = memory_region_size(mr); + + align = memory_region_get_alignment(mr); + + pmem_size = size - nvdimm->label_size; + nvdimm->label_data = memory_region_get_ram_ptr(mr) + pmem_size; + pmem_size = QEMU_ALIGN_DOWN(pmem_size, align); + + if (size <= nvdimm->label_size || !pmem_size) { + HostMemoryBackend *hostmem = dimm->hostmem; + char *path = object_get_canonical_path_component(OBJECT(hostmem)); + + error_setg(errp, "the size of memdev %s (0x%" PRIx64 ") is too " + "small to contain nvdimm label (0x%" PRIx64 ") and " + "aligned PMEM (0x%" PRIx64 ")", + path, memory_region_size(mr), nvdimm->label_size, align); + return; + } + + memory_region_init_alias(&nvdimm->nvdimm_mr, OBJECT(dimm), + "nvdimm-memory", mr, 0, pmem_size); + nvdimm->nvdimm_mr.align = align; +} + +/* + * the caller should check the input parameters before calling + * label read/write functions. + */ +static void nvdimm_validate_rw_label_data(NVDIMMDevice *nvdimm, uint64_t size, + uint64_t offset) +{ + assert((nvdimm->label_size >= size + offset) && (offset + size > offset)); +} + +static void nvdimm_read_label_data(NVDIMMDevice *nvdimm, void *buf, + uint64_t size, uint64_t offset) +{ + nvdimm_validate_rw_label_data(nvdimm, size, offset); + + memcpy(buf, nvdimm->label_data + offset, size); +} + +static void nvdimm_write_label_data(NVDIMMDevice *nvdimm, const void *buf, + uint64_t size, uint64_t offset) +{ + MemoryRegion *mr; + PCDIMMDevice *dimm = PC_DIMM(nvdimm); + uint64_t backend_offset; + + nvdimm_validate_rw_label_data(nvdimm, size, offset); + + memcpy(nvdimm->label_data + offset, buf, size); + + mr = host_memory_backend_get_memory(dimm->hostmem, &error_abort); + backend_offset = memory_region_size(mr) - nvdimm->label_size + offset; + memory_region_set_dirty(mr, backend_offset, size); +} + +static MemoryRegion *nvdimm_get_vmstate_memory_region(PCDIMMDevice *dimm) +{ + return host_memory_backend_get_memory(dimm->hostmem, &error_abort); +} + static void nvdimm_class_init(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); + PCDIMMDeviceClass *ddc = PC_DIMM_CLASS(oc); + NVDIMMClass *nvc = NVDIMM_CLASS(oc); /* nvdimm hotplug has not been supported yet. */ dc->hotpluggable = false; + + ddc->realize = nvdimm_realize; + ddc->get_memory_region = nvdimm_get_memory_region; + ddc->get_vmstate_memory_region = nvdimm_get_vmstate_memory_region; + + nvc->read_label_data = nvdimm_read_label_data; + nvc->write_label_data = nvdimm_write_label_data; } static TypeInfo nvdimm_info = { .name = TYPE_NVDIMM, .parent = TYPE_PC_DIMM, + .class_size = sizeof(NVDIMMClass), .class_init = nvdimm_class_init, + .instance_size = sizeof(NVDIMMDevice), + .instance_init = nvdimm_init, }; static void nvdimm_register_types(void) diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h index 60ee92b85a..1cfe9e01c4 100644 --- a/include/hw/mem/nvdimm.h +++ b/include/hw/mem/nvdimm.h @@ -34,7 +34,60 @@ } \ } while (0) -#define TYPE_NVDIMM "nvdimm" +/* + * The minimum label data size is required by NVDIMM Namespace + * specification, see the chapter 2 Namespaces: + * "NVDIMMs following the NVDIMM Block Mode Specification use an area + * at least 128KB in size, which holds around 1000 labels." + */ +#define MIN_NAMESPACE_LABEL_SIZE (128UL << 10) + +#define TYPE_NVDIMM "nvdimm" +#define NVDIMM(obj) OBJECT_CHECK(NVDIMMDevice, (obj), TYPE_NVDIMM) +#define NVDIMM_CLASS(oc) OBJECT_CLASS_CHECK(NVDIMMClass, (oc), TYPE_NVDIMM) +#define NVDIMM_GET_CLASS(obj) OBJECT_GET_CLASS(NVDIMMClass, (obj), \ + TYPE_NVDIMM) +struct NVDIMMDevice { + /* private */ + PCDIMMDevice parent_obj; + + /* public */ + + /* + * the size of label data in NVDIMM device which is presented to + * guest via __DSM "Get Namespace Label Size" function. + */ + uint64_t label_size; + + /* + * the address of label data which is read by __DSM "Get Namespace + * Label Data" function and written by __DSM "Set Namespace Label + * Data" function. + */ + void *label_data; + + /* + * it's the PMEM region in NVDIMM device, which is presented to + * guest via ACPI NFIT and _FIT method if NVDIMM hotplug is supported. + */ + MemoryRegion nvdimm_mr; +}; +typedef struct NVDIMMDevice NVDIMMDevice; + +struct NVDIMMClass { + /* private */ + PCDIMMDeviceClass parent_class; + + /* public */ + + /* read @size bytes from NVDIMM label data at @offset into @buf. */ + void (*read_label_data)(NVDIMMDevice *nvdimm, void *buf, + uint64_t size, uint64_t offset); + /* write @size bytes from @buf to NVDIMM label data at @offset. */ + void (*write_label_data)(NVDIMMDevice *nvdimm, const void *buf, + uint64_t size, uint64_t offset); +}; +typedef struct NVDIMMClass NVDIMMClass; #define NVDIMM_DSM_MEM_FILE "etc/acpi/nvdimm-mem" From b265f27c5a31c0b2d5017e99f52db45aa0559909 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 7 Jun 2016 20:21:59 +0800 Subject: [PATCH 07/34] acpi: add aml_object_type Implement ObjectType which is used by NVDIMM _DSM method in later patch Signed-off-by: Xiao Guangrong Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/aml-build.c | 8 ++++++++ include/hw/acpi/aml-build.h | 1 + 2 files changed, 9 insertions(+) diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c index 874e473cac..c71fd16136 100644 --- a/hw/acpi/aml-build.c +++ b/hw/acpi/aml-build.c @@ -1481,6 +1481,14 @@ Aml *aml_concatenate(Aml *source1, Aml *source2, Aml *target) target); } +/* ACPI 1.0b: 16.2.5.4 Type 2 Opcodes Encoding: DefObjectType */ +Aml *aml_object_type(Aml *object) +{ + Aml *var = aml_opcode(0x8E /* ObjectTypeOp */); + aml_append(var, object); + return var; +} + void build_header(BIOSLinker *linker, GArray *table_data, AcpiTableHeader *h, const char *sig, int len, uint8_t rev, diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h index 10c09ca29f..7a548e15f5 100644 --- a/include/hw/acpi/aml-build.h +++ b/include/hw/acpi/aml-build.h @@ -363,6 +363,7 @@ Aml *aml_refof(Aml *arg); Aml *aml_derefof(Aml *arg); Aml *aml_sizeof(Aml *arg); Aml *aml_concatenate(Aml *source1, Aml *source2, Aml *target); +Aml *aml_object_type(Aml *object); void build_header(BIOSLinker *linker, GArray *table_data, From 052889b8e96d24625043cd5901ab91dd38aca49f Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 7 Jun 2016 20:22:00 +0800 Subject: [PATCH 08/34] acpi: add aml_call5 It will be used by NVDIMM ACPI Signed-off-by: Xiao Guangrong Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/aml-build.c | 14 ++++++++++++++ include/hw/acpi/aml-build.h | 2 ++ 2 files changed, 16 insertions(+) diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c index c71fd16136..db3e914fb4 100644 --- a/hw/acpi/aml-build.c +++ b/hw/acpi/aml-build.c @@ -660,6 +660,20 @@ Aml *aml_call4(const char *method, Aml *arg1, Aml *arg2, Aml *arg3, Aml *arg4) return var; } +/* helper to call method with 5 arguments */ +Aml *aml_call5(const char *method, Aml *arg1, Aml *arg2, Aml *arg3, Aml *arg4, + Aml *arg5) +{ + Aml *var = aml_alloc(); + build_append_namestring(var->buf, "%s", method); + aml_append(var, arg1); + aml_append(var, arg2); + aml_append(var, arg3); + aml_append(var, arg4); + aml_append(var, arg5); + return var; +} + /* * ACPI 5.0: 6.4.3.8.1 GPIO Connection Descriptor * Type 1, Large Item Name 0xC diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h index 7a548e15f5..e7a1a4cefd 100644 --- a/include/hw/acpi/aml-build.h +++ b/include/hw/acpi/aml-build.h @@ -277,6 +277,8 @@ Aml *aml_call1(const char *method, Aml *arg1); Aml *aml_call2(const char *method, Aml *arg1, Aml *arg2); Aml *aml_call3(const char *method, Aml *arg1, Aml *arg2, Aml *arg3); Aml *aml_call4(const char *method, Aml *arg1, Aml *arg2, Aml *arg3, Aml *arg4); +Aml *aml_call5(const char *method, Aml *arg1, Aml *arg2, Aml *arg3, Aml *arg4, + Aml *arg5); Aml *aml_gpio_int(AmlConsumerAndProducer con_and_pro, AmlLevelAndEdge edge_level, AmlActiveHighAndLow active_level, AmlShared shared, From 732b530c1bd064bdcc29975c0b78fc6de8c47e7f Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 7 Jun 2016 20:22:01 +0800 Subject: [PATCH 09/34] nvdimm acpi: set HDLE properly Now we pass HDLE to Qemu properly, use 0 for root device and use the handle for nvdimm devices Signed-off-by: Xiao Guangrong Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/nvdimm.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c index b4c22627df..14355f8c2e 100644 --- a/hw/acpi/nvdimm.c +++ b/hw/acpi/nvdimm.c @@ -490,7 +490,7 @@ static void nvdimm_build_common_dsm(Aml *dev) Aml *method, *ifctx, *function, *dsm_mem, *unpatched, *result_size; uint8_t byte_list[1]; - method = aml_method(NVDIMM_COMMON_DSM, 4, AML_SERIALIZED); + method = aml_method(NVDIMM_COMMON_DSM, 5, AML_SERIALIZED); function = aml_arg(2); dsm_mem = aml_name(NVDIMM_ACPI_MEM_ADDR); @@ -516,11 +516,10 @@ static void nvdimm_build_common_dsm(Aml *dev) /* * The HDLE indicates the DSM function is issued from which device, - * it is not used at this time as no function is supported yet. - * Currently we make it always be 0 for all the devices and will set - * the appropriate value once real function is implemented. + * it reserves 0 for root device and is the handle for NVDIMM devices. + * See the comments in nvdimm_slot_to_handle(). */ - aml_append(method, aml_store(aml_int(0x0), aml_name("HDLE"))); + aml_append(method, aml_store(aml_arg(4), aml_name("HDLE"))); aml_append(method, aml_store(aml_arg(1), aml_name("REVS"))); aml_append(method, aml_store(aml_arg(2), aml_name("FUNC"))); @@ -542,13 +541,14 @@ static void nvdimm_build_common_dsm(Aml *dev) aml_append(dev, method); } -static void nvdimm_build_device_dsm(Aml *dev) +static void nvdimm_build_device_dsm(Aml *dev, uint32_t handle) { Aml *method; method = aml_method("_DSM", 4, AML_NOTSERIALIZED); - aml_append(method, aml_return(aml_call4(NVDIMM_COMMON_DSM, aml_arg(0), - aml_arg(1), aml_arg(2), aml_arg(3)))); + aml_append(method, aml_return(aml_call5(NVDIMM_COMMON_DSM, aml_arg(0), + aml_arg(1), aml_arg(2), aml_arg(3), + aml_int(handle)))); aml_append(dev, method); } @@ -573,7 +573,7 @@ static void nvdimm_build_nvdimm_devices(GSList *device_list, Aml *root_dev) */ aml_append(nvdimm_dev, aml_name_decl("_ADR", aml_int(handle))); - nvdimm_build_device_dsm(nvdimm_dev); + nvdimm_build_device_dsm(nvdimm_dev, handle); aml_append(root_dev, nvdimm_dev); } } @@ -665,7 +665,9 @@ static void nvdimm_build_ssdt(GSList *device_list, GArray *table_offsets, aml_append(dev, field); nvdimm_build_common_dsm(dev); - nvdimm_build_device_dsm(dev); + + /* 0 is reserved for root device. */ + nvdimm_build_device_dsm(dev, 0); nvdimm_build_nvdimm_devices(device_list, dev); From 4568c948066737d116432dff8646c59f0ce67a38 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 7 Jun 2016 20:22:02 +0800 Subject: [PATCH 10/34] nvdimm acpi: save arg3 of _DSM method Check if the input Arg3 is valid then store it into ARG3 if it is needed Signed-off-by: Xiao Guangrong Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/nvdimm.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c index 14355f8c2e..95504e93f9 100644 --- a/hw/acpi/nvdimm.c +++ b/hw/acpi/nvdimm.c @@ -488,6 +488,7 @@ void nvdimm_init_acpi_state(AcpiNVDIMMState *state, MemoryRegion *io, static void nvdimm_build_common_dsm(Aml *dev) { Aml *method, *ifctx, *function, *dsm_mem, *unpatched, *result_size; + Aml *pckg, *pckg_index, *pckg_buf; uint8_t byte_list[1]; method = aml_method(NVDIMM_COMMON_DSM, 5, AML_SERIALIZED); @@ -523,6 +524,25 @@ static void nvdimm_build_common_dsm(Aml *dev) aml_append(method, aml_store(aml_arg(1), aml_name("REVS"))); aml_append(method, aml_store(aml_arg(2), aml_name("FUNC"))); + /* + * The fourth parameter (Arg3) of _DSM is a package which contains + * a buffer, the layout of the buffer is specified by UUID (Arg0), + * Revision ID (Arg1) and Function Index (Arg2) which are documented + * in the DSM Spec. + */ + pckg = aml_arg(3); + ifctx = aml_if(aml_and(aml_equal(aml_object_type(pckg), + aml_int(4 /* Package */)) /* It is a Package? */, + aml_equal(aml_sizeof(pckg), aml_int(1)) /* 1 element? */, + NULL)); + + pckg_index = aml_local(2); + pckg_buf = aml_local(3); + aml_append(ifctx, aml_store(aml_index(pckg, aml_int(0)), pckg_index)); + aml_append(ifctx, aml_store(aml_derefof(pckg_index), pckg_buf)); + aml_append(ifctx, aml_store(pckg_buf, aml_name("ARG3"))); + aml_append(method, ifctx); + /* * tell QEMU about the real address of DSM memory, then QEMU * gets the control and fills the result in DSM memory. From 90623ebf603168ca4ca73c7f017f926fd2f3ace2 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 7 Jun 2016 20:22:03 +0800 Subject: [PATCH 11/34] nvdimm acpi: check UUID Check arg0 which indicates UUID to see if it is valid Signed-off-by: Xiao Guangrong Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/nvdimm.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c index 95504e93f9..b01f2c6f4f 100644 --- a/hw/acpi/nvdimm.c +++ b/hw/acpi/nvdimm.c @@ -487,19 +487,39 @@ void nvdimm_init_acpi_state(AcpiNVDIMMState *state, MemoryRegion *io, static void nvdimm_build_common_dsm(Aml *dev) { - Aml *method, *ifctx, *function, *dsm_mem, *unpatched, *result_size; + Aml *method, *ifctx, *function, *handle, *uuid, *dsm_mem, *result_size; + Aml *elsectx, *unsupport, *unpatched, *expected_uuid, *uuid_invalid; Aml *pckg, *pckg_index, *pckg_buf; uint8_t byte_list[1]; method = aml_method(NVDIMM_COMMON_DSM, 5, AML_SERIALIZED); + uuid = aml_arg(0); function = aml_arg(2); + handle = aml_arg(4); dsm_mem = aml_name(NVDIMM_ACPI_MEM_ADDR); /* * do not support any method if DSM memory address has not been * patched. */ - unpatched = aml_if(aml_equal(dsm_mem, aml_int(0x0))); + unpatched = aml_equal(dsm_mem, aml_int(0x0)); + + expected_uuid = aml_local(0); + + ifctx = aml_if(aml_equal(handle, aml_int(0x0))); + aml_append(ifctx, aml_store( + aml_touuid("2F10E7A4-9E91-11E4-89D3-123B93F75CBA") + /* UUID for NVDIMM Root Device */, expected_uuid)); + aml_append(method, ifctx); + elsectx = aml_else(); + aml_append(elsectx, aml_store( + aml_touuid("4309AC30-0D11-11E4-9191-0800200C9A66") + /* UUID for NVDIMM Devices */, expected_uuid)); + aml_append(method, elsectx); + + uuid_invalid = aml_lnot(aml_equal(uuid, expected_uuid)); + + unsupport = aml_if(aml_or(unpatched, uuid_invalid, NULL)); /* * function 0 is called to inquire what functions are supported by @@ -508,19 +528,19 @@ static void nvdimm_build_common_dsm(Aml *dev) ifctx = aml_if(aml_equal(function, aml_int(0))); byte_list[0] = 0 /* No function Supported */; aml_append(ifctx, aml_return(aml_buffer(1, byte_list))); - aml_append(unpatched, ifctx); + aml_append(unsupport, ifctx); /* No function is supported yet. */ byte_list[0] = 1 /* Not Supported */; - aml_append(unpatched, aml_return(aml_buffer(1, byte_list))); - aml_append(method, unpatched); + aml_append(unsupport, aml_return(aml_buffer(1, byte_list))); + aml_append(method, unsupport); /* * The HDLE indicates the DSM function is issued from which device, * it reserves 0 for root device and is the handle for NVDIMM devices. * See the comments in nvdimm_slot_to_handle(). */ - aml_append(method, aml_store(aml_arg(4), aml_name("HDLE"))); + aml_append(method, aml_store(handle, aml_name("HDLE"))); aml_append(method, aml_store(aml_arg(1), aml_name("REVS"))); aml_append(method, aml_store(aml_arg(2), aml_name("FUNC"))); From 189f4d56356f0301c9e61c7d35ec8c03c69e7133 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 7 Jun 2016 20:22:04 +0800 Subject: [PATCH 12/34] nvdimm acpi: abstract the operations for root & nvdimm devices It separates the operations between root device and nvdimm devices in order to introducing label functions support for nvdimm device Signed-off-by: Xiao Guangrong Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/nvdimm.c | 74 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 56 insertions(+), 18 deletions(-) diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c index b01f2c6f4f..07c95c1f1f 100644 --- a/hw/acpi/nvdimm.c +++ b/hw/acpi/nvdimm.c @@ -406,6 +406,55 @@ struct NvdimmDsmFuncNoPayloadOut { } QEMU_PACKED; typedef struct NvdimmDsmFuncNoPayloadOut NvdimmDsmFuncNoPayloadOut; +static void +nvdimm_dsm_function0(uint32_t supported_func, hwaddr dsm_mem_addr) +{ + NvdimmDsmFunc0Out func0 = { + .len = cpu_to_le32(sizeof(func0)), + .supported_func = cpu_to_le32(supported_func), + }; + cpu_physical_memory_write(dsm_mem_addr, &func0, sizeof(func0)); +} + +static void +nvdimm_dsm_no_payload(uint32_t func_ret_status, hwaddr dsm_mem_addr) +{ + NvdimmDsmFuncNoPayloadOut out = { + .len = cpu_to_le32(sizeof(out)), + .func_ret_status = cpu_to_le32(func_ret_status), + }; + cpu_physical_memory_write(dsm_mem_addr, &out, sizeof(out)); +} + +static void nvdimm_dsm_root(NvdimmDsmIn *in, hwaddr dsm_mem_addr) +{ + /* + * function 0 is called to inquire which functions are supported by + * OSPM + */ + if (!in->function) { + nvdimm_dsm_function0(0 /* No function supported other than + function 0 */, dsm_mem_addr); + return; + } + + /* No function except function 0 is supported yet. */ + nvdimm_dsm_no_payload(1 /* Not Supported */, dsm_mem_addr); +} + +static void nvdimm_dsm_device(NvdimmDsmIn *in, hwaddr dsm_mem_addr) +{ + /* See the comments in nvdimm_dsm_root(). */ + if (!in->function) { + nvdimm_dsm_function0(0 /* No function supported other than + function 0 */, dsm_mem_addr); + return; + } + + /* No function except function 0 is supported yet. */ + nvdimm_dsm_no_payload(1 /* Not Supported */, dsm_mem_addr); +} + static uint64_t nvdimm_dsm_read(void *opaque, hwaddr addr, unsigned size) { @@ -436,26 +485,15 @@ nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) nvdimm_debug("Revision %#x Handler %#x Function %#x.\n", in->revision, in->handle, in->function); - /* - * function 0 is called to inquire which functions are supported by - * OSPM - */ - if (in->function == 0) { - NvdimmDsmFunc0Out func0 = { - .len = cpu_to_le32(sizeof(func0)), - /* No function supported other than function 0 */ - .supported_func = cpu_to_le32(0), - }; - cpu_physical_memory_write(dsm_mem_addr, &func0, sizeof func0); - } else { - /* No function except function 0 is supported yet. */ - NvdimmDsmFuncNoPayloadOut out = { - .len = cpu_to_le32(sizeof(out)), - .func_ret_status = cpu_to_le32(1) /* Not Supported */, - }; - cpu_physical_memory_write(dsm_mem_addr, &out, sizeof(out)); + /* Handle 0 is reserved for NVDIMM Root Device. */ + if (!in->handle) { + nvdimm_dsm_root(in, dsm_mem_addr); + goto exit; } + nvdimm_dsm_device(in, dsm_mem_addr); + +exit: g_free(in); } From d15fc53f8d564ba977f64df96b0114b58f36d154 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 7 Jun 2016 20:22:05 +0800 Subject: [PATCH 13/34] nvdimm acpi: check revision Currently only revision 1 is supported Signed-off-by: Xiao Guangrong Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/nvdimm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c index 07c95c1f1f..8b89285587 100644 --- a/hw/acpi/nvdimm.c +++ b/hw/acpi/nvdimm.c @@ -485,6 +485,13 @@ nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) nvdimm_debug("Revision %#x Handler %#x Function %#x.\n", in->revision, in->handle, in->function); + if (in->revision != 0x1 /* Currently we only support DSM Spec Rev1. */) { + nvdimm_debug("Revision %#x is not supported, expect %#x.\n", + in->revision, 0x1); + nvdimm_dsm_no_payload(1 /* Not Supported */, dsm_mem_addr); + goto exit; + } + /* Handle 0 is reserved for NVDIMM Root Device. */ if (!in->handle) { nvdimm_dsm_root(in, dsm_mem_addr); From 5797dcdc7ade30e8c4080d9282cd9e51b3566e14 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 7 Jun 2016 20:22:06 +0800 Subject: [PATCH 14/34] nvdimm acpi: support Get Namespace Label Size function Function 4 is used to get Namespace label size Reviewed-by: Stefan Hajnoczi Signed-off-by: Xiao Guangrong Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/nvdimm.c | 130 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 127 insertions(+), 3 deletions(-) diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c index 8b89285587..4a25d8fadc 100644 --- a/hw/acpi/nvdimm.c +++ b/hw/acpi/nvdimm.c @@ -216,6 +216,26 @@ static uint32_t nvdimm_slot_to_dcr_index(int slot) return nvdimm_slot_to_spa_index(slot) + 1; } +static NVDIMMDevice *nvdimm_get_device_by_handle(uint32_t handle) +{ + NVDIMMDevice *nvdimm = NULL; + GSList *list, *device_list = nvdimm_get_plugged_device_list(); + + for (list = device_list; list; list = list->next) { + NVDIMMDevice *nvd = list->data; + int slot = object_property_get_int(OBJECT(nvd), PC_DIMM_SLOT_PROP, + NULL); + + if (nvdimm_slot_to_handle(slot) == handle) { + nvdimm = nvd; + break; + } + } + + g_slist_free(device_list); + return nvdimm; +} + /* ACPI 6.0: 5.2.25.1 System Physical Address Range Structure */ static void nvdimm_build_structure_spa(GArray *structures, DeviceState *dev) @@ -406,6 +426,35 @@ struct NvdimmDsmFuncNoPayloadOut { } QEMU_PACKED; typedef struct NvdimmDsmFuncNoPayloadOut NvdimmDsmFuncNoPayloadOut; +struct NvdimmFuncGetLabelSizeOut { + /* the size of buffer filled by QEMU. */ + uint32_t len; + uint32_t func_ret_status; /* return status code. */ + uint32_t label_size; /* the size of label data area. */ + /* + * Maximum size of the namespace label data length supported by + * the platform in Get/Set Namespace Label Data functions. + */ + uint32_t max_xfer; +} QEMU_PACKED; +typedef struct NvdimmFuncGetLabelSizeOut NvdimmFuncGetLabelSizeOut; +QEMU_BUILD_BUG_ON(sizeof(NvdimmFuncGetLabelSizeOut) > 4096); + +struct NvdimmFuncGetLabelDataOut { + /* the size of buffer filled by QEMU. */ + uint32_t len; + uint32_t func_ret_status; /* return status code. */ + uint8_t out_buf[0]; /* the data got via Get Namesapce Label function. */ +} QEMU_PACKED; +typedef struct NvdimmFuncGetLabelDataOut NvdimmFuncGetLabelDataOut; + +struct NvdimmFuncSetLabelDataIn { + uint32_t offset; /* the offset in the namespace label data area. */ + uint32_t length; /* the size of data is to be written via the function. */ + uint8_t in_buf[0]; /* the data written to label data area. */ +} QEMU_PACKED; +typedef struct NvdimmFuncSetLabelDataIn NvdimmFuncSetLabelDataIn; + static void nvdimm_dsm_function0(uint32_t supported_func, hwaddr dsm_mem_addr) { @@ -442,16 +491,91 @@ static void nvdimm_dsm_root(NvdimmDsmIn *in, hwaddr dsm_mem_addr) nvdimm_dsm_no_payload(1 /* Not Supported */, dsm_mem_addr); } +/* + * the max transfer size is the max size transferred by both a + * 'Get Namespace Label Data' function and a 'Set Namespace Label Data' + * function. + */ +static uint32_t nvdimm_get_max_xfer_label_size(void) +{ + uint32_t max_get_size, max_set_size, dsm_memory_size = 4096; + + /* + * the max data ACPI can read one time which is transferred by + * the response of 'Get Namespace Label Data' function. + */ + max_get_size = dsm_memory_size - sizeof(NvdimmFuncGetLabelDataOut); + + /* + * the max data ACPI can write one time which is transferred by + * 'Set Namespace Label Data' function. + */ + max_set_size = dsm_memory_size - offsetof(NvdimmDsmIn, arg3) - + sizeof(NvdimmFuncSetLabelDataIn); + + return MIN(max_get_size, max_set_size); +} + +/* + * DSM Spec Rev1 4.4 Get Namespace Label Size (Function Index 4). + * + * It gets the size of Namespace Label data area and the max data size + * that Get/Set Namespace Label Data functions can transfer. + */ +static void nvdimm_dsm_label_size(NVDIMMDevice *nvdimm, hwaddr dsm_mem_addr) +{ + NvdimmFuncGetLabelSizeOut label_size_out = { + .len = cpu_to_le32(sizeof(label_size_out)), + }; + uint32_t label_size, mxfer; + + label_size = nvdimm->label_size; + mxfer = nvdimm_get_max_xfer_label_size(); + + nvdimm_debug("label_size %#x, max_xfer %#x.\n", label_size, mxfer); + + label_size_out.func_ret_status = cpu_to_le32(0 /* Success */); + label_size_out.label_size = cpu_to_le32(label_size); + label_size_out.max_xfer = cpu_to_le32(mxfer); + + cpu_physical_memory_write(dsm_mem_addr, &label_size_out, + sizeof(label_size_out)); +} + static void nvdimm_dsm_device(NvdimmDsmIn *in, hwaddr dsm_mem_addr) { + NVDIMMDevice *nvdimm = nvdimm_get_device_by_handle(in->handle); + /* See the comments in nvdimm_dsm_root(). */ if (!in->function) { - nvdimm_dsm_function0(0 /* No function supported other than - function 0 */, dsm_mem_addr); + uint32_t supported_func = 0; + + if (nvdimm && nvdimm->label_size) { + supported_func |= 0x1 /* Bit 0 indicates whether there is + support for any functions other + than function 0. */ | + 1 << 4 /* Get Namespace Label Size */; + } + nvdimm_dsm_function0(supported_func, dsm_mem_addr); return; } - /* No function except function 0 is supported yet. */ + if (!nvdimm) { + nvdimm_dsm_no_payload(2 /* Non-Existing Memory Device */, + dsm_mem_addr); + return; + } + + /* Encode DSM function according to DSM Spec Rev1. */ + switch (in->function) { + case 4 /* Get Namespace Label Size */: + if (nvdimm->label_size) { + nvdimm_dsm_label_size(nvdimm, dsm_mem_addr); + return; + } + break; + } + nvdimm_dsm_no_payload(1 /* Not Supported */, dsm_mem_addr); } From 2b9e57fc7f9d72841d1defb83f0fe79011065ae0 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 7 Jun 2016 20:22:07 +0800 Subject: [PATCH 15/34] nvdimm acpi: support Get Namespace Label Data function Function 5 is used to get Namespace Label Data Reviewed-by: Stefan Hajnoczi Signed-off-by: Xiao Guangrong Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/nvdimm.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 82 insertions(+), 1 deletion(-) diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c index 4a25d8fadc..388d42e165 100644 --- a/hw/acpi/nvdimm.c +++ b/hw/acpi/nvdimm.c @@ -440,6 +440,14 @@ struct NvdimmFuncGetLabelSizeOut { typedef struct NvdimmFuncGetLabelSizeOut NvdimmFuncGetLabelSizeOut; QEMU_BUILD_BUG_ON(sizeof(NvdimmFuncGetLabelSizeOut) > 4096); +struct NvdimmFuncGetLabelDataIn { + uint32_t offset; /* the offset in the namespace label data area. */ + uint32_t length; /* the size of data is to be read via the function. */ +} QEMU_PACKED; +typedef struct NvdimmFuncGetLabelDataIn NvdimmFuncGetLabelDataIn; +QEMU_BUILD_BUG_ON(sizeof(NvdimmFuncGetLabelDataIn) + + offsetof(NvdimmDsmIn, arg3) > 4096); + struct NvdimmFuncGetLabelDataOut { /* the size of buffer filled by QEMU. */ uint32_t len; @@ -447,6 +455,7 @@ struct NvdimmFuncGetLabelDataOut { uint8_t out_buf[0]; /* the data got via Get Namesapce Label function. */ } QEMU_PACKED; typedef struct NvdimmFuncGetLabelDataOut NvdimmFuncGetLabelDataOut; +QEMU_BUILD_BUG_ON(sizeof(NvdimmFuncGetLabelDataOut) > 4096); struct NvdimmFuncSetLabelDataIn { uint32_t offset; /* the offset in the namespace label data area. */ @@ -542,6 +551,71 @@ static void nvdimm_dsm_label_size(NVDIMMDevice *nvdimm, hwaddr dsm_mem_addr) sizeof(label_size_out)); } +static uint32_t nvdimm_rw_label_data_check(NVDIMMDevice *nvdimm, + uint32_t offset, uint32_t length) +{ + uint32_t ret = 3 /* Invalid Input Parameters */; + + if (offset + length < offset) { + nvdimm_debug("offset %#x + length %#x is overflow.\n", offset, + length); + return ret; + } + + if (nvdimm->label_size < offset + length) { + nvdimm_debug("position %#x is beyond label data (len = %" PRIx64 ").\n", + offset + length, nvdimm->label_size); + return ret; + } + + if (length > nvdimm_get_max_xfer_label_size()) { + nvdimm_debug("length (%#x) is larger than max_xfer (%#x).\n", + length, nvdimm_get_max_xfer_label_size()); + return ret; + } + + return 0 /* Success */; +} + +/* + * DSM Spec Rev1 4.5 Get Namespace Label Data (Function Index 5). + */ +static void nvdimm_dsm_get_label_data(NVDIMMDevice *nvdimm, NvdimmDsmIn *in, + hwaddr dsm_mem_addr) +{ + NVDIMMClass *nvc = NVDIMM_GET_CLASS(nvdimm); + NvdimmFuncGetLabelDataIn *get_label_data; + NvdimmFuncGetLabelDataOut *get_label_data_out; + uint32_t status; + int size; + + get_label_data = (NvdimmFuncGetLabelDataIn *)in->arg3; + le32_to_cpus(&get_label_data->offset); + le32_to_cpus(&get_label_data->length); + + nvdimm_debug("Read Label Data: offset %#x length %#x.\n", + get_label_data->offset, get_label_data->length); + + status = nvdimm_rw_label_data_check(nvdimm, get_label_data->offset, + get_label_data->length); + if (status != 0 /* Success */) { + nvdimm_dsm_no_payload(status, dsm_mem_addr); + return; + } + + size = sizeof(*get_label_data_out) + get_label_data->length; + assert(size <= 4096); + get_label_data_out = g_malloc(size); + + get_label_data_out->len = cpu_to_le32(size); + get_label_data_out->func_ret_status = cpu_to_le32(0 /* Success */); + nvc->read_label_data(nvdimm, get_label_data_out->out_buf, + get_label_data->length, get_label_data->offset); + + cpu_physical_memory_write(dsm_mem_addr, get_label_data_out, size); + g_free(get_label_data_out); +} + static void nvdimm_dsm_device(NvdimmDsmIn *in, hwaddr dsm_mem_addr) { NVDIMMDevice *nvdimm = nvdimm_get_device_by_handle(in->handle); @@ -554,7 +628,8 @@ static void nvdimm_dsm_device(NvdimmDsmIn *in, hwaddr dsm_mem_addr) supported_func |= 0x1 /* Bit 0 indicates whether there is support for any functions other than function 0. */ | - 1 << 4 /* Get Namespace Label Size */; + 1 << 4 /* Get Namespace Label Size */ | + 1 << 5 /* Get Namespace Label Data */; } nvdimm_dsm_function0(supported_func, dsm_mem_addr); return; @@ -574,6 +649,12 @@ static void nvdimm_dsm_device(NvdimmDsmIn *in, hwaddr dsm_mem_addr) return; } break; + case 5 /* Get Namespace Label Data */: + if (nvdimm->label_size) { + nvdimm_dsm_get_label_data(nvdimm, in, dsm_mem_addr); + return; + } + break; } nvdimm_dsm_no_payload(1 /* Not Supported */, dsm_mem_addr); From 14e44198ffe6d96d3966c091eb97f2df46839519 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 7 Jun 2016 20:22:08 +0800 Subject: [PATCH 16/34] nvdimm acpi: support Set Namespace Label Data function Function 6 is used to set Namespace Label Data Reviewed-by: Stefan Hajnoczi Signed-off-by: Xiao Guangrong Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/nvdimm.c | 44 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c index 388d42e165..e486128aa1 100644 --- a/hw/acpi/nvdimm.c +++ b/hw/acpi/nvdimm.c @@ -463,6 +463,8 @@ struct NvdimmFuncSetLabelDataIn { uint8_t in_buf[0]; /* the data written to label data area. */ } QEMU_PACKED; typedef struct NvdimmFuncSetLabelDataIn NvdimmFuncSetLabelDataIn; +QEMU_BUILD_BUG_ON(sizeof(NvdimmFuncSetLabelDataIn) + + offsetof(NvdimmDsmIn, arg3) > 4096); static void nvdimm_dsm_function0(uint32_t supported_func, hwaddr dsm_mem_addr) @@ -616,6 +618,39 @@ static void nvdimm_dsm_get_label_data(NVDIMMDevice *nvdimm, NvdimmDsmIn *in, g_free(get_label_data_out); } +/* + * DSM Spec Rev1 4.6 Set Namespace Label Data (Function Index 6). + */ +static void nvdimm_dsm_set_label_data(NVDIMMDevice *nvdimm, NvdimmDsmIn *in, + hwaddr dsm_mem_addr) +{ + NVDIMMClass *nvc = NVDIMM_GET_CLASS(nvdimm); + NvdimmFuncSetLabelDataIn *set_label_data; + uint32_t status; + + set_label_data = (NvdimmFuncSetLabelDataIn *)in->arg3; + + le32_to_cpus(&set_label_data->offset); + le32_to_cpus(&set_label_data->length); + + nvdimm_debug("Write Label Data: offset %#x length %#x.\n", + set_label_data->offset, set_label_data->length); + + status = nvdimm_rw_label_data_check(nvdimm, set_label_data->offset, + set_label_data->length); + if (status != 0 /* Success */) { + nvdimm_dsm_no_payload(status, dsm_mem_addr); + return; + } + + assert(sizeof(*in) + sizeof(*set_label_data) + set_label_data->length <= + 4096); + + nvc->write_label_data(nvdimm, set_label_data->in_buf, + set_label_data->length, set_label_data->offset); + nvdimm_dsm_no_payload(0 /* Success */, dsm_mem_addr); +} + static void nvdimm_dsm_device(NvdimmDsmIn *in, hwaddr dsm_mem_addr) { NVDIMMDevice *nvdimm = nvdimm_get_device_by_handle(in->handle); @@ -629,7 +664,8 @@ static void nvdimm_dsm_device(NvdimmDsmIn *in, hwaddr dsm_mem_addr) support for any functions other than function 0. */ | 1 << 4 /* Get Namespace Label Size */ | - 1 << 5 /* Get Namespace Label Data */; + 1 << 5 /* Get Namespace Label Data */ | + 1 << 6 /* Set Namespace Label Data */; } nvdimm_dsm_function0(supported_func, dsm_mem_addr); return; @@ -655,6 +691,12 @@ static void nvdimm_dsm_device(NvdimmDsmIn *in, hwaddr dsm_mem_addr) return; } break; + case 0x6 /* Set Namespace Label Data */: + if (nvdimm->label_size) { + nvdimm_dsm_set_label_data(nvdimm, in, dsm_mem_addr); + return; + } + break; } nvdimm_dsm_no_payload(1 /* Not Supported */, dsm_mem_addr); From 15b82b1dc59040245f16a24c16825efe8c389050 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 7 Jun 2016 20:22:09 +0800 Subject: [PATCH 17/34] docs: add NVDIMM ACPI documentation It describes the basic concepts of NVDIMM ACPI and the interfaces between QEMU and the ACPI BIOS Signed-off-by: Xiao Guangrong Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- docs/specs/acpi_nvdimm.txt | 132 +++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 docs/specs/acpi_nvdimm.txt diff --git a/docs/specs/acpi_nvdimm.txt b/docs/specs/acpi_nvdimm.txt new file mode 100644 index 0000000000..0fdd251fc0 --- /dev/null +++ b/docs/specs/acpi_nvdimm.txt @@ -0,0 +1,132 @@ +QEMU<->ACPI BIOS NVDIMM interface +--------------------------------- + +QEMU supports NVDIMM via ACPI. This document describes the basic concepts of +NVDIMM ACPI and the interface between QEMU and the ACPI BIOS. + +NVDIMM ACPI Background +---------------------- +NVDIMM is introduced in ACPI 6.0 which defines an NVDIMM root device under +_SB scope with a _HID of “ACPI0012”. For each NVDIMM present or intended +to be supported by platform, platform firmware also exposes an ACPI +Namespace Device under the root device. + +The NVDIMM child devices under the NVDIMM root device are defined with _ADR +corresponding to the NFIT device handle. The NVDIMM root device and the +NVDIMM devices can have device specific methods (_DSM) to provide additional +functions specific to a particular NVDIMM implementation. + +This is an example from ACPI 6.0, a platform contains one NVDIMM: + +Scope (\_SB){ + Device (NVDR) // Root device + { + Name (_HID, “ACPI0012”) + Method (_STA) {...} + Method (_FIT) {...} + Method (_DSM, ...) {...} + Device (NVD) + { + Name(_ADR, h) //where h is NFIT Device Handle for this NVDIMM + Method (_DSM, ...) {...} + } + } +} + +Method supported on both NVDIMM root device and NVDIMM device +_DSM (Device Specific Method) + It is a control method that enables devices to provide device specific + control functions that are consumed by the device driver. + The NVDIMM DSM specification can be found at: + http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf + + Arguments: + Arg0 – A Buffer containing a UUID (16 Bytes) + Arg1 – An Integer containing the Revision ID (4 Bytes) + Arg2 – An Integer containing the Function Index (4 Bytes) + Arg3 – A package containing parameters for the function specified by the + UUID, Revision ID, and Function Index + + Return Value: + If Function Index = 0, a Buffer containing a function index bitfield. + Otherwise, the return value and type depends on the UUID, revision ID + and function index which are described in the DSM specification. + +Methods on NVDIMM ROOT Device +_FIT(Firmware Interface Table) + It evaluates to a buffer returning data in the format of a series of NFIT + Type Structure. + + Arguments: None + + Return Value: + A Buffer containing a list of NFIT Type structure entries. + + The detailed definition of the structure can be found at ACPI 6.0: 5.2.25 + NVDIMM Firmware Interface Table (NFIT). + +QEMU NVDIMM Implemention +======================== +QEMU uses 4 bytes IO Port starting from 0x0a18 and a RAM-based memory page +for NVDIMM ACPI. + +Memory: + QEMU uses BIOS Linker/loader feature to ask BIOS to allocate a memory + page and dynamically patch its into a int32 object named "MEMA" in ACPI. + + This page is RAM-based and it is used to transfer data between _DSM + method and QEMU. If ACPI has control, this pages is owned by ACPI which + writes _DSM input data to it, otherwise, it is owned by QEMU which + emulates _DSM access and writes the output data to it. + + ACPI writes _DSM Input Data (based on the offset in the page): + [0x0 - 0x3]: 4 bytes, NVDIMM Device Handle, 0 is reserved for NVDIMM + Root device. + [0x4 - 0x7]: 4 bytes, Revision ID, that is the Arg1 of _DSM method. + [0x8 - 0xB]: 4 bytes. Function Index, that is the Arg2 of _DSM method. + [0xC - 0xFFF]: 4084 bytes, the Arg3 of _DSM method. + + QEMU Writes Output Data (based on the offset in the page): + [0x0 - 0x3]: 4 bytes, the length of result + [0x4 - 0xFFF]: 4092 bytes, the DSM result filled by QEMU + +IO Port 0x0a18 - 0xa1b: + ACPI writes the address of the memory page allocated by BIOS to this + port then QEMU gets the control and fills the result in the memory page. + + write Access: + [0x0a18 - 0xa1b]: 4 bytes, the address of the memory page allocated + by BIOS. + +_DSM process diagram: +--------------------- +"MEMA" indicates the address of memory page allocated by BIOS. + + +----------------------+   +-----------------------+ + |   1. OSPM   |      | 2. OSPM | + | save _DSM input data | | write "MEMA" to | Exit to QEMU + | to the page +----->| IO port 0x0a18 +------------+ + | indicated by "MEMA" | | | | + +----------------------+ +-----------------------+ | +  | +  v + +------------- ----+ +-----------+ +------------------+--------+ + | 5 QEMU | | 4 QEMU | | 3. QEMU | + | write _DSM result | | emulate | | get _DSM input data from | + | to the page +<------+ _DSM +<-----+ the page indicated by the | + | | | | | value from the IO port | + +--------+-----------+ +-----------+ +---------------------------+ + | + | Enter Guest + | + v + +--------------------------+ +--------------+ + | 6 OSPM | | 7 OSPM | + | result size is returned | | _DSM return | + | by reading DSM +----->+ | + | result from the page | | | + +--------------------------+ +--------------+ + + _FIT implementation + ------------------- + TODO (will fill it when nvdimm hotplug is introduced) From aa1dd39ca3074a3bc4ec4c0543c0588ed4dc5472 Mon Sep 17 00:00:00 2001 From: Ido Yariv Date: Sat, 18 Jun 2016 16:42:05 -0400 Subject: [PATCH 18/34] i386: pci-assign: Fix MSI-X table size The current code creates a whole page mmio region for the MSI-X table size. However, the page containing the MSI-X table may contain other registers not related to MSI-X. Creating an mmio region for the whole page masks such registers and may break drivers in the guest OS. Since maximal number of entries is known, use that instead to deduce the table size when setting up the mmio region. Signed-off-by: Ido Yariv Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/kvm/pci-assign.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c index f9c901471d..98997d167c 100644 --- a/hw/i386/kvm/pci-assign.c +++ b/hw/i386/kvm/pci-assign.c @@ -36,8 +36,6 @@ #include "kvm_i386.h" #include "hw/pci/pci-assign.h" -#define MSIX_PAGE_SIZE 0x1000 - /* From linux/ioport.h */ #define IORESOURCE_IO 0x00000100 /* Resource type */ #define IORESOURCE_MEM 0x00000200 @@ -122,6 +120,7 @@ typedef struct AssignedDevice { int *msi_virq; MSIXTableEntry *msix_table; hwaddr msix_table_addr; + uint16_t msix_table_size; uint16_t msix_max; MemoryRegion mmio; char *configfd_name; @@ -1310,6 +1309,7 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev, Error **errp) bar_nr = msix_table_entry & PCI_MSIX_FLAGS_BIRMASK; msix_table_entry &= ~PCI_MSIX_FLAGS_BIRMASK; dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry; + dev->msix_table_size = msix_max * sizeof(MSIXTableEntry); dev->msix_max = msix_max; } @@ -1633,7 +1633,7 @@ static void assigned_dev_msix_reset(AssignedDevice *dev) return; } - memset(dev->msix_table, 0, MSIX_PAGE_SIZE); + memset(dev->msix_table, 0, dev->msix_table_size); for (i = 0, entry = dev->msix_table; i < dev->msix_max; i++, entry++) { entry->ctrl = cpu_to_le32(0x1); /* Masked */ @@ -1642,8 +1642,8 @@ static void assigned_dev_msix_reset(AssignedDevice *dev) static void assigned_dev_register_msix_mmio(AssignedDevice *dev, Error **errp) { - dev->msix_table = mmap(NULL, MSIX_PAGE_SIZE, PROT_READ|PROT_WRITE, - MAP_ANONYMOUS|MAP_PRIVATE, 0, 0); + dev->msix_table = mmap(NULL, dev->msix_table_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); if (dev->msix_table == MAP_FAILED) { error_setg_errno(errp, errno, "failed to allocate msix_table"); dev->msix_table = NULL; @@ -1653,7 +1653,7 @@ static void assigned_dev_register_msix_mmio(AssignedDevice *dev, Error **errp) assigned_dev_msix_reset(dev); memory_region_init_io(&dev->mmio, OBJECT(dev), &assigned_dev_msix_mmio_ops, - dev, "assigned-dev-msix", MSIX_PAGE_SIZE); + dev, "assigned-dev-msix", dev->msix_table_size); } static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev) @@ -1662,7 +1662,7 @@ static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev) return; } - if (munmap(dev->msix_table, MSIX_PAGE_SIZE) == -1) { + if (munmap(dev->msix_table, dev->msix_table_size) == -1) { error_report("error unmapping msix_table! %s", strerror(errno)); } dev->msix_table = NULL; From abd49bc2ed2f06bc1ea979b17e79e2fc52360de9 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Mon, 2 May 2016 11:42:59 +0200 Subject: [PATCH 19/34] docs: update ACPI CPU hotplug spec with new protocol Add description of new CPU hotplug interface. To switch from from legacy mode into new mode use fact that write accesses into CPU present bitmap were never used before and were ignored by QEMU. So use it to as a way to switch from legacy mode. That way pc/q35 machine starts in legacy mode and QEMU generated ACPI tables will switch to new CPU hotplug interface during runtime. In case QEMU is started with legacy BIOS (that doesn't support QEMU generated ACPI tables), legacy CPU hotplug will remain active and could be used by BIOS built in ACPI tables for CPU hotplug. Signed-off-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- docs/specs/acpi_cpu_hotplug.txt | 94 ++++++++++++++++++++++++++++----- 1 file changed, 82 insertions(+), 12 deletions(-) diff --git a/docs/specs/acpi_cpu_hotplug.txt b/docs/specs/acpi_cpu_hotplug.txt index 340b751a95..ee219c8358 100644 --- a/docs/specs/acpi_cpu_hotplug.txt +++ b/docs/specs/acpi_cpu_hotplug.txt @@ -4,21 +4,91 @@ QEMU<->ACPI BIOS CPU hotplug interface QEMU supports CPU hotplug via ACPI. This document describes the interface between QEMU and the ACPI BIOS. -ACPI GPE block (IO ports 0xafe0-0xafe3, byte access): ------------------------------------------ - -Generic ACPI GPE block. Bit 2 (GPE.2) used to notify CPU -hot-add/remove event to ACPI BIOS, via SCI interrupt. +ACPI BIOS GPE.2 handler is dedicated for notifying OS about CPU hot-add +and hot-remove events. +============================================ +Legacy ACPI CPU hotplug interface registers: +-------------------------------------------- CPU present bitmap for: ICH9-LPC (IO port 0x0cd8-0xcf7, 1-byte access) PIIX-PM (IO port 0xaf00-0xaf1f, 1-byte access) + One bit per CPU. Bit position reflects corresponding CPU APIC ID. Read-only. + The first DWORD in bitmap is used in write mode to switch from legacy + to new CPU hotplug interface, write 0 into it to do switch. --------------------------------------------------------------- -One bit per CPU. Bit position reflects corresponding CPU APIC ID. -Read-only. +QEMU sets corresponding CPU bit on hot-add event and issues SCI +with GPE.2 event set. CPU present map is read by ACPI BIOS GPE.2 handler +to notify OS about CPU hot-add events. CPU hot-remove isn't supported. -CPU hot-add/remove notification: ------------------------------------------------------ -QEMU sets/clears corresponding CPU bit on hot-add/remove event. -CPU present map read by ACPI BIOS GPE.2 handler to notify OS of CPU -hot-(un)plug events. +===================================== +ACPI CPU hotplug interface registers: +------------------------------------- +Register block base address: + ICH9-LPC IO port 0x0cd8 + PIIX-PM IO port 0xaf00 +Register block size: + ACPI_CPU_HOTPLUG_REG_LEN = 12 + +read access: + offset: + [0x0-0x3] reserved + [0x4] CPU device status fields: (1 byte access) + bits: + 0: Device is enabled and may be used by guest + 1: Device insert event, used to distinguish device for which + no device check event to OSPM was issued. + It's valid only when bit 0 is set. + 2: Device remove event, used to distinguish device for which + no device eject request to OSPM was issued. + 3-7: reserved and should be ignored by OSPM + [0x5-0x7] reserved + [0x8] Command data: (DWORD access) + in case of error or unsupported command reads is 0xFFFFFFFF + current 'Command field' value: + 0: returns PXM value corresponding to device + +write access: + offset: + [0x0-0x3] CPU selector: (DWORD access) + selects active CPU device. All following accesses to other + registers will read/store data from/to selected CPU. + [0x4] CPU device control fields: (1 byte access) + bits: + 0: reserved, OSPM must clear it before writing to register. + 1: if set to 1 clears device insert event, set by OSPM + after it has emitted device check event for the + selected CPU device + 2: if set to 1 clears device remove event, set by OSPM + after it has emitted device eject request for the + selected CPU device + 3: if set to 1 initiates device eject, set by OSPM when it + triggers CPU device removal and calls _EJ0 method + 4-7: reserved, OSPM must clear them before writing to register + [0x5] Command field: (1 byte access) + value: + 0: selects a CPU device with inserting/removing events and + following reads from 'Command data' register return + selected CPU (CPU selector value). If no CPU with events + found, the current CPU selector doesn't change and + corresponding insert/remove event flags are not set. + 1: following writes to 'Command data' register set OST event + register in QEMU + 2: following writes to 'Command data' register set OST status + register in QEMU + other values: reserved + [0x6-0x7] reserved + [0x8] Command data: (DWORD access) + current 'Command field' value: + 0: OSPM reads value of CPU selector + 1: stores value into OST event register + 2: stores value into OST status register, triggers + ACPI_DEVICE_OST QMP event from QEMU to external applications + with current values of OST event and status registers. + other values: reserved + +Selecting CPU device beyond possible range has no effect on platform: + - write accesses to CPU hot-plug registers not documented above are + ignored + - read accesses to CPU hot-plug registers not documented above return + all bits set to 0. From 16bcab97eb9fc2d5f15e5c9ff6369f9d1d120b49 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Mon, 11 Apr 2016 17:25:54 +0200 Subject: [PATCH 20/34] pc: piix4/ich9: add 'cpu-hotplug-legacy' property It will be used to select which hotplug call-back is called and for switching from legacy mode into new one. Signed-off-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/ich9.c | 23 ++++++++++++++++++++++- hw/acpi/piix4.c | 24 +++++++++++++++++++++++- include/hw/acpi/ich9.h | 1 + 3 files changed, 46 insertions(+), 2 deletions(-) diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c index 853c9c4eb7..ed16940601 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -306,6 +306,21 @@ static void ich9_pm_set_memory_hotplug_support(Object *obj, bool value, s->pm.acpi_memory_hotplug.is_enabled = value; } +static bool ich9_pm_get_cpu_hotplug_legacy(Object *obj, Error **errp) +{ + ICH9LPCState *s = ICH9_LPC_DEVICE(obj); + + return s->pm.cpu_hotplug_legacy; +} + +static void ich9_pm_set_cpu_hotplug_legacy(Object *obj, bool value, + Error **errp) +{ + ICH9LPCState *s = ICH9_LPC_DEVICE(obj); + + s->pm.cpu_hotplug_legacy = value; +} + static void ich9_pm_get_disable_s3(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { @@ -397,6 +412,7 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) { static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; pm->acpi_memory_hotplug.is_enabled = true; + pm->cpu_hotplug_legacy = true; pm->disable_s3 = 0; pm->disable_s4 = 0; pm->s4_val = 2; @@ -412,6 +428,10 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) ich9_pm_get_memory_hotplug_support, ich9_pm_set_memory_hotplug_support, NULL); + object_property_add_bool(obj, "cpu-hotplug-legacy", + ich9_pm_get_cpu_hotplug_legacy, + ich9_pm_set_cpu_hotplug_legacy, + NULL); object_property_add(obj, ACPI_PM_PROP_S3_DISABLED, "uint8", ich9_pm_get_disable_s3, ich9_pm_set_disable_s3, @@ -439,7 +459,8 @@ void ich9_pm_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { acpi_memory_plug_cb(hotplug_dev, &lpc->pm.acpi_memory_hotplug, dev, errp); - } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { + } else if (lpc->pm.cpu_hotplug_legacy && + object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { legacy_acpi_cpu_plug_cb(hotplug_dev, &lpc->pm.gpe_cpu, dev, errp); } else { error_setg(errp, "acpi: device plug request for not supported device" diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index c48cb1b91a..9ae3964099 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -86,6 +86,7 @@ typedef struct PIIX4PMState { uint8_t disable_s4; uint8_t s4_val; + bool cpu_hotplug_legacy; AcpiCpuHotplug gpe_cpu; MemHotplugState acpi_memory_hotplug; @@ -351,7 +352,8 @@ static void piix4_device_plug_cb(HotplugHandler *hotplug_dev, acpi_memory_plug_cb(hotplug_dev, &s->acpi_memory_hotplug, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { acpi_pcihp_device_plug_cb(hotplug_dev, &s->acpi_pci_hotplug, dev, errp); - } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { + } else if (s->cpu_hotplug_legacy && + object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { legacy_acpi_cpu_plug_cb(hotplug_dev, &s->gpe_cpu, dev, errp); } else { error_setg(errp, "acpi: device plug request for not supported device" @@ -560,6 +562,21 @@ static const MemoryRegionOps piix4_gpe_ops = { .endianness = DEVICE_LITTLE_ENDIAN, }; + +static bool piix4_get_cpu_hotplug_legacy(Object *obj, Error **errp) +{ + PIIX4PMState *s = PIIX4_PM(obj); + + return s->cpu_hotplug_legacy; +} + +static void piix4_set_cpu_hotplug_legacy(Object *obj, bool value, Error **errp) +{ + PIIX4PMState *s = PIIX4_PM(obj); + + s->cpu_hotplug_legacy = value; +} + static void piix4_acpi_system_hot_add_init(MemoryRegion *parent, PCIBus *bus, PIIX4PMState *s) { @@ -570,6 +587,11 @@ static void piix4_acpi_system_hot_add_init(MemoryRegion *parent, acpi_pcihp_init(OBJECT(s), &s->acpi_pci_hotplug, bus, parent, s->use_acpi_pci_hotplug); + s->cpu_hotplug_legacy = true; + object_property_add_bool(OBJECT(s), "cpu-hotplug-legacy", + piix4_get_cpu_hotplug_legacy, + piix4_set_cpu_hotplug_legacy, + NULL); legacy_acpi_cpu_hotplug_init(parent, OBJECT(s), &s->gpe_cpu, PIIX4_CPU_HOTPLUG_IO_BASE); diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h index bbd657c59b..e29a8566d8 100644 --- a/include/hw/acpi/ich9.h +++ b/include/hw/acpi/ich9.h @@ -48,6 +48,7 @@ typedef struct ICH9LPCPMRegs { uint32_t pm_io_base; Notifier powerdown_notifier; + bool cpu_hotplug_legacy; AcpiCpuHotplug gpe_cpu; MemHotplugState acpi_memory_hotplug; From 5e1b5d93887b52eede156f846b6c4c5c8bbcfcdb Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Tue, 14 Jun 2016 16:02:06 +0200 Subject: [PATCH 21/34] acpi: cpuhp: add CPU devices AML with _STA method it adds CPU objects to DSDT with _STA method and QEMU side of CPU hotplug interface initialization with registers sufficient to handle _STA requests, including necessary hotplug callbacks in piix4,ich9 code. Hot-(un)plug hw/acpi parts will be added by corresponding follow up patches. Signed-off-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/Makefile.objs | 1 + hw/acpi/cpu.c | 240 +++++++++++++++++++++++++++++++++++++++++ hw/acpi/ich9.c | 9 +- hw/acpi/piix4.c | 11 +- hw/acpi/trace-events | 5 + include/hw/acpi/cpu.h | 51 +++++++++ include/hw/acpi/ich9.h | 2 + 7 files changed, 313 insertions(+), 6 deletions(-) create mode 100644 hw/acpi/cpu.c create mode 100644 include/hw/acpi/cpu.h diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs index 1a48f61676..4b7da6639f 100644 --- a/hw/acpi/Makefile.objs +++ b/hw/acpi/Makefile.objs @@ -2,6 +2,7 @@ common-obj-$(CONFIG_ACPI_X86) += core.o piix4.o pcihp.o common-obj-$(CONFIG_ACPI_X86_ICH) += ich9.o tco.o common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu_hotplug.o common-obj-$(CONFIG_ACPI_MEMORY_HOTPLUG) += memory_hotplug.o memory_hotplug_acpi_table.o +common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu.o obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o common-obj-$(CONFIG_ACPI) += acpi_interface.o common-obj-$(CONFIG_ACPI) += bios-linker-loader.o diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c new file mode 100644 index 0000000000..d99002c0ef --- /dev/null +++ b/hw/acpi/cpu.c @@ -0,0 +1,240 @@ +#include "qemu/osdep.h" +#include "hw/boards.h" +#include "hw/acpi/cpu.h" +#include "qapi/error.h" +#include "trace.h" + +#define ACPI_CPU_HOTPLUG_REG_LEN 12 +#define ACPI_CPU_SELECTOR_OFFSET_WR 0 +#define ACPI_CPU_FLAGS_OFFSET_RW 4 + +static uint64_t cpu_hotplug_rd(void *opaque, hwaddr addr, unsigned size) +{ + uint64_t val = 0; + CPUHotplugState *cpu_st = opaque; + AcpiCpuStatus *cdev; + + if (cpu_st->selector >= cpu_st->dev_count) { + return val; + } + + cdev = &cpu_st->devs[cpu_st->selector]; + switch (addr) { + case ACPI_CPU_FLAGS_OFFSET_RW: /* pack and return is_* fields */ + val |= cdev->cpu ? 1 : 0; + trace_cpuhp_acpi_read_flags(cpu_st->selector, val); + break; + default: + break; + } + return val; +} + +static void cpu_hotplug_wr(void *opaque, hwaddr addr, uint64_t data, + unsigned int size) +{ + CPUHotplugState *cpu_st = opaque; + + assert(cpu_st->dev_count); + + if (addr) { + if (cpu_st->selector >= cpu_st->dev_count) { + trace_cpuhp_acpi_invalid_idx_selected(cpu_st->selector); + return; + } + } + + switch (addr) { + case ACPI_CPU_SELECTOR_OFFSET_WR: /* current CPU selector */ + cpu_st->selector = data; + trace_cpuhp_acpi_write_idx(cpu_st->selector); + break; + default: + break; + } +} + +static const MemoryRegionOps cpu_hotplug_ops = { + .read = cpu_hotplug_rd, + .write = cpu_hotplug_wr, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 1, + .max_access_size = 4, + }, +}; + +void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner, + CPUHotplugState *state, hwaddr base_addr) +{ + MachineState *machine = MACHINE(qdev_get_machine()); + MachineClass *mc = MACHINE_GET_CLASS(machine); + CPUArchIdList *id_list; + int i; + + assert(mc->possible_cpu_arch_ids); + id_list = mc->possible_cpu_arch_ids(machine); + state->dev_count = id_list->len; + state->devs = g_new0(typeof(*state->devs), state->dev_count); + for (i = 0; i < id_list->len; i++) { + state->devs[i].cpu = id_list->cpus[i].cpu; + state->devs[i].arch_id = id_list->cpus[i].arch_id; + } + g_free(id_list); + memory_region_init_io(&state->ctrl_reg, owner, &cpu_hotplug_ops, state, + "acpi-mem-hotplug", ACPI_CPU_HOTPLUG_REG_LEN); + memory_region_add_subregion(as, base_addr, &state->ctrl_reg); +} + +static AcpiCpuStatus *get_cpu_status(CPUHotplugState *cpu_st, DeviceState *dev) +{ + CPUClass *k = CPU_GET_CLASS(dev); + uint64_t cpu_arch_id = k->get_arch_id(CPU(dev)); + int i; + + for (i = 0; i < cpu_st->dev_count; i++) { + if (cpu_arch_id == cpu_st->devs[i].arch_id) { + return &cpu_st->devs[i]; + } + } + return NULL; +} + +void acpi_cpu_plug_cb(HotplugHandler *hotplug_dev, + CPUHotplugState *cpu_st, DeviceState *dev, Error **errp) +{ + AcpiCpuStatus *cdev; + + cdev = get_cpu_status(cpu_st, dev); + if (!cdev) { + return; + } + + cdev->cpu = CPU(dev); +} + +const VMStateDescription vmstate_cpu_hotplug = { + .name = "CPU hotplug state", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(selector, CPUHotplugState), + VMSTATE_END_OF_LIST() + } +}; + +#define CPU_NAME_FMT "C%.03X" +#define CPUHP_RES_DEVICE "PRES" +#define CPU_LOCK "CPLK" +#define CPU_STS_METHOD "CSTA" + +#define CPU_ENABLED "CPEN" +#define CPU_SELECTOR "CSEL" + +void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + hwaddr io_base, + const char *res_root) +{ + Aml *ifctx; + Aml *field; + Aml *method; + Aml *cpu_ctrl_dev; + Aml *cpus_dev; + Aml *zero = aml_int(0); + Aml *one = aml_int(1); + Aml *sb_scope = aml_scope("_SB"); + MachineClass *mc = MACHINE_GET_CLASS(machine); + CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(machine); + char *cphp_res_path = g_strdup_printf("%s." CPUHP_RES_DEVICE, res_root); + + cpu_ctrl_dev = aml_device("%s", cphp_res_path); + { + Aml *crs; + + aml_append(cpu_ctrl_dev, + aml_name_decl("_HID", aml_eisaid("PNP0A06"))); + aml_append(cpu_ctrl_dev, + aml_name_decl("_UID", aml_string("CPU Hotplug resources"))); + aml_append(cpu_ctrl_dev, aml_mutex(CPU_LOCK, 0)); + + crs = aml_resource_template(); + aml_append(crs, aml_io(AML_DECODE16, io_base, io_base, 1, + ACPI_CPU_HOTPLUG_REG_LEN)); + aml_append(cpu_ctrl_dev, aml_name_decl("_CRS", crs)); + + /* declare CPU hotplug MMIO region with related access fields */ + aml_append(cpu_ctrl_dev, + aml_operation_region("PRST", AML_SYSTEM_IO, aml_int(io_base), + ACPI_CPU_HOTPLUG_REG_LEN)); + + field = aml_field("PRST", AML_BYTE_ACC, AML_NOLOCK, + AML_WRITE_AS_ZEROS); + aml_append(field, aml_reserved_field(ACPI_CPU_FLAGS_OFFSET_RW * 8)); + /* 1 if enabled, read only */ + aml_append(field, aml_named_field(CPU_ENABLED, 1)); + aml_append(cpu_ctrl_dev, field); + + field = aml_field("PRST", AML_DWORD_ACC, AML_NOLOCK, AML_PRESERVE); + /* CPU selector, write only */ + aml_append(field, aml_named_field(CPU_SELECTOR, 32)); + aml_append(cpu_ctrl_dev, field); + + } + aml_append(sb_scope, cpu_ctrl_dev); + + cpus_dev = aml_device("\\_SB.CPUS"); + { + int i; + Aml *ctrl_lock = aml_name("%s.%s", cphp_res_path, CPU_LOCK); + Aml *cpu_selector = aml_name("%s.%s", cphp_res_path, CPU_SELECTOR); + Aml *is_enabled = aml_name("%s.%s", cphp_res_path, CPU_ENABLED); + + aml_append(cpus_dev, aml_name_decl("_HID", aml_string("ACPI0010"))); + aml_append(cpus_dev, aml_name_decl("_CID", aml_eisaid("PNP0A05"))); + + method = aml_method(CPU_STS_METHOD, 1, AML_SERIALIZED); + { + Aml *idx = aml_arg(0); + Aml *sta = aml_local(0); + + aml_append(method, aml_acquire(ctrl_lock, 0xFFFF)); + aml_append(method, aml_store(idx, cpu_selector)); + aml_append(method, aml_store(zero, sta)); + ifctx = aml_if(aml_equal(is_enabled, one)); + { + aml_append(ifctx, aml_store(aml_int(0xF), sta)); + } + aml_append(method, ifctx); + aml_append(method, aml_release(ctrl_lock)); + aml_append(method, aml_return(sta)); + } + aml_append(cpus_dev, method); + + /* build Processor object for each processor */ + for (i = 0; i < arch_ids->len; i++) { + Aml *dev; + Aml *uid = aml_int(i); + int arch_id = arch_ids->cpus[i].arch_id; + + if (opts.apci_1_compatible && arch_id < 255) { + dev = aml_processor(i, 0, 0, CPU_NAME_FMT, i); + } else { + dev = aml_device(CPU_NAME_FMT, i); + aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0007"))); + aml_append(dev, aml_name_decl("_UID", uid)); + } + + method = aml_method("_STA", 0, AML_SERIALIZED); + aml_append(method, aml_return(aml_call1(CPU_STS_METHOD, uid))); + aml_append(dev, method); + + aml_append(cpus_dev, dev); + } + } + aml_append(sb_scope, cpus_dev); + aml_append(table, sb_scope); + + g_free(cphp_res_path); + g_free(arch_ids); +} diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c index ed16940601..9a81da82d3 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -459,9 +459,12 @@ void ich9_pm_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { acpi_memory_plug_cb(hotplug_dev, &lpc->pm.acpi_memory_hotplug, dev, errp); - } else if (lpc->pm.cpu_hotplug_legacy && - object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { - legacy_acpi_cpu_plug_cb(hotplug_dev, &lpc->pm.gpe_cpu, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { + if (lpc->pm.cpu_hotplug_legacy) { + legacy_acpi_cpu_plug_cb(hotplug_dev, &lpc->pm.gpe_cpu, dev, errp); + } else { + acpi_cpu_plug_cb(hotplug_dev, &lpc->pm.cpuhp_state, dev, errp); + } } else { error_setg(errp, "acpi: device plug request for not supported device" " type: %s", object_get_typename(OBJECT(dev))); diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index 9ae3964099..6351d2ebb2 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -34,6 +34,7 @@ #include "hw/acpi/piix4.h" #include "hw/acpi/pcihp.h" #include "hw/acpi/cpu_hotplug.h" +#include "hw/acpi/cpu.h" #include "hw/hotplug.h" #include "hw/mem/pc-dimm.h" #include "hw/acpi/memory_hotplug.h" @@ -88,6 +89,7 @@ typedef struct PIIX4PMState { bool cpu_hotplug_legacy; AcpiCpuHotplug gpe_cpu; + CPUHotplugState cpuhp_state; MemHotplugState acpi_memory_hotplug; } PIIX4PMState; @@ -352,9 +354,12 @@ static void piix4_device_plug_cb(HotplugHandler *hotplug_dev, acpi_memory_plug_cb(hotplug_dev, &s->acpi_memory_hotplug, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { acpi_pcihp_device_plug_cb(hotplug_dev, &s->acpi_pci_hotplug, dev, errp); - } else if (s->cpu_hotplug_legacy && - object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { - legacy_acpi_cpu_plug_cb(hotplug_dev, &s->gpe_cpu, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { + if (s->cpu_hotplug_legacy) { + legacy_acpi_cpu_plug_cb(hotplug_dev, &s->gpe_cpu, dev, errp); + } else { + acpi_cpu_plug_cb(hotplug_dev, &s->cpuhp_state, dev, errp); + } } else { error_setg(errp, "acpi: device plug request for not supported device" " type: %s", object_get_typename(OBJECT(dev))); diff --git a/hw/acpi/trace-events b/hw/acpi/trace-events index e95b2183ac..940467ba14 100644 --- a/hw/acpi/trace-events +++ b/hw/acpi/trace-events @@ -16,3 +16,8 @@ mhp_acpi_clear_insert_evt(uint32_t slot) "slot[0x%"PRIx32"] clear insert event" mhp_acpi_clear_remove_evt(uint32_t slot) "slot[0x%"PRIx32"] clear remove event" mhp_acpi_pc_dimm_deleted(uint32_t slot) "slot[0x%"PRIx32"] pc-dimm deleted" mhp_acpi_pc_dimm_delete_failed(uint32_t slot) "slot[0x%"PRIx32"] pc-dimm delete failed" + +# hw/acpi/cpu.c +cpuhp_acpi_invalid_idx_selected(uint32_t idx) "0x%"PRIx32 +cpuhp_acpi_read_flags(uint32_t idx, uint8_t flags) "idx[0x%"PRIx32"] flags: 0x%"PRIx8 +cpuhp_acpi_write_idx(uint32_t idx) "set active cpu idx: 0x%"PRIx32 diff --git a/include/hw/acpi/cpu.h b/include/hw/acpi/cpu.h new file mode 100644 index 0000000000..f345447af3 --- /dev/null +++ b/include/hw/acpi/cpu.h @@ -0,0 +1,51 @@ +/* + * QEMU ACPI hotplug utilities + * + * Copyright (C) 2016 Red Hat Inc + * + * Authors: + * Igor Mammedov + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef ACPI_CPU_H +#define ACPI_CPU_H + +#include "hw/qdev-core.h" +#include "hw/acpi/acpi.h" +#include "hw/acpi/aml-build.h" +#include "hw/hotplug.h" + +typedef struct AcpiCpuStatus { + struct CPUState *cpu; + uint64_t arch_id; +} AcpiCpuStatus; + +typedef struct CPUHotplugState { + MemoryRegion ctrl_reg; + uint32_t selector; + uint32_t dev_count; + AcpiCpuStatus *devs; +} CPUHotplugState; + +void acpi_cpu_plug_cb(HotplugHandler *hotplug_dev, + CPUHotplugState *cpu_st, DeviceState *dev, Error **errp); + +void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner, + CPUHotplugState *state, hwaddr base_addr); + +typedef struct CPUHotplugFeatures { + bool apci_1_compatible; +} CPUHotplugFeatures; + +void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + hwaddr io_base, + const char *res_root); + +extern const VMStateDescription vmstate_cpu_hotplug; +#define VMSTATE_CPU_HOTPLUG(cpuhp, state) \ + VMSTATE_STRUCT(cpuhp, state, 1, \ + vmstate_cpu_hotplug, CPUHotplugState) + +#endif diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h index e29a8566d8..a352c94fde 100644 --- a/include/hw/acpi/ich9.h +++ b/include/hw/acpi/ich9.h @@ -23,6 +23,7 @@ #include "hw/acpi/acpi.h" #include "hw/acpi/cpu_hotplug.h" +#include "hw/acpi/cpu.h" #include "hw/acpi/memory_hotplug.h" #include "hw/acpi/acpi_dev_interface.h" #include "hw/acpi/tco.h" @@ -50,6 +51,7 @@ typedef struct ICH9LPCPMRegs { bool cpu_hotplug_legacy; AcpiCpuHotplug gpe_cpu; + CPUHotplugState cpuhp_state; MemHotplugState acpi_memory_hotplug; From ac35f13ba8f80533b21016ced01aa55891952251 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 20 Apr 2016 11:28:57 +0200 Subject: [PATCH 22/34] pc: acpi: introduce AcpiDeviceIfClass.madt_cpu hook Add madt_cpu callback to AcpiDeviceIfClass and use it for generating LAPIC MADT entries for CPUs. Later it will be used for generating x2APIC entries in case of more than 255 CPUs and also would be reused by ARM target when ACPI CPU hotplug is introduced there. Signed-off-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/piix4.c | 1 + hw/i386/acpi-build.c | 45 +++++++++++++++++----------- hw/isa/lpc_ich9.c | 1 + include/hw/acpi/acpi_dev_interface.h | 7 +++++ include/hw/i386/pc.h | 5 ++++ stubs/Makefile.objs | 1 + stubs/pc_madt_cpu_entry.c | 7 +++++ 7 files changed, 49 insertions(+), 18 deletions(-) create mode 100644 stubs/pc_madt_cpu_entry.c diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index 6351d2ebb2..6d24cb535b 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -658,6 +658,7 @@ static void piix4_pm_class_init(ObjectClass *klass, void *data) hc->unplug = piix4_device_unplug_cb; adevc->ospm_status = piix4_ospm_status; adevc->send_event = piix4_send_gpe; + adevc->madt_cpu = pc_madt_cpu_entry; } static const TypeInfo piix4_pm_info = { diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index b3dc1df25a..e35a446a9d 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -329,12 +329,38 @@ build_fadt(GArray *table_data, BIOSLinker *linker, AcpiPmInfo *pm, (void *)fadt, "FACP", sizeof(*fadt), 1, oem_id, oem_table_id); } +void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid, + CPUArchIdList *apic_ids, GArray *entry) +{ + int apic_id; + AcpiMadtProcessorApic *apic = acpi_data_push(entry, sizeof *apic); + + apic_id = apic_ids->cpus[uid].arch_id; + apic->type = ACPI_APIC_PROCESSOR; + apic->length = sizeof(*apic); + apic->processor_id = uid; + apic->local_apic_id = apic_id; + if (apic_ids->cpus[uid].cpu != NULL) { + apic->flags = cpu_to_le32(1); + } else { + /* ACPI spec says that LAPIC entry for non present + * CPU may be omitted from MADT or it must be marked + * as disabled. However omitting non present CPU from + * MADT breaks hotplug on linux. So possible CPUs + * should be put in MADT but kept disabled. + */ + apic->flags = cpu_to_le32(0); + } +} + static void build_madt(GArray *table_data, BIOSLinker *linker, PCMachineState *pcms) { MachineClass *mc = MACHINE_GET_CLASS(pcms); CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(MACHINE(pcms)); int madt_start = table_data->len; + AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(pcms->acpi_dev); + AcpiDeviceIf *adev = ACPI_DEVICE_IF(pcms->acpi_dev); AcpiMultipleApicTable *madt; AcpiMadtIoApic *io_apic; @@ -347,24 +373,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, PCMachineState *pcms) madt->flags = cpu_to_le32(1); for (i = 0; i < apic_ids->len; i++) { - AcpiMadtProcessorApic *apic = acpi_data_push(table_data, sizeof *apic); - int apic_id = apic_ids->cpus[i].arch_id; - - apic->type = ACPI_APIC_PROCESSOR; - apic->length = sizeof(*apic); - apic->processor_id = i; - apic->local_apic_id = apic_id; - if (apic_ids->cpus[i].cpu != NULL) { - apic->flags = cpu_to_le32(1); - } else { - /* ACPI spec says that LAPIC entry for non present - * CPU may be omitted from MADT or it must be marked - * as disabled. However omitting non present CPU from - * MADT breaks hotplug on linux. So possible CPUs - * should be put in MADT but kept disabled. - */ - apic->flags = cpu_to_le32(0); - } + adevc->madt_cpu(adev, i, apic_ids, table_data); } g_free(apic_ids); diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c index 213741bc21..c1a4f1b34c 100644 --- a/hw/isa/lpc_ich9.c +++ b/hw/isa/lpc_ich9.c @@ -714,6 +714,7 @@ static void ich9_lpc_class_init(ObjectClass *klass, void *data) hc->unplug = ich9_pm_device_unplug_cb; adevc->ospm_status = ich9_pm_ospm_status; adevc->send_event = ich9_send_gpe; + adevc->madt_cpu = pc_madt_cpu_entry; } static const TypeInfo ich9_lpc_info = { diff --git a/include/hw/acpi/acpi_dev_interface.h b/include/hw/acpi/acpi_dev_interface.h index a0c4a336f2..da4ef7fbd3 100644 --- a/include/hw/acpi/acpi_dev_interface.h +++ b/include/hw/acpi/acpi_dev_interface.h @@ -3,6 +3,7 @@ #include "qom/object.h" #include "qapi-types.h" +#include "hw/boards.h" /* These values are part of guest ABI, and can not be changed */ typedef enum { @@ -37,6 +38,10 @@ void acpi_send_event(DeviceState *dev, AcpiEventStatusBits event); * ospm_status: returns status of ACPI device objects, reported * via _OST method if device supports it. * send_event: inject a specified event into guest + * madt_cpu: fills @entry with Interrupt Controller Structure + * for CPU indexed by @uid in @apic_ids array, + * returned structure types are: + * 0 - Local APIC, 9 - Local x2APIC, 0xB - GICC * * Interface is designed for providing unified interface * to generic ACPI functionality that could be used without @@ -50,5 +55,7 @@ typedef struct AcpiDeviceIfClass { /* */ void (*ospm_status)(AcpiDeviceIf *adev, ACPIOSTInfoList ***list); void (*send_event)(AcpiDeviceIf *adev, AcpiEventStatusBits ev); + void (*madt_cpu)(AcpiDeviceIf *adev, int uid, + CPUArchIdList *apic_ids, GArray *entry); } AcpiDeviceIfClass; #endif diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 49566c89d4..9e239297e8 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -17,6 +17,7 @@ #include "hw/compat.h" #include "hw/mem/pc-dimm.h" #include "hw/mem/nvdimm.h" +#include "hw/acpi/acpi_dev_interface.h" #define HPET_INTCAP "hpet-intcap" @@ -345,6 +346,10 @@ void pc_system_firmware_init(MemoryRegion *rom_memory, /* pvpanic.c */ uint16_t pvpanic_port(void); +/* acpi-build.c */ +void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid, + CPUArchIdList *apic_ids, GArray *entry); + /* e820 types */ #define E820_RAM 1 #define E820_RESERVED 2 diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs index b829ee6e1a..7cdcad4fdb 100644 --- a/stubs/Makefile.objs +++ b/stubs/Makefile.objs @@ -43,3 +43,4 @@ stub-obj-y += vhost.o stub-obj-y += iohandler.o stub-obj-y += smbios_type_38.o stub-obj-y += ipmi.o +stub-obj-y += pc_madt_cpu_entry.o diff --git a/stubs/pc_madt_cpu_entry.c b/stubs/pc_madt_cpu_entry.c new file mode 100644 index 0000000000..427e772868 --- /dev/null +++ b/stubs/pc_madt_cpu_entry.c @@ -0,0 +1,7 @@ +#include "qemu/osdep.h" +#include "hw/i386/pc.h" + +void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid, + CPUArchIdList *apic_ids, GArray *entry) +{ +} From d2238cb6781d7bcbbf8ddf4a1f8486838b80c7bb Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Tue, 14 Jun 2016 16:13:32 +0200 Subject: [PATCH 23/34] acpi: cpuhp: implement hot-add parts of CPU hotplug interface it adds hw registers needed for handling CPU hot-add and corresponding AML methods to handle hot-add events on guest side. Signed-off-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/cpu.c | 150 +++++++++++++++++++++++++++++++++++++++++- hw/acpi/trace-events | 4 ++ include/hw/acpi/cpu.h | 5 +- 3 files changed, 157 insertions(+), 2 deletions(-) diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c index d99002c0ef..811be8a2d3 100644 --- a/hw/acpi/cpu.c +++ b/hw/acpi/cpu.c @@ -7,6 +7,13 @@ #define ACPI_CPU_HOTPLUG_REG_LEN 12 #define ACPI_CPU_SELECTOR_OFFSET_WR 0 #define ACPI_CPU_FLAGS_OFFSET_RW 4 +#define ACPI_CPU_CMD_OFFSET_WR 5 +#define ACPI_CPU_CMD_DATA_OFFSET_RW 8 + +enum { + CPHP_GET_NEXT_CPU_WITH_EVENT_CMD = 0, + CPHP_CMD_MAX +}; static uint64_t cpu_hotplug_rd(void *opaque, hwaddr addr, unsigned size) { @@ -22,8 +29,19 @@ static uint64_t cpu_hotplug_rd(void *opaque, hwaddr addr, unsigned size) switch (addr) { case ACPI_CPU_FLAGS_OFFSET_RW: /* pack and return is_* fields */ val |= cdev->cpu ? 1 : 0; + val |= cdev->is_inserting ? 2 : 0; trace_cpuhp_acpi_read_flags(cpu_st->selector, val); break; + case ACPI_CPU_CMD_DATA_OFFSET_RW: + switch (cpu_st->command) { + case CPHP_GET_NEXT_CPU_WITH_EVENT_CMD: + val = cpu_st->selector; + break; + default: + break; + } + trace_cpuhp_acpi_read_cmd_data(cpu_st->selector, val); + break; default: break; } @@ -34,6 +52,7 @@ static void cpu_hotplug_wr(void *opaque, hwaddr addr, uint64_t data, unsigned int size) { CPUHotplugState *cpu_st = opaque; + AcpiCpuStatus *cdev; assert(cpu_st->dev_count); @@ -49,6 +68,33 @@ static void cpu_hotplug_wr(void *opaque, hwaddr addr, uint64_t data, cpu_st->selector = data; trace_cpuhp_acpi_write_idx(cpu_st->selector); break; + case ACPI_CPU_FLAGS_OFFSET_RW: /* set is_* fields */ + cdev = &cpu_st->devs[cpu_st->selector]; + if (data & 2) { /* clear insert event */ + cdev->is_inserting = false; + trace_cpuhp_acpi_clear_inserting_evt(cpu_st->selector); + } + break; + case ACPI_CPU_CMD_OFFSET_WR: + trace_cpuhp_acpi_write_cmd(cpu_st->selector, data); + if (data < CPHP_CMD_MAX) { + cpu_st->command = data; + if (cpu_st->command == CPHP_GET_NEXT_CPU_WITH_EVENT_CMD) { + uint32_t iter = cpu_st->selector; + + do { + cdev = &cpu_st->devs[iter]; + if (cdev->is_inserting) { + cpu_st->selector = iter; + trace_cpuhp_acpi_cpu_has_events(cpu_st->selector, + cdev->is_inserting); + break; + } + iter = iter + 1 < cpu_st->dev_count ? iter + 1 : 0; + } while (iter != cpu_st->selector); + } + } + break; default: break; } @@ -111,8 +157,23 @@ void acpi_cpu_plug_cb(HotplugHandler *hotplug_dev, } cdev->cpu = CPU(dev); + if (dev->hotplugged) { + cdev->is_inserting = true; + acpi_send_event(DEVICE(hotplug_dev), ACPI_CPU_HOTPLUG_STATUS); + } } +static const VMStateDescription vmstate_cpuhp_sts = { + .name = "CPU hotplug device state", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .fields = (VMStateField[]) { + VMSTATE_BOOL(is_inserting, AcpiCpuStatus), + VMSTATE_END_OF_LIST() + } +}; + const VMStateDescription vmstate_cpu_hotplug = { .name = "CPU hotplug state", .version_id = 1, @@ -120,6 +181,9 @@ const VMStateDescription vmstate_cpu_hotplug = { .minimum_version_id_old = 1, .fields = (VMStateField[]) { VMSTATE_UINT32(selector, CPUHotplugState), + VMSTATE_UINT8(command, CPUHotplugState), + VMSTATE_STRUCT_VARRAY_POINTER_UINT32(devs, CPUHotplugState, dev_count, + vmstate_cpuhp_sts, AcpiCpuStatus), VMSTATE_END_OF_LIST() } }; @@ -128,13 +192,19 @@ const VMStateDescription vmstate_cpu_hotplug = { #define CPUHP_RES_DEVICE "PRES" #define CPU_LOCK "CPLK" #define CPU_STS_METHOD "CSTA" +#define CPU_SCAN_METHOD "CSCN" +#define CPU_NOTIFY_METHOD "CTFY" #define CPU_ENABLED "CPEN" #define CPU_SELECTOR "CSEL" +#define CPU_COMMAND "CCMD" +#define CPU_DATA "CDAT" +#define CPU_INSERT_EVENT "CINS" void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, hwaddr io_base, - const char *res_root) + const char *res_root, + const char *event_handler_method) { Aml *ifctx; Aml *field; @@ -147,6 +217,9 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, MachineClass *mc = MACHINE_GET_CLASS(machine); CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(machine); char *cphp_res_path = g_strdup_printf("%s." CPUHP_RES_DEVICE, res_root); + Object *obj = object_resolve_path_type("", TYPE_ACPI_DEVICE_IF, NULL); + AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(obj); + AcpiDeviceIf *adev = ACPI_DEVICE_IF(obj); cpu_ctrl_dev = aml_device("%s", cphp_res_path); { @@ -173,11 +246,18 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, aml_append(field, aml_reserved_field(ACPI_CPU_FLAGS_OFFSET_RW * 8)); /* 1 if enabled, read only */ aml_append(field, aml_named_field(CPU_ENABLED, 1)); + /* (read) 1 if has a insert event. (write) 1 to clear event */ + aml_append(field, aml_named_field(CPU_INSERT_EVENT, 1)); + aml_append(field, aml_reserved_field(6)); + aml_append(field, aml_named_field(CPU_COMMAND, 8)); aml_append(cpu_ctrl_dev, field); field = aml_field("PRST", AML_DWORD_ACC, AML_NOLOCK, AML_PRESERVE); /* CPU selector, write only */ aml_append(field, aml_named_field(CPU_SELECTOR, 32)); + /* flags + cmd + 2byte align */ + aml_append(field, aml_reserved_field(4 * 8)); + aml_append(field, aml_named_field(CPU_DATA, 32)); aml_append(cpu_ctrl_dev, field); } @@ -189,10 +269,27 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, Aml *ctrl_lock = aml_name("%s.%s", cphp_res_path, CPU_LOCK); Aml *cpu_selector = aml_name("%s.%s", cphp_res_path, CPU_SELECTOR); Aml *is_enabled = aml_name("%s.%s", cphp_res_path, CPU_ENABLED); + Aml *cpu_cmd = aml_name("%s.%s", cphp_res_path, CPU_COMMAND); + Aml *cpu_data = aml_name("%s.%s", cphp_res_path, CPU_DATA); + Aml *ins_evt = aml_name("%s.%s", cphp_res_path, CPU_INSERT_EVENT); aml_append(cpus_dev, aml_name_decl("_HID", aml_string("ACPI0010"))); aml_append(cpus_dev, aml_name_decl("_CID", aml_eisaid("PNP0A05"))); + method = aml_method(CPU_NOTIFY_METHOD, 2, AML_NOTSERIALIZED); + for (i = 0; i < arch_ids->len; i++) { + Aml *cpu = aml_name(CPU_NAME_FMT, i); + Aml *uid = aml_arg(0); + Aml *event = aml_arg(1); + + ifctx = aml_if(aml_equal(uid, aml_int(i))); + { + aml_append(ifctx, aml_notify(cpu, event)); + } + aml_append(method, ifctx); + } + aml_append(cpus_dev, method); + method = aml_method(CPU_STS_METHOD, 1, AML_SERIALIZED); { Aml *idx = aml_arg(0); @@ -211,10 +308,41 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, } aml_append(cpus_dev, method); + method = aml_method(CPU_SCAN_METHOD, 0, AML_SERIALIZED); + { + Aml *while_ctx; + Aml *has_event = aml_local(0); + Aml *dev_chk = aml_int(1); + Aml *next_cpu_cmd = aml_int(CPHP_GET_NEXT_CPU_WITH_EVENT_CMD); + + aml_append(method, aml_acquire(ctrl_lock, 0xFFFF)); + aml_append(method, aml_store(one, has_event)); + while_ctx = aml_while(aml_equal(has_event, one)); + { + /* clear loop exit condition, ins_evt check + * will set it to 1 while next_cpu_cmd returns a CPU + * with events */ + aml_append(while_ctx, aml_store(zero, has_event)); + aml_append(while_ctx, aml_store(next_cpu_cmd, cpu_cmd)); + ifctx = aml_if(aml_equal(ins_evt, one)); + { + aml_append(ifctx, + aml_call2(CPU_NOTIFY_METHOD, cpu_data, dev_chk)); + aml_append(ifctx, aml_store(one, ins_evt)); + aml_append(ifctx, aml_store(one, has_event)); + } + aml_append(while_ctx, ifctx); + } + aml_append(method, while_ctx); + aml_append(method, aml_release(ctrl_lock)); + } + aml_append(cpus_dev, method); + /* build Processor object for each processor */ for (i = 0; i < arch_ids->len; i++) { Aml *dev; Aml *uid = aml_int(i); + GArray *madt_buf = g_array_new(0, 1, 1); int arch_id = arch_ids->cpus[i].arch_id; if (opts.apci_1_compatible && arch_id < 255) { @@ -229,12 +357,32 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, aml_append(method, aml_return(aml_call1(CPU_STS_METHOD, uid))); aml_append(dev, method); + /* build _MAT object */ + assert(adevc && adevc->madt_cpu); + adevc->madt_cpu(adev, i, arch_ids, madt_buf); + switch (madt_buf->data[0]) { + case ACPI_APIC_PROCESSOR: { + AcpiMadtProcessorApic *apic = (void *)madt_buf->data; + apic->flags = cpu_to_le32(1); + break; + } + default: + assert(0); + } + aml_append(dev, aml_name_decl("_MAT", + aml_buffer(madt_buf->len, (uint8_t *)madt_buf->data))); + g_array_free(madt_buf, true); + aml_append(cpus_dev, dev); } } aml_append(sb_scope, cpus_dev); aml_append(table, sb_scope); + method = aml_method(event_handler_method, 0, AML_NOTSERIALIZED); + aml_append(method, aml_call0("\\_SB.CPUS." CPU_SCAN_METHOD)); + aml_append(table, method); + g_free(cphp_res_path); g_free(arch_ids); } diff --git a/hw/acpi/trace-events b/hw/acpi/trace-events index 940467ba14..3043769044 100644 --- a/hw/acpi/trace-events +++ b/hw/acpi/trace-events @@ -21,3 +21,7 @@ mhp_acpi_pc_dimm_delete_failed(uint32_t slot) "slot[0x%"PRIx32"] pc-dimm delete cpuhp_acpi_invalid_idx_selected(uint32_t idx) "0x%"PRIx32 cpuhp_acpi_read_flags(uint32_t idx, uint8_t flags) "idx[0x%"PRIx32"] flags: 0x%"PRIx8 cpuhp_acpi_write_idx(uint32_t idx) "set active cpu idx: 0x%"PRIx32 +cpuhp_acpi_write_cmd(uint32_t idx, uint8_t cmd) "idx[0x%"PRIx32"] cmd: 0x%"PRIx8 +cpuhp_acpi_read_cmd_data(uint32_t idx, uint32_t data) "idx[0x%"PRIx32"] data: 0x%"PRIx32 +cpuhp_acpi_cpu_has_events(uint32_t idx, bool ins) "idx[0x%"PRIx32"] inserting: %d" +cpuhp_acpi_clear_inserting_evt(uint32_t idx) "idx[0x%"PRIx32"]" diff --git a/include/hw/acpi/cpu.h b/include/hw/acpi/cpu.h index f345447af3..55c3166c0f 100644 --- a/include/hw/acpi/cpu.h +++ b/include/hw/acpi/cpu.h @@ -20,11 +20,13 @@ typedef struct AcpiCpuStatus { struct CPUState *cpu; uint64_t arch_id; + bool is_inserting; } AcpiCpuStatus; typedef struct CPUHotplugState { MemoryRegion ctrl_reg; uint32_t selector; + uint8_t command; uint32_t dev_count; AcpiCpuStatus *devs; } CPUHotplugState; @@ -41,7 +43,8 @@ typedef struct CPUHotplugFeatures { void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, hwaddr io_base, - const char *res_root); + const char *res_root, + const char *event_handler_method); extern const VMStateDescription vmstate_cpu_hotplug; #define VMSTATE_CPU_HOTPLUG(cpuhp, state) \ From 8872c25a26ccc2c2f62903b023212bbc0a4f5d39 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Tue, 14 Jun 2016 16:14:02 +0200 Subject: [PATCH 24/34] acpi: cpuhp: implement hot-remove parts of CPU hotplug interface it adds hw registers needed for handling CPU hot-remove and corresponding AML methods to request and eject a CPU with necessary hotplug callbacks in pc,piix4,ich9 code. Signed-off-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/cpu.c | 90 +++++++++++++++++++++++++++++++++++++++++-- hw/acpi/ich9.c | 7 ++++ hw/acpi/piix4.c | 6 +++ hw/acpi/trace-events | 5 ++- hw/i386/pc.c | 47 ++++++++++++++++++++++ include/hw/acpi/cpu.h | 8 ++++ 6 files changed, 158 insertions(+), 5 deletions(-) diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c index 811be8a2d3..483b80810e 100644 --- a/hw/acpi/cpu.c +++ b/hw/acpi/cpu.c @@ -30,6 +30,7 @@ static uint64_t cpu_hotplug_rd(void *opaque, hwaddr addr, unsigned size) case ACPI_CPU_FLAGS_OFFSET_RW: /* pack and return is_* fields */ val |= cdev->cpu ? 1 : 0; val |= cdev->is_inserting ? 2 : 0; + val |= cdev->is_removing ? 4 : 0; trace_cpuhp_acpi_read_flags(cpu_st->selector, val); break; case ACPI_CPU_CMD_DATA_OFFSET_RW: @@ -73,6 +74,22 @@ static void cpu_hotplug_wr(void *opaque, hwaddr addr, uint64_t data, if (data & 2) { /* clear insert event */ cdev->is_inserting = false; trace_cpuhp_acpi_clear_inserting_evt(cpu_st->selector); + } else if (data & 4) { /* clear remove event */ + cdev->is_removing = false; + trace_cpuhp_acpi_clear_remove_evt(cpu_st->selector); + } else if (data & 8) { + DeviceState *dev = NULL; + HotplugHandler *hotplug_ctrl = NULL; + + if (!cdev->cpu) { + trace_cpuhp_acpi_ejecting_invalid_cpu(cpu_st->selector); + break; + } + + trace_cpuhp_acpi_ejecting_cpu(cpu_st->selector); + dev = DEVICE(cdev->cpu); + hotplug_ctrl = qdev_get_hotplug_handler(dev); + hotplug_handler_unplug(hotplug_ctrl, dev, NULL); } break; case ACPI_CPU_CMD_OFFSET_WR: @@ -84,10 +101,10 @@ static void cpu_hotplug_wr(void *opaque, hwaddr addr, uint64_t data, do { cdev = &cpu_st->devs[iter]; - if (cdev->is_inserting) { + if (cdev->is_inserting || cdev->is_removing) { cpu_st->selector = iter; trace_cpuhp_acpi_cpu_has_events(cpu_st->selector, - cdev->is_inserting); + cdev->is_inserting, cdev->is_removing); break; } iter = iter + 1 < cpu_st->dev_count ? iter + 1 : 0; @@ -163,6 +180,34 @@ void acpi_cpu_plug_cb(HotplugHandler *hotplug_dev, } } +void acpi_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, + CPUHotplugState *cpu_st, + DeviceState *dev, Error **errp) +{ + AcpiCpuStatus *cdev; + + cdev = get_cpu_status(cpu_st, dev); + if (!cdev) { + return; + } + + cdev->is_removing = true; + acpi_send_event(DEVICE(hotplug_dev), ACPI_CPU_HOTPLUG_STATUS); +} + +void acpi_cpu_unplug_cb(CPUHotplugState *cpu_st, + DeviceState *dev, Error **errp) +{ + AcpiCpuStatus *cdev; + + cdev = get_cpu_status(cpu_st, dev); + if (!cdev) { + return; + } + + cdev->cpu = NULL; +} + static const VMStateDescription vmstate_cpuhp_sts = { .name = "CPU hotplug device state", .version_id = 1, @@ -170,6 +215,7 @@ static const VMStateDescription vmstate_cpuhp_sts = { .minimum_version_id_old = 1, .fields = (VMStateField[]) { VMSTATE_BOOL(is_inserting, AcpiCpuStatus), + VMSTATE_BOOL(is_removing, AcpiCpuStatus), VMSTATE_END_OF_LIST() } }; @@ -194,12 +240,15 @@ const VMStateDescription vmstate_cpu_hotplug = { #define CPU_STS_METHOD "CSTA" #define CPU_SCAN_METHOD "CSCN" #define CPU_NOTIFY_METHOD "CTFY" +#define CPU_EJECT_METHOD "CEJ0" #define CPU_ENABLED "CPEN" #define CPU_SELECTOR "CSEL" #define CPU_COMMAND "CCMD" #define CPU_DATA "CDAT" #define CPU_INSERT_EVENT "CINS" +#define CPU_REMOVE_EVENT "CRMV" +#define CPU_EJECT_EVENT "CEJ0" void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, hwaddr io_base, @@ -248,7 +297,11 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, aml_append(field, aml_named_field(CPU_ENABLED, 1)); /* (read) 1 if has a insert event. (write) 1 to clear event */ aml_append(field, aml_named_field(CPU_INSERT_EVENT, 1)); - aml_append(field, aml_reserved_field(6)); + /* (read) 1 if has a remove event. (write) 1 to clear event */ + aml_append(field, aml_named_field(CPU_REMOVE_EVENT, 1)); + /* initiates device eject, write only */ + aml_append(field, aml_named_field(CPU_EJECT_EVENT, 1)); + aml_append(field, aml_reserved_field(4)); aml_append(field, aml_named_field(CPU_COMMAND, 8)); aml_append(cpu_ctrl_dev, field); @@ -272,6 +325,8 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, Aml *cpu_cmd = aml_name("%s.%s", cphp_res_path, CPU_COMMAND); Aml *cpu_data = aml_name("%s.%s", cphp_res_path, CPU_DATA); Aml *ins_evt = aml_name("%s.%s", cphp_res_path, CPU_INSERT_EVENT); + Aml *rm_evt = aml_name("%s.%s", cphp_res_path, CPU_REMOVE_EVENT); + Aml *ej_evt = aml_name("%s.%s", cphp_res_path, CPU_EJECT_EVENT); aml_append(cpus_dev, aml_name_decl("_HID", aml_string("ACPI0010"))); aml_append(cpus_dev, aml_name_decl("_CID", aml_eisaid("PNP0A05"))); @@ -308,18 +363,31 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, } aml_append(cpus_dev, method); + method = aml_method(CPU_EJECT_METHOD, 1, AML_SERIALIZED); + { + Aml *idx = aml_arg(0); + + aml_append(method, aml_acquire(ctrl_lock, 0xFFFF)); + aml_append(method, aml_store(idx, cpu_selector)); + aml_append(method, aml_store(one, ej_evt)); + aml_append(method, aml_release(ctrl_lock)); + } + aml_append(cpus_dev, method); + method = aml_method(CPU_SCAN_METHOD, 0, AML_SERIALIZED); { + Aml *else_ctx; Aml *while_ctx; Aml *has_event = aml_local(0); Aml *dev_chk = aml_int(1); + Aml *eject_req = aml_int(3); Aml *next_cpu_cmd = aml_int(CPHP_GET_NEXT_CPU_WITH_EVENT_CMD); aml_append(method, aml_acquire(ctrl_lock, 0xFFFF)); aml_append(method, aml_store(one, has_event)); while_ctx = aml_while(aml_equal(has_event, one)); { - /* clear loop exit condition, ins_evt check + /* clear loop exit condition, ins_evt/rm_evt checks * will set it to 1 while next_cpu_cmd returns a CPU * with events */ aml_append(while_ctx, aml_store(zero, has_event)); @@ -332,6 +400,16 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, aml_append(ifctx, aml_store(one, has_event)); } aml_append(while_ctx, ifctx); + else_ctx = aml_else(); + ifctx = aml_if(aml_equal(rm_evt, one)); + { + aml_append(ifctx, + aml_call2(CPU_NOTIFY_METHOD, cpu_data, eject_req)); + aml_append(ifctx, aml_store(one, rm_evt)); + aml_append(ifctx, aml_store(one, has_event)); + } + aml_append(else_ctx, ifctx); + aml_append(while_ctx, else_ctx); } aml_append(method, while_ctx); aml_append(method, aml_release(ctrl_lock)); @@ -373,6 +451,10 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, aml_buffer(madt_buf->len, (uint8_t *)madt_buf->data))); g_array_free(madt_buf, true); + method = aml_method("_EJ0", 1, AML_NOTSERIALIZED); + aml_append(method, aml_call1(CPU_EJECT_METHOD, uid)); + aml_append(dev, method); + aml_append(cpus_dev, dev); } } diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c index 9a81da82d3..0abe2ce5ee 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -481,6 +481,10 @@ void ich9_pm_device_unplug_request_cb(HotplugHandler *hotplug_dev, acpi_memory_unplug_request_cb(hotplug_dev, &lpc->pm.acpi_memory_hotplug, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU) && + !lpc->pm.cpu_hotplug_legacy) { + acpi_cpu_unplug_request_cb(hotplug_dev, &lpc->pm.cpuhp_state, + dev, errp); } else { error_setg(errp, "acpi: device unplug request for not supported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -495,6 +499,9 @@ void ich9_pm_device_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, if (lpc->pm.acpi_memory_hotplug.is_enabled && object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { acpi_memory_unplug_cb(&lpc->pm.acpi_memory_hotplug, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU) && + !lpc->pm.cpu_hotplug_legacy) { + acpi_cpu_unplug_cb(&lpc->pm.cpuhp_state, dev, errp); } else { error_setg(errp, "acpi: device unplug for not supported device" " type: %s", object_get_typename(OBJECT(dev))); diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index 6d24cb535b..8cdc1da9ac 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -378,6 +378,9 @@ static void piix4_device_unplug_request_cb(HotplugHandler *hotplug_dev, } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { acpi_pcihp_device_unplug_cb(hotplug_dev, &s->acpi_pci_hotplug, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU) && + !s->cpu_hotplug_legacy) { + acpi_cpu_unplug_request_cb(hotplug_dev, &s->cpuhp_state, dev, errp); } else { error_setg(errp, "acpi: device unplug request for not supported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -392,6 +395,9 @@ static void piix4_device_unplug_cb(HotplugHandler *hotplug_dev, if (s->acpi_memory_hotplug.is_enabled && object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { acpi_memory_unplug_cb(&s->acpi_memory_hotplug, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU) && + !s->cpu_hotplug_legacy) { + acpi_cpu_unplug_cb(&s->cpuhp_state, dev, errp); } else { error_setg(errp, "acpi: device unplug for not supported device" " type: %s", object_get_typename(OBJECT(dev))); diff --git a/hw/acpi/trace-events b/hw/acpi/trace-events index 3043769044..c1279b1547 100644 --- a/hw/acpi/trace-events +++ b/hw/acpi/trace-events @@ -23,5 +23,8 @@ cpuhp_acpi_read_flags(uint32_t idx, uint8_t flags) "idx[0x%"PRIx32"] flags: 0x%" cpuhp_acpi_write_idx(uint32_t idx) "set active cpu idx: 0x%"PRIx32 cpuhp_acpi_write_cmd(uint32_t idx, uint8_t cmd) "idx[0x%"PRIx32"] cmd: 0x%"PRIx8 cpuhp_acpi_read_cmd_data(uint32_t idx, uint32_t data) "idx[0x%"PRIx32"] data: 0x%"PRIx32 -cpuhp_acpi_cpu_has_events(uint32_t idx, bool ins) "idx[0x%"PRIx32"] inserting: %d" +cpuhp_acpi_cpu_has_events(uint32_t idx, bool ins, bool rm) "idx[0x%"PRIx32"] inserting: %d, removing: %d" cpuhp_acpi_clear_inserting_evt(uint32_t idx) "idx[0x%"PRIx32"]" +cpuhp_acpi_clear_remove_evt(uint32_t idx) "idx[0x%"PRIx32"]" +cpuhp_acpi_ejecting_invalid_cpu(uint32_t idx) "0x%"PRIx32 +cpuhp_acpi_ejecting_cpu(uint32_t idx) "0x%"PRIx32 diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 7198ed533c..dbfba5cad1 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1707,6 +1707,49 @@ static void pc_cpu_plug(HotplugHandler *hotplug_dev, out: error_propagate(errp, local_err); } +static void pc_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + HotplugHandlerClass *hhc; + Error *local_err = NULL; + PCMachineState *pcms = PC_MACHINE(hotplug_dev); + + hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); + hhc->unplug_request(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); + + if (local_err) { + goto out; + } + + out: + error_propagate(errp, local_err); + +} + +static void pc_cpu_unplug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + HotplugHandlerClass *hhc; + Error *local_err = NULL; + PCMachineState *pcms = PC_MACHINE(hotplug_dev); + + hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); + hhc->unplug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); + + if (local_err) { + goto out; + } + + /* + * TODO: enable unplug once generic CPU remove bits land + * for now guest will be able to eject CPU ACPI wise but + * it will come back again on machine reset. + */ + /* object_unparent(OBJECT(dev)); */ + + out: + error_propagate(errp, local_err); +} static void pc_machine_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) @@ -1723,6 +1766,8 @@ static void pc_machine_device_unplug_request_cb(HotplugHandler *hotplug_dev, { if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { pc_dimm_unplug_request(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { + pc_cpu_unplug_request_cb(hotplug_dev, dev, errp); } else { error_setg(errp, "acpi: device unplug request for not supported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -1734,6 +1779,8 @@ static void pc_machine_device_unplug_cb(HotplugHandler *hotplug_dev, { if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { pc_dimm_unplug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { + pc_cpu_unplug_cb(hotplug_dev, dev, errp); } else { error_setg(errp, "acpi: device unplug for not supported device" " type: %s", object_get_typename(OBJECT(dev))); diff --git a/include/hw/acpi/cpu.h b/include/hw/acpi/cpu.h index 55c3166c0f..f334221d85 100644 --- a/include/hw/acpi/cpu.h +++ b/include/hw/acpi/cpu.h @@ -21,6 +21,7 @@ typedef struct AcpiCpuStatus { struct CPUState *cpu; uint64_t arch_id; bool is_inserting; + bool is_removing; } AcpiCpuStatus; typedef struct CPUHotplugState { @@ -34,6 +35,13 @@ typedef struct CPUHotplugState { void acpi_cpu_plug_cb(HotplugHandler *hotplug_dev, CPUHotplugState *cpu_st, DeviceState *dev, Error **errp); +void acpi_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, + CPUHotplugState *cpu_st, + DeviceState *dev, Error **errp); + +void acpi_cpu_unplug_cb(CPUHotplugState *cpu_st, + DeviceState *dev, Error **errp); + void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner, CPUHotplugState *state, hwaddr base_addr); From 76623d00ae578335f6c9f31cd13fc5bf1931dbc3 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Fri, 22 Apr 2016 19:06:36 +0200 Subject: [PATCH 25/34] acpi: cpuhp: add cpu._OST handling it adds HW and AML parts for CPU_Device._OST method handling to allow OSPM reports status of hot-(un)plug operation. And extends QMP command query-acpi-ospm-status to report CPU's OST info along with already reported PC-DIMM devices. Signed-off-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/cpu.c | 82 +++++++++++++++++++++++++++++++++++++++++++ hw/acpi/ich9.c | 3 ++ hw/acpi/piix4.c | 3 ++ hw/acpi/trace-events | 2 ++ include/hw/acpi/cpu.h | 4 +++ qapi-schema.json | 3 +- 6 files changed, 96 insertions(+), 1 deletion(-) diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c index 483b80810e..401ac0dab2 100644 --- a/hw/acpi/cpu.c +++ b/hw/acpi/cpu.c @@ -2,6 +2,7 @@ #include "hw/boards.h" #include "hw/acpi/cpu.h" #include "qapi/error.h" +#include "qapi-event.h" #include "trace.h" #define ACPI_CPU_HOTPLUG_REG_LEN 12 @@ -12,9 +13,42 @@ enum { CPHP_GET_NEXT_CPU_WITH_EVENT_CMD = 0, + CPHP_OST_EVENT_CMD = 1, + CPHP_OST_STATUS_CMD = 2, CPHP_CMD_MAX }; +static ACPIOSTInfo *acpi_cpu_device_status(int idx, AcpiCpuStatus *cdev) +{ + ACPIOSTInfo *info = g_new0(ACPIOSTInfo, 1); + + info->slot_type = ACPI_SLOT_TYPE_CPU; + info->slot = g_strdup_printf("%d", idx); + info->source = cdev->ost_event; + info->status = cdev->ost_status; + if (cdev->cpu) { + DeviceState *dev = DEVICE(cdev->cpu); + if (dev->id) { + info->device = g_strdup(dev->id); + info->has_device = true; + } + } + return info; +} + +void acpi_cpu_ospm_status(CPUHotplugState *cpu_st, ACPIOSTInfoList ***list) +{ + int i; + + for (i = 0; i < cpu_st->dev_count; i++) { + ACPIOSTInfoList *elem = g_new0(ACPIOSTInfoList, 1); + elem->value = acpi_cpu_device_status(i, &cpu_st->devs[i]); + elem->next = NULL; + **list = elem; + *list = &elem->next; + } +} + static uint64_t cpu_hotplug_rd(void *opaque, hwaddr addr, unsigned size) { uint64_t val = 0; @@ -54,6 +88,7 @@ static void cpu_hotplug_wr(void *opaque, hwaddr addr, uint64_t data, { CPUHotplugState *cpu_st = opaque; AcpiCpuStatus *cdev; + ACPIOSTInfo *info; assert(cpu_st->dev_count); @@ -112,6 +147,28 @@ static void cpu_hotplug_wr(void *opaque, hwaddr addr, uint64_t data, } } break; + case ACPI_CPU_CMD_DATA_OFFSET_RW: + switch (cpu_st->command) { + case CPHP_OST_EVENT_CMD: { + cdev = &cpu_st->devs[cpu_st->selector]; + cdev->ost_event = data; + trace_cpuhp_acpi_write_ost_ev(cpu_st->selector, cdev->ost_event); + break; + } + case CPHP_OST_STATUS_CMD: { + cdev = &cpu_st->devs[cpu_st->selector]; + cdev->ost_status = data; + info = acpi_cpu_device_status(cpu_st->selector, cdev); + qapi_event_send_acpi_device_ost(info, &error_abort); + qapi_free_ACPIOSTInfo(info); + trace_cpuhp_acpi_write_ost_status(cpu_st->selector, + cdev->ost_status); + break; + } + default: + break; + } + break; default: break; } @@ -216,6 +273,8 @@ static const VMStateDescription vmstate_cpuhp_sts = { .fields = (VMStateField[]) { VMSTATE_BOOL(is_inserting, AcpiCpuStatus), VMSTATE_BOOL(is_removing, AcpiCpuStatus), + VMSTATE_UINT32(ost_event, AcpiCpuStatus), + VMSTATE_UINT32(ost_status, AcpiCpuStatus), VMSTATE_END_OF_LIST() } }; @@ -241,6 +300,7 @@ const VMStateDescription vmstate_cpu_hotplug = { #define CPU_SCAN_METHOD "CSCN" #define CPU_NOTIFY_METHOD "CTFY" #define CPU_EJECT_METHOD "CEJ0" +#define CPU_OST_METHOD "COST" #define CPU_ENABLED "CPEN" #define CPU_SELECTOR "CSEL" @@ -416,6 +476,22 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, } aml_append(cpus_dev, method); + method = aml_method(CPU_OST_METHOD, 4, AML_SERIALIZED); + { + Aml *uid = aml_arg(0); + Aml *ev_cmd = aml_int(CPHP_OST_EVENT_CMD); + Aml *st_cmd = aml_int(CPHP_OST_STATUS_CMD); + + aml_append(method, aml_acquire(ctrl_lock, 0xFFFF)); + aml_append(method, aml_store(uid, cpu_selector)); + aml_append(method, aml_store(ev_cmd, cpu_cmd)); + aml_append(method, aml_store(aml_arg(1), cpu_data)); + aml_append(method, aml_store(st_cmd, cpu_cmd)); + aml_append(method, aml_store(aml_arg(2), cpu_data)); + aml_append(method, aml_release(ctrl_lock)); + } + aml_append(cpus_dev, method); + /* build Processor object for each processor */ for (i = 0; i < arch_ids->len; i++) { Aml *dev; @@ -455,6 +531,12 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, aml_append(method, aml_call1(CPU_EJECT_METHOD, uid)); aml_append(dev, method); + method = aml_method("_OST", 3, AML_SERIALIZED); + aml_append(method, + aml_call4(CPU_OST_METHOD, uid, aml_arg(0), + aml_arg(1), aml_arg(2)) + ); + aml_append(dev, method); aml_append(cpus_dev, dev); } } diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c index 0abe2ce5ee..d12cc62381 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -513,4 +513,7 @@ void ich9_pm_ospm_status(AcpiDeviceIf *adev, ACPIOSTInfoList ***list) ICH9LPCState *s = ICH9_LPC_DEVICE(adev); acpi_memory_ospm_status(&s->pm.acpi_memory_hotplug, list); + if (!s->pm.cpu_hotplug_legacy) { + acpi_cpu_ospm_status(&s->pm.cpuhp_state, list); + } } diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index 8cdc1da9ac..858244a661 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -616,6 +616,9 @@ static void piix4_ospm_status(AcpiDeviceIf *adev, ACPIOSTInfoList ***list) PIIX4PMState *s = PIIX4_PM(adev); acpi_memory_ospm_status(&s->acpi_memory_hotplug, list); + if (!s->cpu_hotplug_legacy) { + acpi_cpu_ospm_status(&s->cpuhp_state, list); + } } static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) diff --git a/hw/acpi/trace-events b/hw/acpi/trace-events index c1279b1547..5aa3ba67c8 100644 --- a/hw/acpi/trace-events +++ b/hw/acpi/trace-events @@ -28,3 +28,5 @@ cpuhp_acpi_clear_inserting_evt(uint32_t idx) "idx[0x%"PRIx32"]" cpuhp_acpi_clear_remove_evt(uint32_t idx) "idx[0x%"PRIx32"]" cpuhp_acpi_ejecting_invalid_cpu(uint32_t idx) "0x%"PRIx32 cpuhp_acpi_ejecting_cpu(uint32_t idx) "0x%"PRIx32 +cpuhp_acpi_write_ost_ev(uint32_t slot, uint32_t ev) "idx[0x%"PRIx32"] OST EVENT: 0x%"PRIx32 +cpuhp_acpi_write_ost_status(uint32_t slot, uint32_t st) "idx[0x%"PRIx32"] OST STATUS: 0x%"PRIx32 diff --git a/include/hw/acpi/cpu.h b/include/hw/acpi/cpu.h index f334221d85..980a83c4b8 100644 --- a/include/hw/acpi/cpu.h +++ b/include/hw/acpi/cpu.h @@ -22,6 +22,8 @@ typedef struct AcpiCpuStatus { uint64_t arch_id; bool is_inserting; bool is_removing; + uint32_t ost_event; + uint32_t ost_status; } AcpiCpuStatus; typedef struct CPUHotplugState { @@ -54,6 +56,8 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, const char *res_root, const char *event_handler_method); +void acpi_cpu_ospm_status(CPUHotplugState *cpu_st, ACPIOSTInfoList ***list); + extern const VMStateDescription vmstate_cpu_hotplug; #define VMSTATE_CPU_HOTPLUG(cpuhp, state) \ VMSTATE_STRUCT(cpuhp, state, 1, \ diff --git a/qapi-schema.json b/qapi-schema.json index 0964eece6d..84b6708125 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -4079,8 +4079,9 @@ ## @ACPISlotType # # @DIMM: memory slot +# @CPU: logical CPU slot (since 2.7) # -{ 'enum': 'ACPISlotType', 'data': [ 'DIMM' ] } +{ 'enum': 'ACPISlotType', 'data': [ 'DIMM', 'CPU' ] } ## @ACPIOSTInfo # From 679dd1a957df418453efdd3ed2914dba5cd73773 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 15 Jun 2016 11:25:23 +0200 Subject: [PATCH 26/34] pc: use new CPU hotplug interface since 2.7 machine type For compatibility reasons PC/Q35 will start with legacy CPU hotplug interface by default but with new CPU hotplug AML code since 2.7 machine type. That way legacy firmware that doesn't use QEMU generated ACPI tables will be able to continue using legacy CPU hotplug interface. While new machine type, with firmware supporting QEMU provided ACPI tables, will generate new CPU hotplug AML, which will switch to new CPU hotplug interface when guest OS executes its _INI method on ACPI tables loading. Signed-off-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/cpu.c | 9 +++++++++ hw/acpi/cpu_hotplug.c | 21 ++++++++++++++++++++- hw/acpi/ich9.c | 33 +++++++++++++++++++++++++++++++++ hw/acpi/piix4.c | 32 ++++++++++++++++++++++++++++++++ hw/i386/acpi-build.c | 12 +++++++++++- hw/i386/pc_piix.c | 2 ++ hw/i386/pc_q35.c | 2 ++ include/hw/acpi/cpu.h | 1 + include/hw/acpi/cpu_hotplug.h | 6 ++++++ include/hw/i386/pc.h | 2 ++ 10 files changed, 118 insertions(+), 2 deletions(-) diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c index 401ac0dab2..c13b65c2c9 100644 --- a/hw/acpi/cpu.c +++ b/hw/acpi/cpu.c @@ -373,6 +373,15 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, aml_append(field, aml_named_field(CPU_DATA, 32)); aml_append(cpu_ctrl_dev, field); + if (opts.has_legacy_cphp) { + method = aml_method("_INI", 0, AML_SERIALIZED); + /* switch off legacy CPU hotplug HW and use new one, + * on reboot system is in new mode and writing 0 + * in CPU_SELECTOR selects BSP, which is NOP at + * the time _INI is called */ + aml_append(method, aml_store(zero, aml_name(CPU_SELECTOR))); + aml_append(cpu_ctrl_dev, method); + } } aml_append(sb_scope, cpu_ctrl_dev); diff --git a/hw/acpi/cpu_hotplug.c b/hw/acpi/cpu_hotplug.c index fe75bd9ac9..e19d902063 100644 --- a/hw/acpi/cpu_hotplug.c +++ b/hw/acpi/cpu_hotplug.c @@ -34,7 +34,15 @@ static uint64_t cpu_status_read(void *opaque, hwaddr addr, unsigned int size) static void cpu_status_write(void *opaque, hwaddr addr, uint64_t data, unsigned int size) { - /* TODO: implement VCPU removal on guest signal that CPU can be removed */ + /* firmware never used to write in CPU present bitmap so use + this fact as means to switch QEMU into modern CPU hotplug + mode by writing 0 at the beginning of legacy CPU bitmap + */ + if (addr == 0 && data == 0) { + AcpiCpuHotplug *cpus = opaque; + object_property_set_bool(cpus->device, false, "cpu-hotplug-legacy", + &error_abort); + } } static const MemoryRegionOps AcpiCpuHotplug_ops = { @@ -83,6 +91,17 @@ void legacy_acpi_cpu_hotplug_init(MemoryRegion *parent, Object *owner, memory_region_init_io(&gpe_cpu->io, owner, &AcpiCpuHotplug_ops, gpe_cpu, "acpi-cpu-hotplug", ACPI_GPE_PROC_LEN); memory_region_add_subregion(parent, base, &gpe_cpu->io); + gpe_cpu->device = owner; +} + +void acpi_switch_to_modern_cphp(AcpiCpuHotplug *gpe_cpu, + CPUHotplugState *cpuhp_state, + uint16_t io_port) +{ + MemoryRegion *parent = pci_address_space_io(PCI_DEVICE(gpe_cpu->device)); + + memory_region_del_subregion(parent, &gpe_cpu->io); + cpu_hotplug_hw_init(parent, gpe_cpu->device, cpuhp_state, io_port); } void build_legacy_cpu_hotplug_aml(Aml *ctx, MachineState *machine, diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c index d12cc62381..e5a3c18e52 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -189,6 +189,33 @@ static const VMStateDescription vmstate_tco_io_state = { } }; +static bool vmstate_test_use_cpuhp(void *opaque) +{ + ICH9LPCPMRegs *s = opaque; + return !s->cpu_hotplug_legacy; +} + +static int vmstate_cpuhp_pre_load(void *opaque) +{ + ICH9LPCPMRegs *s = opaque; + Object *obj = OBJECT(s->gpe_cpu.device); + object_property_set_bool(obj, false, "cpu-hotplug-legacy", &error_abort); + return 0; +} + +static const VMStateDescription vmstate_cpuhp_state = { + .name = "ich9_pm/cpuhp", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .needed = vmstate_test_use_cpuhp, + .pre_load = vmstate_cpuhp_pre_load, + .fields = (VMStateField[]) { + VMSTATE_CPU_HOTPLUG(cpuhp_state, ICH9LPCPMRegs), + VMSTATE_END_OF_LIST() + } +}; + const VMStateDescription vmstate_ich9_pm = { .name = "ich9_pm", .version_id = 1, @@ -209,6 +236,7 @@ const VMStateDescription vmstate_ich9_pm = { .subsections = (const VMStateDescription*[]) { &vmstate_memhp_state, &vmstate_tco_io_state, + &vmstate_cpuhp_state, NULL } }; @@ -318,6 +346,11 @@ static void ich9_pm_set_cpu_hotplug_legacy(Object *obj, bool value, { ICH9LPCState *s = ICH9_LPC_DEVICE(obj); + assert(!value); + if (s->pm.cpu_hotplug_legacy && value == false) { + acpi_switch_to_modern_cphp(&s->pm.gpe_cpu, &s->pm.cpuhp_state, + ICH9_CPU_HOTPLUG_IO_BASE); + } s->pm.cpu_hotplug_legacy = value; } diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index 858244a661..2adc246b00 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -276,6 +276,32 @@ static const VMStateDescription vmstate_memhp_state = { } }; +static bool vmstate_test_use_cpuhp(void *opaque) +{ + PIIX4PMState *s = opaque; + return !s->cpu_hotplug_legacy; +} + +static int vmstate_cpuhp_pre_load(void *opaque) +{ + Object *obj = OBJECT(opaque); + object_property_set_bool(obj, false, "cpu-hotplug-legacy", &error_abort); + return 0; +} + +static const VMStateDescription vmstate_cpuhp_state = { + .name = "piix4_pm/cpuhp", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .needed = vmstate_test_use_cpuhp, + .pre_load = vmstate_cpuhp_pre_load, + .fields = (VMStateField[]) { + VMSTATE_CPU_HOTPLUG(cpuhp_state, PIIX4PMState), + VMSTATE_END_OF_LIST() + } +}; + /* qemu-kvm 1.2 uses version 3 but advertised as 2 * To support incoming qemu-kvm 1.2 migration, change version_id * and minimum_version_id to 2 below (which breaks migration from @@ -310,6 +336,7 @@ static const VMStateDescription vmstate_acpi = { }, .subsections = (const VMStateDescription*[]) { &vmstate_memhp_state, + &vmstate_cpuhp_state, NULL } }; @@ -585,6 +612,11 @@ static void piix4_set_cpu_hotplug_legacy(Object *obj, bool value, Error **errp) { PIIX4PMState *s = PIIX4_PM(obj); + assert(!value); + if (s->cpu_hotplug_legacy && value == false) { + acpi_switch_to_modern_cphp(&s->gpe_cpu, &s->cpuhp_state, + PIIX4_CPU_HOTPLUG_IO_BASE); + } s->cpu_hotplug_legacy = value; } diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index e35a446a9d..20e5b4948e 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -33,6 +33,7 @@ #include "hw/timer/hpet.h" #include "hw/acpi/acpi-defs.h" #include "hw/acpi/acpi.h" +#include "hw/acpi/cpu.h" #include "hw/nvram/fw_cfg.h" #include "hw/acpi/bios-linker-loader.h" #include "hw/loader.h" @@ -1895,6 +1896,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, GPtrArray *mem_ranges = g_ptr_array_new_with_free_func(crs_range_free); GPtrArray *io_ranges = g_ptr_array_new_with_free_func(crs_range_free); PCMachineState *pcms = PC_MACHINE(machine); + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine); uint32_t nr_mem = machine->ram_slots; int root_bus_limit = 0xFF; PCIBus *bus = NULL; @@ -1950,7 +1952,15 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, build_q35_pci0_int(dsdt); } - build_legacy_cpu_hotplug_aml(dsdt, machine, pm->cpu_hp_io_base); + if (pcmc->legacy_cpu_hotplug) { + build_legacy_cpu_hotplug_aml(dsdt, machine, pm->cpu_hp_io_base); + } else { + CPUHotplugFeatures opts = { + .apci_1_compatible = true, .has_legacy_cphp = true + }; + build_cpus_aml(dsdt, machine, opts, pm->cpu_hp_io_base, + "\\_SB.PCI0", "\\_GPE._E02"); + } build_memory_hotplug_aml(dsdt, nr_mem, pm->mem_hp_io_base, pm->mem_hp_io_len); diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 53bc968bd0..c7d70af253 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -445,9 +445,11 @@ DEFINE_I440FX_MACHINE(v2_7, "pc-i440fx-2.7", NULL, static void pc_i440fx_2_6_machine_options(MachineClass *m) { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_i440fx_2_7_machine_options(m); m->is_default = 0; m->alias = NULL; + pcmc->legacy_cpu_hotplug = true; SET_MACHINE_COMPAT(m, PC_COMPAT_2_6); } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index e4b541f7b2..97a8835eea 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -294,8 +294,10 @@ DEFINE_Q35_MACHINE(v2_7, "pc-q35-2.7", NULL, static void pc_q35_2_6_machine_options(MachineClass *m) { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_q35_2_7_machine_options(m); m->alias = NULL; + pcmc->legacy_cpu_hotplug = true; SET_MACHINE_COMPAT(m, PC_COMPAT_2_6); } diff --git a/include/hw/acpi/cpu.h b/include/hw/acpi/cpu.h index 980a83c4b8..89ce172941 100644 --- a/include/hw/acpi/cpu.h +++ b/include/hw/acpi/cpu.h @@ -49,6 +49,7 @@ void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner, typedef struct CPUHotplugFeatures { bool apci_1_compatible; + bool has_legacy_cphp; } CPUHotplugFeatures; void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, diff --git a/include/hw/acpi/cpu_hotplug.h b/include/hw/acpi/cpu_hotplug.h index 6fef67ec14..b995ef2ebd 100644 --- a/include/hw/acpi/cpu_hotplug.h +++ b/include/hw/acpi/cpu_hotplug.h @@ -16,8 +16,10 @@ #include "hw/acpi/pc-hotplug.h" #include "hw/acpi/aml-build.h" #include "hw/hotplug.h" +#include "hw/acpi/cpu.h" typedef struct AcpiCpuHotplug { + Object *device; MemoryRegion io; uint8_t sts[ACPI_GPE_PROC_LEN]; } AcpiCpuHotplug; @@ -28,6 +30,10 @@ void legacy_acpi_cpu_plug_cb(HotplugHandler *hotplug_dev, void legacy_acpi_cpu_hotplug_init(MemoryRegion *parent, Object *owner, AcpiCpuHotplug *gpe_cpu, uint16_t base); +void acpi_switch_to_modern_cphp(AcpiCpuHotplug *gpe_cpu, + CPUHotplugState *cpuhp_state, + uint16_t io_port); + void build_legacy_cpu_hotplug_aml(Aml *ctx, MachineState *machine, uint16_t io_base); #endif diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 9e239297e8..884224ed19 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -137,6 +137,8 @@ struct PCMachineClass { /* TSC rate migration: */ bool save_tsc_khz; + /* generate legacy CPU hotplug AML */ + bool legacy_cpu_hotplug; }; #define TYPE_PC_MACHINE "generic-pc-machine" From d8d69e1f51e493e21fa75ce80cfacd002bf8a895 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 24 Jun 2016 07:45:28 +0300 Subject: [PATCH 27/34] acpi-test-data: update expected switched to new cpu hotplug interface, aml changed. Signed-off-by: Michael S. Tsirkin --- tests/acpi-test-data/pc/DSDT | Bin 5503 -> 6008 bytes tests/acpi-test-data/pc/DSDT.bridge | Bin 7362 -> 7867 bytes tests/acpi-test-data/pc/DSDT.ipmikcs | Bin 5575 -> 6080 bytes tests/acpi-test-data/q35/DSDT | Bin 8265 -> 8770 bytes tests/acpi-test-data/q35/DSDT.bridge | Bin 8282 -> 8787 bytes tests/acpi-test-data/q35/DSDT.ipmibt | Bin 8340 -> 8845 bytes 6 files changed, 0 insertions(+), 0 deletions(-) diff --git a/tests/acpi-test-data/pc/DSDT b/tests/acpi-test-data/pc/DSDT index 8b4f1a09b87f8361fb572022f69d304ddeeace99..8053d711058c0f9541d6d97690819f9de697751c 100644 GIT binary patch delta 907 zcmb7D&2G~`5Z-NEx>>tLsVpJEfk-_dA(Sp$dNyM>F^y9z>ms3zWFM2B_DE0ekHib) z1ya;|gm@7iqz}L?%otSBO)m9mJv;h-X1-m${oRwXj*G7^7~}UpJ7S@oG++%K1@K! zW*r+~K)FJBvk0*UGEty|6bdSkhh&#}%t!yKtSWNcSn4?tOg+cdf)4&9?wWVOC`h&Q zk%UNsc2+gkwt^LEk0H2I&OWNc3b%uMvm`~9B?K45iL M-6|{k(W(pc2VMhaIRF3v diff --git a/tests/acpi-test-data/pc/DSDT.bridge b/tests/acpi-test-data/pc/DSDT.bridge index 0d09b5cc61114b68fee0f14729732786854b19fe..850e71a973e52cc5e546fdd2757f0e089fed7192 100644 GIT binary patch delta 907 zcmb7D&2G~`5Z-NEx>>tLp*~RwBK5!pwF?reIB+mylbFWA%DPBsBiYBKr@i*%{z$w) zULZj4QPpE_eFEO7%($qcn_Tc|Jv;h-X1=|BfAc}cIxcQrFvj0q$&-yvdQj?*r8x&V z-fz)yb|m&{!yz9WGEu@vcQ&Q$akgL!9-J_9nvZnBeYTK+IoqY5h;<=Ph8)tN<}k{> za!5Wa&OCrD7Ut|3HMKw|gD!T)QPB;9G99MEGAf$$!RzrJQA2*DMcGf|dNYDNRqBl* z327WezvX(^f#TD*wYi*4*mqD$O~d{ZK>D`X9q)StkVC5SQKCuw-JPAdWn{Cgnm?bp z7bM@xr^)vEk6-V)wxu`hg zdCyz2)GPxB=CNve$ z*}wCroh`{l&vv@PnU!m9m0FM8A`W4tJadtF7_G5l&4)R~e2Gg+8747{ijFS@6Zm@Y sKJ-h4e>+5WZt2h?Blr~m)} delta 383 zcmYLFK~KUk82!3|ECqu|V~FuU_yJ@wdhlQ(mSHjjHg*>?&AKfl^VW+8;DCxz&Q9+` zcPGZ9H%|U6yJ=z%eSPis@?PK9;pzZ)dDmI(0HB8tJSt7ChR2URvQJsRqsiHvva>-1 zLe|RI00Z*n%j>z1HIR`49i&iDfYc}3lyL|BtCFHfadn}mKrm&Nt+_4yP3$#x&M8Q> zdqW9<1o74@zKSR1#Rq?C0e*rf0+(PMpkPr^*Tb<;ZfbrKAPAGzbL@vQ%o7Soa2%q6 z)4LF8VdRMQ8{yuc7TWPksU2Tc>(eDHQF8?Dt#Woy8J4)|-c@pvWjWe#XM2>H=I<}b zy4UYGkY-H)kc!8hPR6(c>dCTAq=;ohL~5CzrV>Q(nCY*LthG%YA+mMZ_D-1PS>`Ta PbynRfEBe_ezt?{Ns(5C) diff --git a/tests/acpi-test-data/pc/DSDT.ipmikcs b/tests/acpi-test-data/pc/DSDT.ipmikcs index f10cd9e296c942b66d6f2404f1a95fe5cc4a6796..8ac48afb6a672a6d70e13539a17614b465d648d6 100644 GIT binary patch delta 907 zcmb7DKX21O6u)a+xSZXhRF;rniPV7s)Er%y&9g~NTR+@de7n$~P+S8C12GEOQ*oGsas2Pe#&=3`xOpDkrz&UWc1VqM6S5r=f7IgGQ< z9FmWUGY=q(g*n@yrWWXA*u~B`D!O4%rlV9?Mn!Wzd^6c2YRErnQ4Z3$-ijbkm3pg4 zLK+9rZ@C$Ep!o1*VRoywPS==-y(lraTITvi;6>D z^t}~J%`$LcZadDO+ZAcpZFzNl;4KCSRXgKscDf!*wpPKg9l_RH&DwCNi{Rd>ah@6N6hDx1OfGAbl(8FaT?Jh&aE4s>UW_fC2* z;vbl&`6K)>rd2S9yu9RlyqC8=+8pAZ+w(TN0BHS=KWlS)pSkCZoLY3yRm5Ux(W_w$ zTr_m7fd=IY#AF#@1w<@C1;Hg$APdNWMO_d5tI{gZaDAm_K(Iw^NAcVEi#S{K+OfctaOs^S|=IRn{&t^N3A>X@1?bebjWebe=tf?ggM*s7bg}q&EId5 zZ8qq7kQG$`5QDj1w_vUZ^2v5gB#G@nOd5ruA|u4`ROlyKQCm|*i0s{T*afwjZ650& N&$3);Wqoj+8$Y*jXKVlf diff --git a/tests/acpi-test-data/q35/DSDT b/tests/acpi-test-data/q35/DSDT index 67445428d935bd6ea5957526089ba7e719a1783a..58fbb3d2e2dc8e8256984744bfb9411feb2e35fe 100644 GIT binary patch delta 907 zcmb7D%}(1u5Z)zDW!8o&l`p_0f&-Tdw!Ltwgc+N}5T{nwAXS^nJ|>*@+LH;Dc!9h? zsCa>jH{iyNW8Ma43_@j-OMP0;j=rCnZyyftBN3@6KiX!DfBP#Y8?EG^P)CKn0x{fg z&~f^6bFb1Ivfe3k3mB-@nh+g6_vb$Me=&WT4OGs(xi5S@*`c41wIPf99Fo4`Fi6ii z#3zz72Oy1HJ^4gU4N!5fjh#`Lw*$XO`iW~874=o`^Kg%-Av;rU(M_UiErcwQ@{1%1 zN#uu*q8YRxKmM^Y6Vnm91}Wt@=zajCU)4LqeTM)tNMtsM6^Va(G9$Z;Y=)Jy&8dAs z@{Rad>fQRLdJlD525FN#{T~yzSrFls;5~lOT?Yky_svIT8R6xh(3_SV#T#a$;*e(@ zXTegv3>=uJiGUvXje!=iN4n(ubEwwup)3G=l8 delta 398 zcmYLFK}y3w6#bL58m1{)8N>>O&;yhpx^JDP#Yoek$wne%l2Ax@vvFa&sI{P7xU#r& zkxDKg-oTaKpvQ3%1!wW!yqS6bz4@Qr?>6qbUGKsGKu_;@QJ-5!fqNd1Gs*@XMJ!j8 z4V?tIs8z5A8WbxOlU0Zn5Qzj81eZ{PEF{O2xgPpgrDdMsy^Wdy!IW9H;y3XZv3JG!W7Rq-i2v|qKz(($5Srdo68^k&r`* z*D-Ru-W5k1tw5~aaR2_Y)LOsRTXQXNy$-Hu0Uf!uD#m@Z)wen_Zo$8kK|EC1>e%oX zdO`AN?R|fFN||Z?ewtK+LC1qE9}ED|^L{yVJ&;$HZ6Zl58xm5>?G+g)frnf_(I{&S dodXpia(LYe`pgO}^91Bpj#^sL52Mj6{{Z}6X!QU9 diff --git a/tests/acpi-test-data/q35/DSDT.bridge b/tests/acpi-test-data/q35/DSDT.bridge index e85f5b1af9fcd36c9522b05e0085af84d6c010cb..c392802a95cb5690c6719b7909c9f8fa2213e503 100644 GIT binary patch delta 907 zcmb7D%}(1u5Z)yYGHU}$q5OL^gW2t zX@idQ+k=xzbIAHv%q?K3TDwAY`O;td+`nV`V>VPd_m;l!_3V&-Le_yS9&ktoio-Dd zz#)DoIdcHg$knqK)YJeK_dD1bg?T6Ni)4_vhEY-9_m4*>L=D+{8hhI68F*AcI6^!&s5{)yjA_JZFu?l%31Ufs1 delta 398 zcmYLF&r8EF82!?2TCxtE1aZQk>@1`BMs1w;b)Td(}WpQM-P{HZyFDV}n?4U-5(Yh50XCtSM8_tOYLoHn0VVJzYzA%_&N zV&wR}OODoBfmpre!NWzVwR)|$W*YPS4z6g7j=WkG<38Hx8yy*U5ZuckKB{bVYy@+? zAbGU*Zm>9|u4(>$npD}K<3pAY27u^!znpnK$Scb>ktCK43904wiVT#%W3Io^sM{Di cdn!U?|GLHcuEi|#801!tT3XQ$qj6IH0lN=q*#H0l diff --git a/tests/acpi-test-data/q35/DSDT.ipmibt b/tests/acpi-test-data/q35/DSDT.ipmibt index 3db2b0b5f96e567d42d3abc672c95e9724243aaf..0ea38e1e72977e82053b087a6bd2e4ea21373420 100644 GIT binary patch delta 907 zcmb7DKTq306u%=*WzL2w$V*{~VCaJ2+J)JDb`wJ!telHfT_jtb5~dRylM6^}OyL6r z#L%_s*XY2QZv*ccgy50|Pv>_}zd!H&?mE1RMWmwqYLhYk>#z9#sg>*%>RX}DK@4{r zbe#NI->Ec*tarfN0tTwJB1DJJ{F%@FQ>K4s1C?`c<_lk+ZP8E2+K|P44oP2e7^FuW z;(f`P1CU0pK3k)v2B^5##?C0r+ksys{lqnliu$~_J=`H`$bKld=q6FM5<-?p`9+e1 zB=W<1(F|IUfB(5O6_XLW1S#b>=zajCU)Ed0U55ZNNMtsM6^Va%FeAH+Y=)K7^@)8! z@{Rad>d*B}^%m-`4ALffdLI+FSrFlw;4Oa8T?GYw_r*tL8R6w^=uJwF;tjJ=amdq- zGiRw@1`f<_$N6(JCk?v|FK-T%sfL qD=6m_3>K^wXv-sSY&1R&HZakhEG}aC6&F@NEJ`=6`QORLX7dq~z4V>{ delta 384 zcmYLFK}*9h82!?2TC$361aX2eck?oe=-rxiLS?hqT%H^D9m(uJsflCCg&$1f+%gjCP5^kG9iZ) zClMO3-W5lgRw7n!x%YTp=_KFHPI9o{n6C7dj#_u%-AikRYAYSLya&A`MVONsnCzKqPB*L5ZS)&_-Bsg TTgG7x Date: Thu, 16 Jun 2016 14:23:48 +0200 Subject: [PATCH 28/34] pc: acpi: drop intermediate PCMachineState.node_cpu PCMachineState.node_cpu was used for mapping APIC ID to numa node id as CPU entries in SRAT used to be built on sparse APIC ID bitmap (up to apic_id_limit). However since commit 5803fce pc: acpi: SRAT: create only valid processor lapic entries CPU entries in SRAT aren't build using apic bitmap but using 0..maxcpus index instead which is also used for creating numa_info[x].node_cpu map. So instead of doing useless intermediate conversion from 1. node by cpu index -> node by apic id i.e. numa_info[x].node_cpu -> PCMachineState.node_cpu 2. apic id -> srat entry PMX PCMachineState.node_cpu[apic id] -> PMX value use numa_info[x].node_cpu map directly like ARM does and do 1. numa_info[x].node_cpu -> PMX value using index in range 0..maxcpus and drop not necessary PCMachineState.node_cpu and related code. That also removes the last (not counting legacy hotplug) dependency of ACPI code on apic_id_limit and need to allocate huge sparse PCMachineState.node_cpu array in case of 32-bit APIC IDs. Signed-off-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 11 ++++++++--- hw/i386/pc.c | 16 +--------------- include/hw/i386/pc.h | 1 - 3 files changed, 9 insertions(+), 19 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 20e5b4948e..5a594be8ee 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -44,6 +44,7 @@ #include "hw/acpi/tpm.h" #include "sysemu/tpm_backend.h" #include "hw/timer/mc146818rtc_regs.h" +#include "sysemu/numa.h" /* Supported chipsets: */ #include "hw/acpi/piix4.h" @@ -2328,7 +2329,6 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) AcpiSratMemoryAffinity *numamem; int i; - uint64_t curnode; int srat_start, numa_start, slots; uint64_t mem_len, mem_base, next_base; MachineClass *mc = MACHINE_GET_CLASS(machine); @@ -2344,14 +2344,19 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) srat->reserved1 = cpu_to_le32(1); for (i = 0; i < apic_ids->len; i++) { + int j; int apic_id = apic_ids->cpus[i].arch_id; core = acpi_data_push(table_data, sizeof *core); core->type = ACPI_SRAT_PROCESSOR_APIC; core->length = sizeof(*core); core->local_apic_id = apic_id; - curnode = pcms->node_cpu[apic_id]; - core->proximity_lo = curnode; + for (j = 0; j < nb_numa_nodes; j++) { + if (test_bit(i, numa_info[j].node_cpu)) { + core->proximity_lo = j; + break; + } + } memset(core->proximity_hi, 0, 3); core->local_sapic_eid = 0; core->flags = cpu_to_le32(1); diff --git a/hw/i386/pc.c b/hw/i386/pc.c index dbfba5cad1..b8fead37e2 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1179,7 +1179,7 @@ void pc_machine_done(Notifier *notifier, void *data) void pc_guest_info_init(PCMachineState *pcms) { - int i, j; + int i; pcms->apic_xrupt_override = kvm_allows_irq0_override(); pcms->numa_nodes = nb_numa_nodes; @@ -1189,20 +1189,6 @@ void pc_guest_info_init(PCMachineState *pcms) pcms->node_mem[i] = numa_info[i].node_mem; } - pcms->node_cpu = g_malloc0(pcms->apic_id_limit * - sizeof *pcms->node_cpu); - - for (i = 0; i < max_cpus; i++) { - unsigned int apic_id = x86_cpu_apic_id_from_index(i); - assert(apic_id < pcms->apic_id_limit); - for (j = 0; j < nb_numa_nodes; j++) { - if (test_bit(i, numa_info[j].node_cpu)) { - pcms->node_cpu[apic_id] = j; - break; - } - } - } - pcms->machine_done.notify = pc_machine_done; qemu_add_machine_init_done_notifier(&pcms->machine_done); } diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 884224ed19..948ed0c277 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -72,7 +72,6 @@ struct PCMachineState { /* NUMA information: */ uint64_t numa_nodes; uint64_t *node_mem; - uint64_t *node_cpu; }; #define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device" From 6798e245a3c7e6f34f66a548a108cfa8eba79b7d Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Fri, 10 Jun 2016 11:04:09 +0200 Subject: [PATCH 29/34] virtio-bus: common ioeventfd infrastructure Introduce a set of ioeventfd callbacks on the virtio-bus level that can be implemented by the individual transports. At the virtio-bus level, do common handling for host notifiers (which is actually most of it). Two things of note: - When setting the host notifier, we only switch from/to the generic ioeventfd handler. This fixes a latent bug where we had no ioeventfd assigned for a certain window. - We always iterate over all possible virtio queues, even though ccw (currently) has a lower limit. It does not really matter here. Signed-off-by: Cornelia Huck Reviewed-by: Fam Zheng Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-bus.c | 132 +++++++++++++++++++++++++++++++++ include/hw/virtio/virtio-bus.h | 30 ++++++++ 2 files changed, 162 insertions(+) diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c index 574f0e23f8..131376027b 100644 --- a/hw/virtio/virtio-bus.c +++ b/hw/virtio/virtio-bus.c @@ -146,6 +146,138 @@ void virtio_bus_set_vdev_config(VirtioBusState *bus, uint8_t *config) } } +/* + * This function handles both assigning the ioeventfd handler and + * registering it with the kernel. + * assign: register/deregister ioeventfd with the kernel + * set_handler: use the generic ioeventfd handler + */ +static int set_host_notifier_internal(DeviceState *proxy, VirtioBusState *bus, + int n, bool assign, bool set_handler) +{ + VirtIODevice *vdev = virtio_bus_get_device(bus); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(bus); + VirtQueue *vq = virtio_get_queue(vdev, n); + EventNotifier *notifier = virtio_queue_get_host_notifier(vq); + int r = 0; + + if (assign) { + r = event_notifier_init(notifier, 1); + if (r < 0) { + error_report("%s: unable to init event notifier: %d", __func__, r); + return r; + } + virtio_queue_set_host_notifier_fd_handler(vq, true, set_handler); + r = k->ioeventfd_assign(proxy, notifier, n, assign); + if (r < 0) { + error_report("%s: unable to assign ioeventfd: %d", __func__, r); + virtio_queue_set_host_notifier_fd_handler(vq, false, false); + event_notifier_cleanup(notifier); + return r; + } + } else { + virtio_queue_set_host_notifier_fd_handler(vq, false, false); + k->ioeventfd_assign(proxy, notifier, n, assign); + event_notifier_cleanup(notifier); + } + return r; +} + +void virtio_bus_start_ioeventfd(VirtioBusState *bus) +{ + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(bus); + DeviceState *proxy = DEVICE(BUS(bus)->parent); + VirtIODevice *vdev; + int n, r; + + if (!k->ioeventfd_started || k->ioeventfd_started(proxy)) { + return; + } + if (k->ioeventfd_disabled(proxy)) { + return; + } + vdev = virtio_bus_get_device(bus); + for (n = 0; n < VIRTIO_QUEUE_MAX; n++) { + if (!virtio_queue_get_num(vdev, n)) { + continue; + } + r = set_host_notifier_internal(proxy, bus, n, true, true); + if (r < 0) { + goto assign_error; + } + } + k->ioeventfd_set_started(proxy, true, false); + return; + +assign_error: + while (--n >= 0) { + if (!virtio_queue_get_num(vdev, n)) { + continue; + } + + r = set_host_notifier_internal(proxy, bus, n, false, false); + assert(r >= 0); + } + k->ioeventfd_set_started(proxy, false, true); + error_report("%s: failed. Fallback to userspace (slower).", __func__); +} + +void virtio_bus_stop_ioeventfd(VirtioBusState *bus) +{ + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(bus); + DeviceState *proxy = DEVICE(BUS(bus)->parent); + VirtIODevice *vdev; + int n, r; + + if (!k->ioeventfd_started || !k->ioeventfd_started(proxy)) { + return; + } + vdev = virtio_bus_get_device(bus); + for (n = 0; n < VIRTIO_QUEUE_MAX; n++) { + if (!virtio_queue_get_num(vdev, n)) { + continue; + } + r = set_host_notifier_internal(proxy, bus, n, false, false); + assert(r >= 0); + } + k->ioeventfd_set_started(proxy, false, false); +} + +/* + * This function switches from/to the generic ioeventfd handler. + * assign==false means 'use generic ioeventfd handler'. + */ +int virtio_bus_set_host_notifier(VirtioBusState *bus, int n, bool assign) +{ + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(bus); + DeviceState *proxy = DEVICE(BUS(bus)->parent); + VirtIODevice *vdev = virtio_bus_get_device(bus); + VirtQueue *vq = virtio_get_queue(vdev, n); + + if (!k->ioeventfd_started) { + return -ENOSYS; + } + if (assign) { + /* + * Stop using the generic ioeventfd, we are doing eventfd handling + * ourselves below + */ + k->ioeventfd_set_disabled(proxy, true); + } + /* + * Just switch the handler, don't deassign the ioeventfd. + * Otherwise, there's a window where we don't have an + * ioeventfd and we may end up with a notification where + * we don't expect one. + */ + virtio_queue_set_host_notifier_fd_handler(vq, assign, !assign); + if (!assign) { + /* Use generic ioeventfd handler again. */ + k->ioeventfd_set_disabled(proxy, false); + } + return 0; +} + static char *virtio_bus_get_dev_path(DeviceState *dev) { BusState *bus = qdev_get_parent_bus(dev); diff --git a/include/hw/virtio/virtio-bus.h b/include/hw/virtio/virtio-bus.h index 3f2c1363d0..9637f80f4d 100644 --- a/include/hw/virtio/virtio-bus.h +++ b/include/hw/virtio/virtio-bus.h @@ -70,6 +70,29 @@ typedef struct VirtioBusClass { */ void (*device_unplugged)(DeviceState *d); int (*query_nvectors)(DeviceState *d); + /* + * ioeventfd handling: if the transport implements ioeventfd_started, + * it must implement the other ioeventfd callbacks as well + */ + /* Returns true if the ioeventfd has been started for the device. */ + bool (*ioeventfd_started)(DeviceState *d); + /* + * Sets the 'ioeventfd started' state after the ioeventfd has been + * started/stopped for the device. err signifies whether an error + * had occurred. + */ + void (*ioeventfd_set_started)(DeviceState *d, bool started, bool err); + /* Returns true if the ioeventfd has been disabled for the device. */ + bool (*ioeventfd_disabled)(DeviceState *d); + /* Sets the 'ioeventfd disabled' state for the device. */ + void (*ioeventfd_set_disabled)(DeviceState *d, bool disabled); + /* + * Assigns/deassigns the ioeventfd backing for the transport on + * the device for queue number n. Returns an error value on + * failure. + */ + int (*ioeventfd_assign)(DeviceState *d, EventNotifier *notifier, + int n, bool assign); /* * Does the transport have variable vring alignment? * (ie can it ever call virtio_queue_set_align()?) @@ -111,4 +134,11 @@ static inline VirtIODevice *virtio_bus_get_device(VirtioBusState *bus) return (VirtIODevice *)qdev; } +/* Start the ioeventfd. */ +void virtio_bus_start_ioeventfd(VirtioBusState *bus); +/* Stop the ioeventfd. */ +void virtio_bus_stop_ioeventfd(VirtioBusState *bus); +/* Switch from/to the generic ioeventfd handler */ +int virtio_bus_set_host_notifier(VirtioBusState *bus, int n, bool assign); + #endif /* VIRTIO_BUS_H */ From b1f0a33d808fc3eb27faa93b7e032eb70c58802f Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Fri, 10 Jun 2016 11:04:10 +0200 Subject: [PATCH 30/34] virtio-bus: have callers tolerate new host notifier api Have vhost and dataplane use the new api for transports that have been converted. Signed-off-by: Cornelia Huck Reviewed-by: Fam Zheng Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/block/dataplane/virtio-blk.c | 14 +++++++++++--- hw/scsi/virtio-scsi-dataplane.c | 20 +++++++++++++++----- hw/virtio/vhost.c | 20 ++++++++++++++++---- 3 files changed, 42 insertions(+), 12 deletions(-) diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index 2073f9a270..fdf5fd1190 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -79,7 +79,8 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, } /* Don't try if transport does not support notifiers. */ - if (!k->set_guest_notifiers || !k->set_host_notifier) { + if (!k->set_guest_notifiers || + (!k->set_host_notifier && !k->ioeventfd_started)) { error_setg(errp, "device is incompatible with dataplane " "(transport does not support notifiers)"); @@ -157,7 +158,10 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s) s->guest_notifier = virtio_queue_get_guest_notifier(s->vq); /* Set up virtqueue notify */ - r = k->set_host_notifier(qbus->parent, 0, true); + r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), 0, true); + if (r == -ENOSYS) { + r = k->set_host_notifier(qbus->parent, 0, true); + } if (r != 0) { fprintf(stderr, "virtio-blk failed to set host notifier (%d)\n", r); goto fail_host_notifier; @@ -193,6 +197,7 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s) BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s->vdev))); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); VirtIOBlock *vblk = VIRTIO_BLK(s->vdev); + int r; if (!vblk->dataplane_started || s->stopping) { return; @@ -217,7 +222,10 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s) aio_context_release(s->ctx); - k->set_host_notifier(qbus->parent, 0, false); + r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), 0, false); + if (r == -ENOSYS) { + k->set_host_notifier(qbus->parent, 0, false); + } /* Clean up guest notifier (irq) */ k->set_guest_notifiers(qbus->parent, 1, false); diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c index 1a49f1e4b7..b9a5716501 100644 --- a/hw/scsi/virtio-scsi-dataplane.c +++ b/hw/scsi/virtio-scsi-dataplane.c @@ -31,7 +31,8 @@ void virtio_scsi_set_iothread(VirtIOSCSI *s, IOThread *iothread) s->ctx = iothread_get_aio_context(vs->conf.iothread); /* Don't try if transport does not support notifiers. */ - if (!k->set_guest_notifiers || !k->set_host_notifier) { + if (!k->set_guest_notifiers || + (!k->set_host_notifier && !k->ioeventfd_started)) { fprintf(stderr, "virtio-scsi: Failed to set iothread " "(transport does not support notifiers)"); exit(1); @@ -73,7 +74,10 @@ static int virtio_scsi_vring_init(VirtIOSCSI *s, VirtQueue *vq, int n, int rc; /* Set up virtqueue notify */ - rc = k->set_host_notifier(qbus->parent, n, true); + rc = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), n, true); + if (rc == -ENOSYS) { + rc = k->set_host_notifier(qbus->parent, n, true); + } if (rc != 0) { fprintf(stderr, "virtio-scsi: Failed to set host notifier (%d)\n", rc); @@ -159,7 +163,10 @@ fail_vrings: virtio_scsi_clear_aio(s); aio_context_release(s->ctx); for (i = 0; i < vs->conf.num_queues + 2; i++) { - k->set_host_notifier(qbus->parent, i, false); + rc = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); + if (rc == -ENOSYS) { + k->set_host_notifier(qbus->parent, i, false); + } } k->set_guest_notifiers(qbus->parent, vs->conf.num_queues + 2, false); fail_guest_notifiers: @@ -174,7 +181,7 @@ void virtio_scsi_dataplane_stop(VirtIOSCSI *s) BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); - int i; + int i, rc; if (!s->dataplane_started || s->dataplane_stopping) { return; @@ -198,7 +205,10 @@ void virtio_scsi_dataplane_stop(VirtIOSCSI *s) aio_context_release(s->ctx); for (i = 0; i < vs->conf.num_queues + 2; i++) { - k->set_host_notifier(qbus->parent, i, false); + rc = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); + if (rc == -ENOSYS) { + k->set_host_notifier(qbus->parent, i, false); + } } /* Clean up guest notifier (irq) */ diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 81cc5b0ae3..bce1b6e313 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1110,14 +1110,18 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) VirtioBusState *vbus = VIRTIO_BUS(qbus); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); int i, r, e; - if (!k->set_host_notifier) { + if (!k->set_host_notifier || !k->ioeventfd_started) { fprintf(stderr, "binding does not support host notifiers\n"); r = -ENOSYS; goto fail; } for (i = 0; i < hdev->nvqs; ++i) { - r = k->set_host_notifier(qbus->parent, hdev->vq_index + i, true); + r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, + true); + if (r == -ENOSYS) { + r = k->set_host_notifier(qbus->parent, hdev->vq_index + i, true); + } if (r < 0) { fprintf(stderr, "vhost VQ %d notifier binding failed: %d\n", i, -r); goto fail_vq; @@ -1127,7 +1131,11 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) return 0; fail_vq: while (--i >= 0) { - e = k->set_host_notifier(qbus->parent, hdev->vq_index + i, false); + e = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, + false); + if (e == -ENOSYS) { + e = k->set_host_notifier(qbus->parent, hdev->vq_index + i, false); + } if (e < 0) { fprintf(stderr, "vhost VQ %d notifier cleanup error: %d\n", i, -r); fflush(stderr); @@ -1151,7 +1159,11 @@ void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) int i, r; for (i = 0; i < hdev->nvqs; ++i) { - r = k->set_host_notifier(qbus->parent, hdev->vq_index + i, false); + r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, + false); + if (r == -ENOSYS) { + r = k->set_host_notifier(qbus->parent, hdev->vq_index + i, false); + } if (r < 0) { fprintf(stderr, "vhost VQ %d notifier cleanup failed: %d\n", i, -r); fflush(stderr); From 7c55f68a630d91c24a5e2942d1cfb811910a6a98 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Fri, 10 Jun 2016 11:04:11 +0200 Subject: [PATCH 31/34] virtio-ccw: convert to ioeventfd callbacks Use the new interface. Signed-off-by: Cornelia Huck Reviewed-by: Fam Zheng Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/s390x/virtio-ccw.c | 141 +++++++++++++++--------------------------- 1 file changed, 49 insertions(+), 92 deletions(-) diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c index 1625e6b38b..8b709e362e 100644 --- a/hw/s390x/virtio-ccw.c +++ b/hw/s390x/virtio-ccw.c @@ -69,92 +69,58 @@ VirtIODevice *virtio_ccw_get_vdev(SubchDev *sch) return vdev; } -static int virtio_ccw_set_guest2host_notifier(VirtioCcwDevice *dev, int n, - bool assign, bool set_handler) -{ - VirtIODevice *vdev = virtio_bus_get_device(&dev->bus); - VirtQueue *vq = virtio_get_queue(vdev, n); - EventNotifier *notifier = virtio_queue_get_host_notifier(vq); - int r = 0; - SubchDev *sch = dev->sch; - uint32_t sch_id = (css_build_subchannel_id(sch) << 16) | sch->schid; - - if (assign) { - r = event_notifier_init(notifier, 1); - if (r < 0) { - error_report("%s: unable to init event notifier: %d", __func__, r); - return r; - } - virtio_queue_set_host_notifier_fd_handler(vq, true, set_handler); - r = s390_assign_subch_ioeventfd(notifier, sch_id, n, assign); - if (r < 0) { - error_report("%s: unable to assign ioeventfd: %d", __func__, r); - virtio_queue_set_host_notifier_fd_handler(vq, false, false); - event_notifier_cleanup(notifier); - return r; - } - } else { - virtio_queue_set_host_notifier_fd_handler(vq, false, false); - s390_assign_subch_ioeventfd(notifier, sch_id, n, assign); - event_notifier_cleanup(notifier); - } - return r; -} - static void virtio_ccw_start_ioeventfd(VirtioCcwDevice *dev) { - VirtIODevice *vdev; - int n, r; - - if (!(dev->flags & VIRTIO_CCW_FLAG_USE_IOEVENTFD) || - dev->ioeventfd_disabled || - dev->ioeventfd_started) { - return; - } - vdev = virtio_bus_get_device(&dev->bus); - for (n = 0; n < VIRTIO_CCW_QUEUE_MAX; n++) { - if (!virtio_queue_get_num(vdev, n)) { - continue; - } - r = virtio_ccw_set_guest2host_notifier(dev, n, true, true); - if (r < 0) { - goto assign_error; - } - } - dev->ioeventfd_started = true; - return; - - assign_error: - while (--n >= 0) { - if (!virtio_queue_get_num(vdev, n)) { - continue; - } - r = virtio_ccw_set_guest2host_notifier(dev, n, false, false); - assert(r >= 0); - } - dev->ioeventfd_started = false; - /* Disable ioeventfd for this device. */ - dev->flags &= ~VIRTIO_CCW_FLAG_USE_IOEVENTFD; - error_report("%s: failed. Fallback to userspace (slower).", __func__); + virtio_bus_start_ioeventfd(&dev->bus); } static void virtio_ccw_stop_ioeventfd(VirtioCcwDevice *dev) { - VirtIODevice *vdev; - int n, r; + virtio_bus_stop_ioeventfd(&dev->bus); +} - if (!dev->ioeventfd_started) { - return; +static bool virtio_ccw_ioeventfd_started(DeviceState *d) +{ + VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d); + + return dev->ioeventfd_started; +} + +static void virtio_ccw_ioeventfd_set_started(DeviceState *d, bool started, + bool err) +{ + VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d); + + dev->ioeventfd_started = started; + if (err) { + /* Disable ioeventfd for this device. */ + dev->flags &= ~VIRTIO_CCW_FLAG_USE_IOEVENTFD; } - vdev = virtio_bus_get_device(&dev->bus); - for (n = 0; n < VIRTIO_CCW_QUEUE_MAX; n++) { - if (!virtio_queue_get_num(vdev, n)) { - continue; - } - r = virtio_ccw_set_guest2host_notifier(dev, n, false, false); - assert(r >= 0); - } - dev->ioeventfd_started = false; +} + +static bool virtio_ccw_ioeventfd_disabled(DeviceState *d) +{ + VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d); + + return dev->ioeventfd_disabled || + !(dev->flags & VIRTIO_CCW_FLAG_USE_IOEVENTFD); +} + +static void virtio_ccw_ioeventfd_set_disabled(DeviceState *d, bool disabled) +{ + VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d); + + dev->ioeventfd_disabled = disabled; +} + +static int virtio_ccw_ioeventfd_assign(DeviceState *d, EventNotifier *notifier, + int n, bool assign) +{ + VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d); + SubchDev *sch = dev->sch; + uint32_t sch_id = (css_build_subchannel_id(sch) << 16) | sch->schid; + + return s390_assign_subch_ioeventfd(notifier, sch_id, n, assign); } VirtualCssBus *virtual_css_bus_init(void) @@ -1157,19 +1123,6 @@ static bool virtio_ccw_query_guest_notifiers(DeviceState *d) return !!(dev->sch->curr_status.pmcw.flags & PMCW_FLAGS_MASK_ENA); } -static int virtio_ccw_set_host_notifier(DeviceState *d, int n, bool assign) -{ - VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d); - - /* Stop using the generic ioeventfd, we are doing eventfd handling - * ourselves below */ - dev->ioeventfd_disabled = assign; - if (assign) { - virtio_ccw_stop_ioeventfd(dev); - } - return virtio_ccw_set_guest2host_notifier(dev, n, assign, false); -} - static int virtio_ccw_get_mappings(VirtioCcwDevice *dev) { int r; @@ -1798,7 +1751,6 @@ static void virtio_ccw_bus_class_init(ObjectClass *klass, void *data) k->notify = virtio_ccw_notify; k->vmstate_change = virtio_ccw_vmstate_change; k->query_guest_notifiers = virtio_ccw_query_guest_notifiers; - k->set_host_notifier = virtio_ccw_set_host_notifier; k->set_guest_notifiers = virtio_ccw_set_guest_notifiers; k->save_queue = virtio_ccw_save_queue; k->load_queue = virtio_ccw_load_queue; @@ -1807,6 +1759,11 @@ static void virtio_ccw_bus_class_init(ObjectClass *klass, void *data) k->device_plugged = virtio_ccw_device_plugged; k->post_plugged = virtio_ccw_post_plugged; k->device_unplugged = virtio_ccw_device_unplugged; + k->ioeventfd_started = virtio_ccw_ioeventfd_started; + k->ioeventfd_set_started = virtio_ccw_ioeventfd_set_started; + k->ioeventfd_disabled = virtio_ccw_ioeventfd_disabled; + k->ioeventfd_set_disabled = virtio_ccw_ioeventfd_set_disabled; + k->ioeventfd_assign = virtio_ccw_ioeventfd_assign; } static const TypeInfo virtio_ccw_bus_info = { From 9f06e71a567ba5ee8b727e65a2d5347fd331d2aa Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Fri, 10 Jun 2016 11:04:12 +0200 Subject: [PATCH 32/34] virtio-pci: convert to ioeventfd callbacks Convert to new interface. Signed-off-by: Cornelia Huck Reviewed-by: Fam Zheng Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-pci.c | 124 ++++++++++++++--------------------------- 1 file changed, 41 insertions(+), 83 deletions(-) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 1a0278304b..2b34b43060 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -262,14 +262,44 @@ static int virtio_pci_load_queue(DeviceState *d, int n, QEMUFile *f) return 0; } +static bool virtio_pci_ioeventfd_started(DeviceState *d) +{ + VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); + + return proxy->ioeventfd_started; +} + +static void virtio_pci_ioeventfd_set_started(DeviceState *d, bool started, + bool err) +{ + VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); + + proxy->ioeventfd_started = started; +} + +static bool virtio_pci_ioeventfd_disabled(DeviceState *d) +{ + VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); + + return proxy->ioeventfd_disabled || + !(proxy->flags & VIRTIO_PCI_FLAG_USE_IOEVENTFD); +} + +static void virtio_pci_ioeventfd_set_disabled(DeviceState *d, bool disabled) +{ + VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); + + proxy->ioeventfd_disabled = disabled; +} + #define QEMU_VIRTIO_PCI_QUEUE_MEM_MULT 0x1000 -static int virtio_pci_set_host_notifier_internal(VirtIOPCIProxy *proxy, - int n, bool assign, bool set_handler) +static int virtio_pci_ioeventfd_assign(DeviceState *d, EventNotifier *notifier, + int n, bool assign) { + VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); VirtQueue *vq = virtio_get_queue(vdev, n); - EventNotifier *notifier = virtio_queue_get_host_notifier(vq); bool legacy = !(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_LEGACY); bool modern = !(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_MODERN); bool fast_mmio = kvm_ioeventfd_any_length_enabled(); @@ -280,16 +310,8 @@ static int virtio_pci_set_host_notifier_internal(VirtIOPCIProxy *proxy, hwaddr modern_addr = QEMU_VIRTIO_PCI_QUEUE_MEM_MULT * virtio_get_queue_index(vq); hwaddr legacy_addr = VIRTIO_PCI_QUEUE_NOTIFY; - int r = 0; if (assign) { - r = event_notifier_init(notifier, 1); - if (r < 0) { - error_report("%s: unable to init event notifier: %d", - __func__, r); - return r; - } - virtio_queue_set_host_notifier_fd_handler(vq, true, set_handler); if (modern) { if (fast_mmio) { memory_region_add_eventfd(modern_mr, modern_addr, 0, @@ -325,68 +347,18 @@ static int virtio_pci_set_host_notifier_internal(VirtIOPCIProxy *proxy, memory_region_del_eventfd(legacy_mr, legacy_addr, 2, true, n, notifier); } - virtio_queue_set_host_notifier_fd_handler(vq, false, false); - event_notifier_cleanup(notifier); } - return r; + return 0; } static void virtio_pci_start_ioeventfd(VirtIOPCIProxy *proxy) { - VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); - int n, r; - - if (!(proxy->flags & VIRTIO_PCI_FLAG_USE_IOEVENTFD) || - proxy->ioeventfd_disabled || - proxy->ioeventfd_started) { - return; - } - - for (n = 0; n < VIRTIO_QUEUE_MAX; n++) { - if (!virtio_queue_get_num(vdev, n)) { - continue; - } - - r = virtio_pci_set_host_notifier_internal(proxy, n, true, true); - if (r < 0) { - goto assign_error; - } - } - proxy->ioeventfd_started = true; - return; - -assign_error: - while (--n >= 0) { - if (!virtio_queue_get_num(vdev, n)) { - continue; - } - - r = virtio_pci_set_host_notifier_internal(proxy, n, false, false); - assert(r >= 0); - } - proxy->ioeventfd_started = false; - error_report("%s: failed. Fallback to a userspace (slower).", __func__); + virtio_bus_start_ioeventfd(&proxy->bus); } static void virtio_pci_stop_ioeventfd(VirtIOPCIProxy *proxy) { - VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); - int r; - int n; - - if (!proxy->ioeventfd_started) { - return; - } - - for (n = 0; n < VIRTIO_QUEUE_MAX; n++) { - if (!virtio_queue_get_num(vdev, n)) { - continue; - } - - r = virtio_pci_set_host_notifier_internal(proxy, n, false, false); - assert(r >= 0); - } - proxy->ioeventfd_started = false; + virtio_bus_stop_ioeventfd(&proxy->bus); } static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val) @@ -1110,24 +1082,6 @@ assign_error: return r; } -static int virtio_pci_set_host_notifier(DeviceState *d, int n, bool assign) -{ - VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); - - /* Stop using ioeventfd for virtqueue kick if the device starts using host - * notifiers. This makes it easy to avoid stepping on each others' toes. - */ - proxy->ioeventfd_disabled = assign; - if (assign) { - virtio_pci_stop_ioeventfd(proxy); - } - /* We don't need to start here: it's not needed because backend - * currently only stops on status change away from ok, - * reset, vmstop and such. If we do add code to start here, - * need to check vmstate, device state etc. */ - return virtio_pci_set_host_notifier_internal(proxy, n, assign, false); -} - static void virtio_pci_vmstate_change(DeviceState *d, bool running) { VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); @@ -2488,12 +2442,16 @@ static void virtio_pci_bus_class_init(ObjectClass *klass, void *data) k->load_extra_state = virtio_pci_load_extra_state; k->has_extra_state = virtio_pci_has_extra_state; k->query_guest_notifiers = virtio_pci_query_guest_notifiers; - k->set_host_notifier = virtio_pci_set_host_notifier; k->set_guest_notifiers = virtio_pci_set_guest_notifiers; k->vmstate_change = virtio_pci_vmstate_change; k->device_plugged = virtio_pci_device_plugged; k->device_unplugged = virtio_pci_device_unplugged; k->query_nvectors = virtio_pci_query_nvectors; + k->ioeventfd_started = virtio_pci_ioeventfd_started; + k->ioeventfd_set_started = virtio_pci_ioeventfd_set_started; + k->ioeventfd_disabled = virtio_pci_ioeventfd_disabled; + k->ioeventfd_set_disabled = virtio_pci_ioeventfd_set_disabled; + k->ioeventfd_assign = virtio_pci_ioeventfd_assign; } static const TypeInfo virtio_pci_bus_info = { From c0971bcb7c3320e991b8913253b6344096fa15fd Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Fri, 10 Jun 2016 11:04:13 +0200 Subject: [PATCH 33/34] virtio-mmio: convert to ioeventfd callbacks Convert to the new interface. Signed-off-by: Cornelia Huck Reviewed-by: Fam Zheng Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-mmio.c | 128 +++++++++++++--------------------------- 1 file changed, 41 insertions(+), 87 deletions(-) diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c index d4cd91f8c4..eb84b74532 100644 --- a/hw/virtio/virtio-mmio.c +++ b/hw/virtio/virtio-mmio.c @@ -93,90 +93,59 @@ typedef struct { bool ioeventfd_started; } VirtIOMMIOProxy; -static int virtio_mmio_set_host_notifier_internal(VirtIOMMIOProxy *proxy, - int n, bool assign, - bool set_handler) +static bool virtio_mmio_ioeventfd_started(DeviceState *d) { - VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); - VirtQueue *vq = virtio_get_queue(vdev, n); - EventNotifier *notifier = virtio_queue_get_host_notifier(vq); - int r = 0; + VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d); + + return proxy->ioeventfd_started; +} + +static void virtio_mmio_ioeventfd_set_started(DeviceState *d, bool started, + bool err) +{ + VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d); + + proxy->ioeventfd_started = started; +} + +static bool virtio_mmio_ioeventfd_disabled(DeviceState *d) +{ + VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d); + + return !kvm_eventfds_enabled() || proxy->ioeventfd_disabled; +} + +static void virtio_mmio_ioeventfd_set_disabled(DeviceState *d, bool disabled) +{ + VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d); + + proxy->ioeventfd_disabled = disabled; +} + +static int virtio_mmio_ioeventfd_assign(DeviceState *d, + EventNotifier *notifier, + int n, bool assign) +{ + VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d); if (assign) { - r = event_notifier_init(notifier, 1); - if (r < 0) { - error_report("%s: unable to init event notifier: %d", - __func__, r); - return r; - } - virtio_queue_set_host_notifier_fd_handler(vq, true, set_handler); memory_region_add_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4, true, n, notifier); } else { memory_region_del_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4, true, n, notifier); - virtio_queue_set_host_notifier_fd_handler(vq, false, false); - event_notifier_cleanup(notifier); } - return r; + return 0; } static void virtio_mmio_start_ioeventfd(VirtIOMMIOProxy *proxy) { - VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); - int n, r; - - if (!kvm_eventfds_enabled() || - proxy->ioeventfd_disabled || - proxy->ioeventfd_started) { - return; - } - - for (n = 0; n < VIRTIO_QUEUE_MAX; n++) { - if (!virtio_queue_get_num(vdev, n)) { - continue; - } - - r = virtio_mmio_set_host_notifier_internal(proxy, n, true, true); - if (r < 0) { - goto assign_error; - } - } - proxy->ioeventfd_started = true; - return; - -assign_error: - while (--n >= 0) { - if (!virtio_queue_get_num(vdev, n)) { - continue; - } - - r = virtio_mmio_set_host_notifier_internal(proxy, n, false, false); - assert(r >= 0); - } - proxy->ioeventfd_started = false; - error_report("%s: failed. Fallback to a userspace (slower).", __func__); + virtio_bus_start_ioeventfd(&proxy->bus); } static void virtio_mmio_stop_ioeventfd(VirtIOMMIOProxy *proxy) { - int r; - int n; - VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); - - if (!proxy->ioeventfd_started) { - return; - } - - for (n = 0; n < VIRTIO_QUEUE_MAX; n++) { - if (!virtio_queue_get_num(vdev, n)) { - continue; - } - - r = virtio_mmio_set_host_notifier_internal(proxy, n, false, false); - assert(r >= 0); - } - proxy->ioeventfd_started = false; + virtio_bus_stop_ioeventfd(&proxy->bus); } static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size) @@ -498,25 +467,6 @@ assign_error: return r; } -static int virtio_mmio_set_host_notifier(DeviceState *opaque, int n, - bool assign) -{ - VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque); - - /* Stop using ioeventfd for virtqueue kick if the device starts using host - * notifiers. This makes it easy to avoid stepping on each others' toes. - */ - proxy->ioeventfd_disabled = assign; - if (assign) { - virtio_mmio_stop_ioeventfd(proxy); - } - /* We don't need to start here: it's not needed because backend - * currently only stops on status change away from ok, - * reset, vmstop and such. If we do add code to start here, - * need to check vmstate, device state etc. */ - return virtio_mmio_set_host_notifier_internal(proxy, n, assign, false); -} - /* virtio-mmio device */ static void virtio_mmio_realizefn(DeviceState *d, Error **errp) @@ -558,8 +508,12 @@ static void virtio_mmio_bus_class_init(ObjectClass *klass, void *data) k->notify = virtio_mmio_update_irq; k->save_config = virtio_mmio_save_config; k->load_config = virtio_mmio_load_config; - k->set_host_notifier = virtio_mmio_set_host_notifier; k->set_guest_notifiers = virtio_mmio_set_guest_notifiers; + k->ioeventfd_started = virtio_mmio_ioeventfd_started; + k->ioeventfd_set_started = virtio_mmio_ioeventfd_set_started; + k->ioeventfd_disabled = virtio_mmio_ioeventfd_disabled; + k->ioeventfd_set_disabled = virtio_mmio_ioeventfd_set_disabled; + k->ioeventfd_assign = virtio_mmio_ioeventfd_assign; k->has_variable_vring_alignment = true; bus_class->max_dev = 1; } From 21a4d96243e60a4c8eeb124a023b8a3bd9120e18 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Fri, 10 Jun 2016 11:04:14 +0200 Subject: [PATCH 34/34] virtio-bus: remove old set_host_notifier callback All users have been converted to the new ioevent callbacks. Signed-off-by: Cornelia Huck Reviewed-by: Fam Zheng Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/block/dataplane/virtio-blk.c | 12 ++---------- hw/scsi/virtio-scsi-dataplane.c | 19 ++++--------------- hw/virtio/vhost.c | 13 +------------ include/hw/virtio/virtio-bus.h | 1 - 4 files changed, 7 insertions(+), 38 deletions(-) diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index fdf5fd1190..2041b0400b 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -79,8 +79,7 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, } /* Don't try if transport does not support notifiers. */ - if (!k->set_guest_notifiers || - (!k->set_host_notifier && !k->ioeventfd_started)) { + if (!k->set_guest_notifiers || !k->ioeventfd_started) { error_setg(errp, "device is incompatible with dataplane " "(transport does not support notifiers)"); @@ -159,9 +158,6 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s) /* Set up virtqueue notify */ r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), 0, true); - if (r == -ENOSYS) { - r = k->set_host_notifier(qbus->parent, 0, true); - } if (r != 0) { fprintf(stderr, "virtio-blk failed to set host notifier (%d)\n", r); goto fail_host_notifier; @@ -197,7 +193,6 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s) BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s->vdev))); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); VirtIOBlock *vblk = VIRTIO_BLK(s->vdev); - int r; if (!vblk->dataplane_started || s->stopping) { return; @@ -222,10 +217,7 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s) aio_context_release(s->ctx); - r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), 0, false); - if (r == -ENOSYS) { - k->set_host_notifier(qbus->parent, 0, false); - } + virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), 0, false); /* Clean up guest notifier (irq) */ k->set_guest_notifiers(qbus->parent, 1, false); diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c index b9a5716501..18ced31493 100644 --- a/hw/scsi/virtio-scsi-dataplane.c +++ b/hw/scsi/virtio-scsi-dataplane.c @@ -31,8 +31,7 @@ void virtio_scsi_set_iothread(VirtIOSCSI *s, IOThread *iothread) s->ctx = iothread_get_aio_context(vs->conf.iothread); /* Don't try if transport does not support notifiers. */ - if (!k->set_guest_notifiers || - (!k->set_host_notifier && !k->ioeventfd_started)) { + if (!k->set_guest_notifiers || !k->ioeventfd_started) { fprintf(stderr, "virtio-scsi: Failed to set iothread " "(transport does not support notifiers)"); exit(1); @@ -70,14 +69,10 @@ static int virtio_scsi_vring_init(VirtIOSCSI *s, VirtQueue *vq, int n, void (*fn)(VirtIODevice *vdev, VirtQueue *vq)) { BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); - VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); int rc; /* Set up virtqueue notify */ rc = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), n, true); - if (rc == -ENOSYS) { - rc = k->set_host_notifier(qbus->parent, n, true); - } if (rc != 0) { fprintf(stderr, "virtio-scsi: Failed to set host notifier (%d)\n", rc); @@ -163,10 +158,7 @@ fail_vrings: virtio_scsi_clear_aio(s); aio_context_release(s->ctx); for (i = 0; i < vs->conf.num_queues + 2; i++) { - rc = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); - if (rc == -ENOSYS) { - k->set_host_notifier(qbus->parent, i, false); - } + virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); } k->set_guest_notifiers(qbus->parent, vs->conf.num_queues + 2, false); fail_guest_notifiers: @@ -181,7 +173,7 @@ void virtio_scsi_dataplane_stop(VirtIOSCSI *s) BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); - int i, rc; + int i; if (!s->dataplane_started || s->dataplane_stopping) { return; @@ -205,10 +197,7 @@ void virtio_scsi_dataplane_stop(VirtIOSCSI *s) aio_context_release(s->ctx); for (i = 0; i < vs->conf.num_queues + 2; i++) { - rc = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); - if (rc == -ENOSYS) { - k->set_host_notifier(qbus->parent, i, false); - } + virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); } /* Clean up guest notifier (irq) */ diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index bce1b6e313..a01394d5ac 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1110,7 +1110,7 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) VirtioBusState *vbus = VIRTIO_BUS(qbus); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); int i, r, e; - if (!k->set_host_notifier || !k->ioeventfd_started) { + if (!k->ioeventfd_started) { fprintf(stderr, "binding does not support host notifiers\n"); r = -ENOSYS; goto fail; @@ -1119,9 +1119,6 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) for (i = 0; i < hdev->nvqs; ++i) { r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, true); - if (r == -ENOSYS) { - r = k->set_host_notifier(qbus->parent, hdev->vq_index + i, true); - } if (r < 0) { fprintf(stderr, "vhost VQ %d notifier binding failed: %d\n", i, -r); goto fail_vq; @@ -1133,9 +1130,6 @@ fail_vq: while (--i >= 0) { e = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, false); - if (e == -ENOSYS) { - e = k->set_host_notifier(qbus->parent, hdev->vq_index + i, false); - } if (e < 0) { fprintf(stderr, "vhost VQ %d notifier cleanup error: %d\n", i, -r); fflush(stderr); @@ -1154,16 +1148,11 @@ fail: void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) { BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); - VirtioBusState *vbus = VIRTIO_BUS(qbus); - VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); int i, r; for (i = 0; i < hdev->nvqs; ++i) { r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, false); - if (r == -ENOSYS) { - r = k->set_host_notifier(qbus->parent, hdev->vq_index + i, false); - } if (r < 0) { fprintf(stderr, "vhost VQ %d notifier cleanup failed: %d\n", i, -r); fflush(stderr); diff --git a/include/hw/virtio/virtio-bus.h b/include/hw/virtio/virtio-bus.h index 9637f80f4d..f3e5ef3f5b 100644 --- a/include/hw/virtio/virtio-bus.h +++ b/include/hw/virtio/virtio-bus.h @@ -52,7 +52,6 @@ typedef struct VirtioBusClass { bool (*has_extra_state)(DeviceState *d); bool (*query_guest_notifiers)(DeviceState *d); int (*set_guest_notifiers)(DeviceState *d, int nvqs, bool assign); - int (*set_host_notifier)(DeviceState *d, int n, bool assigned); void (*vmstate_change)(DeviceState *d, bool running); /* * transport independent init function.