From a8b73734219802e226a5444ffd84d07a085edd28 Mon Sep 17 00:00:00 2001 From: Nikunj A Dadhania Date: Mon, 15 May 2017 14:05:09 +0530 Subject: [PATCH 01/18] target/ppc: reset reservation in do_rfi() For transitioning back to userspace after the interrupt. Suggested-by: Richard Henderson Signed-off-by: Nikunj A Dadhania Signed-off-by: David Gibson --- target/ppc/excp_helper.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index a6bcb47aa2..9cb2123187 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -995,6 +995,9 @@ static inline void do_rfi(CPUPPCState *env, target_ulong nip, target_ulong msr) */ cs->interrupt_request |= CPU_INTERRUPT_EXITTB; + /* Reset the reservation */ + env->reserve_addr = -1; + /* Context synchronizing: check if TCG TLB needs flush */ check_tlb_flush(env, false); } From f63ebfe0ac9efc83ee6d3753e9b3ed7229d8b28a Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Mon, 15 May 2017 13:39:16 +0200 Subject: [PATCH 02/18] ppc/xics: simplify prototype of xics_spapr_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function only does hypercall and RTAS-call registration, and thus never returns an error. This patch adapt the prototype to reflect that. Signed-off-by: Greg Kurz Reviewed-by: Cédric Le Goater Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: David Gibson --- hw/intc/xics_spapr.c | 3 +-- hw/ppc/spapr.c | 2 +- include/hw/ppc/xics.h | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c index f05308b897..d98ea8b130 100644 --- a/hw/intc/xics_spapr.c +++ b/hw/intc/xics_spapr.c @@ -229,7 +229,7 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr, rtas_st(rets, 0, RTAS_OUT_SUCCESS); } -int xics_spapr_init(sPAPRMachineState *spapr, Error **errp) +void xics_spapr_init(sPAPRMachineState *spapr) { /* Registration of global state belongs into realize */ spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_set_xive); @@ -243,7 +243,6 @@ int xics_spapr_init(sPAPRMachineState *spapr, Error **errp) spapr_register_hypercall(H_XIRR_X, h_xirr_x); spapr_register_hypercall(H_EOI, h_eoi); spapr_register_hypercall(H_IPOLL, h_ipoll); - return 0; } #define ICS_IRQ_FREE(ics, srcno) \ diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 0980d733cd..18709ad616 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -139,7 +139,7 @@ static void xics_system_init(MachineState *machine, int nr_irqs, Error **errp) } if (!spapr->ics) { - xics_spapr_init(spapr, errp); + xics_spapr_init(spapr); spapr->icp_type = TYPE_ICP; spapr->ics = spapr_ics_create(spapr, TYPE_ICS_SIMPLE, nr_irqs, errp); } diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h index 05e6acbb35..d6cb51f3ad 100644 --- a/include/hw/ppc/xics.h +++ b/include/hw/ppc/xics.h @@ -206,6 +206,6 @@ void icp_resend(ICPState *ss); typedef struct sPAPRMachineState sPAPRMachineState; int xics_kvm_init(sPAPRMachineState *spapr, Error **errp); -int xics_spapr_init(sPAPRMachineState *spapr, Error **errp); +void xics_spapr_init(sPAPRMachineState *spapr); #endif /* XICS_H */ From 175d2aa038c530d07a8eb5f483c6b1c4b3df43e0 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Mon, 15 May 2017 13:39:45 +0200 Subject: [PATCH 03/18] spapr: sanitize error handling in spapr_ics_create() The spapr_ics_create() function handles errors in a rather convoluted way, with two local Error * variables. Moreover, failing to parent the ICS object to the machine should be considered as a bug but it is currently ignored. This patch addresses both issues. Signed-off-by: Greg Kurz Signed-off-by: David Gibson --- hw/ppc/spapr.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 18709ad616..a9471b99f5 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -101,21 +101,26 @@ static ICSState *spapr_ics_create(sPAPRMachineState *spapr, const char *type_ics, int nr_irqs, Error **errp) { - Error *err = NULL, *local_err = NULL; + Error *local_err = NULL; Object *obj; obj = object_new(type_ics); - object_property_add_child(OBJECT(spapr), "ics", obj, NULL); + object_property_add_child(OBJECT(spapr), "ics", obj, &error_abort); object_property_add_const_link(obj, "xics", OBJECT(spapr), &error_abort); - object_property_set_int(obj, nr_irqs, "nr-irqs", &err); + object_property_set_int(obj, nr_irqs, "nr-irqs", &local_err); + if (local_err) { + goto error; + } object_property_set_bool(obj, true, "realized", &local_err); - error_propagate(&err, local_err); - if (err) { - error_propagate(errp, err); - return NULL; + if (local_err) { + goto error; } return ICS_SIMPLE(obj); + +error: + error_propagate(errp, local_err); + return NULL; } static void xics_system_init(MachineState *machine, int nr_irqs, Error **errp) From c8a98293f7fe672fc3b7a3caafede701fbb3180e Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Mon, 15 May 2017 13:39:55 +0200 Subject: [PATCH 04/18] spapr-cpu-core: release ICP object when realization fails While here we introduce a single error path to avoid code duplication. Signed-off-by: Greg Kurz Signed-off-by: David Gibson --- hw/ppc/spapr_cpu_core.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index a17ea07ef1..1df1404ea5 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -147,25 +147,25 @@ static void spapr_cpu_core_realize_child(Object *child, Error **errp) object_property_add_const_link(obj, "xics", OBJECT(spapr), &error_abort); object_property_set_bool(obj, true, "realized", &local_err); if (local_err) { - error_propagate(errp, local_err); - return; + goto error; } object_property_set_bool(child, true, "realized", &local_err); if (local_err) { - object_unparent(obj); - error_propagate(errp, local_err); - return; + goto error; } spapr_cpu_init(spapr, cpu, &local_err); if (local_err) { - object_unparent(obj); - error_propagate(errp, local_err); - return; + goto error; } xics_cpu_setup(XICS_FABRIC(spapr), cpu, ICP(obj)); + return; + +error: + object_unparent(obj); + error_propagate(errp, local_err); } static void spapr_cpu_core_realize(DeviceState *dev, Error **errp) From 06ec79e865a4a496e762a83126d00d0ed39205f5 Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Wed, 17 May 2017 09:19:20 +0530 Subject: [PATCH 05/18] spapr: Consolidate HPT freeing code into a routine Consolidate the code that frees HPT into a separate routine spapr_free_hpt() as the same chunk of code is called from two places. Signed-off-by: Bharata B Rao Signed-off-by: David Gibson --- hw/ppc/spapr.c | 13 +++++++++---- hw/ppc/spapr_hcall.c | 5 +---- include/hw/ppc/spapr.h | 1 + 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index a9471b99f5..35dceb024e 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1227,16 +1227,21 @@ static int spapr_hpt_shift_for_ramsize(uint64_t ramsize) return shift; } +void spapr_free_hpt(sPAPRMachineState *spapr) +{ + g_free(spapr->htab); + spapr->htab = NULL; + spapr->htab_shift = 0; + close_htab_fd(spapr); +} + static void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift, Error **errp) { long rc; /* Clean up any HPT info from a previous boot */ - g_free(spapr->htab); - spapr->htab = NULL; - spapr->htab_shift = 0; - close_htab_fd(spapr); + spapr_free_hpt(spapr); rc = kvmppc_reset_htab(shift); if (rc < 0) { diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 0d608d6e28..2daace42df 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -913,10 +913,7 @@ static void spapr_check_setup_free_hpt(sPAPRMachineState *spapr, /* We assume RADIX, so this catches all the "Do Nothing" cases */ } else if (!(patbe_old & PATBE1_GR)) { /* HASH->RADIX : Free HPT */ - g_free(spapr->htab); - spapr->htab = NULL; - spapr->htab_shift = 0; - close_htab_fd(spapr); + spapr_free_hpt(spapr); } else if (!(patbe_new & PATBE1_GR)) { /* RADIX->HASH || NOTHING->HASH : Allocate HPT */ spapr_setup_hpt_and_vrma(spapr); diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 5802f888c3..93c4cfcfab 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -610,6 +610,7 @@ int spapr_h_cas_compose_response(sPAPRMachineState *sm, sPAPROptionVector *ov5_updates); void close_htab_fd(sPAPRMachineState *spapr); void spapr_setup_hpt_and_vrma(sPAPRMachineState *spapr); +void spapr_free_hpt(sPAPRMachineState *spapr); sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn); void spapr_tce_table_enable(sPAPRTCETable *tcet, uint32_t page_shift, uint64_t bus_offset, From de86eccc0c836adfa8dbb94848096720177f5ccb Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Wed, 17 May 2017 16:38:20 +0200 Subject: [PATCH 06/18] xics_kvm: cache already enabled vCPU ids MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit a45863bda90d ("xics_kvm: Don't enable KVM_CAP_IRQ_XICS if already enabled"), we were able to re-hotplug a vCPU that had been hot- unplugged ealier, thanks to a boolean flag in ICPState that we set when enabling KVM_CAP_IRQ_XICS. This could work because the lifecycle of all ICPState objects was the same as the machine. Commit 5bc8d26de20c ("spapr: allocate the ICPState object from under sPAPRCPUCore") broke this assumption and now we always pass a freshly allocated ICPState object (ie, with the flag unset) to icp_kvm_cpu_setup(). This cause re-hotplug to fail with: Unable to connect CPU8 to kernel XICS: Device or resource busy Let's fix this by caching all the vCPU ids for which KVM_CAP_IRQ_XICS was enabled. This also drops the now useless boolean flag from ICPState. Reported-by: Laurent Vivier Signed-off-by: Greg Kurz Tested-by: Laurent Vivier Reviewed-by: Laurent Vivier Reviewed-by: Cédric Le Goater Signed-off-by: David Gibson --- hw/intc/xics_kvm.c | 27 ++++++++++++++++++++------- include/hw/ppc/xics.h | 1 - 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c index dd93531ae3..dd7f298462 100644 --- a/hw/intc/xics_kvm.c +++ b/hw/intc/xics_kvm.c @@ -42,6 +42,14 @@ static int kernel_xics_fd = -1; +typedef struct KVMEnabledICP { + unsigned long vcpu_id; + QLIST_ENTRY(KVMEnabledICP) node; +} KVMEnabledICP; + +static QLIST_HEAD(, KVMEnabledICP) + kvm_enabled_icps = QLIST_HEAD_INITIALIZER(&kvm_enabled_icps); + /* * ICP-KVM */ @@ -121,6 +129,8 @@ static void icp_kvm_reset(void *dev) static void icp_kvm_cpu_setup(ICPState *icp, PowerPCCPU *cpu) { CPUState *cs = CPU(cpu); + KVMEnabledICP *enabled_icp; + unsigned long vcpu_id = kvm_arch_vcpu_id(cs); int ret; if (kernel_xics_fd == -1) { @@ -132,18 +142,21 @@ static void icp_kvm_cpu_setup(ICPState *icp, PowerPCCPU *cpu) * which was hot-removed earlier we don't have to renable * KVM_CAP_IRQ_XICS capability again. */ - if (icp->cap_irq_xics_enabled) { - return; + QLIST_FOREACH(enabled_icp, &kvm_enabled_icps, node) { + if (enabled_icp->vcpu_id == vcpu_id) { + return; + } } - ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, kernel_xics_fd, - kvm_arch_vcpu_id(cs)); + ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, kernel_xics_fd, vcpu_id); if (ret < 0) { - error_report("Unable to connect CPU%ld to kernel XICS: %s", - kvm_arch_vcpu_id(cs), strerror(errno)); + error_report("Unable to connect CPU%ld to kernel XICS: %s", vcpu_id, + strerror(errno)); exit(1); } - icp->cap_irq_xics_enabled = true; + enabled_icp = g_malloc(sizeof(*enabled_icp)); + enabled_icp->vcpu_id = vcpu_id; + QLIST_INSERT_HEAD(&kvm_enabled_icps, enabled_icp, node); } static void icp_kvm_realize(DeviceState *dev, Error **errp) diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h index d6cb51f3ad..a3073f9053 100644 --- a/include/hw/ppc/xics.h +++ b/include/hw/ppc/xics.h @@ -81,7 +81,6 @@ struct ICPState { uint8_t pending_priority; uint8_t mfrr; qemu_irq output; - bool cap_irq_xics_enabled; XICSFabric *xics; }; From 07572c0653a60769df406c16136e2cc9234692f5 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Thu, 18 May 2017 15:58:31 +0200 Subject: [PATCH 07/18] spapr: ensure core_slot isn't NULL in spapr_core_unplug() If we go that far on the path of hot-removing a core and we find out that the core-id is invalid, then we have a serious bug. Let's make it explicit with an assert() instead of dereferencing a NULL pointer. This fixes Coverity issue CID 1375404. Signed-off-by: Greg Kurz Reviewed-by: Igor Mammedov Signed-off-by: David Gibson --- hw/ppc/spapr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 35dceb024e..c912eaa2be 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2725,6 +2725,7 @@ static void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, CPUCore *cc = CPU_CORE(dev); CPUArchId *core_slot = spapr_find_cpu_slot(ms, cc->core_id, NULL); + assert(core_slot); core_slot->cpu = NULL; object_unparent(OBJECT(dev)); } From bff3063837a76b37a4bbbfe614324ca38e859f2b Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Fri, 19 May 2017 11:27:49 -0300 Subject: [PATCH 08/18] hw/ppc/spapr_events.c: removing 'exception' from sPAPREventLogEntry Currenty we do not have any RTAS event that is reported by the event-scan interface. The existing events, RTAS_LOG_TYPE_EPOW and RTAS_LOG_TYPE_HOTPLUG, are being reported by the check-exception interface and, as such, marked as 'exception=true'. Commit 79853e18d9, 'spapr_events: event-scan RTAS interface', added the event_scan interface because the guest kernel requires it to initialize other required interfaces. It is acting since then as a stub because no events that would be reported by it were added since then. However, the existence of the 'exception' boolean adds an unnecessary load in the future migration of the pending_events, sPAPREventLogEntry QTAILQ that hosts the pending RTAS events. To make the code cleaner and ease the future migration changes, this patch makes the following changes: - remove the 'exception' boolean that filter these events. There is nothing to filter since all events are reported by check-exception; - functions rtas_event_log_queue, rtas_event_log_dequeue and rtas_event_log_contains don't receive the 'exception' boolean as parameter; - event_scan function was simplified. It was calling 'rtas_event_log_dequeue(mask, false)' that was always returning 'NULL' because we have no events that are created with exception=false, thus in the end it would execute a jump to 'out_no_events' all the time. The function now assumes that this will always be the case and all the remaining logic were deleted. In the future, when or if we add new RTAS events that should be reported with the event_scan interface, we can refer to the changes made in this patch to add the event_scan logic back. Signed-off-by: Daniel Henrique Barboza Signed-off-by: David Gibson --- hw/ppc/spapr_events.c | 52 ++++++------------------------------------ include/hw/ppc/spapr.h | 1 - 2 files changed, 7 insertions(+), 46 deletions(-) diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index f0b28d8112..73e2a1884f 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -342,20 +342,18 @@ static int rtas_event_log_to_irq(sPAPRMachineState *spapr, int log_type) return source->irq; } -static void rtas_event_log_queue(int log_type, void *data, bool exception) +static void rtas_event_log_queue(int log_type, void *data) { sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); sPAPREventLogEntry *entry = g_new(sPAPREventLogEntry, 1); g_assert(data); entry->log_type = log_type; - entry->exception = exception; entry->data = data; QTAILQ_INSERT_TAIL(&spapr->pending_events, entry, next); } -static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask, - bool exception) +static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask) { sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); sPAPREventLogEntry *entry = NULL; @@ -364,10 +362,6 @@ static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask, const sPAPREventSource *source = rtas_event_log_to_source(spapr, entry->log_type); - if (entry->exception != exception) { - continue; - } - if (source->mask & event_mask) { break; } @@ -380,7 +374,7 @@ static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask, return entry; } -static bool rtas_event_log_contains(uint32_t event_mask, bool exception) +static bool rtas_event_log_contains(uint32_t event_mask) { sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); sPAPREventLogEntry *entry = NULL; @@ -389,10 +383,6 @@ static bool rtas_event_log_contains(uint32_t event_mask, bool exception) const sPAPREventSource *source = rtas_event_log_to_source(spapr, entry->log_type); - if (entry->exception != exception) { - continue; - } - if (source->mask & event_mask) { return true; } @@ -479,7 +469,7 @@ static void spapr_powerdown_req(Notifier *n, void *opaque) epow->event_modifier = RTAS_LOG_V6_EPOW_MODIFIER_NORMAL; epow->extended_modifier = RTAS_LOG_V6_EPOW_XMODIFIER_PARTITION_SPECIFIC; - rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow, true); + rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow); qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr), rtas_event_log_to_irq(spapr, @@ -572,7 +562,7 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, cpu_to_be32(drc_id->count_indexed.index); } - rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true); + rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp); qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr), rtas_event_log_to_irq(spapr, @@ -667,7 +657,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, xinfo |= (uint64_t)rtas_ld(args, 6) << 32; } - event = rtas_event_log_dequeue(mask, true); + event = rtas_event_log_dequeue(mask); if (!event) { goto out_no_events; } @@ -690,7 +680,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, * interrupts. */ for (i = 0; i < EVENT_CLASS_MAX; i++) { - if (rtas_event_log_contains(EVENT_CLASS_MASK(i), true)) { + if (rtas_event_log_contains(EVENT_CLASS_MASK(i))) { const sPAPREventSource *source = spapr_event_sources_get_source(spapr->event_sources, i); @@ -710,38 +700,10 @@ static void event_scan(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong args, uint32_t nret, target_ulong rets) { - uint32_t mask, buf, len, event_len; - sPAPREventLogEntry *event; - struct rtas_error_log *hdr; - if (nargs != 4 || nret != 1) { rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); return; } - - mask = rtas_ld(args, 0); - buf = rtas_ld(args, 2); - len = rtas_ld(args, 3); - - event = rtas_event_log_dequeue(mask, false); - if (!event) { - goto out_no_events; - } - - hdr = event->data; - event_len = be32_to_cpu(hdr->extended_length) + sizeof(*hdr); - - if (event_len < len) { - len = event_len; - } - - cpu_physical_memory_write(buf, event->data, len); - rtas_st(rets, 0, RTAS_OUT_SUCCESS); - g_free(event->data); - g_free(event); - return; - -out_no_events: rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND); } diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 93c4cfcfab..8f424ca4e3 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -598,7 +598,6 @@ sPAPRTCETable *spapr_tce_find_by_liobn(target_ulong liobn); struct sPAPREventLogEntry { int log_type; - bool exception; void *data; QTAILQ_ENTRY(sPAPREventLogEntry) next; }; From 249127d0dfeb2cf5e24d9353b6d54c91c1666ddc Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 19 May 2017 12:32:04 +0200 Subject: [PATCH 09/18] spapr_cpu_core: drop reference on ICP object during CPU realization When a piece of code allocates an object, it implicitely gets a reference on it. If it then makes that object a child property of another object, it should drop its own reference at some point otherwise the child object can never be finalized. The current code hence leaks one ICP object per CPU when hot-removing a core. Failing to add a newly allocated ICP object to the CPU is a bug. While here, let's ensure QEMU aborts if this ever happens. Signed-off-by: Greg Kurz Signed-off-by: David Gibson --- hw/ppc/spapr_cpu_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index 1df1404ea5..ff7058ecc0 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -143,7 +143,8 @@ static void spapr_cpu_core_realize_child(Object *child, Error **errp) Object *obj; obj = object_new(spapr->icp_type); - object_property_add_child(OBJECT(cpu), "icp", obj, NULL); + object_property_add_child(OBJECT(cpu), "icp", obj, &error_abort); + object_unref(obj); object_property_add_const_link(obj, "xics", OBJECT(spapr), &error_abort); object_property_set_bool(obj, true, "realized", &local_err); if (local_err) { From 3d85885a1b1f389d38ae3c71e439635207cfcf4d Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 19 May 2017 12:32:12 +0200 Subject: [PATCH 10/18] spapr: fix error reporting in xics_system_init() If the user explicitely asked for kernel-irqchip support and "xics-kvm" initialization fails, we shouldn't fallback to emulated "xics" as we do now. It is also awkward to print an error message when we have an errp pointer argument. Let's use the errp argument to report the error and let the caller decide. This simplifies the code as we don't need a local Error * here. Signed-off-by: Greg Kurz Signed-off-by: David Gibson --- hw/ppc/spapr.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index c912eaa2be..c92d269027 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -128,18 +128,14 @@ static void xics_system_init(MachineState *machine, int nr_irqs, Error **errp) sPAPRMachineState *spapr = SPAPR_MACHINE(machine); if (kvm_enabled()) { - Error *err = NULL; - if (machine_kernel_irqchip_allowed(machine) && !xics_kvm_init(spapr, errp)) { spapr->icp_type = TYPE_KVM_ICP; - spapr->ics = spapr_ics_create(spapr, TYPE_ICS_KVM, nr_irqs, &err); + spapr->ics = spapr_ics_create(spapr, TYPE_ICS_KVM, nr_irqs, errp); } if (machine_kernel_irqchip_required(machine) && !spapr->ics) { - error_reportf_err(err, - "kernel_irqchip requested but unavailable: "); - } else { - error_free(err); + error_prepend(errp, "kernel_irqchip requested but unavailable: "); + return; } } @@ -147,6 +143,9 @@ static void xics_system_init(MachineState *machine, int nr_irqs, Error **errp) xics_spapr_init(spapr); spapr->icp_type = TYPE_ICP; spapr->ics = spapr_ics_create(spapr, TYPE_ICS_SIMPLE, nr_irqs, errp); + if (!spapr->ics) { + return; + } } } From 80c33d343f068012348869e16e579375c8911a04 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 18 May 2017 14:47:44 +1000 Subject: [PATCH 11/18] pseries: Split CAS PVR negotiation out into a separate function Guests of the qemu machine type go through a feature negotiation process known as "client architecture support" (CAS) during early boot. This does a number of things, one of which is finding a CPU compatibility mode which can be supported by both guest and host. In fact the CPU negotiation is probably the single most complex part of the CAS process, so this splits it out into a helper function. We've recently made some mistakes in maintaining backward compatibility for old machine types here. Splitting this out will also make it easier to fix this. This also adds a possibly useful error message if the negotiation fails (i.e. if there isn't a CPU mode that's suitable for both guest and host). Signed-off-by: David Gibson Reviewed-by: Laurent Vivier Reviewed-by: Greg Kurz --- hw/ppc/spapr_hcall.c | 49 +++++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 2daace42df..77d2d66600 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -1044,19 +1044,13 @@ static target_ulong h_signal_sys_reset(PowerPCCPU *cpu, } } -static target_ulong h_client_architecture_support(PowerPCCPU *cpu, - sPAPRMachineState *spapr, - target_ulong opcode, - target_ulong *args) +static uint32_t cas_check_pvr(PowerPCCPU *cpu, target_ulong *addr, + Error **errp) { - target_ulong list = ppc64_phys_to_real(args[0]); - target_ulong ov_table; bool explicit_match = false; /* Matched the CPU's real PVR */ uint32_t max_compat = cpu->max_compat; uint32_t best_compat = 0; int i; - sPAPROptionVector *ov1_guest, *ov5_guest, *ov5_cas_old, *ov5_updates; - bool guest_radix; /* * We scan the supplied table of PVRs looking for two things @@ -1066,9 +1060,9 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, for (i = 0; i < 512; ++i) { uint32_t pvr, pvr_mask; - pvr_mask = ldl_be_phys(&address_space_memory, list); - pvr = ldl_be_phys(&address_space_memory, list + 4); - list += 8; + pvr_mask = ldl_be_phys(&address_space_memory, *addr); + pvr = ldl_be_phys(&address_space_memory, *addr + 4); + *addr += 8; if (~pvr_mask & pvr) { break; /* Terminator record */ @@ -1087,17 +1081,38 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, /* We couldn't find a suitable compatibility mode, and either * the guest doesn't support "raw" mode for this CPU, or raw * mode is disabled because a maximum compat mode is set */ - return H_HARDWARE; + error_setg(errp, "Couldn't negotiate a suitable PVR during CAS"); + return 0; } /* Parsing finished */ trace_spapr_cas_pvr(cpu->compat_pvr, explicit_match, best_compat); - /* Update CPUs */ - if (cpu->compat_pvr != best_compat) { - Error *local_err = NULL; + return best_compat; +} - ppc_set_compat_all(best_compat, &local_err); +static target_ulong h_client_architecture_support(PowerPCCPU *cpu, + sPAPRMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + /* Working address in data buffer */ + target_ulong addr = ppc64_phys_to_real(args[0]); + target_ulong ov_table; + uint32_t cas_pvr; + sPAPROptionVector *ov1_guest, *ov5_guest, *ov5_cas_old, *ov5_updates; + bool guest_radix; + Error *local_err = NULL; + + cas_pvr = cas_check_pvr(cpu, &addr, &local_err); + if (local_err) { + error_report_err(local_err); + return H_HARDWARE; + } + + /* Update CPUs */ + if (cpu->compat_pvr != cas_pvr) { + ppc_set_compat_all(cas_pvr, &local_err); if (local_err) { error_report_err(local_err); return H_HARDWARE; @@ -1105,7 +1120,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, } /* For the future use: here @ov_table points to the first option vector */ - ov_table = list; + ov_table = addr; ov1_guest = spapr_ovec_parse_vector(ov_table, 1); ov5_guest = spapr_ovec_parse_vector(ov_table, 5); From 459264ef24cf2d409b8baf2047fa86e697d6e9ab Mon Sep 17 00:00:00 2001 From: David Gibson Date: Tue, 23 May 2017 16:33:06 +1000 Subject: [PATCH 12/18] pseries: Restore support for total vcpus not a multiple of threads-per-core for old machine types As of pseries-2.7 and later, we require the total number of guest vcpus to be a multiple of the threads-per-core. pseries-2.6 and earlier machine types, however, are supposed to allow this for the sake of migration from old qemu versions which allowed this. Unfortunately, 8149e29 "pseries: Enforce homogeneous threads-per-core" broke this by not considering the old machine type case. This fixes it by only applying the check when the machine type supports hotpluggable cpus. By not-entirely-coincidence, that corresponds to the same time when we started enforcing total threads being a multiple of threads-per-core. Fixes: 8149e2992f7811355cc34721b79d69d1a3a667dd Signed-off-by: David Gibson Reviewed-by: Laurent Vivier Reviewed-by: Greg Kurz Tested-by: Greg Kurz --- hw/ppc/spapr.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index c92d269027..bcb0e184a0 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2863,7 +2863,13 @@ static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, goto out; } - if (cc->nr_threads != smp_threads) { + /* + * In general we should have homogeneous threads-per-core, but old + * (pre hotplug support) machine types allow the last core to have + * reduced threads as a compatibility hack for when we allowed + * total vcpus not a multiple of threads-per-core. + */ + if (mc->has_hotpluggable_cpus && (cc->nr_threads != smp_threads)) { error_setg(errp, "invalid nr-threads %d, must be %d", cc->nr_threads, smp_threads); return; From c871bc70bb22d1d70451bc813ecb008fe98cc92b Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Tue, 23 May 2017 13:18:09 +0200 Subject: [PATCH 13/18] spapr: add pre_plug function for memory This allows to manage errors before the memory has started to be hotplugged. We already have the function for the CPU cores. Signed-off-by: Laurent Vivier Reviewed-by: Greg Kurz [dwg: Fixed a couple of style nits] Signed-off-by: David Gibson --- hw/ppc/spapr.c | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index bcb0e184a0..3760d37c02 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2578,20 +2578,6 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev, uint64_t align = memory_region_get_alignment(mr); uint64_t size = memory_region_size(mr); uint64_t addr; - char *mem_dev; - - if (size % SPAPR_MEMORY_BLOCK_SIZE) { - error_setg(&local_err, "Hotplugged memory size must be a multiple of " - "%lld MB", SPAPR_MEMORY_BLOCK_SIZE/M_BYTE); - goto out; - } - - mem_dev = object_property_get_str(OBJECT(dimm), PC_DIMM_MEMDEV_PROP, NULL); - if (mem_dev && !kvmppc_is_mem_backend_page_size_ok(mem_dev)) { - error_setg(&local_err, "Memory backend has bad page size. " - "Use 'memory-backend-file' with correct mem-path."); - goto out; - } pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, &local_err); if (local_err) { @@ -2612,6 +2598,29 @@ out: error_propagate(errp, local_err); } +static void spapr_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) +{ + PCDIMMDevice *dimm = PC_DIMM(dev); + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); + MemoryRegion *mr = ddc->get_memory_region(dimm); + uint64_t size = memory_region_size(mr); + char *mem_dev; + + if (size % SPAPR_MEMORY_BLOCK_SIZE) { + error_setg(errp, "Hotplugged memory size must be a multiple of " + "%lld MB", SPAPR_MEMORY_BLOCK_SIZE / M_BYTE); + return; + } + + mem_dev = object_property_get_str(OBJECT(dimm), PC_DIMM_MEMDEV_PROP, NULL); + if (mem_dev && !kvmppc_is_mem_backend_page_size_ok(mem_dev)) { + error_setg(errp, "Memory backend has bad page size. " + "Use 'memory-backend-file' with correct mem-path."); + return; + } +} + typedef struct sPAPRDIMMState { uint32_t nr_lmbs; } sPAPRDIMMState; @@ -3006,7 +3015,9 @@ static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev, static void spapr_machine_device_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { - if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) { + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + spapr_memory_pre_plug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) { spapr_core_pre_plug(hotplug_dev, dev, errp); } } From 0cffce56ae3501c5783d779f97993ce478acf856 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 24 May 2017 17:01:48 +1000 Subject: [PATCH 14/18] hw/ppc/spapr.c: adding pending_dimm_unplugs to sPAPRMachineState The LMB DRC release callback, spapr_lmb_release(), uses an opaque parameter, a sPAPRDIMMState struct that stores the current LMBs that are allocated to a DIMM (nr_lmbs). After each call to this callback, the nr_lmbs is decremented by one and, when it reaches zero, the callback proceeds with the qdev calls to hot unplug the LMB. Using drc->detach_cb_opaque is problematic because it can't be migrated in the future DRC migration work. This patch makes the following changes to eliminate the usage of this opaque callback inside spapr_lmb_release: - sPAPRDIMMState was moved from spapr.c and added to spapr.h. A new attribute called 'addr' was added to it. This is used as an unique identifier to associate a sPAPRDIMMState to a PCDIMM element. - sPAPRMachineState now hosts a new QTAILQ called 'pending_dimm_unplugs'. This queue of sPAPRDIMMState elements will store the DIMM state of DIMMs that are currently going under an unplug process. - spapr_lmb_release() will now retrieve the nr_lmbs value by getting the correspondent sPAPRDIMMState. A helper function called spapr_dimm_get_address was created to fetch the address of a PCDIMM device inside spapr_lmb_release. When nr_lmbs reaches zero and the callback proceeds with the qdev hot unplug calls, the sPAPRDIMMState struct is removed from spapr->pending_dimm_unplugs. After these changes, the opaque argument for spapr_lmb_release is now unused and is passed as NULL inside spapr_del_lmbs. This and the other opaque arguments can now be safely removed from the code. As an additional cleanup made by this patch, the spapr_del_lmbs function was merged with spapr_memory_unplug_request. The former was being called only by the latter and both were small enough to fit one single function. Signed-off-by: Daniel Henrique Barboza [dwg: Minor stylistic cleanups] Signed-off-by: David Gibson --- hw/ppc/spapr.c | 105 ++++++++++++++++++++++++++--------------- include/hw/ppc/spapr.h | 6 +++ 2 files changed, 73 insertions(+), 38 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 3760d37c02..3a79dabb2b 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2059,6 +2059,7 @@ static void ppc_spapr_init(MachineState *machine) msi_nonbroken = true; QLIST_INIT(&spapr->phbs); + QTAILQ_INIT(&spapr->pending_dimm_unplugs); /* Allocate RMA if necessary */ rma_alloc_size = kvmppc_alloc_rma(&rma); @@ -2621,58 +2622,58 @@ static void spapr_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, } } -typedef struct sPAPRDIMMState { +struct sPAPRDIMMState { + PCDIMMDevice *dimm; uint32_t nr_lmbs; -} sPAPRDIMMState; + QTAILQ_ENTRY(sPAPRDIMMState) next; +}; + +static sPAPRDIMMState *spapr_pending_dimm_unplugs_find(sPAPRMachineState *s, + PCDIMMDevice *dimm) +{ + sPAPRDIMMState *dimm_state = NULL; + + QTAILQ_FOREACH(dimm_state, &s->pending_dimm_unplugs, next) { + if (dimm_state->dimm == dimm) { + break; + } + } + return dimm_state; +} + +static void spapr_pending_dimm_unplugs_add(sPAPRMachineState *spapr, + sPAPRDIMMState *dimm_state) +{ + g_assert(!spapr_pending_dimm_unplugs_find(spapr, dimm_state->dimm)); + QTAILQ_INSERT_HEAD(&spapr->pending_dimm_unplugs, dimm_state, next); +} + +static void spapr_pending_dimm_unplugs_remove(sPAPRMachineState *spapr, + sPAPRDIMMState *dimm_state) +{ + QTAILQ_REMOVE(&spapr->pending_dimm_unplugs, dimm_state, next); + g_free(dimm_state); +} static void spapr_lmb_release(DeviceState *dev, void *opaque) { - sPAPRDIMMState *ds = (sPAPRDIMMState *)opaque; - HotplugHandler *hotplug_ctrl; + HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev); + sPAPRMachineState *spapr = SPAPR_MACHINE(hotplug_ctrl); + sPAPRDIMMState *ds = spapr_pending_dimm_unplugs_find(spapr, PC_DIMM(dev)); if (--ds->nr_lmbs) { return; } - g_free(ds); + spapr_pending_dimm_unplugs_remove(spapr, ds); /* * Now that all the LMBs have been removed by the guest, call the * pc-dimm unplug handler to cleanup up the pc-dimm device. */ - hotplug_ctrl = qdev_get_hotplug_handler(dev); hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort); } -static void spapr_del_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size, - Error **errp) -{ - sPAPRDRConnector *drc; - sPAPRDRConnectorClass *drck; - uint32_t nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE; - int i; - sPAPRDIMMState *ds = g_malloc0(sizeof(sPAPRDIMMState)); - uint64_t addr = addr_start; - - ds->nr_lmbs = nr_lmbs; - for (i = 0; i < nr_lmbs; i++) { - drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, - addr / SPAPR_MEMORY_BLOCK_SIZE); - g_assert(drc); - - drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - drck->detach(drc, dev, spapr_lmb_release, ds, errp); - addr += SPAPR_MEMORY_BLOCK_SIZE; - } - - drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, - addr_start / SPAPR_MEMORY_BLOCK_SIZE); - drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - spapr_hotplug_req_remove_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB, - nr_lmbs, - drck->get_index(drc)); -} - static void spapr_memory_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { @@ -2688,19 +2689,47 @@ static void spapr_memory_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, static void spapr_memory_unplug_request(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { + sPAPRMachineState *spapr = SPAPR_MACHINE(hotplug_dev); Error *local_err = NULL; PCDIMMDevice *dimm = PC_DIMM(dev); PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); MemoryRegion *mr = ddc->get_memory_region(dimm); uint64_t size = memory_region_size(mr); - uint64_t addr; + uint32_t nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE; + uint64_t addr_start, addr; + int i; + sPAPRDRConnector *drc; + sPAPRDRConnectorClass *drck; + sPAPRDIMMState *ds; - addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err); + addr_start = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, + &local_err); if (local_err) { goto out; } - spapr_del_lmbs(dev, addr, size, &error_abort); + ds = g_malloc0(sizeof(sPAPRDIMMState)); + ds->nr_lmbs = nr_lmbs; + ds->dimm = dimm; + spapr_pending_dimm_unplugs_add(spapr, ds); + + addr = addr_start; + for (i = 0; i < nr_lmbs; i++) { + drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, + addr / SPAPR_MEMORY_BLOCK_SIZE); + g_assert(drc); + + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + drck->detach(drc, dev, spapr_lmb_release, NULL, errp); + addr += SPAPR_MEMORY_BLOCK_SIZE; + } + + drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, + addr_start / SPAPR_MEMORY_BLOCK_SIZE); + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + spapr_hotplug_req_remove_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB, + nr_lmbs, + drck->get_index(drc)); out: error_propagate(errp, local_err); } diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 8f424ca4e3..777b5de27b 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -32,6 +32,7 @@ struct sPAPRRTCState { int64_t ns_offset; }; +typedef struct sPAPRDIMMState sPAPRDIMMState; typedef struct sPAPRMachineClass sPAPRMachineClass; #define TYPE_SPAPR_MACHINE "spapr-machine" @@ -104,6 +105,11 @@ struct sPAPRMachineState { /* RTAS state */ QTAILQ_HEAD(, sPAPRConfigureConnectorState) ccs_list; + /* Pending DIMM unplug cache. It is populated when a LMB + * unplug starts. It can be regenerated if a migration + * occurs during the unplug process. */ + QTAILQ_HEAD(, sPAPRDIMMState) pending_dimm_unplugs; + /*< public >*/ char *kvm_type; MemoryHotplugState hotplug_memory; From 318347234d7069b62d38391dd27e269a3107d668 Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Mon, 22 May 2017 16:35:48 -0300 Subject: [PATCH 15/18] hw/ppc: removing drc->detach_cb and drc->detach_cb_opaque The pointer drc->detach_cb is being used as a way of informing the detach() function inside spapr_drc.c which cb to execute. This information can also be retrieved simply by checking drc->type and choosing the right callback based on it. In this context, detach_cb is redundant information that must be managed. After the previous spapr_lmb_release change, no detach_cb_opaques are being used by any of the three callbacks functions. This is yet another information that is now unused and, on top of that, can't be migrated either. This patch makes the following changes: - removal of detach_cb_opaque. the 'opaque' argument was removed from the callbacks and from the detach() function of sPAPRConnectorClass. The attribute detach_cb_opaque of sPAPRConnector was removed. - removal of detach_cb from the detach() call. The function pointer detach_cb of sPAPRConnector was removed. detach() now uses a switch(drc->type) to execute the apropriate callback. To achieve this, spapr_core_release, spapr_lmb_release and spapr_phb_remove_pci_device_cb callbacks were made public to be visible inside detach(). Signed-off-by: Daniel Henrique Barboza Signed-off-by: David Gibson --- hw/ppc/spapr.c | 10 ++++++---- hw/ppc/spapr_drc.c | 36 ++++++++++++++++++++---------------- hw/ppc/spapr_pci.c | 5 +++-- include/hw/pci-host/spapr.h | 3 +++ include/hw/ppc/spapr.h | 4 ++++ include/hw/ppc/spapr_drc.h | 8 +------- 6 files changed, 37 insertions(+), 29 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 3a79dabb2b..14399f4172 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2655,7 +2655,8 @@ static void spapr_pending_dimm_unplugs_remove(sPAPRMachineState *spapr, g_free(dimm_state); } -static void spapr_lmb_release(DeviceState *dev, void *opaque) +/* Callback to be called during DRC release. */ +void spapr_lmb_release(DeviceState *dev) { HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev); sPAPRMachineState *spapr = SPAPR_MACHINE(hotplug_ctrl); @@ -2720,7 +2721,7 @@ static void spapr_memory_unplug_request(HotplugHandler *hotplug_dev, g_assert(drc); drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - drck->detach(drc, dev, spapr_lmb_release, NULL, errp); + drck->detach(drc, dev, errp); addr += SPAPR_MEMORY_BLOCK_SIZE; } @@ -2767,7 +2768,8 @@ static void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, object_unparent(OBJECT(dev)); } -static void spapr_core_release(DeviceState *dev, void *opaque) +/* Callback to be called during DRC release. */ +void spapr_core_release(DeviceState *dev) { HotplugHandler *hotplug_ctrl; @@ -2800,7 +2802,7 @@ void spapr_core_unplug_request(HotplugHandler *hotplug_dev, DeviceState *dev, g_assert(drc); drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - drck->detach(drc, dev, spapr_core_release, NULL, &local_err); + drck->detach(drc, dev, &local_err); if (local_err) { error_propagate(errp, local_err); return; diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index 9fa5545991..2851e16a26 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -20,6 +20,7 @@ #include "qapi/visitor.h" #include "qemu/error-report.h" #include "hw/ppc/spapr.h" /* for RTAS return codes */ +#include "hw/pci-host/spapr.h" /* spapr_phb_remove_pci_device_cb callback */ #include "trace.h" #define DRC_CONTAINER_PATH "/dr-connector" @@ -99,8 +100,7 @@ static uint32_t set_isolation_state(sPAPRDRConnector *drc, if (drc->awaiting_release) { if (drc->configured) { trace_spapr_drc_set_isolation_state_finalizing(get_index(drc)); - drck->detach(drc, DEVICE(drc->dev), drc->detach_cb, - drc->detach_cb_opaque, NULL); + drck->detach(drc, DEVICE(drc->dev), NULL); } else { trace_spapr_drc_set_isolation_state_deferring(get_index(drc)); } @@ -153,8 +153,7 @@ static uint32_t set_allocation_state(sPAPRDRConnector *drc, if (drc->awaiting_release && drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_UNUSABLE) { trace_spapr_drc_set_allocation_state_finalizing(get_index(drc)); - drck->detach(drc, DEVICE(drc->dev), drc->detach_cb, - drc->detach_cb_opaque, NULL); + drck->detach(drc, DEVICE(drc->dev), NULL); } else if (drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_USABLE) { drc->awaiting_allocation = false; } @@ -404,15 +403,10 @@ static void attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt, NULL, 0, NULL); } -static void detach(sPAPRDRConnector *drc, DeviceState *d, - spapr_drc_detach_cb *detach_cb, - void *detach_cb_opaque, Error **errp) +static void detach(sPAPRDRConnector *drc, DeviceState *d, Error **errp) { trace_spapr_drc_detach(get_index(drc)); - drc->detach_cb = detach_cb; - drc->detach_cb_opaque = detach_cb_opaque; - /* if we've signalled device presence to the guest, or if the guest * has gone ahead and configured the device (via manually-executed * device add via drmgr in guest, namely), we need to wait @@ -456,8 +450,21 @@ static void detach(sPAPRDRConnector *drc, DeviceState *d, drc->indicator_state = SPAPR_DR_INDICATOR_STATE_INACTIVE; - if (drc->detach_cb) { - drc->detach_cb(drc->dev, drc->detach_cb_opaque); + /* Calling release callbacks based on drc->type. */ + switch (drc->type) { + case SPAPR_DR_CONNECTOR_TYPE_CPU: + spapr_core_release(drc->dev); + break; + case SPAPR_DR_CONNECTOR_TYPE_PCI: + spapr_phb_remove_pci_device_cb(drc->dev); + break; + case SPAPR_DR_CONNECTOR_TYPE_LMB: + spapr_lmb_release(drc->dev); + break; + case SPAPR_DR_CONNECTOR_TYPE_PHB: + case SPAPR_DR_CONNECTOR_TYPE_VIO: + default: + g_assert(false); } drc->awaiting_release = false; @@ -467,8 +474,6 @@ static void detach(sPAPRDRConnector *drc, DeviceState *d, drc->fdt_start_offset = 0; object_property_del(OBJECT(drc), "device", NULL); drc->dev = NULL; - drc->detach_cb = NULL; - drc->detach_cb_opaque = NULL; } static bool release_pending(sPAPRDRConnector *drc) @@ -498,8 +503,7 @@ static void reset(DeviceState *d) * force removal if we are */ if (drc->awaiting_release) { - drck->detach(drc, DEVICE(drc->dev), drc->detach_cb, - drc->detach_cb_opaque, NULL); + drck->detach(drc, DEVICE(drc->dev), NULL); } /* non-PCI devices may be awaiting a transition to UNUSABLE */ diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index a7cff32bbf..e4daf8d5f1 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1369,7 +1369,8 @@ out: } } -static void spapr_phb_remove_pci_device_cb(DeviceState *dev, void *opaque) +/* Callback to be called during DRC release. */ +void spapr_phb_remove_pci_device_cb(DeviceState *dev) { /* some version guests do not wait for completion of a device * cleanup (generally done asynchronously by the kernel) before @@ -1392,7 +1393,7 @@ static void spapr_phb_remove_pci_device(sPAPRDRConnector *drc, { sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - drck->detach(drc, DEVICE(pdev), spapr_phb_remove_pci_device_cb, phb, errp); + drck->detach(drc, DEVICE(pdev), errp); } static sPAPRDRConnector *spapr_phb_get_pci_func_drc(sPAPRPHBState *phb, diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h index 1c2e970da2..38470b2f0e 100644 --- a/include/hw/pci-host/spapr.h +++ b/include/hw/pci-host/spapr.h @@ -123,6 +123,9 @@ sPAPRPHBState *spapr_pci_find_phb(sPAPRMachineState *spapr, uint64_t buid); PCIDevice *spapr_pci_find_dev(sPAPRMachineState *spapr, uint64_t buid, uint32_t config_addr); +/* PCI release callback. */ +void spapr_phb_remove_pci_device_cb(DeviceState *dev); + /* VFIO EEH hooks */ #ifdef CONFIG_LINUX bool spapr_phb_eeh_available(sPAPRPHBState *sphb); diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 777b5de27b..98fb78b012 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -642,6 +642,10 @@ void spapr_hotplug_req_remove_by_count_indexed(sPAPRDRConnectorType drc_type, void *spapr_populate_hotplug_cpu_dt(CPUState *cs, int *fdt_offset, sPAPRMachineState *spapr); +/* CPU and LMB DRC release callbacks. */ +void spapr_core_release(DeviceState *dev); +void spapr_lmb_release(DeviceState *dev); + /* rtas-configure-connector state */ struct sPAPRConfigureConnectorState { uint32_t drc_index; diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h index 5524247cdc..813b9ffd60 100644 --- a/include/hw/ppc/spapr_drc.h +++ b/include/hw/ppc/spapr_drc.h @@ -130,8 +130,6 @@ typedef enum { SPAPR_DR_CC_RESPONSE_NOT_CONFIGURABLE = -9003, } sPAPRDRCCResponse; -typedef void (spapr_drc_detach_cb)(DeviceState *d, void *opaque); - typedef struct sPAPRDRConnector { /*< private >*/ DeviceState parent; @@ -158,8 +156,6 @@ typedef struct sPAPRDRConnector { /* device pointer, via link property */ DeviceState *dev; - spapr_drc_detach_cb *detach_cb; - void *detach_cb_opaque; } sPAPRDRConnector; typedef struct sPAPRDRConnectorClass { @@ -188,9 +184,7 @@ typedef struct sPAPRDRConnectorClass { /* QEMU interfaces for managing hotplug operations */ void (*attach)(sPAPRDRConnector *drc, DeviceState *d, void *fdt, int fdt_start_offset, bool coldplug, Error **errp); - void (*detach)(sPAPRDRConnector *drc, DeviceState *d, - spapr_drc_detach_cb *detach_cb, - void *detach_cb_opaque, Error **errp); + void (*detach)(sPAPRDRConnector *drc, DeviceState *d, Error **errp); bool (*release_pending)(sPAPRDRConnector *drc); void (*set_signalled)(sPAPRDRConnector *drc); } sPAPRDRConnectorClass; From a50919dddf148b0a2008db4a0593dbe69e1059c0 Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Mon, 22 May 2017 16:35:49 -0300 Subject: [PATCH 16/18] hw/ppc: migrating the DRC state of hotplugged devices In pseries, a firmware abstraction called Dynamic Reconfiguration Connector (DRC) is used to assign a particular dynamic resource to the guest and provide an interface to manage configuration/removal of the resource associated with it. In other words, DRC is the 'plugged state' of a device. Before this patch, DRC wasn't being migrated. This causes post-migration problems due to DRC state mismatch between source and target. The DRC state of a device X in the source might change, while in the target the DRC state of X is still fresh. When migrating the guest, X will not have the same hotplugged state as it did in the source. This means that we can't hot unplug X in the target after migration is completed because its DRC state is not consistent. https://bugs.launchpad.net/ubuntu/+source/qemu/+bug/1677552 is one bug that is caused by this DRC state mismatch between source and target. To migrate the DRC state, we defined the VMStateDescription struct for spapr_drc to enable the transmission of spapr_drc state in migration. Not all the elements in the DRC state are migrated - only those that can be modified by guest actions or device add/remove operations: - 'isolation_state', 'allocation_state' and 'indicator_state' are involved in the DR state transition diagram from PAPR+ 2.7, 13.4; - 'configured', 'signalled', 'awaiting_release' and 'awaiting_allocation' are needed in attaching and detaching devices; - 'indicator_state' provides users with hardware state information. These are the DRC elements that are migrated. In this patch the DRC state is migrated for PCI, LMB and CPU connector types. At this moment there is no support to migrate DRC for the PHB (PCI Host Bridge) type. In the 'realize' function the DRC is registered using vmstate_register, similar to what hw/ppc/spapr_iommu.c does in 'spapr_tce_table_realize'. This approach works because DRCs are bus-less and do not sit on a BusClass that implements bc->get_dev_path, so as a fallback the VMSD gets identified via "spapr_drc"/get_index(drc). Signed-off-by: Daniel Henrique Barboza Signed-off-by: David Gibson --- hw/ppc/spapr_drc.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index 2851e16a26..cc2400bcd5 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -519,6 +519,60 @@ static void reset(DeviceState *d) } } +static bool spapr_drc_needed(void *opaque) +{ + sPAPRDRConnector *drc = (sPAPRDRConnector *)opaque; + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + bool rc = false; + sPAPRDREntitySense value; + drck->entity_sense(drc, &value); + + /* If no dev is plugged in there is no need to migrate the DRC state */ + if (value != SPAPR_DR_ENTITY_SENSE_PRESENT) { + return false; + } + + /* + * If there is dev plugged in, we need to migrate the DRC state when + * it is different from cold-plugged state + */ + switch (drc->type) { + case SPAPR_DR_CONNECTOR_TYPE_PCI: + rc = !((drc->isolation_state == SPAPR_DR_ISOLATION_STATE_UNISOLATED) && + (drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_USABLE) && + drc->configured && drc->signalled && !drc->awaiting_release); + break; + case SPAPR_DR_CONNECTOR_TYPE_CPU: + case SPAPR_DR_CONNECTOR_TYPE_LMB: + rc = !((drc->isolation_state == SPAPR_DR_ISOLATION_STATE_ISOLATED) && + (drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_UNUSABLE) && + drc->configured && drc->signalled && !drc->awaiting_release); + break; + case SPAPR_DR_CONNECTOR_TYPE_PHB: + case SPAPR_DR_CONNECTOR_TYPE_VIO: + default: + g_assert(false); + } + return rc; +} + +static const VMStateDescription vmstate_spapr_drc = { + .name = "spapr_drc", + .version_id = 1, + .minimum_version_id = 1, + .needed = spapr_drc_needed, + .fields = (VMStateField []) { + VMSTATE_UINT32(isolation_state, sPAPRDRConnector), + VMSTATE_UINT32(allocation_state, sPAPRDRConnector), + VMSTATE_UINT32(indicator_state, sPAPRDRConnector), + VMSTATE_BOOL(configured, sPAPRDRConnector), + VMSTATE_BOOL(awaiting_release, sPAPRDRConnector), + VMSTATE_BOOL(awaiting_allocation, sPAPRDRConnector), + VMSTATE_BOOL(signalled, sPAPRDRConnector), + VMSTATE_END_OF_LIST() + } +}; + static void realize(DeviceState *d, Error **errp) { sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(d); @@ -547,6 +601,8 @@ static void realize(DeviceState *d, Error **errp) object_unref(OBJECT(drc)); } g_free(child_name); + vmstate_register(DEVICE(drc), drck->get_index(drc), &vmstate_spapr_drc, + drc); trace_spapr_drc_realize_complete(drck->get_index(drc)); } From 16ee99805e069601ba3ce9da524bab377ab03866 Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Mon, 22 May 2017 16:35:50 -0300 Subject: [PATCH 17/18] hw/ppc/spapr.c: recover pending LMB unplug info in spapr_lmb_release When a LMB hot unplug starts, the current DRC LMB status is stored at spapr->pending_dimm_unplugs QTAILQ. This queue isn't migrated, thus if a migration occurs in the middle of a LMB unplug the spapr_lmb_release callback will lost track of the LMB unplug progress. This patch implements a new recover function spapr_recover_pending_dimm_state that is used inside spapr_lmb_release to recover this DRC LMB release status that is lost during the migration. Signed-off-by: Daniel Henrique Barboza [dwg: Minor stylistic changes, simplify error handling] Signed-off-by: David Gibson --- hw/ppc/spapr.c | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 14399f4172..ab3aab1279 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2655,6 +2655,40 @@ static void spapr_pending_dimm_unplugs_remove(sPAPRMachineState *spapr, g_free(dimm_state); } +static sPAPRDIMMState *spapr_recover_pending_dimm_state(sPAPRMachineState *ms, + PCDIMMDevice *dimm) +{ + sPAPRDRConnector *drc; + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); + MemoryRegion *mr = ddc->get_memory_region(dimm); + uint64_t size = memory_region_size(mr); + uint32_t nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE; + uint32_t avail_lmbs = 0; + uint64_t addr_start, addr; + int i; + sPAPRDIMMState *ds; + + addr_start = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, + &error_abort); + + addr = addr_start; + for (i = 0; i < nr_lmbs; i++) { + drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, + addr / SPAPR_MEMORY_BLOCK_SIZE); + g_assert(drc); + if (drc->indicator_state != SPAPR_DR_INDICATOR_STATE_INACTIVE) { + avail_lmbs++; + } + addr += SPAPR_MEMORY_BLOCK_SIZE; + } + + ds = g_malloc0(sizeof(sPAPRDIMMState)); + ds->nr_lmbs = avail_lmbs; + ds->dimm = dimm; + spapr_pending_dimm_unplugs_add(ms, ds); + return ds; +} + /* Callback to be called during DRC release. */ void spapr_lmb_release(DeviceState *dev) { @@ -2662,7 +2696,14 @@ void spapr_lmb_release(DeviceState *dev) sPAPRMachineState *spapr = SPAPR_MACHINE(hotplug_ctrl); sPAPRDIMMState *ds = spapr_pending_dimm_unplugs_find(spapr, PC_DIMM(dev)); - if (--ds->nr_lmbs) { + /* This information will get lost if a migration occurs + * during the unplug process. In this case recover it. */ + if (ds == NULL) { + ds = spapr_recover_pending_dimm_state(spapr, PC_DIMM(dev)); + if (ds->nr_lmbs) { + return; + } + } else if (--ds->nr_lmbs) { return; } From 62f94fc94f98095173146e753a1f03d7c2cc7ba3 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Wed, 24 May 2017 19:40:43 +0200 Subject: [PATCH 18/18] xics: add unrealize handler Now that ICPState objects get finalized on CPU unplug, we should unregister reset handlers as well to avoid a QEMU crash at machine reset time. Signed-off-by: Greg Kurz Signed-off-by: David Gibson --- hw/intc/xics.c | 5 +++++ hw/intc/xics_kvm.c | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/hw/intc/xics.c b/hw/intc/xics.c index 292fffecd3..ea3516794a 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -357,6 +357,10 @@ static void icp_realize(DeviceState *dev, Error **errp) qemu_register_reset(icp_reset, dev); } +static void icp_unrealize(DeviceState *dev, Error **errp) +{ + qemu_unregister_reset(icp_reset, dev); +} static void icp_class_init(ObjectClass *klass, void *data) { @@ -364,6 +368,7 @@ static void icp_class_init(ObjectClass *klass, void *data) dc->vmsd = &vmstate_icp_server; dc->realize = icp_realize; + dc->unrealize = icp_unrealize; } static const TypeInfo icp_info = { diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c index dd7f298462..14b8f6f6e4 100644 --- a/hw/intc/xics_kvm.c +++ b/hw/intc/xics_kvm.c @@ -164,12 +164,18 @@ static void icp_kvm_realize(DeviceState *dev, Error **errp) qemu_register_reset(icp_kvm_reset, dev); } +static void icp_kvm_unrealize(DeviceState *dev, Error **errp) +{ + qemu_unregister_reset(icp_kvm_reset, dev); +} + static void icp_kvm_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); ICPStateClass *icpc = ICP_CLASS(klass); dc->realize = icp_kvm_realize; + dc->unrealize = icp_kvm_unrealize; icpc->pre_save = icp_get_kvm_state; icpc->post_load = icp_set_kvm_state; icpc->cpu_setup = icp_kvm_cpu_setup;