From 5c94b2a5e5ef7f91270ce034d2095c6ed924c61d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Sun, 3 Apr 2016 19:57:50 +0200 Subject: [PATCH 1/3] ppc: Rework POWER7 & POWER8 exception model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From: Benjamin Herrenschmidt This patch fixes the current AIL implementation for POWER8. The interrupt vector address can be calculated directly from LPCR when the exception is handled. The excp_prefix update becomes useless and we can cleanup the H_SET_MODE hcall. Signed-off-by: Benjamin Herrenschmidt [clg: Removed LPES0/1 handling for HV vs. !HV Fixed LPCR_ILE case for POWERPC_EXCP_POWER8 ] Signed-off-by: Cédric Le Goater [dwg: This was written as a cleanup, but it also fixes a real bug where setting an alternative interrupt location would not be correctly migrated] Signed-off-by: David Gibson --- hw/ppc/spapr_hcall.c | 16 +----------- include/hw/ppc/spapr.h | 5 ---- target-ppc/cpu.h | 10 ++++++++ target-ppc/excp_helper.c | 49 +++++++++++++++++++++++++++++++++++-- target-ppc/translate_init.c | 2 +- 5 files changed, 59 insertions(+), 23 deletions(-) diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 2dcb676c6f..8f40602a5e 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -824,7 +824,6 @@ static target_ulong h_set_mode_resource_addr_trans_mode(PowerPCCPU *cpu, { CPUState *cs; PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); - target_ulong prefix; if (!(pcc->insns_flags2 & PPC2_ISA207S)) { return H_P2; @@ -836,25 +835,12 @@ static target_ulong h_set_mode_resource_addr_trans_mode(PowerPCCPU *cpu, return H_P4; } - switch (mflags) { - case H_SET_MODE_ADDR_TRANS_NONE: - prefix = 0; - break; - case H_SET_MODE_ADDR_TRANS_0001_8000: - prefix = 0x18000; - break; - case H_SET_MODE_ADDR_TRANS_C000_0000_0000_4000: - prefix = 0xC000000000004000ULL; - break; - default: + if (mflags == AIL_RESERVED) { return H_UNSUPPORTED_FLAG; } CPU_FOREACH(cs) { - CPUPPCState *env = &POWERPC_CPU(cpu)->env; - set_spr(cs, SPR_LPCR, mflags << LPCR_AIL_SHIFT, LPCR_AIL); - env->excp_prefix = prefix; } return H_SUCCESS; diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 098d85d1a1..815d5eec45 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -204,11 +204,6 @@ struct sPAPRMachineState { #define H_SET_MODE_ENDIAN_BIG 0 #define H_SET_MODE_ENDIAN_LITTLE 1 -/* Flags for H_SET_MODE_RESOURCE_ADDR_TRANS_MODE */ -#define H_SET_MODE_ADDR_TRANS_NONE 0 -#define H_SET_MODE_ADDR_TRANS_0001_8000 2 -#define H_SET_MODE_ADDR_TRANS_C000_0000_0000_4000 3 - /* VASI States */ #define H_VASI_INVALID 0 #define H_VASI_ENABLED 1 diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index 676081e69d..9d4e43cf1f 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -167,6 +167,8 @@ enum powerpc_excp_t { POWERPC_EXCP_970, /* POWER7 exception model */ POWERPC_EXCP_POWER7, + /* POWER8 exception model */ + POWERPC_EXCP_POWER8, #endif /* defined(TARGET_PPC64) */ }; @@ -2277,6 +2279,14 @@ enum { HMER_XSCOM_STATUS_LSH = (63 - 23), }; +/* Alternate Interrupt Location (AIL) */ +enum { + AIL_NONE = 0, + AIL_RESERVED = 1, + AIL_0001_8000 = 2, + AIL_C000_0000_0000_4000 = 3, +}; + /*****************************************************************************/ static inline target_ulong cpu_read_xer(CPUPPCState *env) diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c index c890853d86..ca4ffe8ad6 100644 --- a/target-ppc/excp_helper.c +++ b/target-ppc/excp_helper.c @@ -77,7 +77,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) CPUPPCState *env = &cpu->env; target_ulong msr, new_msr, vector; int srr0, srr1, asrr0, asrr1; - int lpes0, lpes1, lev; + int lpes0, lpes1, lev, ail; if (0) { /* XXX: find a suitable condition to enable the hypervisor mode */ @@ -108,6 +108,25 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) asrr0 = -1; asrr1 = -1; + /* Exception targetting modifiers + * + * AIL is initialized here but can be cleared by + * selected exceptions + */ +#if defined(TARGET_PPC64) + if (excp_model == POWERPC_EXCP_POWER7 || + excp_model == POWERPC_EXCP_POWER8) { + if (excp_model == POWERPC_EXCP_POWER8) { + ail = (env->spr[SPR_LPCR] & LPCR_AIL) >> LPCR_AIL_SHIFT; + } else { + ail = 0; + } + } else +#endif /* defined(TARGET_PPC64) */ + { + ail = 0; + } + switch (excp) { case POWERPC_EXCP_NONE: /* Should never happen */ @@ -146,6 +165,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) /* XXX: find a suitable condition to enable the hypervisor mode */ new_msr |= (target_ulong)MSR_HVB; } + ail = 0; /* machine check exceptions don't have ME set */ new_msr &= ~((target_ulong)1 << MSR_ME); @@ -344,6 +364,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) /* XXX: find a suitable condition to enable the hypervisor mode */ new_msr |= (target_ulong)MSR_HVB; } + ail = 0; goto store_next; case POWERPC_EXCP_DSEG: /* Data segment exception */ if (lpes1 == 0) { @@ -630,7 +651,8 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) } #ifdef TARGET_PPC64 - if (excp_model == POWERPC_EXCP_POWER7) { + if (excp_model == POWERPC_EXCP_POWER7 || + excp_model == POWERPC_EXCP_POWER8) { if (env->spr[SPR_LPCR] & LPCR_ILE) { new_msr |= (target_ulong)1 << MSR_LE; } @@ -650,6 +672,29 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) excp); } vector |= env->excp_prefix; + + /* AIL only works if there is no HV transition and we are running with + * translations enabled + */ + if (!((msr >> MSR_IR) & 1) || !((msr >> MSR_DR) & 1)) { + ail = 0; + } + /* Handle AIL */ + if (ail) { + new_msr |= (1 << MSR_IR) | (1 << MSR_DR); + switch(ail) { + case AIL_0001_8000: + vector |= 0x18000; + break; + case AIL_C000_0000_0000_4000: + vector |= 0xc000000000004000ull; + break; + default: + cpu_abort(cs, "Invalid AIL combination %d\n", ail); + break; + } + } + #if defined(TARGET_PPC64) if (excp_model == POWERPC_EXCP_BOOKE) { if (env->spr[SPR_BOOKE_EPCR] & EPCR_ICM) { diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index 0a33597f6b..f51572552b 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -8487,7 +8487,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data) pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault; pcc->sps = &POWER7_POWER8_sps; #endif - pcc->excp_model = POWERPC_EXCP_POWER7; + pcc->excp_model = POWERPC_EXCP_POWER8; pcc->bus_model = PPC_FLAGS_INPUT_POWER7; pcc->bfd_mach = bfd_mach_ppc64; pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE | From f40eb921da78d7524f4d1d96a5c04c77c4a26a28 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 31 Mar 2016 17:27:37 -0500 Subject: [PATCH 2/3] spapr_drc: enable immediate detach for unsignalled devices Currently spapr doesn't support "aborting" hotplug of PCI devices by allowing device_del to immediately remove the device if we haven't signalled the presence of the device to the guest. In the past this wasn't an issue, since we always immediately signalled device attach and simply relied on full guest-aware add->remove path for device removal. However, as of 788d259, we now defer signalling for PCI functions until function 0 is attached, so now we need to deal with these "abort" operations for cases where a user hotplugs a non-0 function, then opts to remove it prior hotplugging function 0. Currently they'd have to reboot before the unplug completed. PCIe multifunction hotplug does not have this requirement however, so from a management implementation perspective it would be good to address this within the same release as 788d259. We accomplish this by simply adding a 'signalled' flag to track whether a device hotplug event has been sent to the guest. If it hasn't, we allow immediate removal under the assumption that the guest will not be using the device. Devices present at boot/reset time are also assumed to be 'signalled'. For CPU/memory/etc, signalling will still happen immediately as part of device_add, so only PCI functions should be affected. Cc: bharata@linux.vnet.ibm.com Cc: david@gibson.dropbear.id.au Cc: sbhat@linux.vnet.ibm.com Cc: qemu-ppc@nongnu.org Signed-off-by: Michael Roth [dwg: This fixes a regression where an incorrect hot-add of a non-zero function can no longer be backed out until function 0 is added] Signed-off-by: David Gibson --- hw/ppc/spapr_drc.c | 34 ++++++++++++++++++++++++++++++++++ hw/ppc/spapr_events.c | 11 +++++++++++ include/hw/ppc/spapr_drc.h | 2 ++ 3 files changed, 47 insertions(+) diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index e6eedf8946..317394027a 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -176,6 +176,12 @@ static void set_configured(sPAPRDRConnector *drc) drc->configured = true; } +/* has the guest been notified of device attachment? */ +static void set_signalled(sPAPRDRConnector *drc) +{ + drc->signalled = true; +} + /* * dr-entity-sense sensor value * returned via get-sensor-state RTAS calls @@ -358,6 +364,7 @@ static void attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt, drc->fdt = fdt; drc->fdt_start_offset = fdt_start_offset; drc->configured = coldplug; + drc->signalled = coldplug; object_property_add_link(OBJECT(drc), "device", object_get_typename(OBJECT(drc->dev)), @@ -374,6 +381,26 @@ static void detach(sPAPRDRConnector *drc, DeviceState *d, drc->detach_cb = detach_cb; drc->detach_cb_opaque = detach_cb_opaque; + /* if we've signalled device presence to the guest, or if the guest + * has gone ahead and configured the device (via manually-executed + * device add via drmgr in guest, namely), we need to wait + * for the guest to quiesce the device before completing detach. + * Otherwise, we can assume the guest hasn't seen it and complete the + * detach immediately. Note that there is a small race window + * just before, or during, configuration, which is this context + * refers mainly to fetching the device tree via RTAS. + * During this window the device access will be arbitrated by + * associated DRC, which will simply fail the RTAS calls as invalid. + * This is recoverable within guest and current implementations of + * drmgr should be able to cope. + */ + if (!drc->signalled && !drc->configured) { + /* if the guest hasn't seen the device we can't rely on it to + * set it back to an isolated state via RTAS, so do it here manually + */ + drc->isolation_state = SPAPR_DR_ISOLATION_STATE_ISOLATED; + } + if (drc->isolation_state != SPAPR_DR_ISOLATION_STATE_ISOLATED) { DPRINTFN("awaiting transition to isolated state before removal"); drc->awaiting_release = true; @@ -412,6 +439,7 @@ static void reset(DeviceState *d) { sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(d); sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + sPAPRDREntitySense state; DPRINTFN("drc reset: %x", drck->get_index(drc)); /* immediately upon reset we can safely assume DRCs whose devices @@ -439,6 +467,11 @@ static void reset(DeviceState *d) drck->set_allocation_state(drc, SPAPR_DR_ALLOCATION_STATE_UNUSABLE); } } + + drck->entity_sense(drc, &state); + if (state == SPAPR_DR_ENTITY_SENSE_PRESENT) { + drck->set_signalled(drc); + } } static void realize(DeviceState *d, Error **errp) @@ -597,6 +630,7 @@ static void spapr_dr_connector_class_init(ObjectClass *k, void *data) drck->attach = attach; drck->detach = detach; drck->release_pending = release_pending; + drck->set_signalled = set_signalled; /* * Reason: it crashes FIXME find and document the real reason */ diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index 1abec27ec6..269ab7e61c 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -389,6 +389,13 @@ static void spapr_powerdown_req(Notifier *n, void *opaque) qemu_irq_pulse(xics_get_qirq(spapr->icp, spapr->check_exception_irq)); } +static void spapr_hotplug_set_signalled(uint32_t drc_index) +{ + sPAPRDRConnector *drc = spapr_dr_connector_by_index(drc_index); + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + drck->set_signalled(drc); +} + static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, sPAPRDRConnectorType drc_type, uint32_t drc) @@ -455,6 +462,10 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true); + if (hp->hotplug_action == RTAS_LOG_V6_HP_ACTION_ADD) { + spapr_hotplug_set_signalled(drc); + } + qemu_irq_pulse(xics_get_qirq(spapr->icp, spapr->check_exception_irq)); } diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h index 7e5634772c..fa21ba0444 100644 --- a/include/hw/ppc/spapr_drc.h +++ b/include/hw/ppc/spapr_drc.h @@ -151,6 +151,7 @@ typedef struct sPAPRDRConnector { bool configured; bool awaiting_release; + bool signalled; /* device pointer, via link property */ DeviceState *dev; @@ -188,6 +189,7 @@ typedef struct sPAPRDRConnectorClass { spapr_drc_detach_cb *detach_cb, void *detach_cb_opaque, Error **errp); bool (*release_pending)(sPAPRDRConnector *drc); + void (*set_signalled)(sPAPRDRConnector *drc); } sPAPRDRConnectorClass; sPAPRDRConnector *spapr_dr_connector_new(Object *owner, From efdaf797de009e194db89dcd8b171fa35181a6f2 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Mon, 4 Apr 2016 18:42:36 +1000 Subject: [PATCH 3/3] vl: Move cpu_synchronize_all_states() into qemu_system_reset() There are currently 3 calls to qemu_system_reset() in vl.c. Two of them are immediately preceded by a cpu_synchronize_all_states9) and the remaining one should be. The one which doesn't is the very first reset called directly from main(). Without a cpu_synchronize_all_states(), kvm_vcpu_dirty is false at this point from the earlier cpu_synchronize_all_post_init(). That's incorrect because the reset path is quite likely to update the CPU state, and that updated state should be pushed back to KVM, not overwritten with stale data pushed to KVM immediately after init. This patch moves the call to cpu_synchronize_all_states() into qemu_system_reset() for safety, so it is always called. AFAICT this should be safe for the handful of callers outside vl.c - these all appear to be in places where the cpu state is already synchronized so the extra call will be a no-op. Signed-off-by: David Gibson Acked-by: Paolo Bonzini Tested-by: Laurent Vivier --- vl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vl.c b/vl.c index bd81ea954c..36293360e0 100644 --- a/vl.c +++ b/vl.c @@ -1745,6 +1745,8 @@ void qemu_system_reset(bool report) mc = current_machine ? MACHINE_GET_CLASS(current_machine) : NULL; + cpu_synchronize_all_states(); + if (mc && mc->reset) { mc->reset(); } else { @@ -1893,7 +1895,6 @@ static bool main_loop_should_exit(void) } if (qemu_reset_requested()) { pause_all_vcpus(); - cpu_synchronize_all_states(); qemu_system_reset(VMRESET_REPORT); resume_all_vcpus(); if (!runstate_check(RUN_STATE_RUNNING) && @@ -1903,7 +1904,6 @@ static bool main_loop_should_exit(void) } if (qemu_wakeup_requested()) { pause_all_vcpus(); - cpu_synchronize_all_states(); qemu_system_reset(VMRESET_SILENT); notifier_list_notify(&wakeup_notifiers, &wakeup_reason); wakeup_reason = QEMU_WAKEUP_REASON_NONE;