From 0990ce6a2e900d0bdda7f3ecdc991746f63551fb Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Thu, 24 Oct 2019 16:27:22 +0200 Subject: [PATCH 1/6] ppc: Add intc_destroy() handlers to SpaprInterruptController/PnvChip SpaprInterruptControllerClass and PnvChipClass have an intc_create() method that calls the appropriate routine, ie. icp_create() or xive_tctx_create(), to establish the link between the VCPU and the presenter component of the interrupt controller during realize. There aren't any symmetrical call to be called when the VCPU gets unrealized though. It is assumed that object_unparent() is the only thing to do. This is questionable because the parenting logic around the CPU and presenter objects is really an implementation detail of the interrupt controller. It shouldn't be open-coded in the machine code. Fix this by adding an intc_destroy() method that undoes what was done in intc_create(). Also NULLify the presenter pointers to avoid having stale pointers around. This will allow to reliably check if a vCPU has a valid presenter. Signed-off-by: Greg Kurz Message-Id: <157192724208.3146912.7254684777515287626.stgit@bahia.lan> Signed-off-by: David Gibson Signed-off-by: Laurent Vivier --- hw/intc/spapr_xive.c | 10 ++++++++++ hw/intc/xics.c | 5 +++++ hw/intc/xics_spapr.c | 10 ++++++++++ hw/intc/xive.c | 5 +++++ hw/ppc/pnv.c | 21 +++++++++++++++++++++ hw/ppc/pnv_core.c | 7 ++++--- hw/ppc/spapr_cpu_core.c | 7 +------ hw/ppc/spapr_irq.c | 14 ++++++++++++++ include/hw/ppc/pnv.h | 1 + include/hw/ppc/spapr_irq.h | 2 ++ include/hw/ppc/xics.h | 1 + include/hw/ppc/xive.h | 1 + 12 files changed, 75 insertions(+), 9 deletions(-) diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c index d8e1291905..9cb8d38a3b 100644 --- a/hw/intc/spapr_xive.c +++ b/hw/intc/spapr_xive.c @@ -555,6 +555,15 @@ static void spapr_xive_cpu_intc_reset(SpaprInterruptController *intc, xive_tctx_set_os_cam(tctx, xive_nvt_cam_line(nvt_blk, nvt_idx)); } +static void spapr_xive_cpu_intc_destroy(SpaprInterruptController *intc, + PowerPCCPU *cpu) +{ + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); + + xive_tctx_destroy(spapr_cpu->tctx); + spapr_cpu->tctx = NULL; +} + static void spapr_xive_set_irq(SpaprInterruptController *intc, int irq, int val) { SpaprXive *xive = SPAPR_XIVE(intc); @@ -692,6 +701,7 @@ static void spapr_xive_class_init(ObjectClass *klass, void *data) sicc->deactivate = spapr_xive_deactivate; sicc->cpu_intc_create = spapr_xive_cpu_intc_create; sicc->cpu_intc_reset = spapr_xive_cpu_intc_reset; + sicc->cpu_intc_destroy = spapr_xive_cpu_intc_destroy; sicc->claim_irq = spapr_xive_claim_irq; sicc->free_irq = spapr_xive_free_irq; sicc->set_irq = spapr_xive_set_irq; diff --git a/hw/intc/xics.c b/hw/intc/xics.c index 6da05763f9..935f325749 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -401,6 +401,11 @@ Object *icp_create(Object *cpu, const char *type, XICSFabric *xi, Error **errp) return obj; } +void icp_destroy(ICPState *icp) +{ + object_unparent(OBJECT(icp)); +} + /* * ICS: Source layer */ diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c index 7418fb9f37..b3705dab0e 100644 --- a/hw/intc/xics_spapr.c +++ b/hw/intc/xics_spapr.c @@ -352,6 +352,15 @@ static void xics_spapr_cpu_intc_reset(SpaprInterruptController *intc, icp_reset(spapr_cpu_state(cpu)->icp); } +static void xics_spapr_cpu_intc_destroy(SpaprInterruptController *intc, + PowerPCCPU *cpu) +{ + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); + + icp_destroy(spapr_cpu->icp); + spapr_cpu->icp = NULL; +} + static int xics_spapr_claim_irq(SpaprInterruptController *intc, int irq, bool lsi, Error **errp) { @@ -440,6 +449,7 @@ static void ics_spapr_class_init(ObjectClass *klass, void *data) sicc->deactivate = xics_spapr_deactivate; sicc->cpu_intc_create = xics_spapr_cpu_intc_create; sicc->cpu_intc_reset = xics_spapr_cpu_intc_reset; + sicc->cpu_intc_destroy = xics_spapr_cpu_intc_destroy; sicc->claim_irq = xics_spapr_claim_irq; sicc->free_irq = xics_spapr_free_irq; sicc->set_irq = xics_spapr_set_irq; diff --git a/hw/intc/xive.c b/hw/intc/xive.c index f066be5eb5..38257aa020 100644 --- a/hw/intc/xive.c +++ b/hw/intc/xive.c @@ -696,6 +696,11 @@ error: return NULL; } +void xive_tctx_destroy(XiveTCTX *tctx) +{ + object_unparent(OBJECT(tctx)); +} + /* * XIVE ESB helpers */ diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 60632720ef..627c08e5b9 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -778,6 +778,7 @@ static void pnv_chip_power8_intc_create(PnvChip *chip, PowerPCCPU *cpu, pnv_cpu->intc = obj; } + static void pnv_chip_power8_intc_reset(PnvChip *chip, PowerPCCPU *cpu) { PnvCPUState *pnv_cpu = pnv_cpu_state(cpu); @@ -785,6 +786,14 @@ static void pnv_chip_power8_intc_reset(PnvChip *chip, PowerPCCPU *cpu) icp_reset(ICP(pnv_cpu->intc)); } +static void pnv_chip_power8_intc_destroy(PnvChip *chip, PowerPCCPU *cpu) +{ + PnvCPUState *pnv_cpu = pnv_cpu_state(cpu); + + icp_destroy(ICP(pnv_cpu->intc)); + pnv_cpu->intc = NULL; +} + /* * 0:48 Reserved - Read as zeroes * 49:52 Node ID @@ -829,6 +838,14 @@ static void pnv_chip_power9_intc_reset(PnvChip *chip, PowerPCCPU *cpu) xive_tctx_reset(XIVE_TCTX(pnv_cpu->intc)); } +static void pnv_chip_power9_intc_destroy(PnvChip *chip, PowerPCCPU *cpu) +{ + PnvCPUState *pnv_cpu = pnv_cpu_state(cpu); + + xive_tctx_destroy(XIVE_TCTX(pnv_cpu->intc)); + pnv_cpu->intc = NULL; +} + /* * Allowed core identifiers on a POWER8 Processor Chip : * @@ -999,6 +1016,7 @@ static void pnv_chip_power8e_class_init(ObjectClass *klass, void *data) k->core_pir = pnv_chip_core_pir_p8; k->intc_create = pnv_chip_power8_intc_create; k->intc_reset = pnv_chip_power8_intc_reset; + k->intc_destroy = pnv_chip_power8_intc_destroy; k->isa_create = pnv_chip_power8_isa_create; k->dt_populate = pnv_chip_power8_dt_populate; k->pic_print_info = pnv_chip_power8_pic_print_info; @@ -1019,6 +1037,7 @@ static void pnv_chip_power8_class_init(ObjectClass *klass, void *data) k->core_pir = pnv_chip_core_pir_p8; k->intc_create = pnv_chip_power8_intc_create; k->intc_reset = pnv_chip_power8_intc_reset; + k->intc_destroy = pnv_chip_power8_intc_destroy; k->isa_create = pnv_chip_power8_isa_create; k->dt_populate = pnv_chip_power8_dt_populate; k->pic_print_info = pnv_chip_power8_pic_print_info; @@ -1039,6 +1058,7 @@ static void pnv_chip_power8nvl_class_init(ObjectClass *klass, void *data) k->core_pir = pnv_chip_core_pir_p8; k->intc_create = pnv_chip_power8_intc_create; k->intc_reset = pnv_chip_power8_intc_reset; + k->intc_destroy = pnv_chip_power8_intc_destroy; k->isa_create = pnv_chip_power8nvl_isa_create; k->dt_populate = pnv_chip_power8_dt_populate; k->pic_print_info = pnv_chip_power8_pic_print_info; @@ -1209,6 +1229,7 @@ static void pnv_chip_power9_class_init(ObjectClass *klass, void *data) k->core_pir = pnv_chip_core_pir_p9; k->intc_create = pnv_chip_power9_intc_create; k->intc_reset = pnv_chip_power9_intc_reset; + k->intc_destroy = pnv_chip_power9_intc_destroy; k->isa_create = pnv_chip_power9_isa_create; k->dt_populate = pnv_chip_power9_dt_populate; k->pic_print_info = pnv_chip_power9_pic_print_info; diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c index e81cd3a3e0..61b3d3ce22 100644 --- a/hw/ppc/pnv_core.c +++ b/hw/ppc/pnv_core.c @@ -269,11 +269,12 @@ err: error_propagate(errp, local_err); } -static void pnv_core_cpu_unrealize(PowerPCCPU *cpu) +static void pnv_core_cpu_unrealize(PowerPCCPU *cpu, PnvChip *chip) { PnvCPUState *pnv_cpu = pnv_cpu_state(cpu); + PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip); - object_unparent(OBJECT(pnv_cpu_state(cpu)->intc)); + pcc->intc_destroy(chip, cpu); cpu_remove_sync(CPU(cpu)); cpu->machine_data = NULL; g_free(pnv_cpu); @@ -289,7 +290,7 @@ static void pnv_core_unrealize(DeviceState *dev, Error **errp) qemu_unregister_reset(pnv_core_reset, pc); for (i = 0; i < cc->nr_threads; i++) { - pnv_core_cpu_unrealize(pc->threads[i]); + pnv_core_cpu_unrealize(pc->threads[i], pc->chip); } g_free(pc->threads); } diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index ef7b27a66d..8339c4c0f8 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -195,12 +195,7 @@ static void spapr_unrealize_vcpu(PowerPCCPU *cpu, SpaprCpuCore *sc) if (!sc->pre_3_0_migration) { vmstate_unregister(NULL, &vmstate_spapr_cpu_state, cpu->machine_data); } - if (spapr_cpu_state(cpu)->icp) { - object_unparent(OBJECT(spapr_cpu_state(cpu)->icp)); - } - if (spapr_cpu_state(cpu)->tctx) { - object_unparent(OBJECT(spapr_cpu_state(cpu)->tctx)); - } + spapr_irq_cpu_intc_destroy(SPAPR_MACHINE(qdev_get_machine()), cpu); cpu_remove_sync(CPU(cpu)); object_unparent(OBJECT(cpu)); } diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c index b941608b69..168044be85 100644 --- a/hw/ppc/spapr_irq.c +++ b/hw/ppc/spapr_irq.c @@ -234,6 +234,20 @@ void spapr_irq_cpu_intc_reset(SpaprMachineState *spapr, PowerPCCPU *cpu) } } +void spapr_irq_cpu_intc_destroy(SpaprMachineState *spapr, PowerPCCPU *cpu) +{ + SpaprInterruptController *intcs[] = ALL_INTCS(spapr); + int i; + + for (i = 0; i < ARRAY_SIZE(intcs); i++) { + SpaprInterruptController *intc = intcs[i]; + if (intc) { + SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc); + sicc->cpu_intc_destroy(intc, cpu); + } + } +} + static void spapr_set_irq(void *opaque, int irq, int level) { SpaprMachineState *spapr = SPAPR_MACHINE(opaque); diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h index 2a780e633f..0b4c722e6b 100644 --- a/include/hw/ppc/pnv.h +++ b/include/hw/ppc/pnv.h @@ -112,6 +112,7 @@ typedef struct PnvChipClass { uint32_t (*core_pir)(PnvChip *chip, uint32_t core_id); void (*intc_create)(PnvChip *chip, PowerPCCPU *cpu, Error **errp); void (*intc_reset)(PnvChip *chip, PowerPCCPU *cpu); + void (*intc_destroy)(PnvChip *chip, PowerPCCPU *cpu); ISABus *(*isa_create)(PnvChip *chip, Error **errp); void (*dt_populate)(PnvChip *chip, void *fdt); void (*pic_print_info)(PnvChip *chip, Monitor *mon); diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h index 09232999b0..ff814d13de 100644 --- a/include/hw/ppc/spapr_irq.h +++ b/include/hw/ppc/spapr_irq.h @@ -53,6 +53,7 @@ typedef struct SpaprInterruptControllerClass { int (*cpu_intc_create)(SpaprInterruptController *intc, PowerPCCPU *cpu, Error **errp); void (*cpu_intc_reset)(SpaprInterruptController *intc, PowerPCCPU *cpu); + void (*cpu_intc_destroy)(SpaprInterruptController *intc, PowerPCCPU *cpu); int (*claim_irq)(SpaprInterruptController *intc, int irq, bool lsi, Error **errp); void (*free_irq)(SpaprInterruptController *intc, int irq); @@ -70,6 +71,7 @@ void spapr_irq_update_active_intc(SpaprMachineState *spapr); int spapr_irq_cpu_intc_create(SpaprMachineState *spapr, PowerPCCPU *cpu, Error **errp); void spapr_irq_cpu_intc_reset(SpaprMachineState *spapr, PowerPCCPU *cpu); +void spapr_irq_cpu_intc_destroy(SpaprMachineState *spapr, PowerPCCPU *cpu); void spapr_irq_print_info(SpaprMachineState *spapr, Monitor *mon); void spapr_irq_dt(SpaprMachineState *spapr, uint32_t nr_servers, void *fdt, uint32_t phandle); diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h index 602173c122..48a75aa4ab 100644 --- a/include/hw/ppc/xics.h +++ b/include/hw/ppc/xics.h @@ -181,6 +181,7 @@ void icp_resend(ICPState *ss); Object *icp_create(Object *cpu, const char *type, XICSFabric *xi, Error **errp); +void icp_destroy(ICPState *icp); /* KVM */ void icp_get_kvm_state(ICPState *icp); diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h index 99381639f5..8fd439ec9b 100644 --- a/include/hw/ppc/xive.h +++ b/include/hw/ppc/xive.h @@ -416,6 +416,7 @@ uint64_t xive_tctx_tm_read(XiveTCTX *tctx, hwaddr offset, unsigned size); void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon); Object *xive_tctx_create(Object *cpu, XiveRouter *xrtr, Error **errp); void xive_tctx_reset(XiveTCTX *tctx); +void xive_tctx_destroy(XiveTCTX *tctx); static inline uint32_t xive_nvt_cam_line(uint8_t nvt_blk, uint32_t nvt_idx) { From 35886de140b7ff781b775d2da5e7475e8a8cb4c6 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Thu, 24 Oct 2019 16:27:27 +0200 Subject: [PATCH 2/6] xive, xics: Fix reference counting on CPU objects When a VCPU gets connected to the XIVE interrupt controller, we add a const link targetting the CPU object to the TCTX object. Similar links are added to the ICP object when using the XICS interrupt controller. As explained in : * The caller must ensure that @target stays alive as long as * this property exists. In the case @target is a child of @obj, * this will be the case. Otherwise, the caller is responsible for * taking a reference. We're in the latter case for both XICS and XIVE. Add the missing calls to object_ref() and object_unref(). This doesn't fix any known issue because the life cycle of the TCTX or ICP happens to be shorter than the one of the CPU or XICS fabric, but better safe than sorry. Signed-off-by: Greg Kurz Reviewed-by: David Gibson Message-Id: <157192724770.3146912.15400869269097231255.stgit@bahia.lan> Signed-off-by: David Gibson Signed-off-by: Laurent Vivier --- hw/intc/xics.c | 8 +++++++- hw/intc/xive.c | 6 +++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/hw/intc/xics.c b/hw/intc/xics.c index 935f325749..5f746079be 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -388,8 +388,10 @@ Object *icp_create(Object *cpu, const char *type, XICSFabric *xi, Error **errp) obj = object_new(type); object_property_add_child(cpu, type, obj, &error_abort); object_unref(obj); + object_ref(OBJECT(xi)); object_property_add_const_link(obj, ICP_PROP_XICS, OBJECT(xi), &error_abort); + object_ref(cpu); object_property_add_const_link(obj, ICP_PROP_CPU, cpu, &error_abort); object_property_set_bool(obj, true, "realized", &local_err); if (local_err) { @@ -403,7 +405,11 @@ Object *icp_create(Object *cpu, const char *type, XICSFabric *xi, Error **errp) void icp_destroy(ICPState *icp) { - object_unparent(OBJECT(icp)); + Object *obj = OBJECT(icp); + + object_unref(object_property_get_link(obj, ICP_PROP_CPU, &error_abort)); + object_unref(object_property_get_link(obj, ICP_PROP_XICS, &error_abort)); + object_unparent(obj); } /* diff --git a/hw/intc/xive.c b/hw/intc/xive.c index 38257aa020..952a461d53 100644 --- a/hw/intc/xive.c +++ b/hw/intc/xive.c @@ -682,6 +682,7 @@ Object *xive_tctx_create(Object *cpu, XiveRouter *xrtr, Error **errp) obj = object_new(TYPE_XIVE_TCTX); object_property_add_child(cpu, TYPE_XIVE_TCTX, obj, &error_abort); object_unref(obj); + object_ref(cpu); object_property_add_const_link(obj, "cpu", cpu, &error_abort); object_property_set_bool(obj, true, "realized", &local_err); if (local_err) { @@ -698,7 +699,10 @@ error: void xive_tctx_destroy(XiveTCTX *tctx) { - object_unparent(OBJECT(tctx)); + Object *obj = OBJECT(tctx); + + object_unref(object_property_get_link(obj, "cpu", &error_abort)); + object_unparent(obj); } /* From 0a83b47055246d3942084f03fc54731c4fb9b731 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Thu, 24 Oct 2019 16:27:33 +0200 Subject: [PATCH 3/6] ppc: Skip partially initialized vCPUs in 'info pic' CPU_FOREACH() can race with vCPU hotplug/unplug on sPAPR machines, ie. we may try to print out info about a vCPU with a NULL presenter pointer. Check that in order to prevent QEMU from crashing. Signed-off-by: Greg Kurz Message-Id: <157192725327.3146912.12047076483178652551.stgit@bahia.lan> Signed-off-by: David Gibson Signed-off-by: Laurent Vivier --- hw/intc/xics.c | 11 ++++++++++- hw/intc/xive.c | 11 ++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/hw/intc/xics.c b/hw/intc/xics.c index 5f746079be..e7ac9ba618 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -44,7 +44,16 @@ void icp_pic_print_info(ICPState *icp, Monitor *mon) { - int cpu_index = icp->cs ? icp->cs->cpu_index : -1; + int cpu_index; + + /* Skip partially initialized vCPUs. This can happen on sPAPR when vCPUs + * are hot plugged or unplugged. + */ + if (!icp) { + return; + } + + cpu_index = icp->cs ? icp->cs->cpu_index : -1; if (!icp->output) { return; diff --git a/hw/intc/xive.c b/hw/intc/xive.c index 952a461d53..75dce82fb2 100644 --- a/hw/intc/xive.c +++ b/hw/intc/xive.c @@ -523,9 +523,18 @@ static const char * const xive_tctx_ring_names[] = { void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon) { - int cpu_index = tctx->cs ? tctx->cs->cpu_index : -1; + int cpu_index; int i; + /* Skip partially initialized vCPUs. This can happen on sPAPR when vCPUs + * are hot plugged or unplugged. + */ + if (!tctx) { + return; + } + + cpu_index = tctx->cs ? tctx->cs->cpu_index : -1; + if (kvm_irqchip_in_kernel()) { Error *local_err = NULL; From a49f62b9fd7cf825d7c1f6fad10d865220ff01b2 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 24 Oct 2019 15:13:08 +1100 Subject: [PATCH 4/6] spapr: Add /chosen to FDT only at reset time to preserve kernel and initramdisk Since "spapr: Render full FDT on ibm,client-architecture-support" we build the entire flatten device tree (FDT) twice - at the reset time and when "ibm,client-architecture-support" (CAS) is called. The full FDT from CAS is then applied on top of the SLOF internal device tree. This is mostly ok, however there is a case when the QEMU is started with -initrd and for some reason the guest decided to move/unpack the init RAM disk image - the guest correctly notifies SLOF about the change but at CAS it is overridden with the QEMU initial location addresses and the guest may fail to boot if the original initrd memory was changed. This fixes the problem by only adding the /chosen node at the reset time to prevent the original QEMU's linux,initrd-start/linux,initrd-end to override the updated addresses. This only treats /chosen differently as we know there is a special case already and it is unlikely anything else will need to change /chosen at CAS we are better off not touching /chosen after we handed it over to SLOF. Signed-off-by: Alexey Kardashevskiy Message-Id: <20191024041308.5673-1-aik@ozlabs.ru> Signed-off-by: David Gibson Signed-off-by: Laurent Vivier --- hw/ppc/spapr.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 94f9d27096..e076f6023c 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -917,7 +917,7 @@ static bool spapr_hotplugged_dev_before_cas(void) return false; } -static void *spapr_build_fdt(SpaprMachineState *spapr); +static void *spapr_build_fdt(SpaprMachineState *spapr, bool reset); int spapr_h_cas_compose_response(SpaprMachineState *spapr, target_ulong addr, target_ulong size, @@ -939,7 +939,7 @@ int spapr_h_cas_compose_response(SpaprMachineState *spapr, size -= sizeof(hdr); - fdt = spapr_build_fdt(spapr); + fdt = spapr_build_fdt(spapr, false); _FDT((fdt_pack(fdt))); if (fdt_totalsize(fdt) + sizeof(hdr) > size) { @@ -1197,7 +1197,7 @@ static void spapr_dt_hypervisor(SpaprMachineState *spapr, void *fdt) } } -static void *spapr_build_fdt(SpaprMachineState *spapr) +static void *spapr_build_fdt(SpaprMachineState *spapr, bool reset) { MachineState *machine = MACHINE(spapr); MachineClass *mc = MACHINE_GET_CLASS(machine); @@ -1297,7 +1297,9 @@ static void *spapr_build_fdt(SpaprMachineState *spapr) spapr_dt_rtas(spapr, fdt); /* /chosen */ - spapr_dt_chosen(spapr, fdt); + if (reset) { + spapr_dt_chosen(spapr, fdt); + } /* /hypervisor */ if (kvm_enabled()) { @@ -1305,11 +1307,14 @@ static void *spapr_build_fdt(SpaprMachineState *spapr) } /* Build memory reserve map */ - if (spapr->kernel_size) { - _FDT((fdt_add_mem_rsv(fdt, KERNEL_LOAD_ADDR, spapr->kernel_size))); - } - if (spapr->initrd_size) { - _FDT((fdt_add_mem_rsv(fdt, spapr->initrd_base, spapr->initrd_size))); + if (reset) { + if (spapr->kernel_size) { + _FDT((fdt_add_mem_rsv(fdt, KERNEL_LOAD_ADDR, spapr->kernel_size))); + } + if (spapr->initrd_size) { + _FDT((fdt_add_mem_rsv(fdt, spapr->initrd_base, + spapr->initrd_size))); + } } /* ibm,client-architecture-support updates */ @@ -1718,7 +1723,7 @@ static void spapr_machine_reset(MachineState *machine) */ fdt_addr = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FDT_MAX_SIZE; - fdt = spapr_build_fdt(spapr); + fdt = spapr_build_fdt(spapr, true); rc = fdt_pack(fdt); From 165dc3edd7f705c6c10908860b8c72e55eadd04b Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 30 Oct 2019 17:20:35 +0100 Subject: [PATCH 5/6] spapr/kvm: Set default cpu model for all machine classes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have to set the default model of all machine classes, not just for the active one. Otherwise, "query-machines" will indicate the wrong CPU model (e.g. "power9_v2.0-powerpc64-cpu" instead of "host-powerpc64-cpu") as "default-cpu-type". s390x already fixed this in de60a92e "s390x/kvm: Set default cpu model for all machine classes". This patch applies a similar fix for the pseries-* machine types on ppc64. Doing a {"execute":"query-machines"} under KVM now results in { "hotpluggable-cpus": true, "name": "pseries-4.2", "numa-mem-supported": true, "default-cpu-type": "host-powerpc64-cpu", "is-default": true, "cpu-max": 1024, "deprecated": false, "alias": "pseries" }, { "hotpluggable-cpus": true, "name": "pseries-4.1", "numa-mem-supported": true, "default-cpu-type": "host-powerpc64-cpu", "cpu-max": 1024, "deprecated": false }, ... Libvirt probes all machines via "-machine none,accel=kvm:tcg" and will currently see the wrong CPU model under KVM. Reported-by: Jiři Denemark Cc: David Hildenbrand Cc: Igor Mammedov Signed-off-by: David Gibson Reviewed-by: David Hildenbrand Reviewed-by: Greg Kurz Tested-by: Jiri Denemark Signed-off-by: Laurent Vivier --- target/ppc/kvm.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 7d2e8969ac..c77f9848ec 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -100,7 +100,7 @@ static bool kvmppc_is_pr(KVMState *ks) return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0; } -static int kvm_ppc_register_host_cpu_type(MachineState *ms); +static int kvm_ppc_register_host_cpu_type(void); static void kvmppc_get_cpu_characteristics(KVMState *s); static int kvmppc_get_dec_bits(void); @@ -147,7 +147,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) exit(1); } - kvm_ppc_register_host_cpu_type(ms); + kvm_ppc_register_host_cpu_type(); return 0; } @@ -2534,13 +2534,19 @@ PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) return pvr_pcc; } -static int kvm_ppc_register_host_cpu_type(MachineState *ms) +static void pseries_machine_class_fixup(ObjectClass *oc, void *opaque) +{ + MachineClass *mc = MACHINE_CLASS(oc); + + mc->default_cpu_type = TYPE_HOST_POWERPC_CPU; +} + +static int kvm_ppc_register_host_cpu_type(void) { TypeInfo type_info = { .name = TYPE_HOST_POWERPC_CPU, .class_init = kvmppc_host_cpu_class_init, }; - MachineClass *mc = MACHINE_GET_CLASS(ms); PowerPCCPUClass *pvr_pcc; ObjectClass *oc; DeviceClass *dc; @@ -2552,10 +2558,9 @@ static int kvm_ppc_register_host_cpu_type(MachineState *ms) } type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc)); type_register(&type_info); - if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) { - /* override TCG default cpu type with 'host' cpu model */ - mc->default_cpu_type = TYPE_HOST_POWERPC_CPU; - } + /* override TCG default cpu type with 'host' cpu model */ + object_class_foreach(pseries_machine_class_fixup, TYPE_SPAPR_MACHINE, + false, NULL); oc = object_class_by_name(type_info.name); g_assert(oc); From cd8843ff25d62a0af747517289a4f330b1ae2a6e Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Sat, 2 Nov 2019 16:49:19 +0100 Subject: [PATCH 6/6] mos6522: fix T1 and T2 timers With the Quadra 800 emulation, mos6522 timers processing can consume until 70% of the host CPU time with an idle guest (I guess the problem should also happen with PowerMac emulation). On a recent system, it can be painless (except if you look at top), but on an old host like a PowerMac G5 the guest kernel can be terribly slow during the boot sequence (for instance, unpacking initramfs can take 15 seconds rather than only 3 seconds). We can avoid this CPU overload by enabling QEMU internal timers only if the mos6522 counter interrupts are enabled. Sometime the guest kernel wants to read the counters values, but we don't need the timers to update the counters. With this patch applied, an idle Q800 consumes only 3% of host CPU time (and the guest can boot in a decent time). Signed-off-by: Laurent Vivier Message-Id: <20191102154919.17775-1-laurent@vivier.eu> Signed-off-by: David Gibson Signed-off-by: Laurent Vivier --- hw/misc/mos6522.c | 67 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 15 deletions(-) diff --git a/hw/misc/mos6522.c b/hw/misc/mos6522.c index 57f13db266..aa3bfe1afd 100644 --- a/hw/misc/mos6522.c +++ b/hw/misc/mos6522.c @@ -38,8 +38,10 @@ /* XXX: implement all timer modes */ -static void mos6522_timer_update(MOS6522State *s, MOS6522Timer *ti, - int64_t current_time); +static void mos6522_timer1_update(MOS6522State *s, MOS6522Timer *ti, + int64_t current_time); +static void mos6522_timer2_update(MOS6522State *s, MOS6522Timer *ti, + int64_t current_time); static void mos6522_update_irq(MOS6522State *s) { @@ -98,7 +100,11 @@ static void set_counter(MOS6522State *s, MOS6522Timer *ti, unsigned int val) trace_mos6522_set_counter(1 + ti->index, val); ti->load_time = get_load_time(s, ti); ti->counter_value = val; - mos6522_timer_update(s, ti, ti->load_time); + if (ti->index == 0) { + mos6522_timer1_update(s, ti, ti->load_time); + } else { + mos6522_timer2_update(s, ti, ti->load_time); + } } static int64_t get_next_irq_time(MOS6522State *s, MOS6522Timer *ti, @@ -130,19 +136,34 @@ static int64_t get_next_irq_time(MOS6522State *s, MOS6522Timer *ti, trace_mos6522_get_next_irq_time(ti->latch, d, next_time - d); next_time = muldiv64(next_time, NANOSECONDS_PER_SECOND, ti->frequency) + ti->load_time; + if (next_time <= current_time) { next_time = current_time + 1; } return next_time; } -static void mos6522_timer_update(MOS6522State *s, MOS6522Timer *ti, +static void mos6522_timer1_update(MOS6522State *s, MOS6522Timer *ti, int64_t current_time) { if (!ti->timer) { return; } - if (ti->index == 0 && (s->acr & T1MODE) != T1MODE_CONT) { + if ((s->ier & T1_INT) == 0 || (s->acr & T1MODE) != T1MODE_CONT) { + timer_del(ti->timer); + } else { + ti->next_irq_time = get_next_irq_time(s, ti, current_time); + timer_mod(ti->timer, ti->next_irq_time); + } +} + +static void mos6522_timer2_update(MOS6522State *s, MOS6522Timer *ti, + int64_t current_time) +{ + if (!ti->timer) { + return; + } + if ((s->ier & T2_INT) == 0) { timer_del(ti->timer); } else { ti->next_irq_time = get_next_irq_time(s, ti, current_time); @@ -155,7 +176,7 @@ static void mos6522_timer1(void *opaque) MOS6522State *s = opaque; MOS6522Timer *ti = &s->timers[0]; - mos6522_timer_update(s, ti, ti->next_irq_time); + mos6522_timer1_update(s, ti, ti->next_irq_time); s->ifr |= T1_INT; mos6522_update_irq(s); } @@ -165,7 +186,7 @@ static void mos6522_timer2(void *opaque) MOS6522State *s = opaque; MOS6522Timer *ti = &s->timers[1]; - mos6522_timer_update(s, ti, ti->next_irq_time); + mos6522_timer2_update(s, ti, ti->next_irq_time); s->ifr |= T2_INT; mos6522_update_irq(s); } @@ -204,7 +225,16 @@ uint64_t mos6522_read(void *opaque, hwaddr addr, unsigned size) { MOS6522State *s = opaque; uint32_t val; + int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + if (now >= s->timers[0].next_irq_time) { + mos6522_timer1_update(s, &s->timers[0], now); + s->ifr |= T1_INT; + } + if (now >= s->timers[1].next_irq_time) { + mos6522_timer2_update(s, &s->timers[1], now); + s->ifr |= T2_INT; + } switch (addr) { case VIA_REG_B: val = s->b; @@ -299,8 +329,8 @@ void mos6522_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) break; case VIA_REG_T1CL: s->timers[0].latch = (s->timers[0].latch & 0xff00) | val; - mos6522_timer_update(s, &s->timers[0], - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); + mos6522_timer1_update(s, &s->timers[0], + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); break; case VIA_REG_T1CH: s->timers[0].latch = (s->timers[0].latch & 0xff) | (val << 8); @@ -309,14 +339,14 @@ void mos6522_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) break; case VIA_REG_T1LL: s->timers[0].latch = (s->timers[0].latch & 0xff00) | val; - mos6522_timer_update(s, &s->timers[0], - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); + mos6522_timer1_update(s, &s->timers[0], + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); break; case VIA_REG_T1LH: s->timers[0].latch = (s->timers[0].latch & 0xff) | (val << 8); s->ifr &= ~T1_INT; - mos6522_timer_update(s, &s->timers[0], - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); + mos6522_timer1_update(s, &s->timers[0], + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); break; case VIA_REG_T2CL: s->timers[1].latch = (s->timers[1].latch & 0xff00) | val; @@ -334,8 +364,8 @@ void mos6522_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) break; case VIA_REG_ACR: s->acr = val; - mos6522_timer_update(s, &s->timers[0], - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); + mos6522_timer1_update(s, &s->timers[0], + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); break; case VIA_REG_PCR: s->pcr = val; @@ -354,6 +384,11 @@ void mos6522_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) s->ier &= ~val; } mos6522_update_irq(s); + /* if IER is modified starts needed timers */ + mos6522_timer1_update(s, &s->timers[0], + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); + mos6522_timer2_update(s, &s->timers[1], + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); break; default: case VIA_REG_ANH: @@ -426,9 +461,11 @@ static void mos6522_reset(DeviceState *dev) s->timers[0].frequency = s->frequency; s->timers[0].latch = 0xffff; set_counter(s, &s->timers[0], 0xffff); + timer_del(s->timers[0].timer); s->timers[1].frequency = s->frequency; s->timers[1].latch = 0xffff; + timer_del(s->timers[1].timer); } static void mos6522_init(Object *obj)