From fc87e185302a96c2675a1c3a86ca47c6a2d657ff Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 30 Aug 2010 13:49:15 +0200 Subject: [PATCH 1/4] KVM: PPC: Add level based interrupt logic KVM on PowerPC used to have completely broken interrupt logic. Usually, interrupts work by having a PIC that pulls a line up/down, so the CPU knows that an interrupt is active. This line stays active until some action is done to the PIC to release the line. On KVM for PPC, we just checked if there was an interrupt pending and pulled a line in the kernel module. We never released it though, hoping that kernel space would just declare an interrupt as released when injected - which is wrong. To fix this, we need to completely redesign the interrupt injection logic. Whenever an interrupt line gets triggered, we need to notify kernel space that the line is up. Whenever it gets released, we do the same. This way we can assure that the interrupt state is always known to kernel space. This fixes random stalls in KVM guests on PowerPC that were waiting for an interrupt while everyone else thought they received it already. Signed-off-by: Alexander Graf --- hw/ppc.c | 11 +++++++++++ target-ppc/kvm.c | 37 +++++++++++++++++++++++++++++++++++-- target-ppc/kvm_ppc.h | 13 +++++++++++++ 3 files changed, 59 insertions(+), 2 deletions(-) diff --git a/hw/ppc.c b/hw/ppc.c index 2a77eb9bff..55e380844c 100644 --- a/hw/ppc.c +++ b/hw/ppc.c @@ -28,6 +28,8 @@ #include "nvram.h" #include "qemu-log.h" #include "loader.h" +#include "kvm.h" +#include "kvm_ppc.h" //#define PPC_DEBUG_IRQ //#define PPC_DEBUG_TB @@ -50,6 +52,8 @@ static void cpu_ppc_tb_start (CPUState *env); static void ppc_set_irq (CPUState *env, int n_IRQ, int level) { + unsigned int old_pending = env->pending_interrupts; + if (level) { env->pending_interrupts |= 1 << n_IRQ; cpu_interrupt(env, CPU_INTERRUPT_HARD); @@ -58,6 +62,13 @@ static void ppc_set_irq (CPUState *env, int n_IRQ, int level) if (env->pending_interrupts == 0) cpu_reset_interrupt(env, CPU_INTERRUPT_HARD); } + + if (old_pending != env->pending_interrupts) { +#ifdef CONFIG_KVM + kvmppc_set_interrupt(env, n_IRQ, level); +#endif + } + LOG_IRQ("%s: %p n_IRQ %d level %d => pending %08" PRIx32 "req %08x\n", __func__, env, n_IRQ, level, env->pending_interrupts, env->interrupt_request); diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index 14d6365ee2..5cacef7b58 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -37,6 +37,9 @@ do { } while (0) #endif +static int cap_interrupt_unset = false; +static int cap_interrupt_level = false; + /* XXX We have a race condition where we actually have a level triggered * interrupt, but the infrastructure can't expose that yet, so the guest * takes but ignores it, goes to sleep and never gets notified that there's @@ -55,6 +58,18 @@ static void kvm_kick_env(void *env) int kvm_arch_init(KVMState *s, int smp_cpus) { +#ifdef KVM_CAP_PPC_UNSET_IRQ + cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ); +#endif +#ifdef KVM_CAP_PPC_IRQ_LEVEL + cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL); +#endif + + if (!cap_interrupt_level) { + fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the " + "VM to stall at times!\n"); + } + return 0; } @@ -178,6 +193,23 @@ int kvm_arch_get_registers(CPUState *env) return 0; } +int kvmppc_set_interrupt(CPUState *env, int irq, int level) +{ + unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET; + + if (irq != PPC_INTERRUPT_EXT) { + return 0; + } + + if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) { + return 0; + } + + kvm_vcpu_ioctl(env, KVM_INTERRUPT, &virq); + + return 0; +} + #if defined(TARGET_PPCEMB) #define PPC_INPUT_INT PPC40x_INPUT_INT #elif defined(TARGET_PPC64) @@ -193,7 +225,8 @@ int kvm_arch_pre_run(CPUState *env, struct kvm_run *run) /* PowerPC Qemu tracks the various core input pins (interrupt, critical * interrupt, reset, etc) in PPC-specific env->irq_input_state. */ - if (run->ready_for_interrupt_injection && + if (!cap_interrupt_level && + run->ready_for_interrupt_injection && (env->interrupt_request & CPU_INTERRUPT_HARD) && (env->irq_input_state & (1< Date: Tue, 31 Aug 2010 00:22:28 +0200 Subject: [PATCH 2/4] PPC: Qdev'ify e500 pci The e500 PCI controller isn't qdev'ified yet. This leads to severe issues when running with -drive. To be able to use a virtio disk with an e500 VM, let's convert the PCI controller over to qdev. Signed-off-by: Alexander Graf --- hw/ppce500_pci.c | 132 ++++++++++++++++++++++++++++++----------------- 1 file changed, 86 insertions(+), 46 deletions(-) diff --git a/hw/ppce500_pci.c b/hw/ppce500_pci.c index 8ac99f2817..3fa42d2cdc 100644 --- a/hw/ppce500_pci.c +++ b/hw/ppce500_pci.c @@ -73,11 +73,11 @@ struct pci_inbound { }; struct PPCE500PCIState { + PCIHostState pci_state; struct pci_outbound pob[PPCE500_PCI_NR_POBS]; struct pci_inbound pib[PPCE500_PCI_NR_PIBS]; uint32_t gasket_time; - PCIHostState pci_state; - PCIDevice *pci_dev; + uint64_t base_addr; }; typedef struct PPCE500PCIState PPCE500PCIState; @@ -221,7 +221,7 @@ static void ppce500_pci_save(QEMUFile *f, void *opaque) PPCE500PCIState *controller = opaque; int i; - pci_device_save(controller->pci_dev, f); + /* pci_device_save(controller->pci_dev, f); */ for (i = 0; i < PPCE500_PCI_NR_POBS; i++) { qemu_put_be32s(f, &controller->pob[i].potar); @@ -247,7 +247,7 @@ static int ppce500_pci_load(QEMUFile *f, void *opaque, int version_id) if (version_id != 1) return -EINVAL; - pci_device_load(controller->pci_dev, f); + /* pci_device_load(controller->pci_dev, f); */ for (i = 0; i < PPCE500_PCI_NR_POBS; i++) { qemu_get_be32s(f, &controller->pob[i].potar); @@ -269,55 +269,95 @@ static int ppce500_pci_load(QEMUFile *f, void *opaque, int version_id) PCIBus *ppce500_pci_init(qemu_irq pci_irqs[4], target_phys_addr_t registers) { - PPCE500PCIState *controller; + DeviceState *dev; + PCIBus *b; + PCIHostState *h; + PPCE500PCIState *s; PCIDevice *d; - int index; static int ppce500_pci_id; - controller = qemu_mallocz(sizeof(PPCE500PCIState)); + dev = qdev_create(NULL, "e500-pcihost"); + h = FROM_SYSBUS(PCIHostState, sysbus_from_qdev(dev)); + s = DO_UPCAST(PPCE500PCIState, pci_state, h); - controller->pci_state.bus = pci_register_bus(NULL, "pci", - mpc85xx_pci_set_irq, - mpc85xx_pci_map_irq, - pci_irqs, PCI_DEVFN(0x11, 0), - 4); - d = pci_register_device(controller->pci_state.bus, - "host bridge", sizeof(PCIDevice), - 0, NULL, NULL); + qdev_prop_set_uint64(dev, "base_addr", registers); + b = pci_register_bus(&s->pci_state.busdev.qdev, NULL, mpc85xx_pci_set_irq, + mpc85xx_pci_map_irq, pci_irqs, PCI_DEVFN(0x11, 0), 4); - pci_config_set_vendor_id(d->config, PCI_VENDOR_ID_FREESCALE); - pci_config_set_device_id(d->config, PCI_DEVICE_ID_MPC8533E); - pci_config_set_class(d->config, PCI_CLASS_PROCESSOR_POWERPC); - - controller->pci_dev = d; - - /* CFGADDR */ - index = pci_host_conf_register_mmio(&controller->pci_state, 0); - if (index < 0) - goto free; - cpu_register_physical_memory(registers + PCIE500_CFGADDR, 4, index); - - /* CFGDATA */ - index = pci_host_data_register_mmio(&controller->pci_state, 0); - if (index < 0) - goto free; - cpu_register_physical_memory(registers + PCIE500_CFGDATA, 4, index); - - index = cpu_register_io_memory(e500_pci_reg_read, - e500_pci_reg_write, controller); - if (index < 0) - goto free; - cpu_register_physical_memory(registers + PCIE500_REG_BASE, - PCIE500_REG_SIZE, index); + s->pci_state.bus = b; + qdev_init_nofail(dev); + d = pci_create_simple(b, 0, "e500-host-bridge"); /* XXX load/save code not tested. */ register_savevm(&d->qdev, "ppce500_pci", ppce500_pci_id++, - 1, ppce500_pci_save, ppce500_pci_load, controller); + 1, ppce500_pci_save, ppce500_pci_load, s); - return controller->pci_state.bus; - -free: - printf("%s error\n", __func__); - qemu_free(controller); - return NULL; + return b; } + +static int e500_pcihost_initfn(SysBusDevice *dev) +{ + PCIHostState *h; + PPCE500PCIState *s; + target_phys_addr_t registers; + int index; + + h = FROM_SYSBUS(PCIHostState, sysbus_from_qdev(dev)); + s = DO_UPCAST(PPCE500PCIState, pci_state, h); + registers = (target_phys_addr_t)s->base_addr; + + /* CFGADDR */ + index = pci_host_conf_register_mmio(&s->pci_state, 0); + if (index < 0) + return -1; + cpu_register_physical_memory(registers + PCIE500_CFGADDR, 4, index); + + /* CFGDATA */ + index = pci_host_data_register_mmio(&s->pci_state, 0); + if (index < 0) + return -1; + cpu_register_physical_memory(registers + PCIE500_CFGDATA, 4, index); + + index = cpu_register_io_memory(e500_pci_reg_read, + e500_pci_reg_write, s); + if (index < 0) + return -1; + cpu_register_physical_memory(registers + PCIE500_REG_BASE, + PCIE500_REG_SIZE, index); + return 0; +} + +static int e500_host_bridge_initfn(PCIDevice *dev) +{ + pci_config_set_vendor_id(dev->config, PCI_VENDOR_ID_FREESCALE); + pci_config_set_device_id(dev->config, PCI_DEVICE_ID_MPC8533E); + pci_config_set_class(dev->config, PCI_CLASS_PROCESSOR_POWERPC); + + return 0; +} + +static PCIDeviceInfo e500_host_bridge_info = { + .qdev.name = "e500-host-bridge", + .qdev.desc = "Host bridge", + .qdev.size = sizeof(PCIDevice), + .qdev.no_user = 1, + .init = e500_host_bridge_initfn, +}; + +static SysBusDeviceInfo e500_pcihost_info = { + .init = e500_pcihost_initfn, + .qdev.name = "e500-pcihost", + .qdev.size = sizeof(PPCE500PCIState), + .qdev.no_user = 1, + .qdev.props = (Property[]) { + DEFINE_PROP_UINT64("base_addr", PPCE500PCIState, base_addr, 0), + DEFINE_PROP_END_OF_LIST(), + } +}; + +static void e500_pci_register(void) +{ + sysbus_register_withprop(&e500_pcihost_info); + pci_qdev_register(&e500_host_bridge_info); +} +device_init(e500_pci_register); From cfb207e643d94e3e96d456b1df14c5e36f6aa9e5 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 31 Aug 2010 00:22:50 +0200 Subject: [PATCH 3/4] PPC: Make e500 pci byte swap config data The config data field on the e500 pci controller is in little endian, so we need to enable byte swap there. Signed-off-by: Alexander Graf --- hw/ppce500_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/ppce500_pci.c b/hw/ppce500_pci.c index 3fa42d2cdc..629b24235f 100644 --- a/hw/ppce500_pci.c +++ b/hw/ppce500_pci.c @@ -313,7 +313,7 @@ static int e500_pcihost_initfn(SysBusDevice *dev) cpu_register_physical_memory(registers + PCIE500_CFGADDR, 4, index); /* CFGDATA */ - index = pci_host_data_register_mmio(&s->pci_state, 0); + index = pci_host_data_register_mmio(&s->pci_state, 1); if (index < 0) return -1; cpu_register_physical_memory(registers + PCIE500_CFGDATA, 4, index); From 42a876582906b1765db1c97152c967bc15401f84 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 7 Sep 2010 13:46:15 +0200 Subject: [PATCH 4/4] PPC: Change PPC maintainer Since nobody else seems interested in maintaining PPC, let's change the maintainer to myself. I keep a staging tree anyways and am probably the person touching most of that code these days. This changes the maintainer entry for working ppc targets to myself. Signed-off-by: Alexander Graf --- MAINTAINERS | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 1dca65e7f7..e5165fbae8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14,7 +14,7 @@ x86 Fabrice Bellard ARM Paul Brook SPARC Blue Swirl MIPS ? -PowerPC ? +PowerPC Alexander Graf M68K Paul Brook SH4 ? CRIS Edgar E. Iglesias @@ -48,9 +48,9 @@ MIPS mips_mipssim.c ? PowerPC ppc_prep.c ? - ppc_oldworld.c Fabrice Bellard - ppc_chrp.c Fabrice Bellard - ppc405_boards.c ? + ppc_oldworld.c Alexander Graf + ppc_newworld.c Alexander Graf + ppc405_boards.c Alexander Graf M86K mcf5208.c Paul Brook an5206.c Paul Brook