From 277c7a4d717aedbcb253ca152ae4da67e4162470 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 24 Jul 2014 10:46:47 +0200
Subject: [PATCH 01/52] PPC: KVM: Fix g3beige and mac99 when HV is loaded

On PPC we have 2 different styles of KVM: PR and HV. HV can only virtualize
sPAPR guests while PR can virtualize everything that's reasonably close to
the host hardware platform.

As long as only one kernel module (PR or HV) is loaded, the "default" kvm type
is the module that's loaded. So if your hardware only supports PR mode you can
easily spawn a Mac VM.

However, if both HV and PR are loaded we default to HV mode. And in that case
the Mac machines have to explicitly ask for PR mode to get a working VM.

Fix this up by explicitly having the Mac machines ask for PR style KVM. This
fixes bootup of Mac VMs on systems where bot HV and PR kvm modules are loaded
for me.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/ppc/mac_newworld.c | 7 +++++++
 hw/ppc/mac_oldworld.c | 7 +++++++
 2 files changed, 14 insertions(+)

diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index 1ec4bb490b..0693168286 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -477,12 +477,19 @@ static void ppc_core99_init(MachineState *machine)
     qemu_register_boot_set(fw_cfg_boot_set, fw_cfg);
 }
 
+static int core99_kvm_type(const char *arg)
+{
+    /* Always force PR KVM */
+    return 2;
+}
+
 static QEMUMachine core99_machine = {
     .name = "mac99",
     .desc = "Mac99 based PowerMAC",
     .init = ppc_core99_init,
     .max_cpus = MAX_CPUS,
     .default_boot_order = "cd",
+    .kvm_type = core99_kvm_type,
 };
 
 static void core99_machine_init(void)
diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c
index cd9bdbc53e..ec7ed38343 100644
--- a/hw/ppc/mac_oldworld.c
+++ b/hw/ppc/mac_oldworld.c
@@ -346,6 +346,12 @@ static void ppc_heathrow_init(MachineState *machine)
     qemu_register_boot_set(fw_cfg_boot_set, fw_cfg);
 }
 
+static int heathrow_kvm_type(const char *arg)
+{
+    /* Always force PR KVM */
+    return 2;
+}
+
 static QEMUMachine heathrow_machine = {
     .name = "g3beige",
     .desc = "Heathrow based PowerMAC",
@@ -355,6 +361,7 @@ static QEMUMachine heathrow_machine = {
     .is_default = 1,
 #endif
     .default_boot_order = "cd", /* TOFIX "cad" when Mac floppy is implemented */
+    .kvm_type = heathrow_kvm_type,
 };
 
 static void heathrow_machine_init(void)

From 2e14072f9e859272c7b94b8e189bd30bb4954aa1 Mon Sep 17 00:00:00 2001
From: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Date: Mon, 30 Jun 2014 14:05:29 +0530
Subject: [PATCH 02/52] ppc: spapr-rtas - implement os-term rtas call

PAPR compliant guest calls this in absence of kdump. This finally
reaches the guest and can be handled according to the policies set by
higher level tools(like taking dump) for further analysis by tools like
crash.

Linux kernel calls ibm,os-term when extended property of os-term is set.
This makes sure that a return to the linux kernel is gauranteed.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
[agraf: reduce RTAS_TOKEN_MAX]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/ppc/spapr.c         |  9 +++++++++
 hw/ppc/spapr_rtas.c    | 15 +++++++++++++++
 include/hw/ppc/spapr.h |  3 +--
 3 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 5cb452f234..6bb646ce47 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -502,6 +502,15 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
 
     _FDT((fdt_property_cell(fdt, "rtas-error-log-max", RTAS_ERROR_LOG_MAX)));
 
+    /*
+     * According to PAPR, rtas ibm,os-term, does not gaurantee a return
+     * back to the guest cpu.
+     *
+     * While an additional ibm,extended-os-term property indicates that
+     * rtas call return will always occur. Set this property.
+     */
+    _FDT((fdt_property(fdt, "ibm,extended-os-term", NULL, 0)));
+
     _FDT((fdt_end_node(fdt)));
 
     /* interrupt controller */
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 9ba1ba69f9..2ec2a8e4d1 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -277,6 +277,19 @@ static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu,
     rtas_st(rets, 0, ret);
 }
 
+static void rtas_ibm_os_term(PowerPCCPU *cpu,
+                            sPAPREnvironment *spapr,
+                            uint32_t token, uint32_t nargs,
+                            target_ulong args,
+                            uint32_t nret, target_ulong rets)
+{
+    target_ulong ret = 0;
+
+    qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_PAUSE, &error_abort);
+
+    rtas_st(rets, 0, ret);
+}
+
 static struct rtas_call {
     const char *name;
     spapr_rtas_fn fn;
@@ -404,6 +417,8 @@ static void core_rtas_register_types(void)
     spapr_rtas_register(RTAS_IBM_SET_SYSTEM_PARAMETER,
                         "ibm,set-system-parameter",
                         rtas_ibm_set_system_parameter);
+    spapr_rtas_register(RTAS_IBM_OS_TERM, "ibm,os-term",
+                        rtas_ibm_os_term);
 }
 
 type_init(core_rtas_register_types)
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index bbba51a703..7fff9794f7 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -382,9 +382,8 @@ int spapr_allocate_irq_block(int num, bool lsi, bool msi);
 #define RTAS_GET_SENSOR_STATE                   (RTAS_TOKEN_BASE + 0x1D)
 #define RTAS_IBM_CONFIGURE_CONNECTOR            (RTAS_TOKEN_BASE + 0x1E)
 #define RTAS_IBM_OS_TERM                        (RTAS_TOKEN_BASE + 0x1F)
-#define RTAS_IBM_EXTENDED_OS_TERM               (RTAS_TOKEN_BASE + 0x20)
 
-#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x21)
+#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x20)
 
 /* RTAS ibm,get-system-parameter token values */
 #define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS      20

From fbdc200ac2d0d29e88ee6af9b77810c0f84265a6 Mon Sep 17 00:00:00 2001
From: Tom Musta <tommusta@gmail.com>
Date: Mon, 30 Jun 2014 08:13:36 -0500
Subject: [PATCH 03/52] linux-user: Fix Stack Pointer Bug in PPC setup_rt_frame

The code that sets the stack frame back pointer is incorrect for
the setup_rt_frame() code; qemu will abort (SIGSEGV) in some
environments.  The setup_frame code  was fixed in commit
beb526b12134a6b6744125deec5a7fe24a8f92e3 but the setup_rt_frame
code was not.

Make the setup_rt_frame code consistent with the setup_frame
code.

Signed-off-by: Tom Musta <tommusta@gmail.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 linux-user/signal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/linux-user/signal.c b/linux-user/signal.c
index 26929c59de..29529563ea 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -4751,7 +4751,7 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
 
     /* Create a stack frame for the caller of the handler.  */
     newsp = rt_sf_addr - (SIGNAL_FRAMESIZE + 16);
-    __put_user(env->gpr[1], (target_ulong *)(uintptr_t) newsp);
+    err |= put_user(env->gpr[1], newsp, target_ulong);
 
     if (err)
         goto sigsegv;

From 7678108b134fc14d0a94fd13c0dd1129525c12d7 Mon Sep 17 00:00:00 2001
From: Tom Musta <tommusta@gmail.com>
Date: Mon, 30 Jun 2014 08:13:37 -0500
Subject: [PATCH 04/52] linux-user: Split PPC Trampoline Encoding from Register
 Save

Split the encoding of the PowerPC sigreturn trampoline from the saving of
register state onto the signal handler stack.  This will make it easier
in subsequent patches to deal with variations in the stack frame layouts between
32 and 64 bit PowerPC.

Signed-off-by: Tom Musta <tommusta@gmail.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 linux-user/signal.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/linux-user/signal.c b/linux-user/signal.c
index 29529563ea..7365d5ddbf 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -4491,8 +4491,7 @@ static target_ulong get_sigframe(struct target_sigaction *ka,
     return newsp;
 }
 
-static void save_user_regs(CPUPPCState *env, struct target_mcontext *frame,
-                          int sigret)
+static void save_user_regs(CPUPPCState *env, struct target_mcontext *frame)
 {
     target_ulong msr = env->msr;
     int i;
@@ -4559,11 +4558,14 @@ static void save_user_regs(CPUPPCState *env, struct target_mcontext *frame,
 
     /* Store MSR.  */
     __put_user(msr, &frame->mc_gregs[TARGET_PT_MSR]);
+}
 
+static void encode_trampoline(int sigret, uint32_t *tramp)
+{
     /* Set up the sigreturn trampoline: li r0,sigret; sc.  */
     if (sigret) {
-        __put_user(0x38000000UL | sigret, &frame->tramp[0]);
-        __put_user(0x44000002UL, &frame->tramp[1]);
+        __put_user(0x38000000 | sigret, &tramp[0]);
+        __put_user(0x44000002, &tramp[1]);
     }
 }
 
@@ -4674,7 +4676,10 @@ static void setup_frame(int sig, struct target_sigaction *ka,
     __put_user(sig, &sc->signal);
 
     /* Save user regs.  */
-    save_user_regs(env, &frame->mctx, TARGET_NR_sigreturn);
+    save_user_regs(env, &frame->mctx);
+
+    /* Construct the trampoline code on the stack. */
+    encode_trampoline(TARGET_NR_sigreturn, (uint32_t *)&frame->mctx.tramp);
 
     /* The kernel checks for the presence of a VDSO here.  We don't
        emulate a vdso, so use a sigreturn system call.  */
@@ -4740,7 +4745,8 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
     }
 
     frame = &rt_sf->uc.tuc_mcontext;
-    save_user_regs(env, frame, TARGET_NR_rt_sigreturn);
+    save_user_regs(env, frame);
+    encode_trampoline(TARGET_NR_rt_sigreturn, (uint32_t *)&frame->tramp);
 
     /* The kernel checks for the presence of a VDSO here.  We don't
        emulate a vdso, so use a sigreturn system call.  */

From 61e75fecef17aff4b0ecef26b300b90de410aaa2 Mon Sep 17 00:00:00 2001
From: Tom Musta <tommusta@gmail.com>
Date: Mon, 30 Jun 2014 08:13:38 -0500
Subject: [PATCH 05/52] linux-user: Enable Signal Handlers on PPC64

Enable the 64-bit PowerPC signal handling code that was previously
disabled via #ifdefs.  Specifically:

  - Move the target_mcontext (register save area) structure and
    append it to the 64-bit target_sigcontext structure.  This
    provides the space on the stack for saving and restoring
    context.
  - Define the target_rt_sigframe for 64-bit.
  - Adjust the setup_frame and setup_rt_frame routines to properly
    select the target_mcontext area and trampoline within the stack
    frame; tthis is different for 32-bit and 64-bit implementations.
  - Adjust the do_setcontext stub for 64-bit so that it compiles
    without warnings.

The 64-bit signal handling code is still not functional after this
change; but the 32-bit code is.  Subsequent changes will address
specific issues with the 64-bit code.

Signed-off-by: Tom Musta <tommusta@gmail.com>
[agraf: fix build on 32bit hosts, ppc64abi32]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 linux-user/signal.c | 121 ++++++++++++++++++++++++++------------------
 1 file changed, 73 insertions(+), 48 deletions(-)

diff --git a/linux-user/signal.c b/linux-user/signal.c
index 7365d5ddbf..56c38db4b3 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -4325,15 +4325,7 @@ badframe:
     return 0;
 }
 
-#elif defined(TARGET_PPC) && !defined(TARGET_PPC64)
-
-/* FIXME: Many of the structures are defined for both PPC and PPC64, but
-   the signal handling is different enough that we haven't implemented
-   support for PPC64 yet.  Hence the restriction above.
-
-   There are various #if'd blocks for code for TARGET_PPC64.  These
-   blocks should go away so that we can successfully run 32-bit and
-   64-bit binaries on a QEMU configured for PPC64.  */
+#elif defined(TARGET_PPC)
 
 /* Size of dummy stack frame allocated when calling signal handler.
    See arch/powerpc/include/asm/ptrace.h.  */
@@ -4343,6 +4335,33 @@ badframe:
 #define SIGNAL_FRAMESIZE 64
 #endif
 
+/* See arch/powerpc/include/asm/ucontext.h.  Only used for 32-bit PPC;
+   on 64-bit PPC, sigcontext and mcontext are one and the same.  */
+struct target_mcontext {
+    target_ulong mc_gregs[48];
+    /* Includes fpscr.  */
+    uint64_t mc_fregs[33];
+    target_ulong mc_pad[2];
+    /* We need to handle Altivec and SPE at the same time, which no
+       kernel needs to do.  Fortunately, the kernel defines this bit to
+       be Altivec-register-large all the time, rather than trying to
+       twiddle it based on the specific platform.  */
+    union {
+        /* SPE vector registers.  One extra for SPEFSCR.  */
+        uint32_t spe[33];
+        /* Altivec vector registers.  The packing of VSCR and VRSAVE
+           varies depending on whether we're PPC64 or not: PPC64 splits
+           them apart; PPC32 stuffs them together.  */
+#if defined(TARGET_PPC64)
+#define QEMU_NVRREG 34
+#else
+#define QEMU_NVRREG 33
+#endif
+        ppc_avr_t altivec[QEMU_NVRREG];
+#undef QEMU_NVRREG
+    } mc_vregs __attribute__((__aligned__(16)));
+};
+
 /* See arch/powerpc/include/asm/sigcontext.h.  */
 struct target_sigcontext {
     target_ulong _unused[4];
@@ -4353,7 +4372,9 @@ struct target_sigcontext {
     target_ulong handler;
     target_ulong oldmask;
     target_ulong regs;      /* struct pt_regs __user * */
-    /* TODO: PPC64 includes extra bits here.  */
+#if defined(TARGET_PPC64)
+    struct target_mcontext mcontext;
+#endif
 };
 
 /* Indices for target_mcontext.mc_gregs, below.
@@ -4408,32 +4429,6 @@ enum {
     TARGET_PT_REGS_COUNT = 44
 };
 
-/* See arch/powerpc/include/asm/ucontext.h.  Only used for 32-bit PPC;
-   on 64-bit PPC, sigcontext and mcontext are one and the same.  */
-struct target_mcontext {
-    target_ulong mc_gregs[48];
-    /* Includes fpscr.  */
-    uint64_t mc_fregs[33];
-    target_ulong mc_pad[2];
-    /* We need to handle Altivec and SPE at the same time, which no
-       kernel needs to do.  Fortunately, the kernel defines this bit to
-       be Altivec-register-large all the time, rather than trying to
-       twiddle it based on the specific platform.  */
-    union {
-        /* SPE vector registers.  One extra for SPEFSCR.  */
-        uint32_t spe[33];
-        /* Altivec vector registers.  The packing of VSCR and VRSAVE
-           varies depending on whether we're PPC64 or not: PPC64 splits
-           them apart; PPC32 stuffs them together.  */
-#if defined(TARGET_PPC64)
-#define QEMU_NVRREG 34
-#else
-#define QEMU_NVRREG 33
-#endif
-        ppc_avr_t altivec[QEMU_NVRREG];
-#undef QEMU_NVRREG
-    } mc_vregs __attribute__((__aligned__(16)));
-};
 
 struct target_ucontext {
     target_ulong tuc_flags;
@@ -4447,7 +4442,7 @@ struct target_ucontext {
     target_sigset_t tuc_sigmask;
 #if defined(TARGET_PPC64)
     target_sigset_t unused[15]; /* Allow for uc_sigmask growth */
-    struct target_sigcontext tuc_mcontext;
+    struct target_sigcontext tuc_sigcontext;
 #else
     int32_t tuc_maskext[30];
     int32_t tuc_pad2[3];
@@ -4462,12 +4457,32 @@ struct target_sigframe {
     int32_t abigap[56];
 };
 
+#if defined(TARGET_PPC64)
+
+#define TARGET_TRAMP_SIZE 6
+
+struct target_rt_sigframe {
+        /* sys_rt_sigreturn requires the ucontext be the first field */
+        struct target_ucontext uc;
+        target_ulong  _unused[2];
+        uint32_t trampoline[TARGET_TRAMP_SIZE];
+        target_ulong pinfo; /* struct siginfo __user * */
+        target_ulong puc; /* void __user * */
+        struct target_siginfo info;
+        /* 64 bit ABI allows for 288 bytes below sp before decrementing it. */
+        char abigap[288];
+} __attribute__((aligned(16)));
+
+#else
+
 struct target_rt_sigframe {
     struct target_siginfo info;
     struct target_ucontext uc;
     int32_t abigap[56];
 };
 
+#endif
+
 /* We use the mc_pad field for the signal return trampoline.  */
 #define tramp mc_pad
 
@@ -4667,7 +4682,7 @@ static void setup_frame(int sig, struct target_sigaction *ka,
 
     __put_user(ka->_sa_handler, &sc->handler);
     __put_user(set->sig[0], &sc->oldmask);
-#if defined(TARGET_PPC64)
+#if TARGET_ABI_BITS == 64
     __put_user(set->sig[0] >> 32, &sc->_unused[3]);
 #else
     __put_user(set->sig[1], &sc->_unused[3]);
@@ -4717,7 +4732,8 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
                            target_sigset_t *set, CPUPPCState *env)
 {
     struct target_rt_sigframe *rt_sf;
-    struct target_mcontext *frame;
+    uint32_t *trampptr = 0;
+    struct target_mcontext *mctx = 0;
     target_ulong rt_sf_addr, newsp = 0;
     int i, err = 0;
     int signal;
@@ -4738,19 +4754,28 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
                &rt_sf->uc.tuc_stack.ss_flags);
     __put_user(target_sigaltstack_used.ss_size,
                &rt_sf->uc.tuc_stack.ss_size);
+#if !defined(TARGET_PPC64)
     __put_user(h2g (&rt_sf->uc.tuc_mcontext),
                &rt_sf->uc.tuc_regs);
+#endif
     for(i = 0; i < TARGET_NSIG_WORDS; i++) {
         __put_user(set->sig[i], &rt_sf->uc.tuc_sigmask.sig[i]);
     }
 
-    frame = &rt_sf->uc.tuc_mcontext;
-    save_user_regs(env, frame);
-    encode_trampoline(TARGET_NR_rt_sigreturn, (uint32_t *)&frame->tramp);
+#if defined(TARGET_PPC64)
+    mctx = &rt_sf->uc.tuc_sigcontext.mcontext;
+    trampptr = &rt_sf->trampoline[0];
+#else
+    mctx = &rt_sf->uc.tuc_mcontext;
+    trampptr = (uint32_t *)&rt_sf->uc.tuc_mcontext.tramp;
+#endif
+
+    save_user_regs(env, mctx);
+    encode_trampoline(TARGET_NR_rt_sigreturn, trampptr);
 
     /* The kernel checks for the presence of a VDSO here.  We don't
        emulate a vdso, so use a sigreturn system call.  */
-    env->lr = (target_ulong) h2g(frame->tramp);
+    env->lr = (target_ulong) h2g(trampptr);
 
     /* Turn off all fp exceptions.  */
     env->fpscr = 0;
@@ -4795,7 +4820,7 @@ long do_sigreturn(CPUPPCState *env)
         goto sigsegv;
 
 #if defined(TARGET_PPC64)
-    set.sig[0] = sc->oldmask + ((long)(sc->_unused[3]) << 32);
+    set.sig[0] = sc->oldmask + ((uint64_t)(sc->_unused[3]) << 32);
 #else
     __get_user(set.sig[0], &sc->oldmask);
     __get_user(set.sig[1], &sc->_unused[3]);
@@ -4823,6 +4848,10 @@ sigsegv:
 /* See arch/powerpc/kernel/signal_32.c.  */
 static int do_setcontext(struct target_ucontext *ucp, CPUPPCState *env, int sig)
 {
+#if defined(TARGET_PPC64)
+    fprintf(stderr, "do_setcontext: not implemented\n");
+    return 0;
+#else
     struct target_mcontext *mcp;
     target_ulong mcp_addr;
     sigset_t blocked;
@@ -4832,10 +4861,6 @@ static int do_setcontext(struct target_ucontext *ucp, CPUPPCState *env, int sig)
                        sizeof (set)))
         return 1;
 
-#if defined(TARGET_PPC64)
-    fprintf (stderr, "do_setcontext: not implemented\n");
-    return 0;
-#else
     __get_user(mcp_addr, &ucp->tuc_regs);
 
     if (!lock_user_struct(VERIFY_READ, mcp, mcp_addr, 1))

From 8d6ab333ebf2751d2467bd7dae94a04742052e26 Mon Sep 17 00:00:00 2001
From: Tom Musta <tommusta@gmail.com>
Date: Mon, 30 Jun 2014 08:13:39 -0500
Subject: [PATCH 06/52] linux-user: Properly Dereference PPC64 ELFv1 Signal
 Handler Pointer

Properly dereference 64-bit PPC ELF V1 ABIT function pointers to signal handlers.
On this platform, function pointers are pointers to structures and the first 64
bits of such a structure contains the function's entry point.  The second 64 bits
contains the TOC pointer, which must be placed into GPR 2.

Signed-off-by: Tom Musta <tommusta@gmail.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 linux-user/signal.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/linux-user/signal.c b/linux-user/signal.c
index 56c38db4b3..e8e49db9dd 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -4483,6 +4483,15 @@ struct target_rt_sigframe {
 
 #endif
 
+#if defined(TARGET_PPC64)
+
+struct target_func_ptr {
+    target_ulong entry;
+    target_ulong toc;
+};
+
+#endif
+
 /* We use the mc_pad field for the signal return trampoline.  */
 #define tramp mc_pad
 
@@ -4714,7 +4723,17 @@ static void setup_frame(int sig, struct target_sigaction *ka,
     env->gpr[1] = newsp;
     env->gpr[3] = signal;
     env->gpr[4] = frame_addr + offsetof(struct target_sigframe, sctx);
+
+#if defined(TARGET_PPC64)
+    /* PPC64 function pointers are pointers to OPD entries. */
+    struct target_func_ptr *handler =
+        (struct target_func_ptr *)g2h(ka->_sa_handler);
+    env->nip = tswapl(handler->entry);
+    env->gpr[2] = tswapl(handler->toc);
+#else
     env->nip = (target_ulong) ka->_sa_handler;
+#endif
+
     /* Signal handlers are entered in big-endian mode.  */
     env->msr &= ~MSR_LE;
 
@@ -4793,7 +4812,17 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
     env->gpr[4] = (target_ulong) h2g(&rt_sf->info);
     env->gpr[5] = (target_ulong) h2g(&rt_sf->uc);
     env->gpr[6] = (target_ulong) h2g(rt_sf);
+
+#if defined(TARGET_PPC64)
+    /* PPC64 function pointers are pointers to OPD entries.  */
+    struct target_func_ptr *handler =
+        (struct target_func_ptr *)g2h(ka->_sa_handler);
+    env->nip = tswapl(handler->entry);
+    env->gpr[2] = tswapl(handler->toc);
+#else
     env->nip = (target_ulong) ka->_sa_handler;
+#endif
+
     /* Signal handlers are entered in big-endian mode.  */
     env->msr &= ~MSR_LE;
 

From 19774ec5c4019d1802b497d702e1eefd3c193e84 Mon Sep 17 00:00:00 2001
From: Tom Musta <tommusta@gmail.com>
Date: Mon, 30 Jun 2014 08:13:40 -0500
Subject: [PATCH 07/52] linux-user: Implement do_setcontext for PPC64

Eliminate the stub for the do_setcontext() function for TARGET_PPC64.  The
implementation re-uses the existing TARGET_PPC32 code with the only change
being the computation of the address of the register save area.

Signed-off-by: Tom Musta <tommusta@gmail.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 linux-user/signal.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/linux-user/signal.c b/linux-user/signal.c
index e8e49db9dd..281d1c4a03 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -4877,10 +4877,6 @@ sigsegv:
 /* See arch/powerpc/kernel/signal_32.c.  */
 static int do_setcontext(struct target_ucontext *ucp, CPUPPCState *env, int sig)
 {
-#if defined(TARGET_PPC64)
-    fprintf(stderr, "do_setcontext: not implemented\n");
-    return 0;
-#else
     struct target_mcontext *mcp;
     target_ulong mcp_addr;
     sigset_t blocked;
@@ -4890,7 +4886,12 @@ static int do_setcontext(struct target_ucontext *ucp, CPUPPCState *env, int sig)
                        sizeof (set)))
         return 1;
 
+#if defined(TARGET_PPC64)
+    mcp_addr = h2g(ucp) +
+        offsetof(struct target_ucontext, tuc_sigcontext.mcontext);
+#else
     __get_user(mcp_addr, &ucp->tuc_regs);
+#endif
 
     if (!lock_user_struct(VERIFY_READ, mcp, mcp_addr, 1))
         return 1;
@@ -4901,7 +4902,6 @@ static int do_setcontext(struct target_ucontext *ucp, CPUPPCState *env, int sig)
 
     unlock_user_struct(mcp, mcp_addr, 1);
     return 0;
-#endif
 }
 
 long do_rt_sigreturn(CPUPPCState *env)

From 145855801a002aaf1310630df41425a6bc39cf47 Mon Sep 17 00:00:00 2001
From: Tom Musta <tommusta@gmail.com>
Date: Mon, 30 Jun 2014 08:13:42 -0500
Subject: [PATCH 08/52] linux-user: Handle PPC64 ELFv2 Function Pointers

Function pointers in the 64-bit ELFv2 PowerPC ABI are actual (internal)
entry point addresses.  However, when invoking a function via a function
pointer, GPR 12 must also be set to this address so that the TOC may be
handled properly.

Add this support to the invocation of a signal handler.

Signed-off-by: Tom Musta <tommusta@gmail.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 linux-user/signal.c | 40 ++++++++++++++++++++++++++++++----------
 1 file changed, 30 insertions(+), 10 deletions(-)

diff --git a/linux-user/signal.c b/linux-user/signal.c
index 281d1c4a03..e11b20887e 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -4681,6 +4681,9 @@ static void setup_frame(int sig, struct target_sigaction *ka,
     target_ulong frame_addr, newsp;
     int err = 0;
     int signal;
+#if defined(TARGET_PPC64)
+    struct image_info *image = ((TaskState *)thread_cpu->opaque)->info;
+#endif
 
     frame_addr = get_sigframe(ka, env, sizeof(*frame));
     if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 1))
@@ -4725,11 +4728,18 @@ static void setup_frame(int sig, struct target_sigaction *ka,
     env->gpr[4] = frame_addr + offsetof(struct target_sigframe, sctx);
 
 #if defined(TARGET_PPC64)
-    /* PPC64 function pointers are pointers to OPD entries. */
-    struct target_func_ptr *handler =
-        (struct target_func_ptr *)g2h(ka->_sa_handler);
-    env->nip = tswapl(handler->entry);
-    env->gpr[2] = tswapl(handler->toc);
+    if (get_ppc64_abi(image) < 2) {
+        /* ELFv1 PPC64 function pointers are pointers to OPD entries. */
+        struct target_func_ptr *handler =
+            (struct target_func_ptr *)g2h(ka->_sa_handler);
+        env->nip = tswapl(handler->entry);
+        env->gpr[2] = tswapl(handler->toc);
+    } else {
+        /* ELFv2 PPC64 function pointers are entry points, but R12
+         * must also be set */
+        env->nip = tswapl((target_ulong) ka->_sa_handler);
+        env->gpr[12] = env->nip;
+    }
 #else
     env->nip = (target_ulong) ka->_sa_handler;
 #endif
@@ -4756,6 +4766,9 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
     target_ulong rt_sf_addr, newsp = 0;
     int i, err = 0;
     int signal;
+#if defined(TARGET_PPC64)
+    struct image_info *image = ((TaskState *)thread_cpu->opaque)->info;
+#endif
 
     rt_sf_addr = get_sigframe(ka, env, sizeof(*rt_sf));
     if (!lock_user_struct(VERIFY_WRITE, rt_sf, rt_sf_addr, 1))
@@ -4814,11 +4827,18 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
     env->gpr[6] = (target_ulong) h2g(rt_sf);
 
 #if defined(TARGET_PPC64)
-    /* PPC64 function pointers are pointers to OPD entries.  */
-    struct target_func_ptr *handler =
-        (struct target_func_ptr *)g2h(ka->_sa_handler);
-    env->nip = tswapl(handler->entry);
-    env->gpr[2] = tswapl(handler->toc);
+    if (get_ppc64_abi(image) < 2) {
+        /* ELFv1 PPC64 function pointers are pointers to OPD entries. */
+        struct target_func_ptr *handler =
+            (struct target_func_ptr *)g2h(ka->_sa_handler);
+        env->nip = tswapl(handler->entry);
+        env->gpr[2] = tswapl(handler->toc);
+    } else {
+        /* ELFv2 PPC64 function pointers are entry points, but R12
+         * must also be set */
+        env->nip = tswapl((target_ulong) ka->_sa_handler);
+        env->gpr[12] = env->nip;
+    }
 #else
     env->nip = (target_ulong) ka->_sa_handler;
 #endif

From 7d0cd464a756f3d47f308d7c47eb888b573a9fe4 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Tue, 8 Jul 2014 16:02:26 +0100
Subject: [PATCH 09/52] hw/ppc/spapr_hcall.c: Fix typo in function names

Fix a typo in the names of a couple of functions
(s/resouce/resource/).

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/ppc/spapr_hcall.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 467858ce05..86514472fe 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -712,10 +712,10 @@ static target_ulong h_logical_dcbf(PowerPCCPU *cpu, sPAPREnvironment *spapr,
     return H_SUCCESS;
 }
 
-static target_ulong h_set_mode_resouce_le(PowerPCCPU *cpu,
-                                          target_ulong mflags,
-                                          target_ulong value1,
-                                          target_ulong value2)
+static target_ulong h_set_mode_resource_le(PowerPCCPU *cpu,
+                                           target_ulong mflags,
+                                           target_ulong value1,
+                                           target_ulong value2)
 {
     CPUState *cs;
 
@@ -743,10 +743,10 @@ static target_ulong h_set_mode_resouce_le(PowerPCCPU *cpu,
     return H_UNSUPPORTED_FLAG;
 }
 
-static target_ulong h_set_mode_resouce_addr_trans_mode(PowerPCCPU *cpu,
-                                                       target_ulong mflags,
-                                                       target_ulong value1,
-                                                       target_ulong value2)
+static target_ulong h_set_mode_resource_addr_trans_mode(PowerPCCPU *cpu,
+                                                        target_ulong mflags,
+                                                        target_ulong value1,
+                                                        target_ulong value2)
 {
     CPUState *cs;
     PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
@@ -794,11 +794,11 @@ static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPREnvironment *spapr,
 
     switch (resource) {
     case H_SET_MODE_RESOURCE_LE:
-        ret = h_set_mode_resouce_le(cpu, args[0], args[2], args[3]);
+        ret = h_set_mode_resource_le(cpu, args[0], args[2], args[3]);
         break;
     case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE:
-        ret = h_set_mode_resouce_addr_trans_mode(cpu, args[0],
-                                                 args[2], args[3]);
+        ret = h_set_mode_resource_addr_trans_mode(cpu, args[0],
+                                                  args[2], args[3]);
         break;
     }
 

From ef9514431d33e52eb611f799670ca86618c1b7d9 Mon Sep 17 00:00:00 2001
From: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Date: Wed, 9 Jul 2014 16:08:37 +0530
Subject: [PATCH 10/52] spapr: add uuid/host details to device tree

Useful for identifying the guest/host uniquely within the
guest. Adding following properties to the guest root node.

vm,uuid - uuid of the guest
host-model - Host model number
host-serial - Host machine serial number
hypervisor type - Tells its "kvm"

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/ppc/spapr.c       | 28 ++++++++++++++++++++++++++++
 target-ppc/kvm.c     | 13 ++++++++++++-
 target-ppc/kvm_ppc.h | 12 ++++++++++++
 3 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 6bb646ce47..0adea311cc 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -319,6 +319,7 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
     QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL);
     unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0;
     uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1;
+    char *buf;
 
     add_str(hypertas, "hcall-pft");
     add_str(hypertas, "hcall-term");
@@ -348,6 +349,33 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
     _FDT((fdt_property_string(fdt, "model", "IBM pSeries (emulated by qemu)")));
     _FDT((fdt_property_string(fdt, "compatible", "qemu,pseries")));
 
+    if (kvm_enabled()) {
+        _FDT((fdt_property_string(fdt, "hypervisor", "kvm")));
+    }
+
+    /*
+     * Add info to guest to indentify which host is it being run on
+     * and what is the uuid of the guest
+     */
+    if (kvmppc_get_host_model(&buf)) {
+        _FDT((fdt_property_string(fdt, "host-model", buf)));
+        g_free(buf);
+    }
+    if (kvmppc_get_host_serial(&buf)) {
+        _FDT((fdt_property_string(fdt, "host-serial", buf)));
+        g_free(buf);
+    }
+
+    buf = g_strdup_printf(UUID_FMT, qemu_uuid[0], qemu_uuid[1],
+                          qemu_uuid[2], qemu_uuid[3], qemu_uuid[4],
+                          qemu_uuid[5], qemu_uuid[6], qemu_uuid[7],
+                          qemu_uuid[8], qemu_uuid[9], qemu_uuid[10],
+                          qemu_uuid[11], qemu_uuid[12], qemu_uuid[13],
+                          qemu_uuid[14], qemu_uuid[15]);
+
+    _FDT((fdt_property_string(fdt, "vm,uuid", buf)));
+    g_free(buf);
+
     _FDT((fdt_property_cell(fdt, "#address-cells", 0x2)));
     _FDT((fdt_property_cell(fdt, "#size-cells", 0x2)));
 
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 42718f77ae..8c9e79c3a9 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -1369,7 +1369,7 @@ static int read_cpuinfo(const char *field, char *value, int len)
     }
 
     do {
-        if(!fgets(line, sizeof(line), f)) {
+        if (!fgets(line, sizeof(line), f)) {
             break;
         }
         if (!strncmp(line, field, field_len)) {
@@ -1404,6 +1404,17 @@ uint32_t kvmppc_get_tbfreq(void)
     return retval;
 }
 
+bool kvmppc_get_host_serial(char **value)
+{
+    return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
+                               NULL);
+}
+
+bool kvmppc_get_host_model(char **value)
+{
+    return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
+}
+
 /* Try to find a device tree node for a CPU with clock-frequency property */
 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
 {
diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
index d9516e73ef..2e0224c6af 100644
--- a/target-ppc/kvm_ppc.h
+++ b/target-ppc/kvm_ppc.h
@@ -19,6 +19,8 @@ uint32_t kvmppc_get_tbfreq(void);
 uint64_t kvmppc_get_clockfreq(void);
 uint32_t kvmppc_get_vmx(void);
 uint32_t kvmppc_get_dfp(void);
+bool kvmppc_get_host_model(char **buf);
+bool kvmppc_get_host_serial(char **buf);
 int kvmppc_get_hasidle(CPUPPCState *env);
 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len);
 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level);
@@ -60,6 +62,16 @@ static inline uint32_t kvmppc_get_tbfreq(void)
     return 0;
 }
 
+static inline bool kvmppc_get_host_model(char **buf)
+{
+    return false;
+}
+
+static inline bool kvmppc_get_host_serial(char **buf)
+{
+    return false;
+}
+
 static inline uint64_t kvmppc_get_clockfreq(void)
 {
     return 0;

From 261265cc912b375649fcdf7aded0f87359dba544 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Fri, 11 Jul 2014 03:24:39 +0200
Subject: [PATCH 11/52] PPC: mac99: Move NVRAM to page boundary when necessary

When running KVM we have to adhere to host page boundaries for memory slots.
Unfortunately the NVRAM on mac99 is a 4k RAM hole inside of an MMIO flash
area.

So if our host is configured with 64k page size, we can't use the mac99 target
with KVM. This is a real shame, as this limitation is not really an issue - we
can easily map NVRAM somewhere else and at least Linux and Mac OS X use it
at their new location.

So in that emergency case when it's about failing to run at all and moving NVRAM
to a place it shouldn't be at, choose the latter.

This patch enables -M mac99 with KVM on 64k page size hosts.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/ppc/mac_newworld.c | 11 ++++++++++-
 include/hw/ppc/ppc.h  |  2 +-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index 0693168286..26067b458e 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -176,6 +176,7 @@ static void ppc_core99_init(MachineState *machine)
     SysBusDevice *s;
     DeviceState *dev;
     int *token = g_new(int, 1);
+    hwaddr nvram_addr = 0xFFF04000;
 
     linux_boot = (kernel_filename != NULL);
 
@@ -426,11 +427,18 @@ static void ppc_core99_init(MachineState *machine)
     }
 
     /* The NewWorld NVRAM is not located in the MacIO device */
+#ifdef CONFIG_KVM
+    if (kvm_enabled() && getpagesize() > 4096) {
+        /* We can't combine read-write and read-only in a single page, so
+           move the NVRAM out of ROM again for KVM */
+        nvram_addr = 0xFFE00000;
+    }
+#endif
     dev = qdev_create(NULL, TYPE_MACIO_NVRAM);
     qdev_prop_set_uint32(dev, "size", 0x2000);
     qdev_prop_set_uint32(dev, "it_shift", 1);
     qdev_init_nofail(dev);
-    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0xFFF04000);
+    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, nvram_addr);
     nvr = MACIO_NVRAM(dev);
     pmac_format_nvram_partition(nvr, 0x2000);
     /* No PCI init: the BIOS will do it */
@@ -473,6 +481,7 @@ static void ppc_core99_init(MachineState *machine)
     /* Mac OS X requires a "known good" clock-frequency value; pass it one. */
     fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_CLOCKFREQ, CLOCKFREQ);
     fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_BUSFREQ, BUSFREQ);
+    fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_NVRAM_ADDR, nvram_addr);
 
     qemu_register_boot_set(fw_cfg_boot_set, fw_cfg);
 }
diff --git a/include/hw/ppc/ppc.h b/include/hw/ppc/ppc.h
index 7e16e2e06c..14efd0ca31 100644
--- a/include/hw/ppc/ppc.h
+++ b/include/hw/ppc/ppc.h
@@ -92,7 +92,7 @@ enum {
 #define FW_CFG_PPC_IS_KVM       (FW_CFG_ARCH_LOCAL + 0x05)
 #define FW_CFG_PPC_KVM_HC       (FW_CFG_ARCH_LOCAL + 0x06)
 #define FW_CFG_PPC_KVM_PID      (FW_CFG_ARCH_LOCAL + 0x07)
-/* OpenBIOS has FW_CFG_PPC_NVRAM_ADDR as +0x08 */
+#define FW_CFG_PPC_NVRAM_ADDR   (FW_CFG_ARCH_LOCAL + 0x08)
 #define FW_CFG_PPC_BUSFREQ      (FW_CFG_ARCH_LOCAL + 0x09)
 
 #define PPC_SERIAL_MM_BAUDBASE 399193

From a21a7a701252717f05defee8a1a33d72c28fabb7 Mon Sep 17 00:00:00 2001
From: Gonglei <arei.gonglei@huawei.com>
Date: Sat, 26 Jul 2014 12:45:33 +0800
Subject: [PATCH 12/52] spapr: fix possible memory leak

get_boot_devices_list() will malloc memory, spapr_finalize_fdt
doesn't free it.

Signed-off-by: Chenliang <chenliang88@huawei.com>
Signed-off-by: Gonglei <arei.gonglei@huawei.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/ppc/spapr.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 0adea311cc..522ee27045 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -783,6 +783,7 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr,
 
     cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
 
+    g_free(bootlist);
     g_free(fdt);
 }
 

From 26a8c353bf0ffb485f4a68bea97efcef7d2bbaa3 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Thu, 3 Jul 2014 13:10:02 +1000
Subject: [PATCH 13/52] spapr: Move DT memory node rendering to a helper

This moves recurring bits of code related to memory@xxx nodes
creation to a helper.

This makes use of the new helper for node@0.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/ppc/spapr.c | 48 ++++++++++++++++++++++++++++--------------------
 1 file changed, 28 insertions(+), 20 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 522ee27045..28c857834f 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -634,6 +634,31 @@ int spapr_h_cas_compose_response(target_ulong addr, target_ulong size)
     return 0;
 }
 
+static void spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start,
+                                       hwaddr size)
+{
+    uint32_t associativity[] = {
+        cpu_to_be32(0x4), /* length */
+        cpu_to_be32(0x0), cpu_to_be32(0x0),
+        cpu_to_be32(nodeid), cpu_to_be32(nodeid)
+    };
+    char mem_name[32];
+    uint64_t mem_reg_property[2];
+    int off;
+
+    mem_reg_property[0] = cpu_to_be64(start);
+    mem_reg_property[1] = cpu_to_be64(size);
+
+    sprintf(mem_name, "memory@" TARGET_FMT_lx, start);
+    off = fdt_add_subnode(fdt, 0, mem_name);
+    _FDT(off);
+    _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
+    _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
+                      sizeof(mem_reg_property))));
+    _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
+                      sizeof(associativity))));
+}
+
 static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
 {
     uint32_t associativity[] = {cpu_to_be32(0x4), cpu_to_be32(0x0),
@@ -652,29 +677,12 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
     }
 
     /* RMA */
-    mem_reg_property[0] = 0;
-    mem_reg_property[1] = cpu_to_be64(spapr->rma_size);
-    off = fdt_add_subnode(fdt, 0, "memory@0");
-    _FDT(off);
-    _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
-    _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
-                      sizeof(mem_reg_property))));
-    _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
-                      sizeof(associativity))));
+    spapr_populate_memory_node(fdt, 0, 0, spapr->rma_size);
 
     /* RAM: Node 0 */
     if (node0_size > spapr->rma_size) {
-        mem_reg_property[0] = cpu_to_be64(spapr->rma_size);
-        mem_reg_property[1] = cpu_to_be64(node0_size - spapr->rma_size);
-
-        sprintf(mem_name, "memory@" TARGET_FMT_lx, spapr->rma_size);
-        off = fdt_add_subnode(fdt, 0, mem_name);
-        _FDT(off);
-        _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
-        _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
-                          sizeof(mem_reg_property))));
-        _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
-                          sizeof(associativity))));
+        spapr_populate_memory_node(fdt, 0, spapr->rma_size,
+                                   node0_size - spapr->rma_size);
     }
 
     /* RAM: Node 1 and beyond */

From 81014ac2b88b5fd275c33b463efe306668e920ed Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Thu, 3 Jul 2014 13:10:03 +1000
Subject: [PATCH 14/52] spapr: Use DT memory node rendering helper for other
 nodes

This finishes refactoring by using the spapr_populate_memory_node helper
for all nodes and removing leftovers from spapr_populate_memory().

This is not a part of the previous patch because the patches look
nicer apart.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/ppc/spapr.c | 19 ++-----------------
 1 file changed, 2 insertions(+), 17 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 28c857834f..9b9b6c4775 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -661,13 +661,8 @@ static void spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start,
 
 static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
 {
-    uint32_t associativity[] = {cpu_to_be32(0x4), cpu_to_be32(0x0),
-                                cpu_to_be32(0x0), cpu_to_be32(0x0),
-                                cpu_to_be32(0x0)};
-    char mem_name[32];
     hwaddr node0_size, mem_start, node_size;
-    uint64_t mem_reg_property[2];
-    int i, off;
+    int i;
 
     /* memory node(s) */
     if (nb_numa_nodes > 1 && numa_info[0].node_mem < ram_size) {
@@ -688,7 +683,6 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
     /* RAM: Node 1 and beyond */
     mem_start = node0_size;
     for (i = 1; i < nb_numa_nodes; i++) {
-        mem_reg_property[0] = cpu_to_be64(mem_start);
         if (mem_start >= ram_size) {
             node_size = 0;
         } else {
@@ -697,16 +691,7 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
                 node_size = ram_size - mem_start;
             }
         }
-        mem_reg_property[1] = cpu_to_be64(node_size);
-        associativity[3] = associativity[4] = cpu_to_be32(i);
-        sprintf(mem_name, "memory@" TARGET_FMT_lx, mem_start);
-        off = fdt_add_subnode(fdt, 0, mem_name);
-        _FDT(off);
-        _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
-        _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
-                          sizeof(mem_reg_property))));
-        _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
-                          sizeof(associativity))));
+        spapr_populate_memory_node(fdt, i, mem_start, node_size);
         mem_start += node_size;
     }
 

From 7db8a127e373e468d1f61e46e01e50d1aa33e827 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Thu, 3 Jul 2014 13:10:04 +1000
Subject: [PATCH 15/52] spapr: Refactor spapr_populate_memory() to allow
 memoryless nodes

Current QEMU does not support memoryless NUMA nodes, however
actual hardware may have them so it makes sense to have a way
to emulate them in QEMU. This prepares SPAPR for that.

This moves 2 calls of spapr_populate_memory_node() into
the existing loop over numa nodes so first several nodes may
have no memory and this still will work.

If there is no numa configuration, the code assumes there is just
a single node at 0 and it has all the guest memory.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/ppc/spapr.c | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 9b9b6c4775..718a201d88 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -661,36 +661,36 @@ static void spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start,
 
 static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
 {
-    hwaddr node0_size, mem_start, node_size;
-    int i;
+    hwaddr mem_start, node_size;
+    int i, nb_nodes = nb_numa_nodes;
+    NodeInfo *nodes = numa_info;
+    NodeInfo ramnode;
 
-    /* memory node(s) */
-    if (nb_numa_nodes > 1 && numa_info[0].node_mem < ram_size) {
-        node0_size = numa_info[0].node_mem;
-    } else {
-        node0_size = ram_size;
+    /* No NUMA nodes, assume there is just one node with whole RAM */
+    if (!nb_numa_nodes) {
+        nb_nodes = 1;
+        ramnode.node_mem = ram_size;
+        nodes = &ramnode;
     }
 
-    /* RMA */
-    spapr_populate_memory_node(fdt, 0, 0, spapr->rma_size);
-
-    /* RAM: Node 0 */
-    if (node0_size > spapr->rma_size) {
-        spapr_populate_memory_node(fdt, 0, spapr->rma_size,
-                                   node0_size - spapr->rma_size);
-    }
-
-    /* RAM: Node 1 and beyond */
-    mem_start = node0_size;
-    for (i = 1; i < nb_numa_nodes; i++) {
+    for (i = 0, mem_start = 0; i < nb_nodes; ++i) {
+        if (!nodes[i].node_mem) {
+            continue;
+        }
         if (mem_start >= ram_size) {
             node_size = 0;
         } else {
-            node_size = numa_info[i].node_mem;
+            node_size = nodes[i].node_mem;
             if (node_size > ram_size - mem_start) {
                 node_size = ram_size - mem_start;
             }
         }
+        if (!mem_start) {
+            /* ppc_spapr_init() checks for rma_size <= node0_size already */
+            spapr_populate_memory_node(fdt, i, 0, spapr->rma_size);
+            mem_start += spapr->rma_size;
+            node_size -= spapr->rma_size;
+        }
         spapr_populate_memory_node(fdt, i, mem_start, node_size);
         mem_start += node_size;
     }

From 6010818c30ce9c796b4e22fd261fc6fea1cecbfc Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Thu, 3 Jul 2014 13:10:05 +1000
Subject: [PATCH 16/52] spapr: Split memory nodes to power-of-two blocks

Linux kernel expects nodes to have power-of-two size and
does WARN_ON if this is not the case:
[    0.041456] WARNING: at drivers/base/memory.c:115
which is:

===
	/* Validate blk_sz is a power of 2 and not less than section size */
	if ((block_sz & (block_sz - 1)) || (block_sz < MIN_MEMORY_BLOCK_SIZE)) {
        	WARN_ON(1);
	        block_sz = MIN_MEMORY_BLOCK_SIZE;
	}
===

This splits memory nodes into set of smaller blocks with
a size which is a power of two. This makes sure the start
address of every node is aligned to the node size.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
[agraf: squash windows compile fix in]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/ppc/spapr.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 718a201d88..f2fa11e951 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -691,8 +691,18 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
             mem_start += spapr->rma_size;
             node_size -= spapr->rma_size;
         }
-        spapr_populate_memory_node(fdt, i, mem_start, node_size);
-        mem_start += node_size;
+        for ( ; node_size; ) {
+            hwaddr sizetmp = pow2floor(node_size);
+
+            /* mem_start != 0 here */
+            if (ctzl(mem_start) < ctzl(sizetmp)) {
+                sizetmp = 1ULL << ctzl(mem_start);
+            }
+
+            spapr_populate_memory_node(fdt, i, mem_start, sizetmp);
+            node_size -= sizetmp;
+            mem_start += sizetmp;
+        }
     }
 
     return 0;

From b082d65a30078d176f8d1fbb3b99e1449fa2fcff Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Thu, 3 Jul 2014 13:10:06 +1000
Subject: [PATCH 17/52] spapr: Add a helper for node0_size calculation

In multiple places there is a node0_size variable calculation
which assumes that NUMA node #0 and memory node #0 are the same
things which they are not. Since we are going to change it and
do not want to change it in multiple places, let's make a helper.

This adds a spapr_node0_size() helper and makes use of it.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/ppc/spapr.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index f2fa11e951..1623805b81 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -283,6 +283,19 @@ static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop,
     return (p - prop) * sizeof(uint32_t);
 }
 
+static hwaddr spapr_node0_size(void)
+{
+    if (nb_numa_nodes) {
+        int i;
+        for (i = 0; i < nb_numa_nodes; ++i) {
+            if (numa_info[i].node_mem) {
+                return MIN(pow2floor(numa_info[i].node_mem), ram_size);
+            }
+        }
+    }
+    return ram_size;
+}
+
 #define _FDT(exp) \
     do { \
         int ret = (exp);                                           \
@@ -833,9 +846,8 @@ static void spapr_reset_htab(sPAPREnvironment *spapr)
 
     /* Update the RMA size if necessary */
     if (spapr->vrma_adjust) {
-        hwaddr node0_size = (nb_numa_nodes > 1) ?
-            numa_info[0].node_mem : ram_size;
-        spapr->rma_size = kvmppc_rma_size(node0_size, spapr->htab_shift);
+        spapr->rma_size = kvmppc_rma_size(spapr_node0_size(),
+                                          spapr->htab_shift);
     }
 }
 
@@ -1268,7 +1280,7 @@ static void ppc_spapr_init(MachineState *machine)
     MemoryRegion *rma_region;
     void *rma = NULL;
     hwaddr rma_alloc_size;
-    hwaddr node0_size = (nb_numa_nodes > 1) ? numa_info[0].node_mem : ram_size;
+    hwaddr node0_size = spapr_node0_size();
     uint32_t initrd_base = 0;
     long kernel_size = 0, initrd_size = 0;
     long load_limit, rtas_limit, fw_size;

From c3b4f589d86ae4a6b9f6c1e0587998bc525833da Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Thu, 3 Jul 2014 13:10:07 +1000
Subject: [PATCH 18/52] spapr: Fix ibm, associativity for memory nodes

We want the associtivity lists of memory and CPU nodes to match but
memory nodes have incorrect domain#3 which is zero for CPU so they won't
match.

This clears domain#3 in the list to match CPUs associtivity lists.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/ppc/spapr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 1623805b81..12dbf1b36b 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -653,7 +653,7 @@ static void spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start,
     uint32_t associativity[] = {
         cpu_to_be32(0x4), /* length */
         cpu_to_be32(0x0), cpu_to_be32(0x0),
-        cpu_to_be32(nodeid), cpu_to_be32(nodeid)
+        cpu_to_be32(0x0), cpu_to_be32(nodeid)
     };
     char mem_name[32];
     uint64_t mem_reg_property[2];

From ea87616d6c44d998affef3d3b9fdfc49d14b8150 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 21 Jul 2014 13:02:03 +1000
Subject: [PATCH 19/52] loader: Add load_image_size() to replace load_image()

A subsequent patch to ppc/spapr needs to load the RTAS blob into
qemu memory rather than target memory (so it can later be copied
into the right spot at machine reset time).

I would use load_image() but it is marked deprecated because it
doesn't take a buffer size as argument, so let's add load_image_size()
that does.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
[aik: fixed errors from checkpatch.pl]
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/core/loader.c    | 21 +++++++++++++++++++++
 include/hw/loader.h |  1 +
 2 files changed, 22 insertions(+)

diff --git a/hw/core/loader.c b/hw/core/loader.c
index 193f0f8400..597b117db3 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -89,6 +89,27 @@ int load_image(const char *filename, uint8_t *addr)
     return size;
 }
 
+/* return the size or -1 if error */
+ssize_t load_image_size(const char *filename, void *addr, size_t size)
+{
+    int fd;
+    ssize_t actsize;
+
+    fd = open(filename, O_RDONLY | O_BINARY);
+    if (fd < 0) {
+        return -1;
+    }
+
+    actsize = read(fd, addr, size);
+    if (actsize < 0) {
+        close(fd);
+        return -1;
+    }
+    close(fd);
+
+    return actsize;
+}
+
 /* read()-like version */
 ssize_t read_targphys(const char *name,
                       int fd, hwaddr dst_addr, size_t nbytes)
diff --git a/include/hw/loader.h b/include/hw/loader.h
index 00c9117ee0..9190387b8e 100644
--- a/include/hw/loader.h
+++ b/include/hw/loader.h
@@ -13,6 +13,7 @@
  */
 int get_image_size(const char *filename);
 int load_image(const char *filename, uint8_t *addr); /* deprecated */
+ssize_t load_image_size(const char *filename, void *addr, size_t size);
 int load_image_targphys(const char *filename, hwaddr,
                         uint64_t max_sz);
 int load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz);

From b7d1f77adaab790d20232df261d4e2ff6a77f556 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 21 Jul 2014 13:02:04 +1000
Subject: [PATCH 20/52] spapr: Locate RTAS and device-tree based on real RMA

We currently calculate the final RTAS and FDT location based on
the early estimate of the RMA size, cropped to 256M on KVM since
we only know the real RMA size at reset time which happens much
later in the boot process.

This means the FDT and RTAS end up right below 256M while they
could be much higher, using precious RMA space and limiting
what the OS bootloader can put there which has proved to be
a problem with some OSes (such as when using very large initrd's)

Fortunately, we do the actual copy of the device-tree into guest
memory much later, during reset, late enough to be able to do it
using the final RMA value, we just need to move the calculation
to the right place.

However, RTAS is still loaded too early, so we change the code to
load the tiny blob into qemu memory early on, and then copy it into
guest memory at reset time. It's small enough that the memory usage
doesn't matter.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
[aik: fixed errors from checkpatch.pl, defined RTAS_MAX_ADDR]
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
[agraf: fix compilation on 32bit hosts]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/ppc/spapr.c         | 34 ++++++++++++++++++++++------------
 include/hw/ppc/spapr.h |  3 ++-
 2 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 12dbf1b36b..2f16d9dcce 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -71,6 +71,7 @@
  */
 #define FDT_MAX_SIZE            0x40000
 #define RTAS_MAX_SIZE           0x10000
+#define RTAS_MAX_ADDR           0x80000000 /* RTAS must stay below that */
 #define FW_MAX_SIZE             0x400000
 #define FW_FILE_NAME            "slof.bin"
 #define FW_OVERHEAD             0x2800000
@@ -854,16 +855,30 @@ static void spapr_reset_htab(sPAPREnvironment *spapr)
 static void ppc_spapr_reset(void)
 {
     PowerPCCPU *first_ppc_cpu;
+    uint32_t rtas_limit;
 
     /* Reset the hash table & recalc the RMA */
     spapr_reset_htab(spapr);
 
     qemu_devices_reset();
 
+    /*
+     * We place the device tree and RTAS just below either the top of the RMA,
+     * or just below 2GB, whichever is lowere, so that it can be
+     * processed with 32-bit real mode code if necessary
+     */
+    rtas_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR);
+    spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE;
+    spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE;
+
     /* Load the fdt */
     spapr_finalize_fdt(spapr, spapr->fdt_addr, spapr->rtas_addr,
                        spapr->rtas_size);
 
+    /* Copy RTAS over */
+    cpu_physical_memory_write(spapr->rtas_addr, spapr->rtas_blob,
+                              spapr->rtas_size);
+
     /* Set up the entry state */
     first_ppc_cpu = POWERPC_CPU(first_cpu);
     first_ppc_cpu->env.gpr[3] = spapr->fdt_addr;
@@ -1283,7 +1298,7 @@ static void ppc_spapr_init(MachineState *machine)
     hwaddr node0_size = spapr_node0_size();
     uint32_t initrd_base = 0;
     long kernel_size = 0, initrd_size = 0;
-    long load_limit, rtas_limit, fw_size;
+    long load_limit, fw_size;
     bool kernel_le = false;
     char *filename;
 
@@ -1328,13 +1343,8 @@ static void ppc_spapr_init(MachineState *machine)
         exit(1);
     }
 
-    /* We place the device tree and RTAS just below either the top of the RMA,
-     * or just below 2GB, whichever is lowere, so that it can be
-     * processed with 32-bit real mode code if necessary */
-    rtas_limit = MIN(spapr->rma_size, 0x80000000);
-    spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE;
-    spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE;
-    load_limit = spapr->fdt_addr - FW_OVERHEAD;
+    /* Setup a load limit for the ramdisk leaving room for SLOF and FDT */
+    load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD;
 
     /* We aim for a hash table of size 1/128 the size of RAM.  The
      * normal rule of thumb is 1/64 the size of RAM, but that's much
@@ -1402,14 +1412,14 @@ static void ppc_spapr_init(MachineState *machine)
     }
 
     filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin");
-    spapr->rtas_size = load_image_targphys(filename, spapr->rtas_addr,
-                                           rtas_limit - spapr->rtas_addr);
-    if (spapr->rtas_size < 0) {
+    spapr->rtas_size = get_image_size(filename);
+    spapr->rtas_blob = g_malloc(spapr->rtas_size);
+    if (load_image_size(filename, spapr->rtas_blob, spapr->rtas_size) < 0) {
         hw_error("qemu: could not load LPAR rtas '%s'\n", filename);
         exit(1);
     }
     if (spapr->rtas_size > RTAS_MAX_SIZE) {
-        hw_error("RTAS too big ! 0x%lx bytes (max is 0x%x)\n",
+        hw_error("RTAS too big ! 0x%zx bytes (max is 0x%x)\n",
                  spapr->rtas_size, RTAS_MAX_SIZE);
         exit(1);
     }
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 7fff9794f7..36e8e51435 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -24,7 +24,8 @@ typedef struct sPAPREnvironment {
     hwaddr rma_size;
     int vrma_adjust;
     hwaddr fdt_addr, rtas_addr;
-    long rtas_size;
+    ssize_t rtas_size;
+    void *rtas_blob;
     void *fdt_skel;
     target_ulong entry_point;
     uint64_t rtc_offset;

From 3c902d4469304a9fd78cbef8a927d44b847cde3f Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Date: Mon, 14 Jul 2014 14:45:35 +0530
Subject: [PATCH 21/52] ppc: debug stub: Get trap instruction opcode from KVM

Get trap instruction opcode from KVM and this opcode will
be used for setting software breakpoint in following patch

Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 target-ppc/kvm.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 8c9e79c3a9..126f7ee690 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -72,6 +72,8 @@ static int cap_papr;
 static int cap_htab_fd;
 static int cap_fixup_hcalls;
 
+static uint32_t debug_inst_opcode;
+
 /* XXX We have a race condition where we actually have a level triggered
  *     interrupt, but the infrastructure can't expose that yet, so the guest
  *     takes but ignores it, goes to sleep and never gets notified that there's
@@ -436,6 +438,8 @@ int kvm_arch_init_vcpu(CPUState *cs)
         break;
     }
 
+    kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
+
     return ret;
 }
 

From c371c2e3e0d7ad979dc2bb9763223287fabdcc24 Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Date: Mon, 14 Jul 2014 14:45:36 +0530
Subject: [PATCH 22/52] ppc: synchronize excp_vectors for injecting exception

This patch synchronizes env->excp_vectors[] with env->iovr[].
This is required for using the existing interrupt injection mechanism
for kvm.

Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 target-ppc/kvm.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 126f7ee690..05d2ac8b99 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -903,6 +903,11 @@ int kvm_arch_put_registers(CPUState *cs, int level)
     return ret;
 }
 
+static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
+{
+     env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
+}
+
 int kvm_arch_get_registers(CPUState *cs)
 {
     PowerPCCPU *cpu = POWERPC_CPU(cs);
@@ -985,35 +990,57 @@ int kvm_arch_get_registers(CPUState *cs)
 
         if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
             env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
+            kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
             env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
+            kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
             env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
+            kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
             env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
+            kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
             env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
+            kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
             env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
+            kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
             env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
+            kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
             env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
+            kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
             env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
+            kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
             env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
+            kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
             env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
+            kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
             env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
+            kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
             env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
+            kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
             env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
+            kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
             env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
+            kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
             env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
+            kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
 
             if (sregs.u.e.features & KVM_SREGS_E_SPE) {
                 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
+                kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
                 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
+                kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
                 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
+                kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
             }
 
             if (sregs.u.e.features & KVM_SREGS_E_PM) {
                 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
+                kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
             }
 
             if (sregs.u.e.features & KVM_SREGS_E_PC) {
                 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
+                kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
                 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
+                kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
             }
         }
 

From 8a0548f94edecb96acb9b7fb9106ccc821c4996f Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Date: Mon, 14 Jul 2014 14:45:38 +0530
Subject: [PATCH 23/52] ppc: Add software breakpoint support

This patch allow insert/remove software breakpoint.

When QEMU is not able to handle debug exception then we inject
program exception to guest because for software breakpoint QEMU
uses a ehpriv-1 instruction;
So there cannot be any reason that we are in qemu with exit reason
KVM_EXIT_DEBUG  for guest set debug exception, only possibility is
guest executed ehpriv-1 privilege instruction and that's why we are
injecting program exception.

Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>
[agraf: make deflect comment booke/book3s agnostic]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 target-ppc/kvm.c | 93 ++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 79 insertions(+), 14 deletions(-)

diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 05d2ac8b99..c29ed65c16 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -1275,6 +1275,75 @@ static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t dat
     return 0;
 }
 
+int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
+{
+    /* Mixed endian case is not handled */
+    uint32_t sc = debug_inst_opcode;
+
+    if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
+                            sizeof(sc), 0) ||
+        cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
+int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
+{
+    uint32_t sc;
+
+    if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
+        sc != debug_inst_opcode ||
+        cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
+                            sizeof(sc), 1)) {
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
+void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
+{
+    /* Software Breakpoint updates */
+    if (kvm_sw_breakpoints_active(cs)) {
+        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
+    }
+}
+
+static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
+{
+    CPUState *cs = CPU(cpu);
+    CPUPPCState *env = &cpu->env;
+    struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
+    int handle = 0;
+
+    if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
+        handle = 1;
+    } else {
+        /* QEMU is not able to handle debug exception, so inject
+         * program exception to guest;
+         * Yes program exception NOT debug exception !!
+         * For software breakpoint QEMU uses a privileged instruction;
+         * So there cannot be any reason that we are here for guest
+         * set debug exception, only possibility is guest executed a
+         * privileged / illegal instruction and that's why we are
+         * injecting a program interrupt.
+         */
+
+        cpu_synchronize_state(cs);
+        /* env->nip is PC, so increment this by 4 to use
+         * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
+         */
+        env->nip += 4;
+        cs->exception_index = POWERPC_EXCP_PROGRAM;
+        env->error_code = POWERPC_EXCP_INVAL;
+        ppc_cpu_do_interrupt(cs);
+    }
+
+    return handle;
+}
+
 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
 {
     PowerPCCPU *cpu = POWERPC_CPU(cs);
@@ -1315,6 +1384,16 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
         ret = 0;
         break;
 
+    case KVM_EXIT_DEBUG:
+        DPRINTF("handle debug exception\n");
+        if (kvm_handle_debug(cpu, run)) {
+            ret = EXCP_DEBUG;
+            break;
+        }
+        /* re-enter, this exception was guest-internal */
+        ret = 0;
+        break;
+
     default:
         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
         ret = -1;
@@ -2007,16 +2086,6 @@ void kvm_arch_init_irq_routing(KVMState *s)
 {
 }
 
-int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
-{
-    return -EINVAL;
-}
-
-int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
-{
-    return -EINVAL;
-}
-
 int kvm_arch_insert_hw_breakpoint(target_ulong addr, target_ulong len, int type)
 {
     return -EINVAL;
@@ -2031,10 +2100,6 @@ void kvm_arch_remove_all_hw_breakpoints(void)
 {
 }
 
-void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
-{
-}
-
 struct kvm_get_htab_buf {
     struct kvm_get_htab_header header;
     /*

From 88365d17d586bcf0d9f4432447db345f72278a2a Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Date: Mon, 14 Jul 2014 14:45:37 +0530
Subject: [PATCH 24/52] ppc: Add hw breakpoint watchpoint support

This patch adds hardware breakpoint and hardware watchpoint support
for ppc.

On BOOKE architecture we cannot share debug resources between QEMU
and guest because:
    When QEMU is using debug resources then debug exception must
    be always enabled. To achieve this we set MSR_DE and also set
    MSRP_DEP so guest cannot change MSR_DE.

    When emulating debug resource for guest we want guest
    to control MSR_DE (enable/disable debug interrupt on need).

    So above mentioned two configuration cannot be supported
    at the same time. So the result is that we cannot share
    debug resources between QEMU and Guest on BOOKE architecture.

In the current design QEMU gets priority over guest,
this means that if QEMU is using debug resources then guest
cannot use them and if guest is using debug resource then
qemu can overwrite them.

When QEMU is not able to handle debug exception then we inject program
exception to guest. Yes program exception NOT debug exception and the
reason is:
 1) QEMU and guest not sharing debug resources
 2) For software breakpoint QEMU uses a ehpriv-1 instruction;

 So there cannot be any reason that we are in qemu with exit reason
 KVM_EXIT_DEBUG  for guest set debug exception, only possibility is
 guest executed ehpriv-1 privilege instruction and that's why we are
 injecting program exception.

Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 target-ppc/kvm.c | 234 ++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 219 insertions(+), 15 deletions(-)

diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index c29ed65c16..2b6fee692e 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -38,6 +38,7 @@
 #include "hw/ppc/ppc.h"
 #include "sysemu/watchdog.h"
 #include "trace.h"
+#include "exec/gdbstub.h"
 
 //#define DEBUG_KVM
 
@@ -412,6 +413,38 @@ unsigned long kvm_arch_vcpu_id(CPUState *cpu)
     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 }
 
+/* e500 supports 2 h/w breakpoint and 2 watchpoint.
+ * book3s supports only 1 watchpoint, so array size
+ * of 4 is sufficient for now.
+ */
+#define MAX_HW_BKPTS 4
+
+static struct HWBreakpoint {
+    target_ulong addr;
+    int type;
+} hw_debug_points[MAX_HW_BKPTS];
+
+static CPUWatchpoint hw_watchpoint;
+
+/* Default there is no breakpoint and watchpoint supported */
+static int max_hw_breakpoint;
+static int max_hw_watchpoint;
+static int nb_hw_breakpoint;
+static int nb_hw_watchpoint;
+
+static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
+{
+    if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
+        max_hw_breakpoint = 2;
+        max_hw_watchpoint = 2;
+    }
+
+    if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
+        fprintf(stderr, "Error initializing h/w breakpoints\n");
+        return;
+    }
+}
+
 int kvm_arch_init_vcpu(CPUState *cs)
 {
     PowerPCCPU *cpu = POWERPC_CPU(cs);
@@ -439,6 +472,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
     }
 
     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
+    kvmppc_hw_debug_points_init(cenv);
 
     return ret;
 }
@@ -1303,12 +1337,163 @@ int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
     return 0;
 }
 
+static int find_hw_breakpoint(target_ulong addr, int type)
+{
+    int n;
+
+    assert((nb_hw_breakpoint + nb_hw_watchpoint)
+           <= ARRAY_SIZE(hw_debug_points));
+
+    for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
+        if (hw_debug_points[n].addr == addr &&
+             hw_debug_points[n].type == type) {
+            return n;
+        }
+    }
+
+    return -1;
+}
+
+static int find_hw_watchpoint(target_ulong addr, int *flag)
+{
+    int n;
+
+    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
+    if (n >= 0) {
+        *flag = BP_MEM_ACCESS;
+        return n;
+    }
+
+    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
+    if (n >= 0) {
+        *flag = BP_MEM_WRITE;
+        return n;
+    }
+
+    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
+    if (n >= 0) {
+        *flag = BP_MEM_READ;
+        return n;
+    }
+
+    return -1;
+}
+
+int kvm_arch_insert_hw_breakpoint(target_ulong addr,
+                                  target_ulong len, int type)
+{
+    if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
+        return -ENOBUFS;
+    }
+
+    hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
+    hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
+
+    switch (type) {
+    case GDB_BREAKPOINT_HW:
+        if (nb_hw_breakpoint >= max_hw_breakpoint) {
+            return -ENOBUFS;
+        }
+
+        if (find_hw_breakpoint(addr, type) >= 0) {
+            return -EEXIST;
+        }
+
+        nb_hw_breakpoint++;
+        break;
+
+    case GDB_WATCHPOINT_WRITE:
+    case GDB_WATCHPOINT_READ:
+    case GDB_WATCHPOINT_ACCESS:
+        if (nb_hw_watchpoint >= max_hw_watchpoint) {
+            return -ENOBUFS;
+        }
+
+        if (find_hw_breakpoint(addr, type) >= 0) {
+            return -EEXIST;
+        }
+
+        nb_hw_watchpoint++;
+        break;
+
+    default:
+        return -ENOSYS;
+    }
+
+    return 0;
+}
+
+int kvm_arch_remove_hw_breakpoint(target_ulong addr,
+                                  target_ulong len, int type)
+{
+    int n;
+
+    n = find_hw_breakpoint(addr, type);
+    if (n < 0) {
+        return -ENOENT;
+    }
+
+    switch (type) {
+    case GDB_BREAKPOINT_HW:
+        nb_hw_breakpoint--;
+        break;
+
+    case GDB_WATCHPOINT_WRITE:
+    case GDB_WATCHPOINT_READ:
+    case GDB_WATCHPOINT_ACCESS:
+        nb_hw_watchpoint--;
+        break;
+
+    default:
+        return -ENOSYS;
+    }
+    hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
+
+    return 0;
+}
+
+void kvm_arch_remove_all_hw_breakpoints(void)
+{
+    nb_hw_breakpoint = nb_hw_watchpoint = 0;
+}
+
 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
 {
+    int n;
+
     /* Software Breakpoint updates */
     if (kvm_sw_breakpoints_active(cs)) {
         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
     }
+
+    assert((nb_hw_breakpoint + nb_hw_watchpoint)
+           <= ARRAY_SIZE(hw_debug_points));
+    assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
+
+    if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
+        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
+        memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
+        for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
+            switch (hw_debug_points[n].type) {
+            case GDB_BREAKPOINT_HW:
+                dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
+                break;
+            case GDB_WATCHPOINT_WRITE:
+                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
+                break;
+            case GDB_WATCHPOINT_READ:
+                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
+                break;
+            case GDB_WATCHPOINT_ACCESS:
+                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
+                                        KVMPPC_DEBUG_WATCH_READ;
+                break;
+            default:
+                cpu_abort(cs, "Unsupported breakpoint type\n");
+            }
+            dbg->arch.bp[n].addr = hw_debug_points[n].addr;
+        }
+    }
 }
 
 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
@@ -1317,13 +1502,46 @@ static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
     CPUPPCState *env = &cpu->env;
     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
     int handle = 0;
+    int n;
+    int flag = 0;
 
-    if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
+    if (cs->singlestep_enabled) {
+        handle = 1;
+    } else if (arch_info->status) {
+        if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
+            if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
+                n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
+                if (n >= 0) {
+                    handle = 1;
+                }
+            } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
+                                            KVMPPC_DEBUG_WATCH_WRITE)) {
+                n = find_hw_watchpoint(arch_info->address,  &flag);
+                if (n >= 0) {
+                    handle = 1;
+                    cs->watchpoint_hit = &hw_watchpoint;
+                    hw_watchpoint.vaddr = hw_debug_points[n].addr;
+                    hw_watchpoint.flags = flag;
+                }
+            }
+        }
+    } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
         handle = 1;
     } else {
         /* QEMU is not able to handle debug exception, so inject
          * program exception to guest;
          * Yes program exception NOT debug exception !!
+         * When QEMU is using debug resources then debug exception must
+         * be always set. To achieve this we set MSR_DE and also set
+         * MSRP_DEP so guest cannot change MSR_DE.
+         * When emulating debug resource for guest we want guest
+         * to control MSR_DE (enable/disable debug interrupt on need).
+         * Supporting both configurations are NOT possible.
+         * So the result is that we cannot share debug resources
+         * between QEMU and Guest on BOOKE architecture.
+         * In the current design QEMU gets the priority over guest,
+         * this means that if QEMU is using debug resources then guest
+         * cannot use them;
          * For software breakpoint QEMU uses a privileged instruction;
          * So there cannot be any reason that we are here for guest
          * set debug exception, only possibility is guest executed a
@@ -2086,20 +2304,6 @@ void kvm_arch_init_irq_routing(KVMState *s)
 {
 }
 
-int kvm_arch_insert_hw_breakpoint(target_ulong addr, target_ulong len, int type)
-{
-    return -EINVAL;
-}
-
-int kvm_arch_remove_hw_breakpoint(target_ulong addr, target_ulong len, int type)
-{
-    return -EINVAL;
-}
-
-void kvm_arch_remove_all_hw_breakpoints(void)
-{
-}
-
 struct kvm_get_htab_buf {
     struct kvm_get_htab_header header;
     /*

From 9674a356267ee9cf8230775f88c90c299a4affc9 Mon Sep 17 00:00:00 2001
From: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Date: Fri, 27 Jun 2014 12:17:38 +0530
Subject: [PATCH 25/52] ppc/spapr: Fix MAX_CPUS to 255

MAX_CPUS 256 is inconsistent with qemu supporting upto 255 cpus. This
MAX_CPUS number was percolated back to "virsh capabilities" with wrong
max_cpus.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/ppc/spapr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 2f16d9dcce..555a007e38 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -81,7 +81,7 @@
 
 #define TIMEBASE_FREQ           512000000ULL
 
-#define MAX_CPUS                256
+#define MAX_CPUS                255
 
 #define PHANDLE_XICP            0x00001111
 

From a7f23d0f8bfbe76864a6427c0e21fe794ab9b7ef Mon Sep 17 00:00:00 2001
From: Tom Musta <tommusta@gmail.com>
Date: Tue, 12 Aug 2014 08:45:03 -0500
Subject: [PATCH 26/52] target-ppc: Bug Fix: rlwinm

The rlwinm specification includes the ROTL32 operation, which is defined
to be a left rotation of two copies of the least significant 32 bits of
the source GPR.

The current implementation is incorrect on 64-bit implementations in that
it rotates a single copy of the least significant 32 bits, padding with
zeroes in the most significant bits.

Fix the code to properly implement this ROTL32 operation.

Example:
R3 = F7487D82EC6F75DF
rlwinm 3,3,5,12,4

R3 expected : 8DEEBBFD880EBBFD
R3 actual   : 00000000880EBBFD (without this fix)

Signed-off-by: Tom Musta <tommusta@gmail.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 target-ppc/translate.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index c07bb01a7a..44a8e1efdc 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1675,11 +1675,9 @@ static void gen_rlwinm(DisasContext *ctx)
     } else {
         TCGv t0 = tcg_temp_new();
 #if defined(TARGET_PPC64)
-        TCGv_i32 t1 = tcg_temp_new_i32();
-        tcg_gen_trunc_i64_i32(t1, cpu_gpr[rS(ctx->opcode)]);
-        tcg_gen_rotli_i32(t1, t1, sh);
-        tcg_gen_extu_i32_i64(t0, t1);
-        tcg_temp_free_i32(t1);
+        tcg_gen_deposit_i64(t0, cpu_gpr[rS(ctx->opcode)],
+            cpu_gpr[rS(ctx->opcode)], 32, 32);
+        tcg_gen_rotli_i64(t0, t0, sh);
 #else
         tcg_gen_rotli_i32(t0, cpu_gpr[rS(ctx->opcode)], sh);
 #endif

From 1c0a150f4bb60ce9af7b4b10f0deabdea1b22f93 Mon Sep 17 00:00:00 2001
From: Tom Musta <tommusta@gmail.com>
Date: Tue, 12 Aug 2014 08:45:04 -0500
Subject: [PATCH 27/52] target-ppc: Bug Fix: rlwnm

The rlwnm specification includes the ROTL32 operation, which is defined
to be a left rotation of two copies of the least significant 32 bits of
the source GPR.

The current implementation is incorrect on 64-bit implementations in that
it rotates a single copy of the least significant 32 bits, padding with
zeroes in the most significant bits.

Fix the code to properly implement this ROTL32 operation.

Example:

R3 = 0000000000000002
R4 = 7FFFFFFFFFFFFFFF
rlwnm 3,3,4,31,16
R3 expected : 0000000100000001
R3 actual   : 0000000000000001 (without this patch)

Signed-off-by: Tom Musta <tommusta@gmail.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 target-ppc/translate.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 44a8e1efdc..be7d40bbf8 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1698,7 +1698,7 @@ static void gen_rlwnm(DisasContext *ctx)
     uint32_t mb, me;
     TCGv t0;
 #if defined(TARGET_PPC64)
-    TCGv_i32 t1, t2;
+    TCGv t1;
 #endif
 
     mb = MB(ctx->opcode);
@@ -1706,14 +1706,11 @@ static void gen_rlwnm(DisasContext *ctx)
     t0 = tcg_temp_new();
     tcg_gen_andi_tl(t0, cpu_gpr[rB(ctx->opcode)], 0x1f);
 #if defined(TARGET_PPC64)
-    t1 = tcg_temp_new_i32();
-    t2 = tcg_temp_new_i32();
-    tcg_gen_trunc_i64_i32(t1, cpu_gpr[rS(ctx->opcode)]);
-    tcg_gen_trunc_i64_i32(t2, t0);
-    tcg_gen_rotl_i32(t1, t1, t2);
-    tcg_gen_extu_i32_i64(t0, t1);
-    tcg_temp_free_i32(t1);
-    tcg_temp_free_i32(t2);
+    t1 = tcg_temp_new_i64();
+    tcg_gen_deposit_i64(t1, cpu_gpr[rS(ctx->opcode)],
+        cpu_gpr[rS(ctx->opcode)], 32, 32);
+    tcg_gen_rotl_i64(t0, t1, t0);
+    tcg_temp_free_i64(t1);
 #else
     tcg_gen_rotl_i32(t0, cpu_gpr[rS(ctx->opcode)], t0);
 #endif
@@ -1724,6 +1721,9 @@ static void gen_rlwnm(DisasContext *ctx)
 #endif
         tcg_gen_andi_tl(cpu_gpr[rA(ctx->opcode)], t0, MASK(mb, me));
     } else {
+#if defined(TARGET_PPC64)
+        tcg_gen_andi_tl(t0, t0, MASK(32, 63));
+#endif
         tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], t0);
     }
     tcg_temp_free(t0);

From 6ea7b35c0294b1cc462e3225c4672de31300ed79 Mon Sep 17 00:00:00 2001
From: Tom Musta <tommusta@gmail.com>
Date: Tue, 12 Aug 2014 08:45:05 -0500
Subject: [PATCH 28/52] target-ppc: Bug Fix: rlwimi

The rlwimi specification includes the ROTL32 operation, which is defined
to be a left rotation of two copies of the least significant 32 bits of
the source GPR.

The current implementation is incorrect on 64-bit implementations in that
it rotates a single copy of the least significant 32 bits, padding with
zeroes in the most significant bits.

Fix the code to properly implement this ROTL32 operation.

Also fix the special case of MB=31 and ME=0 to copy the entire contents
of the source GPR.

Examples:

R3 FFFFFFFFFFFFFFF0
rlwimi 3,3,29,14,1
R3 expected : 1FFFFFFE3FFFFFFE
R3 actual   : 000000003FFFFFFE (without this patch)

R3 ED7EB4DD824F0853
rlwimi 3,3,10,31,0
R3 expected : 3C214E09024F0853
R3 actual   : 00000000024F0853 (without this patch)

Signed-off-by: Tom Musta <tommusta@gmail.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 target-ppc/translate.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index be7d40bbf8..fab4f01d97 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1617,17 +1617,19 @@ static void gen_rlwimi(DisasContext *ctx)
     me = ME(ctx->opcode);
     sh = SH(ctx->opcode);
     if (likely(sh == 0 && mb == 0 && me == 31)) {
+#if defined(TARGET_PPC64)
+        tcg_gen_mov_i64(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]);
+#else
         tcg_gen_ext32u_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]);
+#endif
     } else {
         target_ulong mask;
         TCGv t1;
         TCGv t0 = tcg_temp_new();
 #if defined(TARGET_PPC64)
-        TCGv_i32 t2 = tcg_temp_new_i32();
-        tcg_gen_trunc_i64_i32(t2, cpu_gpr[rS(ctx->opcode)]);
-        tcg_gen_rotli_i32(t2, t2, sh);
-        tcg_gen_extu_i32_i64(t0, t2);
-        tcg_temp_free_i32(t2);
+        tcg_gen_deposit_i64(t0, cpu_gpr[rS(ctx->opcode)],
+            cpu_gpr[rS(ctx->opcode)], 32, 32);
+        tcg_gen_rotli_i64(t0, t0, sh);
 #else
         tcg_gen_rotli_i32(t0, cpu_gpr[rS(ctx->opcode)], sh);
 #endif

From f11ebbf8d4308795129bc6651cf701b61b812abf Mon Sep 17 00:00:00 2001
From: Tom Musta <tommusta@gmail.com>
Date: Tue, 12 Aug 2014 08:45:07 -0500
Subject: [PATCH 29/52] target-ppc: Bug Fix: mullwo

On 64-bit implementations, the mullwo result is the 64 bit product of
the signed 32 bit operands.  Fix the implementation to properly deposit
the upper 32 bits into the target register.

Example:

R3 0407DED115077586
R4 53778DF3CA992E09
mullwo 3,3,4
R3 expected : FB9D02730D7735B6
R3 actual   : 000000000D7735B6 (without this patch)

Signed-off-by: Tom Musta <tommusta@gmail.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 target-ppc/translate.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index fab4f01d97..dc80b02318 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1140,11 +1140,20 @@ static void gen_mullwo(DisasContext *ctx)
 {
     TCGv_i32 t0 = tcg_temp_new_i32();
     TCGv_i32 t1 = tcg_temp_new_i32();
+#if defined(TARGET_PPC64)
+    TCGv_i64 t2 = tcg_temp_new_i64();
+#endif
 
     tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);
     tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);
     tcg_gen_muls2_i32(t0, t1, t0, t1);
     tcg_gen_ext_i32_tl(cpu_gpr[rD(ctx->opcode)], t0);
+#if defined(TARGET_PPC64)
+    tcg_gen_ext_i32_tl(t2, t1);
+    tcg_gen_deposit_i64(cpu_gpr[rD(ctx->opcode)],
+                        cpu_gpr[rD(ctx->opcode)], t2, 32, 32);
+    tcg_temp_free(t2);
+#endif
 
     tcg_gen_sari_i32(t0, t0, 31);
     tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t1);

From 1fa74845f2bab36bfa37108b9054b53c1b8264b9 Mon Sep 17 00:00:00 2001
From: Tom Musta <tommusta@gmail.com>
Date: Tue, 12 Aug 2014 08:45:06 -0500
Subject: [PATCH 30/52] target-ppc: Bug Fix: mullw

For 64-bit implementations, the mullw result is the 64 bit product
of the sign-extended least significant 32 bits of the source
registers.

Fix the code to properly sign extend the source operands and produce
a 64 bit product.

Example:
R3 00000000002F37A0
R4 41C33D242F816715
mullw 3,3,4
R3 expected : 0008C3146AE0F020
R3 actual   : 000000006AE0F020 (without this patch)

Signed-off-by: Tom Musta <tommusta@gmail.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 target-ppc/translate.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index dc80b02318..b19eb14df7 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1128,9 +1128,20 @@ static void gen_mulhwu(DisasContext *ctx)
 /* mullw  mullw. */
 static void gen_mullw(DisasContext *ctx)
 {
+#if defined(TARGET_PPC64)
+    TCGv_i64 t0, t1;
+    t0 = tcg_temp_new_i64();
+    t1 = tcg_temp_new_i64();
+    tcg_gen_ext32s_tl(t0, cpu_gpr[rA(ctx->opcode)]);
+    tcg_gen_ext32s_tl(t1, cpu_gpr[rB(ctx->opcode)]);
+    tcg_gen_mul_i64(cpu_gpr[rD(ctx->opcode)], t0, t1);
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+#else
     tcg_gen_mul_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
                    cpu_gpr[rB(ctx->opcode)]);
     tcg_gen_ext32s_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rD(ctx->opcode)]);
+#endif
     if (unlikely(Rc(ctx->opcode) != 0))
         gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
 }

From 9824d01d5d789a57d27360c0f5e8ee44955eb1d7 Mon Sep 17 00:00:00 2001
From: Tom Musta <tommusta@gmail.com>
Date: Tue, 12 Aug 2014 08:45:08 -0500
Subject: [PATCH 31/52] target-ppc: Bug Fix: mulldo OV Detection

Fix the code to properly detect overflow; the 128 bit signed
product must have all zeroes or all ones in the first 65 bits
otherwise OV should be set.

Example:

R3 45F086A5D5887509
R4 0000000000000002
mulldo 3,3,4

Should set XER[OV].

Signed-off-by: Tom Musta <tommusta@gmail.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 target-ppc/int_helper.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index f6e8846707..e83a25d05a 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -32,12 +32,22 @@ uint64_t helper_mulldo(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
     uint64_t tl;
 
     muls64(&tl, (uint64_t *)&th, arg1, arg2);
-    /* If th != 0 && th != -1, then we had an overflow */
-    if (likely((uint64_t)(th + 1) <= 1)) {
+
+    /* th should either contain all 1 bits or all 0 bits and should
+     * match the sign bit of tl; otherwise we have overflowed. */
+
+    if ((int64_t)tl < 0) {
+        if (likely(th == -1LL)) {
+            env->ov = 0;
+        } else {
+            env->so = env->ov = 1;
+        }
+    } else if (likely(th == 0LL)) {
         env->ov = 0;
     } else {
         env->so = env->ov = 1;
     }
+
     return (int64_t)tl;
 }
 #endif

From 34a0fad10210a3e639a8e68323c923494047eefc Mon Sep 17 00:00:00 2001
From: Tom Musta <tommusta@gmail.com>
Date: Tue, 12 Aug 2014 08:45:09 -0500
Subject: [PATCH 32/52] target-ppc: Bug Fix: srawi

For 64 bit implementations, the special case of a shift by zero
should result in the sign extension of the least significant 32 bits
of the source GPR (not a direct copy of the 64 bit source GPR).

Example:

R3 A6212433228F41DC
srawi 3,3,0
R3 expected : 00000000228F41DC
R3 actual   : A6212433228F41DC (without this patch)

Signed-off-by: Tom Musta <tommusta@gmail.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 target-ppc/translate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index b19eb14df7..47dc90311c 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1944,7 +1944,7 @@ static void gen_srawi(DisasContext *ctx)
     TCGv dst = cpu_gpr[rA(ctx->opcode)];
     TCGv src = cpu_gpr[rS(ctx->opcode)];
     if (sh == 0) {
-        tcg_gen_mov_tl(dst, src);
+        tcg_gen_ext32s_tl(dst, src);
         tcg_gen_movi_tl(cpu_ca, 0);
     } else {
         TCGv t0;

From 4bc02e230d1e0fd41d2a892d81dcad56e3b3702d Mon Sep 17 00:00:00 2001
From: Tom Musta <tommusta@gmail.com>
Date: Tue, 12 Aug 2014 08:45:10 -0500
Subject: [PATCH 33/52] target-ppc: Bug Fix: srad

Fix the check for carry in the srad helper to properly construct
the mask -- a "1ULL" must be used (instead of "1") in order to
get the desired result.

Example:

R3 8000000000000000
R4 F3511AD4A2CD4C38
srad 3,3,4

Should *not* set XER[CA] but does without this patch.

Signed-off-by: Tom Musta <tommusta@gmail.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 target-ppc/int_helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index e83a25d05a..e5b103b0ec 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -248,7 +248,7 @@ target_ulong helper_srad(CPUPPCState *env, target_ulong value,
         if (likely((uint64_t)shift != 0)) {
             shift &= 0x3f;
             ret = (int64_t)value >> shift;
-            if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
+            if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
                 env->ca = 0;
             } else {
                 env->ca = 1;

From 7d0a07fa926436baf1238dcf68a55ea96cf5b9ab Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 14 Jul 2014 19:15:15 +0200
Subject: [PATCH 34/52] KVM: Add helper to run KVM_CHECK_EXTENSION on vm fd

We now can call KVM_CHECK_EXTENSION on the kvm fd or on the vm fd, whereas
the vm version is more accurate when it comes to PPC KVM.

Add a helper to make the vm version available that falls back to the non-vm
variant if the vm one is not available yet to stay compatible.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 include/sysemu/kvm.h |  2 ++
 kvm-all.c            | 13 +++++++++++++
 2 files changed, 15 insertions(+)

diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 174ea36afa..d2000af9c3 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -303,6 +303,8 @@ bool kvm_arch_stop_on_emulation_error(CPUState *cpu);
 
 int kvm_check_extension(KVMState *s, unsigned int extension);
 
+int kvm_vm_check_extension(KVMState *s, unsigned int extension);
+
 #define kvm_vm_enable_cap(s, capability, cap_flags, ...)             \
     ({                                                               \
         struct kvm_enable_cap cap = {                                \
diff --git a/kvm-all.c b/kvm-all.c
index b240bf87a9..b1cf703f9e 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -493,6 +493,19 @@ int kvm_check_extension(KVMState *s, unsigned int extension)
     return ret;
 }
 
+int kvm_vm_check_extension(KVMState *s, unsigned int extension)
+{
+    int ret;
+
+    ret = kvm_vm_ioctl(s, KVM_CHECK_EXTENSION, extension);
+    if (ret < 0) {
+        /* VM wide version not implemented, use global one instead */
+        ret = kvm_check_extension(s, extension);
+    }
+
+    return ret;
+}
+
 static int kvm_set_ioeventfd_mmio(int fd, hwaddr addr, uint32_t val,
                                   bool assign, uint32_t size, bool datamatch)
 {

From 6fd33a750214a866772dd77573cfa24c27ad956d Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 14 Jul 2014 19:17:35 +0200
Subject: [PATCH 35/52] PPC: KVM: Use vm check_extension for pv hcall

To find out whether we support the KVM hypercall interface we need to ask KVM
on the VM level rather than the global KVM level, because Book3S HV KVM does
not support it and we play conservative when both HV and PR are loaded.

So instead, use the VM helper that falls back to global KVM enumeration. That
should cover all cases.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 target-ppc/kvm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 2b6fee692e..9c23c6ba0d 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -1835,7 +1835,7 @@ static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
      PowerPCCPU *cpu = ppc_env_get_cpu(env);
      CPUState *cs = CPU(cpu);
 
-    if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
+    if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
         return 0;
     }

From d696760b43ca46c070f74fe12d90f38904232467 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Sun, 13 Jul 2014 16:45:46 +0200
Subject: [PATCH 36/52] PPC: mac99: Fix core99 timer frequency

There is a special timer in the mac99 machine that we recently started
to emulate. Unfortunately we emulated it in the wrong frequency.

This patch adapts the frequency Mac OS X uses to evaluate results from
this timer, making calculations it bases off of it work.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/misc/macio/macio.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/hw/misc/macio/macio.c b/hw/misc/macio/macio.c
index 47f45f5d2c..35eaa00dce 100644
--- a/hw/misc/macio/macio.c
+++ b/hw/misc/macio/macio.c
@@ -243,13 +243,18 @@ static void timer_write(void *opaque, hwaddr addr, uint64_t value,
 static uint64_t timer_read(void *opaque, hwaddr addr, unsigned size)
 {
     uint32_t value = 0;
+    uint64_t systime = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    uint64_t kltime;
+
+    kltime = muldiv64(systime, 4194300, get_ticks_per_sec() * 4);
+    kltime = muldiv64(kltime, 18432000, 1048575);
 
     switch (addr) {
     case 0x38:
-        value = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+        value = kltime;
         break;
     case 0x3c:
-        value = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) >> 32;
+        value = kltime >> 32;
         break;
     }
 

From a8b0503701ed8de9353834b0955260f4d9f08640 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Sun, 13 Jul 2014 16:50:39 +0200
Subject: [PATCH 37/52] PPC: mac_nvram: Remove unused functions

The macio_nvram_read and macio_nvram_write functions are never called,
just remove them.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/nvram/mac_nvram.c | 23 -----------------------
 hw/ppc/mac.h         |  2 --
 2 files changed, 25 deletions(-)

diff --git a/hw/nvram/mac_nvram.c b/hw/nvram/mac_nvram.c
index 170b10b766..bcff074d81 100644
--- a/hw/nvram/mac_nvram.c
+++ b/hw/nvram/mac_nvram.c
@@ -39,29 +39,6 @@
 
 #define DEF_SYSTEM_SIZE 0xc10
 
-/* Direct access to NVRAM */
-uint8_t macio_nvram_read(MacIONVRAMState *s, uint32_t addr)
-{
-    uint32_t ret;
-
-    if (addr < s->size) {
-        ret = s->data[addr];
-    } else {
-        ret = -1;
-    }
-    NVR_DPRINTF("read addr %04" PRIx32 " val %" PRIx8 "\n", addr, ret);
-
-    return ret;
-}
-
-void macio_nvram_write(MacIONVRAMState *s, uint32_t addr, uint8_t val)
-{
-    NVR_DPRINTF("write addr %04" PRIx32 " val %" PRIx8 "\n", addr, val);
-    if (addr < s->size) {
-        s->data[addr] = val;
-    }
-}
-
 /* macio style NVRAM device */
 static void macio_nvram_writeb(void *opaque, hwaddr addr,
                                uint64_t value, unsigned size)
diff --git a/hw/ppc/mac.h b/hw/ppc/mac.h
index c1faf9ce27..23536f4827 100644
--- a/hw/ppc/mac.h
+++ b/hw/ppc/mac.h
@@ -178,6 +178,4 @@ typedef struct MacIONVRAMState {
 } MacIONVRAMState;
 
 void pmac_format_nvram_partition (MacIONVRAMState *nvr, int len);
-uint8_t macio_nvram_read(MacIONVRAMState *s, uint32_t addr);
-void macio_nvram_write(MacIONVRAMState *s, uint32_t addr, uint8_t val);
 #endif /* !defined(__PPC_MAC_H__) */

From b19eae18c1cdf053fd85a39902cf77d8b561ef76 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Sun, 13 Jul 2014 16:55:53 +0200
Subject: [PATCH 38/52] PPC: mac_nvram: Allow 2 and 4 byte accesses

The NVRAM in our Core99 machine really supports 2byte and 4byte accesses
just as well as 1byte accesses. In fact, Mac OS X uses those.

Add support for higher register size granularities.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/nvram/mac_nvram.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/hw/nvram/mac_nvram.c b/hw/nvram/mac_nvram.c
index bcff074d81..7656951e99 100644
--- a/hw/nvram/mac_nvram.c
+++ b/hw/nvram/mac_nvram.c
@@ -66,6 +66,10 @@ static uint64_t macio_nvram_readb(void *opaque, hwaddr addr,
 static const MemoryRegionOps macio_nvram_ops = {
     .read = macio_nvram_readb,
     .write = macio_nvram_writeb,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 4,
+    .impl.min_access_size = 1,
+    .impl.max_access_size = 1,
     .endianness = DEVICE_BIG_ENDIAN,
 };
 

From 2d9907a3332888e43bc73fe9b98a32f8de662526 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Sun, 13 Jul 2014 17:09:55 +0200
Subject: [PATCH 39/52] PPC: mac_nvram: Split NVRAM into OF and OSX parts

Mac OS X (at least with -M mac99) searches for a valid NVRAM partition
of a special Apple type. If it can't find that partition in the first
half of NVRAM, it will look at the second half.

There are a few implications from this. The first is that we need to
split NVRAM into 2 halves - one for Open Firmware use, the other one for
Mac OS X. Without this split Mac OS X will just loop endlessly over the
second half trying to find a partition.

The other implication is that we should provide a specially crafted Mac
OS X compatible NVRAM partition on the second half that Mac OS X can
happily use as it sees fit.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/nvram/mac_nvram.c | 43 ++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 40 insertions(+), 3 deletions(-)

diff --git a/hw/nvram/mac_nvram.c b/hw/nvram/mac_nvram.c
index 7656951e99..d35f8a3121 100644
--- a/hw/nvram/mac_nvram.c
+++ b/hw/nvram/mac_nvram.c
@@ -26,6 +26,7 @@
 #include "hw/nvram/openbios_firmware_abi.h"
 #include "sysemu/sysemu.h"
 #include "hw/ppc/mac.h"
+#include <zlib.h>
 
 /* debug NVR */
 //#define DEBUG_NVR
@@ -137,15 +138,16 @@ static void macio_nvram_register_types(void)
 }
 
 /* Set up a system OpenBIOS NVRAM partition */
-void pmac_format_nvram_partition (MacIONVRAMState *nvr, int len)
+static void pmac_format_nvram_partition_of(MacIONVRAMState *nvr, int off,
+                                           int len)
 {
     unsigned int i;
-    uint32_t start = 0, end;
+    uint32_t start = off, end;
     struct OpenBIOS_nvpart_v1 *part_header;
 
     // OpenBIOS nvram variables
     // Variable partition
-    part_header = (struct OpenBIOS_nvpart_v1 *)nvr->data;
+    part_header = (struct OpenBIOS_nvpart_v1 *)&nvr->data[start];
     part_header->signature = OPENBIOS_PART_SYSTEM;
     pstrcpy(part_header->name, sizeof(part_header->name), "system");
 
@@ -173,4 +175,39 @@ void pmac_format_nvram_partition (MacIONVRAMState *nvr, int len)
     OpenBIOS_finish_partition(part_header, end - start);
 }
 
+#define OSX_NVRAM_SIGNATURE     (0x5A)
+
+/* Set up a Mac OS X NVRAM partition */
+static void pmac_format_nvram_partition_osx(MacIONVRAMState *nvr, int off,
+                                            int len)
+{
+    uint32_t start = off;
+    struct OpenBIOS_nvpart_v1 *part_header;
+    unsigned char *data = &nvr->data[start];
+
+    /* empty partition */
+    part_header = (struct OpenBIOS_nvpart_v1 *)data;
+    part_header->signature = OSX_NVRAM_SIGNATURE;
+    pstrcpy(part_header->name, sizeof(part_header->name), "wwwwwwwwwwww");
+
+    OpenBIOS_finish_partition(part_header, len);
+
+    /* Generation */
+    stl_be_p(&data[20], 2);
+
+    /* Adler32 checksum */
+    stl_be_p(&data[16], adler32(0, &data[20], len - 20));
+}
+
+/* Set up NVRAM with OF and OSX partitions */
+void pmac_format_nvram_partition(MacIONVRAMState *nvr, int len)
+{
+    /*
+     * Mac OS X expects side "B" of the flash at the second half of NVRAM,
+     * so we use half of the chip for OF and the other half for a free OSX
+     * partition.
+     */
+    pmac_format_nvram_partition_of(nvr, 0, len / 2);
+    pmac_format_nvram_partition_osx(nvr, len / 2, len / 2);
+}
 type_init(macio_nvram_register_types)

From caae6c961107c4c55731a86572f9a1f53837636b Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Sun, 13 Jul 2014 22:29:02 +0200
Subject: [PATCH 40/52] PPC: Mac: Move tbfreq into local variable

We already expose the real CPU's tb frequency to the guest via fw_cfg. Soon
we will need to also expose it to the MacIO, so let's move it to a variable
that we can leverage every time we need the frequency.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/ppc/mac_newworld.c | 13 ++++++++++---
 hw/ppc/mac_oldworld.c | 12 +++++++++---
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index 26067b458e..d525247c0a 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -177,6 +177,7 @@ static void ppc_core99_init(MachineState *machine)
     DeviceState *dev;
     int *token = g_new(int, 1);
     hwaddr nvram_addr = 0xFFF04000;
+    uint64_t tbfreq;
 
     linux_boot = (kernel_filename != NULL);
 
@@ -373,6 +374,14 @@ static void ppc_core99_init(MachineState *machine)
         pci_bus = pci_pmac_init(pic, get_system_memory(), get_system_io());
         machine_arch = ARCH_MAC99;
     }
+
+    /* Timebase Frequency */
+    if (kvm_enabled()) {
+        tbfreq = kvmppc_get_tbfreq();
+    } else {
+        tbfreq = TBFREQ;
+    }
+
     /* init basic PC hardware */
     escc_mem = escc_init(0, pic[0x25], pic[0x24],
                          serial_hds[0], serial_hds[1], ESCC_CLOCK, 4);
@@ -469,15 +478,13 @@ static void ppc_core99_init(MachineState *machine)
 #ifdef CONFIG_KVM
         uint8_t *hypercall;
 
-        fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, kvmppc_get_tbfreq());
         hypercall = g_malloc(16);
         kvmppc_get_hypercall(env, hypercall, 16);
         fw_cfg_add_bytes(fw_cfg, FW_CFG_PPC_KVM_HC, hypercall, 16);
         fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_KVM_PID, getpid());
 #endif
-    } else {
-        fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, TBFREQ);
     }
+    fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, tbfreq);
     /* Mac OS X requires a "known good" clock-frequency value; pass it one. */
     fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_CLOCKFREQ, CLOCKFREQ);
     fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_BUSFREQ, BUSFREQ);
diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c
index ec7ed38343..863dd2fc3a 100644
--- a/hw/ppc/mac_oldworld.c
+++ b/hw/ppc/mac_oldworld.c
@@ -103,6 +103,7 @@ static void ppc_heathrow_init(MachineState *machine)
     uint16_t ppc_boot_device;
     DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS];
     void *fw_cfg;
+    uint64_t tbfreq;
 
     linux_boot = (kernel_filename != NULL);
 
@@ -250,6 +251,13 @@ static void ppc_heathrow_init(MachineState *machine)
         }
     }
 
+    /* Timebase Frequency */
+    if (kvm_enabled()) {
+        tbfreq = kvmppc_get_tbfreq();
+    } else {
+        tbfreq = TBFREQ;
+    }
+
     /* init basic PC hardware */
     if (PPC_INPUT(env) != PPC_FLAGS_INPUT_6xx) {
         hw_error("Only 6xx bus is supported on heathrow machine\n");
@@ -330,15 +338,13 @@ static void ppc_heathrow_init(MachineState *machine)
 #ifdef CONFIG_KVM
         uint8_t *hypercall;
 
-        fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, kvmppc_get_tbfreq());
         hypercall = g_malloc(16);
         kvmppc_get_hypercall(env, hypercall, 16);
         fw_cfg_add_bytes(fw_cfg, FW_CFG_PPC_KVM_HC, hypercall, 16);
         fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_KVM_PID, getpid());
 #endif
-    } else {
-        fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, TBFREQ);
     }
+    fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, tbfreq);
     /* Mac OS X requires a "known good" clock-frequency value; pass it one. */
     fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_CLOCKFREQ, CLOCKFREQ);
     fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_BUSFREQ, BUSFREQ);

From b981289c493c7ddabc1cdf7de99daa24642c7739 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Sun, 13 Jul 2014 22:31:53 +0200
Subject: [PATCH 41/52] PPC: Cuda: Use cuda timer to expose tbfreq to guest

Mac OS X calibrates a number of frequencies on bootup based on reading
tb values on bootup and comparing them to via cuda timer values.

The only variable we can really steer well (thanks to KVM) is the cuda
frequency. So let's use that one to fake Mac OS X into believing the
bus frequency is tbfreq * 4. That way Mac OS X will automatically
calculate the correct timebase frequency.

With this patch and the patch set I posted earlier I can successfully
run Mac OS X 10.2, 10.3 and 10.4 guests with -M mac99 on TCG and KVM.

Suggested-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/misc/macio/cuda.c  | 23 ++++++++++++++++++++---
 hw/misc/macio/macio.c | 10 ++++++++++
 hw/ppc/mac.h          |  2 ++
 hw/ppc/mac_newworld.c |  1 +
 hw/ppc/mac_oldworld.c |  1 +
 5 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/hw/misc/macio/cuda.c b/hw/misc/macio/cuda.c
index ff6051defe..b4273aa171 100644
--- a/hw/misc/macio/cuda.c
+++ b/hw/misc/macio/cuda.c
@@ -123,13 +123,22 @@ static void cuda_update_irq(CUDAState *s)
     }
 }
 
+static uint64_t get_tb(uint64_t freq)
+{
+    return muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+                    freq, get_ticks_per_sec());
+}
+
 static unsigned int get_counter(CUDATimer *s)
 {
     int64_t d;
     unsigned int counter;
+    uint64_t tb_diff;
+
+    /* Reverse of the tb calculation algorithm that Mac OS X uses on bootup. */
+    tb_diff = get_tb(s->frequency) - s->load_time;
+    d = (tb_diff * 0xBF401675E5DULL) / (s->frequency << 24);
 
-    d = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - s->load_time,
-                 CUDA_TIMER_FREQ, get_ticks_per_sec());
     if (s->index == 0) {
         /* the timer goes down from latch to -1 (period of latch + 2) */
         if (d <= (s->counter_value + 1)) {
@@ -147,7 +156,7 @@ static unsigned int get_counter(CUDATimer *s)
 static void set_counter(CUDAState *s, CUDATimer *ti, unsigned int val)
 {
     CUDA_DPRINTF("T%d.counter=%d\n", 1 + (ti->timer == NULL), val);
-    ti->load_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    ti->load_time = get_tb(s->frequency);
     ti->counter_value = val;
     cuda_timer_update(s, ti, ti->load_time);
 }
@@ -688,6 +697,8 @@ static void cuda_realizefn(DeviceState *dev, Error **errp)
     struct tm tm;
 
     s->timers[0].timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, cuda_timer1, s);
+    s->timers[0].frequency = s->frequency;
+    s->timers[1].frequency = s->frequency;
 
     qemu_get_timedate(&tm, 0);
     s->tick_offset = (uint32_t)mktimegm(&tm) + RTC_OFFSET;
@@ -713,6 +724,11 @@ static void cuda_initfn(Object *obj)
                         DEVICE(obj), "adb.0");
 }
 
+static Property cuda_properties[] = {
+    DEFINE_PROP_UINT64("frequency", CUDAState, frequency, 0),
+    DEFINE_PROP_END_OF_LIST()
+};
+
 static void cuda_class_init(ObjectClass *oc, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(oc);
@@ -720,6 +736,7 @@ static void cuda_class_init(ObjectClass *oc, void *data)
     dc->realize = cuda_realizefn;
     dc->reset = cuda_reset;
     dc->vmsd = &vmstate_cuda;
+    dc->props = cuda_properties;
 }
 
 static const TypeInfo cuda_type_info = {
diff --git a/hw/misc/macio/macio.c b/hw/misc/macio/macio.c
index 35eaa00dce..e0f1e88f54 100644
--- a/hw/misc/macio/macio.c
+++ b/hw/misc/macio/macio.c
@@ -42,6 +42,7 @@ typedef struct MacIOState
     void *dbdma;
     MemoryRegion *pic_mem;
     MemoryRegion *escc_mem;
+    uint64_t frequency;
 } MacIOState;
 
 #define OLDWORLD_MACIO(obj) \
@@ -351,12 +352,19 @@ static void macio_newworld_class_init(ObjectClass *oc, void *data)
     pdc->device_id = PCI_DEVICE_ID_APPLE_UNI_N_KEYL;
 }
 
+static Property macio_properties[] = {
+    DEFINE_PROP_UINT64("frequency", MacIOState, frequency, 0),
+    DEFINE_PROP_END_OF_LIST()
+};
+
 static void macio_class_init(ObjectClass *klass, void *data)
 {
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
     k->vendor_id = PCI_VENDOR_ID_APPLE;
     k->class_id = PCI_CLASS_OTHERS << 8;
+    dc->props = macio_properties;
 }
 
 static const TypeInfo macio_oldworld_type_info = {
@@ -403,6 +411,8 @@ void macio_init(PCIDevice *d,
     macio_state->escc_mem = escc_mem;
     /* Note: this code is strongly inspirated from the corresponding code
        in PearPC */
+    qdev_prop_set_uint64(DEVICE(&macio_state->cuda), "frequency",
+                         macio_state->frequency);
 
     qdev_init_nofail(DEVICE(d));
 }
diff --git a/hw/ppc/mac.h b/hw/ppc/mac.h
index 23536f4827..aff2b9a566 100644
--- a/hw/ppc/mac.h
+++ b/hw/ppc/mac.h
@@ -57,6 +57,7 @@ typedef struct CUDATimer {
     uint16_t counter_value;
     int64_t load_time;
     int64_t next_irq_time;
+    uint64_t frequency;
     QEMUTimer *timer;
 } CUDATimer;
 
@@ -97,6 +98,7 @@ typedef struct CUDAState {
     CUDATimer timers[2];
 
     uint32_t tick_offset;
+    uint64_t frequency;
 
     uint8_t last_b;
     uint8_t last_acr;
diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index d525247c0a..8453bfa284 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -395,6 +395,7 @@ static void ppc_core99_init(MachineState *machine)
     qdev_connect_gpio_out(dev, 2, pic[0x02]); /* IDE DMA */
     qdev_connect_gpio_out(dev, 3, pic[0x0e]); /* IDE */
     qdev_connect_gpio_out(dev, 4, pic[0x03]); /* IDE DMA */
+    qdev_prop_set_uint64(dev, "frequency", tbfreq);
     macio_init(macio, pic_mem, escc_bar);
 
     /* We only emulate 2 out of 3 IDE controllers for now */
diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c
index 863dd2fc3a..630a9f9c36 100644
--- a/hw/ppc/mac_oldworld.c
+++ b/hw/ppc/mac_oldworld.c
@@ -286,6 +286,7 @@ static void ppc_heathrow_init(MachineState *machine)
     qdev_connect_gpio_out(dev, 2, pic[0x02]); /* IDE-0 DMA */
     qdev_connect_gpio_out(dev, 3, pic[0x0E]); /* IDE-1 */
     qdev_connect_gpio_out(dev, 4, pic[0x03]); /* IDE-1 DMA */
+    qdev_prop_set_uint64(dev, "frequency", tbfreq);
     macio_init(macio, pic_mem, escc_bar);
 
     macio_ide = MACIO_IDE(object_resolve_path_component(OBJECT(macio),

From 32420522482ffc20f8e9423af4f41f4e05ce3a56 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Wed, 13 Aug 2014 17:20:53 +1000
Subject: [PATCH 42/52] spapr_pci: Fix config space corruption

When disabling MSI/MSIX via "ibm,change-msi" RTAS call, no check was made
if MSI or MSIX is actually supported and the MSI message was reset
unconditionally. If this happened on a device which does not support MSI
(but does support MSIX, otherwise "ibm,change-msi" would not be called),
this device would have PCIDevice::msi_cap field (MSI capability offset)
set to zero and writing a vector would actually clear PCI status.

This clears MSI message only if MSI or MSIX is present on a device.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/ppc/spapr_pci.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 9ed39a93b7..e894f07f84 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -262,7 +262,6 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
     unsigned int irq, max_irqs = 0, num = 0;
     sPAPRPHBState *phb = NULL;
     PCIDevice *pdev = NULL;
-    bool msix = false;
     spapr_pci_msi *msi;
     int *config_addr_key;
 
@@ -300,7 +299,12 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
         }
 
         xics_free(spapr->icp, msi->first_irq, msi->num);
-        spapr_msi_setmsg(pdev, 0, msix, 0, num);
+        if (msi_present(pdev)) {
+            spapr_msi_setmsg(pdev, 0, false, 0, num);
+        }
+        if (msix_present(pdev)) {
+            spapr_msi_setmsg(pdev, 0, true, 0, num);
+        }
         g_hash_table_remove(phb->msi, &config_addr);
 
         trace_spapr_pci_msi("Released MSIs", config_addr);

From 439ce1401bac1687c711cb6acf4ee8f3f457c05e Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Fri, 22 Aug 2014 11:50:57 +1000
Subject: [PATCH 43/52] spapr-vlan: Don't touch last entry in buffer list

The last 8 bytes of the buffer list is defined to contain the number
of dropped frames. At the moment we use it to store rx entries,
which trips up ethtool -S:

rx_no_buffer: 9223380832981355136

Fix this by skipping the last buffer list entry.

Signed-off-by: Anton Blanchard <anton@samba.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/net/spapr_llan.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/hw/net/spapr_llan.c b/hw/net/spapr_llan.c
index 2d47df6930..23c47d397c 100644
--- a/hw/net/spapr_llan.c
+++ b/hw/net/spapr_llan.c
@@ -72,7 +72,14 @@ typedef uint64_t vlan_bd_t;
 #define VLAN_RXQ_BD_OFF      0
 #define VLAN_FILTER_BD_OFF   8
 #define VLAN_RX_BDS_OFF      16
-#define VLAN_MAX_BUFS        ((SPAPR_TCE_PAGE_SIZE - VLAN_RX_BDS_OFF) / 8)
+/*
+ * The final 8 bytes of the buffer list is a counter of frames dropped
+ * because there was not a buffer in the buffer list capable of holding
+ * the frame. We must avoid it, or the operating system will report garbage
+ * for this statistic.
+ */
+#define VLAN_RX_BDS_LEN      (SPAPR_TCE_PAGE_SIZE - VLAN_RX_BDS_OFF - 8)
+#define VLAN_MAX_BUFS        (VLAN_RX_BDS_LEN / 8)
 
 #define TYPE_VIO_SPAPR_VLAN_DEVICE "spapr-vlan"
 #define VIO_SPAPR_VLAN_DEVICE(obj) \
@@ -119,7 +126,7 @@ static ssize_t spapr_vlan_receive(NetClientState *nc, const uint8_t *buf,
 
     do {
         buf_ptr += 8;
-        if (buf_ptr >= SPAPR_TCE_PAGE_SIZE) {
+        if (buf_ptr >= (VLAN_RX_BDS_LEN + VLAN_RX_BDS_OFF)) {
             buf_ptr = VLAN_RX_BDS_OFF;
         }
 
@@ -397,7 +404,7 @@ static target_ulong h_add_logical_lan_buffer(PowerPCCPU *cpu,
 
     do {
         dev->add_buf_ptr += 8;
-        if (dev->add_buf_ptr >= SPAPR_TCE_PAGE_SIZE) {
+        if (dev->add_buf_ptr >= (VLAN_RX_BDS_LEN + VLAN_RX_BDS_OFF)) {
             dev->add_buf_ptr = VLAN_RX_BDS_OFF;
         }
 

From ab92678d0a24e7ef8d4d93d18e5c0df8619874fe Mon Sep 17 00:00:00 2001
From: Tom Musta <tommusta@gmail.com>
Date: Mon, 25 Aug 2014 14:25:39 -0500
Subject: [PATCH 44/52] target-ppc: Special Case of rlwimi Should Use Deposit

The special case of rlwimi where MB <= ME and SH = 31-ME can be implemented
with a single TCG deposit operation.  This replaces the less general case
of SH = MB = 0 and ME = 31.

Signed-off-by: Tom Musta <tommusta@gmail.com>
Suggested-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 target-ppc/translate.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 47dc90311c..095b83c147 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1636,12 +1636,9 @@ static void gen_rlwimi(DisasContext *ctx)
     mb = MB(ctx->opcode);
     me = ME(ctx->opcode);
     sh = SH(ctx->opcode);
-    if (likely(sh == 0 && mb == 0 && me == 31)) {
-#if defined(TARGET_PPC64)
-        tcg_gen_mov_i64(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]);
-#else
-        tcg_gen_ext32u_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]);
-#endif
+    if (likely(sh == (31-me) && mb <= me)) {
+        tcg_gen_deposit_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
+                           cpu_gpr[rS(ctx->opcode)], sh, me - mb + 1);
     } else {
         target_ulong mask;
         TCGv t1;

From 8979c2f602357129fdf07a5cf8484ca430928b47 Mon Sep 17 00:00:00 2001
From: Tom Musta <tommusta@gmail.com>
Date: Mon, 25 Aug 2014 14:25:40 -0500
Subject: [PATCH 45/52] target-ppc: Optimize rlwinm MB=0 ME=31

Optimize the special case of rlwinm where MB=0 and ME=31.  This can
be implemented as a 32-bit ROTL.

Signed-off-by: Tom Musta <tommusta@gmail.com>
Suggested-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 target-ppc/translate.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 095b83c147..889e37d272 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1691,6 +1691,12 @@ static void gen_rlwinm(DisasContext *ctx)
         tcg_gen_shri_tl(t0, t0, mb);
         tcg_gen_ext32u_tl(cpu_gpr[rA(ctx->opcode)], t0);
         tcg_temp_free(t0);
+    } else if (likely(mb == 0 && me == 31)) {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        tcg_gen_trunc_tl_i32(t0, cpu_gpr[rS(ctx->opcode)]);
+        tcg_gen_rotli_i32(t0, t0, sh);
+        tcg_gen_extu_i32_tl(cpu_gpr[rA(ctx->opcode)], t0);
+        tcg_temp_free_i32(t0);
     } else {
         TCGv t0 = tcg_temp_new();
 #if defined(TARGET_PPC64)

From 57fca134bb64926f00ab8b14cdb8d345f395e07f Mon Sep 17 00:00:00 2001
From: Tom Musta <tommusta@gmail.com>
Date: Mon, 25 Aug 2014 14:25:41 -0500
Subject: [PATCH 46/52] target-ppc: Optimize rlwnm MB=0 ME=31

Optimize the special case of rlwnm where MB=0 and ME=31.  This can
be implemented using a ROTL.

Suggested-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Tom Musta <tommusta@gmail.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 target-ppc/translate.c | 62 +++++++++++++++++++++++++-----------------
 1 file changed, 37 insertions(+), 25 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 889e37d272..57cb381447 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1721,37 +1721,49 @@ static void gen_rlwinm(DisasContext *ctx)
 static void gen_rlwnm(DisasContext *ctx)
 {
     uint32_t mb, me;
-    TCGv t0;
-#if defined(TARGET_PPC64)
-    TCGv t1;
-#endif
-
     mb = MB(ctx->opcode);
     me = ME(ctx->opcode);
-    t0 = tcg_temp_new();
-    tcg_gen_andi_tl(t0, cpu_gpr[rB(ctx->opcode)], 0x1f);
-#if defined(TARGET_PPC64)
-    t1 = tcg_temp_new_i64();
-    tcg_gen_deposit_i64(t1, cpu_gpr[rS(ctx->opcode)],
-        cpu_gpr[rS(ctx->opcode)], 32, 32);
-    tcg_gen_rotl_i64(t0, t1, t0);
-    tcg_temp_free_i64(t1);
-#else
-    tcg_gen_rotl_i32(t0, cpu_gpr[rS(ctx->opcode)], t0);
-#endif
-    if (unlikely(mb != 0 || me != 31)) {
-#if defined(TARGET_PPC64)
-        mb += 32;
-        me += 32;
-#endif
-        tcg_gen_andi_tl(cpu_gpr[rA(ctx->opcode)], t0, MASK(mb, me));
+
+    if (likely(mb == 0 && me == 31)) {
+        TCGv_i32 t0, t1;
+        t0 = tcg_temp_new_i32();
+        t1 = tcg_temp_new_i32();
+        tcg_gen_trunc_tl_i32(t0, cpu_gpr[rB(ctx->opcode)]);
+        tcg_gen_trunc_tl_i32(t1, cpu_gpr[rS(ctx->opcode)]);
+        tcg_gen_andi_i32(t0, t0, 0x1f);
+        tcg_gen_rotl_i32(t1, t1, t0);
+        tcg_gen_extu_i32_tl(cpu_gpr[rA(ctx->opcode)], t1);
+        tcg_temp_free_i32(t0);
+        tcg_temp_free_i32(t1);
     } else {
+        TCGv t0;
 #if defined(TARGET_PPC64)
-        tcg_gen_andi_tl(t0, t0, MASK(32, 63));
+        TCGv t1;
 #endif
-        tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], t0);
+
+        t0 = tcg_temp_new();
+        tcg_gen_andi_tl(t0, cpu_gpr[rB(ctx->opcode)], 0x1f);
+#if defined(TARGET_PPC64)
+        t1 = tcg_temp_new_i64();
+        tcg_gen_deposit_i64(t1, cpu_gpr[rS(ctx->opcode)],
+                            cpu_gpr[rS(ctx->opcode)], 32, 32);
+        tcg_gen_rotl_i64(t0, t1, t0);
+        tcg_temp_free_i64(t1);
+#else
+        tcg_gen_rotl_i32(t0, cpu_gpr[rS(ctx->opcode)], t0);
+#endif
+        if (unlikely(mb != 0 || me != 31)) {
+#if defined(TARGET_PPC64)
+            mb += 32;
+            me += 32;
+#endif
+            tcg_gen_andi_tl(cpu_gpr[rA(ctx->opcode)], t0, MASK(mb, me));
+        } else {
+            tcg_gen_andi_tl(t0, t0, MASK(32, 63));
+            tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], t0);
+        }
+        tcg_temp_free(t0);
     }
-    tcg_temp_free(t0);
     if (unlikely(Rc(ctx->opcode) != 0))
         gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
 }

From 03039e5ef0b92cb3ec89ff2caa5b57fa6bf12a88 Mon Sep 17 00:00:00 2001
From: Tom Musta <tommusta@gmail.com>
Date: Mon, 25 Aug 2014 14:25:42 -0500
Subject: [PATCH 47/52] target-ppc: Clean Up mullw

Eliminate the unecessary ext32s TCG operation and make the multiplication
operation explicitly 32 bit.

Signed-off-by: Tom Musta <tommusta@gmail.com>
Suggested-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 target-ppc/translate.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 57cb381447..ced295f079 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1138,9 +1138,8 @@ static void gen_mullw(DisasContext *ctx)
     tcg_temp_free(t0);
     tcg_temp_free(t1);
 #else
-    tcg_gen_mul_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
-                   cpu_gpr[rB(ctx->opcode)]);
-    tcg_gen_ext32s_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rD(ctx->opcode)]);
+    tcg_gen_mul_i32(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
+                    cpu_gpr[rB(ctx->opcode)]);
 #endif
     if (unlikely(Rc(ctx->opcode) != 0))
         gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);

From 269778769d4d24c511bc3d5f95eeb2e92dcf1868 Mon Sep 17 00:00:00 2001
From: Tom Musta <tommusta@gmail.com>
Date: Mon, 25 Aug 2014 14:25:43 -0500
Subject: [PATCH 48/52] target-ppc: Clean up mullwo

Simplify the implementation of mullwo.  For 64 bit CPUs, the result is
the concatenation of the upper and lower parts of the muls2_i32 operation,
which may be slightly better than deposit.  For 32 bit CPUs, the lower part
of the muls_i32 operation is moved into the target GPR.

Signed-off-by: Tom Musta <tommusta@gmail.com>
Suggested-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 target-ppc/translate.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index ced295f079..106263477a 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1150,19 +1150,14 @@ static void gen_mullwo(DisasContext *ctx)
 {
     TCGv_i32 t0 = tcg_temp_new_i32();
     TCGv_i32 t1 = tcg_temp_new_i32();
-#if defined(TARGET_PPC64)
-    TCGv_i64 t2 = tcg_temp_new_i64();
-#endif
 
     tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);
     tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);
     tcg_gen_muls2_i32(t0, t1, t0, t1);
-    tcg_gen_ext_i32_tl(cpu_gpr[rD(ctx->opcode)], t0);
 #if defined(TARGET_PPC64)
-    tcg_gen_ext_i32_tl(t2, t1);
-    tcg_gen_deposit_i64(cpu_gpr[rD(ctx->opcode)],
-                        cpu_gpr[rD(ctx->opcode)], t2, 32, 32);
-    tcg_temp_free(t2);
+    tcg_gen_concat_i32_i64(cpu_gpr[rD(ctx->opcode)], t0, t1);
+#else
+    tcg_gen_mov_i32(cpu_gpr[rD(ctx->opcode)], t0);
 #endif
 
     tcg_gen_sari_i32(t0, t0, 31);

From 22ffad31d453a82aa290bf196f904580198e8e66 Mon Sep 17 00:00:00 2001
From: Tom Musta <tommusta@gmail.com>
Date: Mon, 25 Aug 2014 14:25:44 -0500
Subject: [PATCH 49/52] target-ppc: Implement mulldo with TCG

Optimize mulldo by using the muls2_i64 operation rather than a helper.  Eliminate
the obsolete helper code.

Signed-off-by: Tom Musta <tommusta@gmail.com>
Suggested-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 target-ppc/helper.h     |  1 -
 target-ppc/int_helper.c | 27 ---------------------------
 target-ppc/translate.c  | 16 ++++++++++++++--
 3 files changed, 14 insertions(+), 30 deletions(-)

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 509eae52ff..0cfdc8ab8f 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -28,7 +28,6 @@ DEF_HELPER_2(icbi, void, env, tl)
 DEF_HELPER_5(lscbx, tl, env, tl, i32, i32, i32)
 
 #if defined(TARGET_PPC64)
-DEF_HELPER_3(mulldo, i64, env, i64, i64)
 DEF_HELPER_4(divdeu, i64, env, i64, i64, i32)
 DEF_HELPER_4(divde, i64, env, i64, i64, i32)
 #endif
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index e5b103b0ec..713d777076 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -24,33 +24,6 @@
 #include "helper_regs.h"
 /*****************************************************************************/
 /* Fixed point operations helpers */
-#if defined(TARGET_PPC64)
-
-uint64_t helper_mulldo(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
-{
-    int64_t th;
-    uint64_t tl;
-
-    muls64(&tl, (uint64_t *)&th, arg1, arg2);
-
-    /* th should either contain all 1 bits or all 0 bits and should
-     * match the sign bit of tl; otherwise we have overflowed. */
-
-    if ((int64_t)tl < 0) {
-        if (likely(th == -1LL)) {
-            env->ov = 0;
-        } else {
-            env->so = env->ov = 1;
-        }
-    } else if (likely(th == 0LL)) {
-        env->ov = 0;
-    } else {
-        env->so = env->ov = 1;
-    }
-
-    return (int64_t)tl;
-}
-#endif
 
 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
                            uint32_t oe)
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 106263477a..d03daeaa48 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1215,8 +1215,20 @@ static void gen_mulld(DisasContext *ctx)
 /* mulldo  mulldo. */
 static void gen_mulldo(DisasContext *ctx)
 {
-    gen_helper_mulldo(cpu_gpr[rD(ctx->opcode)], cpu_env,
-                      cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+
+    tcg_gen_muls2_i64(t0, t1, cpu_gpr[rA(ctx->opcode)],
+                      cpu_gpr[rB(ctx->opcode)]);
+    tcg_gen_mov_i64(cpu_gpr[rD(ctx->opcode)], t0);
+
+    tcg_gen_sari_i64(t0, t0, 63);
+    tcg_gen_setcond_i64(TCG_COND_NE, cpu_ov, t0, t1);
+    tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
+
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i64(t1);
+
     if (unlikely(Rc(ctx->opcode) != 0)) {
         gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
     }

From 8c46f7ec85a4dd9663489b2fa2b425cd7b3653e1 Mon Sep 17 00:00:00 2001
From: Greg Kurz <gkurz@linux.vnet.ibm.com>
Date: Wed, 27 Aug 2014 18:17:12 +0200
Subject: [PATCH 50/52] spapr_pci: map the MSI window in each PHB

On sPAPR, virtio devices are connected to the PCI bus and use MSI-X.
Commit cc943c36faa192cd4b32af8fe5edb31894017d35 has modified MSI-X
so that writes are made using the bus master address space and follow
the IOMMU path.

Unfortunately, the IOMMU address space address space does not have an
MSI window: the notification is silently dropped in unassigned_mem_write
instead of reaching the guest... The most visible effect is that all
virtio devices are non-functional on sPAPR since then. :(

This patch does the following:
1) map the MSI window into the IOMMU address space for each PHB
   - since each PHB instantiates its own IOMMU address space, we
     can safely map the window at a fixed address (SPAPR_PCI_MSI_WINDOW)
   - no real need to keep the MSI window setup in a separate function,
     the spapr_pci_msi_init() code moves to spapr_phb_realize().

2) kill the global MSI window as it is not needed in the end

Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/ppc/spapr.c              |  1 -
 hw/ppc/spapr_pci.c          | 53 +++++++++++++++++--------------------
 include/hw/pci-host/spapr.h |  2 +-
 include/hw/ppc/spapr.h      |  2 --
 4 files changed, 25 insertions(+), 33 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 555a007e38..65b28ace82 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1441,7 +1441,6 @@ static void ppc_spapr_init(MachineState *machine)
     spapr_create_nvram(spapr);
 
     /* Set up PCI */
-    spapr_pci_msi_init(spapr, SPAPR_PCI_MSI_WINDOW);
     spapr_pci_rtas_init();
 
     phb = spapr_create_phb(spapr, 0);
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index e894f07f84..ad0da7fdc4 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -345,7 +345,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
     }
 
     /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */
-    spapr_msi_setmsg(pdev, spapr->msi_win_addr, ret_intr_type == RTAS_TYPE_MSIX,
+    spapr_msi_setmsg(pdev, SPAPR_PCI_MSI_WINDOW, ret_intr_type == RTAS_TYPE_MSIX,
                      irq, req_num);
 
     /* Add MSI device to cache */
@@ -469,34 +469,6 @@ static const MemoryRegionOps spapr_msi_ops = {
     .endianness = DEVICE_LITTLE_ENDIAN
 };
 
-void spapr_pci_msi_init(sPAPREnvironment *spapr, hwaddr addr)
-{
-    uint64_t window_size = 4096;
-
-    /*
-     * As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
-     * we need to allocate some memory to catch those writes coming
-     * from msi_notify()/msix_notify().
-     * As MSIMessage:addr is going to be the same and MSIMessage:data
-     * is going to be a VIRQ number, 4 bytes of the MSI MR will only
-     * be used.
-     *
-     * For KVM we want to ensure that this memory is a full page so that
-     * our memory slot is of page size granularity.
-     */
-#ifdef CONFIG_KVM
-    if (kvm_enabled()) {
-        window_size = getpagesize();
-    }
-#endif
-
-    spapr->msi_win_addr = addr;
-    memory_region_init_io(&spapr->msiwindow, NULL, &spapr_msi_ops, spapr,
-                          "msi", window_size);
-    memory_region_add_subregion(get_system_memory(), spapr->msi_win_addr,
-                                &spapr->msiwindow);
-}
-
 /*
  * PHB PCI device
  */
@@ -516,6 +488,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
     char *namebuf;
     int i;
     PCIBus *bus;
+    uint64_t msi_window_size = 4096;
 
     if (sphb->index != -1) {
         hwaddr windows_base;
@@ -608,6 +581,28 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
     address_space_init(&sphb->iommu_as, &sphb->iommu_root,
                        sphb->dtbusname);
 
+    /*
+     * As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
+     * we need to allocate some memory to catch those writes coming
+     * from msi_notify()/msix_notify().
+     * As MSIMessage:addr is going to be the same and MSIMessage:data
+     * is going to be a VIRQ number, 4 bytes of the MSI MR will only
+     * be used.
+     *
+     * For KVM we want to ensure that this memory is a full page so that
+     * our memory slot is of page size granularity.
+     */
+#ifdef CONFIG_KVM
+    if (kvm_enabled()) {
+        msi_window_size = getpagesize();
+    }
+#endif
+
+    memory_region_init_io(&sphb->msiwindow, NULL, &spapr_msi_ops, spapr,
+                          "msi", msi_window_size);
+    memory_region_add_subregion(&sphb->iommu_root, SPAPR_PCI_MSI_WINDOW,
+                                &sphb->msiwindow);
+
     pci_setup_iommu(bus, spapr_pci_dma_iommu, sphb);
 
     pci_bus_set_route_irq_fn(bus, spapr_route_intx_pin_to_irq);
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index 32f0aa7d10..4ea2a0d14a 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -70,7 +70,7 @@ struct sPAPRPHBState {
 
     MemoryRegion memspace, iospace;
     hwaddr mem_win_addr, mem_win_size, io_win_addr, io_win_size;
-    MemoryRegion memwindow, iowindow;
+    MemoryRegion memwindow, iowindow, msiwindow;
 
     uint32_t dma_liobn;
     AddressSpace iommu_as;
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 36e8e51435..749daf4dd7 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -13,8 +13,6 @@ struct sPAPRNVRAM;
 typedef struct sPAPREnvironment {
     struct VIOsPAPRBus *vio_bus;
     QLIST_HEAD(, sPAPRPHBState) phbs;
-    hwaddr msi_win_addr;
-    MemoryRegion msiwindow;
     struct sPAPRNVRAM *nvram;
     XICSState *icp;
 

From 4a761ffa37d20a5d3ecbb95b45b4c42e54c8f18e Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Wed, 2 Jul 2014 19:01:46 +0200
Subject: [PATCH 51/52] PPC: Fix default config ordering and add eTSEC for
 ppc64

We messed up the ordering in our default configs for PPC. The top entries
are generic entries, then come sections that indicate that features are only
in because of a special feature (such as PReP).

Fix the ordering again and while at it add eTSEC support to the ppc64 target
so that we can spawn eTSEC adapters with qemu-system-ppc64.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 default-configs/ppc-softmmu.mak   | 4 ++--
 default-configs/ppc64-softmmu.mak | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/default-configs/ppc-softmmu.mak b/default-configs/ppc-softmmu.mak
index 33f8d84e68..d725b23312 100644
--- a/default-configs/ppc-softmmu.mak
+++ b/default-configs/ppc-softmmu.mak
@@ -45,8 +45,8 @@ CONFIG_PREP=y
 CONFIG_MAC=y
 CONFIG_E500=y
 CONFIG_OPENPIC_KVM=$(and $(CONFIG_E500),$(CONFIG_KVM))
+CONFIG_ETSEC=y
+CONFIG_LIBDECNUMBER=y
 # For PReP
 CONFIG_MC146818RTC=y
-CONFIG_ETSEC=y
 CONFIG_ISA_TESTDEV=y
-CONFIG_LIBDECNUMBER=y
diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
index 37a15b7ce2..bd30d69347 100644
--- a/default-configs/ppc64-softmmu.mak
+++ b/default-configs/ppc64-softmmu.mak
@@ -46,6 +46,8 @@ CONFIG_PREP=y
 CONFIG_MAC=y
 CONFIG_E500=y
 CONFIG_OPENPIC_KVM=$(and $(CONFIG_E500),$(CONFIG_KVM))
+CONFIG_ETSEC=y
+CONFIG_LIBDECNUMBER=y
 # For pSeries
 CONFIG_XICS=$(CONFIG_PSERIES)
 CONFIG_XICS_KVM=$(and $(CONFIG_PSERIES),$(CONFIG_KVM))
@@ -58,4 +60,3 @@ CONFIG_I82374=y
 CONFIG_I8257=y
 CONFIG_MC146818RTC=y
 CONFIG_ISA_TESTDEV=y
-CONFIG_LIBDECNUMBER=y

From 85423d90c7bdbbae3d97ed3a12b5db79d00a3fb0 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Sat, 30 Aug 2014 15:55:21 +1000
Subject: [PATCH 52/52] hypervisor property clashes with hypervisor node

dtc fails on a recent QEMU snapshot:

ERROR (name_properties): "name" property in /hypervisor#1 is incorrect ("hypervisor" instead of base node name)

Looking at the device tree we have a hypervisor property:

# lsprop hypervisor
hypervisor       "kvm"

But we also have a hypervisor node, with a name that doesn't match:

# lsprop hypervisor#1/
name             "hypervisor"
compatible       "linux,kvm"
linux,phandle    7e5eb5d8 (2120136152)

Commit c08ce91d309c (spapr: add uuid/host details to device tree)
looks to have collided with an earlier patch. Remove the hypervisor
property.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/ppc/spapr.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 65b28ace82..2ab4460f04 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -363,10 +363,6 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
     _FDT((fdt_property_string(fdt, "model", "IBM pSeries (emulated by qemu)")));
     _FDT((fdt_property_string(fdt, "compatible", "qemu,pseries")));
 
-    if (kvm_enabled()) {
-        _FDT((fdt_property_string(fdt, "hypervisor", "kvm")));
-    }
-
     /*
      * Add info to guest to indentify which host is it being run on
      * and what is the uuid of the guest