From 154c69f2b8512bdbfcd48a82bfd902e9449a895a Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 15 Feb 2019 17:16:39 +0100 Subject: [PATCH 01/50] target/ppc: Fix nip on power management instructions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Those instructions currently raise an exception from within the helper. This tends to result in a bogus nip value in the env context (typically the beginning of the TB). Such a helper needs a gen_update_nip() first. This fixes it with a different approach which is to throw the exception from translate.c instead of the helper using gen_exception_nip() which does the right thing. Exception EXCP_HLT is also used instead of POWERPC_EXCP_STOP to effectively exit from the CPU execution loop. Signed-off-by: Benjamin Herrenschmidt [clg : modified the commit log to comment the use of EXCP_HLT instead of POWERPC_EXCP_STOP] Signed-off-by: Cédric Le Goater Message-Id: <20190215161648.9600-2-clg@kaod.org> Signed-off-by: David Gibson --- target/ppc/excp_helper.c | 1 - target/ppc/translate.c | 12 ++++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 751d759fcc..8407e0ade9 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -958,7 +958,6 @@ void helper_pminsn(CPUPPCState *env, powerpc_pm_insn_t insn) * but this doesn't seem to be a problem. */ env->msr |= (1ull << MSR_EE); - raise_exception(env, EXCP_HLT); } #endif /* defined(TARGET_PPC64) */ diff --git a/target/ppc/translate.c b/target/ppc/translate.c index f4d70e725a..bffdbd9687 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -3566,7 +3566,8 @@ static void gen_doze(DisasContext *ctx) t = tcg_const_i32(PPC_PM_DOZE); gen_helper_pminsn(cpu_env, t); tcg_temp_free_i32(t); - gen_stop_exception(ctx); + /* Stop translation, as the CPU is supposed to sleep from now */ + gen_exception_nip(ctx, EXCP_HLT, ctx->base.pc_next); #endif /* defined(CONFIG_USER_ONLY) */ } @@ -3581,7 +3582,8 @@ static void gen_nap(DisasContext *ctx) t = tcg_const_i32(PPC_PM_NAP); gen_helper_pminsn(cpu_env, t); tcg_temp_free_i32(t); - gen_stop_exception(ctx); + /* Stop translation, as the CPU is supposed to sleep from now */ + gen_exception_nip(ctx, EXCP_HLT, ctx->base.pc_next); #endif /* defined(CONFIG_USER_ONLY) */ } @@ -3601,7 +3603,8 @@ static void gen_sleep(DisasContext *ctx) t = tcg_const_i32(PPC_PM_SLEEP); gen_helper_pminsn(cpu_env, t); tcg_temp_free_i32(t); - gen_stop_exception(ctx); + /* Stop translation, as the CPU is supposed to sleep from now */ + gen_exception_nip(ctx, EXCP_HLT, ctx->base.pc_next); #endif /* defined(CONFIG_USER_ONLY) */ } @@ -3616,7 +3619,8 @@ static void gen_rvwinkle(DisasContext *ctx) t = tcg_const_i32(PPC_PM_RVWINKLE); gen_helper_pminsn(cpu_env, t); tcg_temp_free_i32(t); - gen_stop_exception(ctx); + /* Stop translation, as the CPU is supposed to sleep from now */ + gen_exception_nip(ctx, EXCP_HLT, ctx->base.pc_next); #endif /* defined(CONFIG_USER_ONLY) */ } #endif /* #if defined(TARGET_PPC64) */ From 3621e2c96082c6e5094dfec244077a135383d538 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 15 Feb 2019 17:16:40 +0100 Subject: [PATCH 02/50] target/ppc: Don't clobber MSR:EE on PM instructions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When issuing a power management instruction, we set MSR:EE to force ppc_hw_interrupt() into calling powerpc_excp() to deal with the fact that on P7 and P8, the system reset caused by the wakeup needs to be generated regardless of the MSR:EE value (using LPCR only). This however means that the OS will see a bogus SRR1:EE value which is a problem. It also prevents properly implementing P9 STOP "light". So fix this by instead putting some logic in ppc_hw_interrupt() to decide whether to deliver or not by taking into account the fact that we are waking up from sleep. The LPCR isn't checked as this is done in the has_work() test. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Message-Id: <20190215161648.9600-3-clg@kaod.org> Signed-off-by: David Gibson --- target/ppc/excp_helper.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 8407e0ade9..7c7c8d1b9d 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -748,6 +748,7 @@ void ppc_cpu_do_interrupt(CPUState *cs) static void ppc_hw_interrupt(CPUPPCState *env) { PowerPCCPU *cpu = ppc_env_get_cpu(env); + bool async_deliver; /* External reset */ if (env->pending_interrupts & (1 << PPC_INTERRUPT_RESET)) { @@ -769,11 +770,20 @@ static void ppc_hw_interrupt(CPUPPCState *env) return; } #endif + + /* + * For interrupts that gate on MSR:EE, we need to do something a + * bit more subtle, as we need to let them through even when EE is + * clear when coming out of some power management states (in order + * for them to become a 0x100). + */ + async_deliver = (msr_ee != 0) || env->in_pm_state; + /* Hypervisor decrementer exception */ if (env->pending_interrupts & (1 << PPC_INTERRUPT_HDECR)) { /* LPCR will be clear when not supported so this will work */ bool hdice = !!(env->spr[SPR_LPCR] & LPCR_HDICE); - if ((msr_ee != 0 || msr_hv == 0) && hdice) { + if ((async_deliver || msr_hv == 0) && hdice) { /* HDEC clears on delivery */ env->pending_interrupts &= ~(1 << PPC_INTERRUPT_HDECR); powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_HDECR); @@ -783,7 +793,7 @@ static void ppc_hw_interrupt(CPUPPCState *env) /* Extermal interrupt can ignore MSR:EE under some circumstances */ if (env->pending_interrupts & (1 << PPC_INTERRUPT_EXT)) { bool lpes0 = !!(env->spr[SPR_LPCR] & LPCR_LPES0); - if (msr_ee != 0 || (env->has_hv_mode && msr_hv == 0 && !lpes0)) { + if (async_deliver || (env->has_hv_mode && msr_hv == 0 && !lpes0)) { powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_EXTERNAL); return; } @@ -795,7 +805,7 @@ static void ppc_hw_interrupt(CPUPPCState *env) return; } } - if (msr_ee != 0) { + if (async_deliver != 0) { /* Watchdog timer on embedded PowerPC */ if (env->pending_interrupts & (1 << PPC_INTERRUPT_WDT)) { env->pending_interrupts &= ~(1 << PPC_INTERRUPT_WDT); @@ -943,21 +953,14 @@ void helper_pminsn(CPUPPCState *env, powerpc_pm_insn_t insn) cs = CPU(ppc_env_get_cpu(env)); cs->halted = 1; - env->in_pm_state = true; /* The architecture specifies that HDEC interrupts are * discarded in PM states */ env->pending_interrupts &= ~(1 << PPC_INTERRUPT_HDECR); - /* Technically, nap doesn't set EE, but if we don't set it - * then ppc_hw_interrupt() won't deliver. We could add some - * other tests there based on LPCR but it's simpler to just - * whack EE in. It will be cleared by the 0x100 at wakeup - * anyway. It will still be observable by the guest in SRR1 - * but this doesn't seem to be a problem. - */ - env->msr |= (1ull << MSR_EE); + /* Condition for waking up at 0x100 */ + env->in_pm_state = true; } #endif /* defined(TARGET_PPC64) */ From 21c0d66a9c994c7a406f8a6d04a81c16332e0cb8 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 15 Feb 2019 17:16:41 +0100 Subject: [PATCH 03/50] target/ppc: Fix support for "STOP light" states on POWER9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit STOP must act differently based on PSSCR:EC on POWER9. When set, it acts like the P7/P8 power management instructions and wake up at 0x100 based on the wakeup conditions in LPCR. When PSSCR:EC is clear however it will wakeup at the next instruction after STOP (if EE is clear) or take the corresponding interrupts (if EE is set). Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Message-Id: <20190215161648.9600-4-clg@kaod.org> Signed-off-by: David Gibson --- target/ppc/cpu-qom.h | 1 + target/ppc/cpu.h | 12 +++++++++--- target/ppc/excp_helper.c | 8 ++++++-- target/ppc/translate.c | 13 ++++++++++++- target/ppc/translate_init.inc.c | 7 +++++++ 5 files changed, 35 insertions(+), 6 deletions(-) diff --git a/target/ppc/cpu-qom.h b/target/ppc/cpu-qom.h index 3130802304..e9cb158423 100644 --- a/target/ppc/cpu-qom.h +++ b/target/ppc/cpu-qom.h @@ -122,6 +122,7 @@ typedef enum { PPC_PM_NAP, PPC_PM_SLEEP, PPC_PM_RVWINKLE, + PPC_PM_STOP, } powerpc_pm_insn_t; /*****************************************************************************/ diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 325ebbeb98..5b1899bfc9 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -414,6 +414,10 @@ struct ppc_slb_t { #define LPCR_HVICE PPC_BIT(62) /* HV Virtualisation Int Enable */ #define LPCR_HDICE PPC_BIT(63) +/* PSSCR bits */ +#define PSSCR_ESL PPC_BIT(42) /* Enable State Loss */ +#define PSSCR_EC PPC_BIT(43) /* Exit Criterion */ + #define msr_sf ((env->msr >> MSR_SF) & 1) #define msr_isf ((env->msr >> MSR_ISF) & 1) #define msr_shv ((env->msr >> MSR_SHV) & 1) @@ -1110,9 +1114,11 @@ struct CPUPPCState { * instructions and SPRs are diallowed if MSR:HV is 0 */ bool has_hv_mode; - /* On P7/P8, set when in PM state, we need to handle resume - * in a special way (such as routing some resume causes to - * 0x100), so flag this here. + + /* + * On P7/P8/P9, set when in PM state, we need to handle resume in + * a special way (such as routing some resume causes to 0x100), so + * flag this here. */ bool in_pm_state; #endif diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 7c7c8d1b9d..97503193ef 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -97,7 +97,10 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) asrr0 = -1; asrr1 = -1; - /* check for special resume at 0x100 from doze/nap/sleep/winkle on P7/P8 */ + /* + * check for special resume at 0x100 from doze/nap/sleep/winkle on + * P7/P8/P9 + */ if (env->in_pm_state) { env->in_pm_state = false; @@ -960,7 +963,8 @@ void helper_pminsn(CPUPPCState *env, powerpc_pm_insn_t insn) env->pending_interrupts &= ~(1 << PPC_INTERRUPT_HDECR); /* Condition for waking up at 0x100 */ - env->in_pm_state = true; + env->in_pm_state = (insn != PPC_PM_STOP) || + (env->spr[SPR_PSSCR] & PSSCR_EC); } #endif /* defined(TARGET_PPC64) */ diff --git a/target/ppc/translate.c b/target/ppc/translate.c index bffdbd9687..fde7ead7b7 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -3589,7 +3589,18 @@ static void gen_nap(DisasContext *ctx) static void gen_stop(DisasContext *ctx) { - gen_nap(ctx); +#if defined(CONFIG_USER_ONLY) + GEN_PRIV; +#else + TCGv_i32 t; + + CHK_HV; + t = tcg_const_i32(PPC_PM_STOP); + gen_helper_pminsn(cpu_env, t); + tcg_temp_free_i32(t); + /* Stop translation, as the CPU is supposed to sleep from now */ + gen_exception_nip(ctx, EXCP_HLT, ctx->base.pc_next); +#endif /* defined(CONFIG_USER_ONLY) */ } static void gen_sleep(DisasContext *ctx) diff --git a/target/ppc/translate_init.inc.c b/target/ppc/translate_init.inc.c index d884906004..8b1d324b3b 100644 --- a/target/ppc/translate_init.inc.c +++ b/target/ppc/translate_init.inc.c @@ -8801,9 +8801,16 @@ static bool cpu_has_work_POWER9(CPUState *cs) CPUPPCState *env = &cpu->env; if (cs->halted) { + uint64_t psscr = env->spr[SPR_PSSCR]; + if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) { return false; } + + /* If EC is clear, just return true on any pending interrupt */ + if (!(psscr & PSSCR_EC)) { + return true; + } /* External Exception */ if ((env->pending_interrupts & (1u << PPC_INTERRUPT_EXT)) && (env->spr[SPR_LPCR] & LPCR_EEE)) { From dead760b00789d738f946f01849a2fdc57c5e765 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 15 Feb 2019 17:16:42 +0100 Subject: [PATCH 04/50] target/ppc: Move "wakeup reset" code to a separate function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This moves the code to handle waking up from the 0x100 vector from powerpc_excp() to a separate function, as the former is already way too big as it is. No functional change. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Message-Id: <20190215161648.9600-5-clg@kaod.org> Signed-off-by: David Gibson --- target/ppc/excp_helper.c | 75 ++++++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 34 deletions(-) diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 97503193ef..489a54f51b 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -65,6 +65,46 @@ static inline void dump_syscall(CPUPPCState *env) ppc_dump_gpr(env, 6), env->nip); } +static int powerpc_reset_wakeup(CPUState *cs, CPUPPCState *env, int excp, + target_ulong *msr) +{ + /* We no longer are in a PM state */ + env->in_pm_state = false; + + /* Pretend to be returning from doze always as we don't lose state */ + *msr |= (0x1ull << (63 - 47)); + + /* Machine checks are sent normally */ + if (excp == POWERPC_EXCP_MCHECK) { + return excp; + } + switch (excp) { + case POWERPC_EXCP_RESET: + *msr |= 0x4ull << (63 - 45); + break; + case POWERPC_EXCP_EXTERNAL: + *msr |= 0x8ull << (63 - 45); + break; + case POWERPC_EXCP_DECR: + *msr |= 0x6ull << (63 - 45); + break; + case POWERPC_EXCP_SDOOR: + *msr |= 0x5ull << (63 - 45); + break; + case POWERPC_EXCP_SDOOR_HV: + *msr |= 0x3ull << (63 - 45); + break; + case POWERPC_EXCP_HV_MAINT: + *msr |= 0xaull << (63 - 45); + break; + default: + cpu_abort(cs, "Unsupported exception %d in Power Save mode\n", + excp); + } + return POWERPC_EXCP_RESET; +} + + /* Note that this function should be greatly optimized * when called with a constant excp, from ppc_hw_interrupt */ @@ -102,40 +142,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) * P7/P8/P9 */ if (env->in_pm_state) { - env->in_pm_state = false; - - /* Pretend to be returning from doze always as we don't lose state */ - msr |= (0x1ull << (63 - 47)); - - /* Non-machine check are routed to 0x100 with a wakeup cause - * encoded in SRR1 - */ - if (excp != POWERPC_EXCP_MCHECK) { - switch (excp) { - case POWERPC_EXCP_RESET: - msr |= 0x4ull << (63 - 45); - break; - case POWERPC_EXCP_EXTERNAL: - msr |= 0x8ull << (63 - 45); - break; - case POWERPC_EXCP_DECR: - msr |= 0x6ull << (63 - 45); - break; - case POWERPC_EXCP_SDOOR: - msr |= 0x5ull << (63 - 45); - break; - case POWERPC_EXCP_SDOOR_HV: - msr |= 0x3ull << (63 - 45); - break; - case POWERPC_EXCP_HV_MAINT: - msr |= 0xaull << (63 - 45); - break; - default: - cpu_abort(cs, "Unsupported exception %d in Power Save mode\n", - excp); - } - excp = POWERPC_EXCP_RESET; - } + excp = powerpc_reset_wakeup(cs, env, excp, &msr); } /* Exception targetting modifiers From 1e7fd61d97fd4b25c7c8f70eed6a3ffd50893b74 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 15 Feb 2019 17:16:43 +0100 Subject: [PATCH 05/50] target/ppc: Rename "in_pm_state" to "resume_as_sreset" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To better reflect what this does, as it's specific to some of the P7/P8/P9 PM states, not generic. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Message-Id: <20190215161648.9600-6-clg@kaod.org> Signed-off-by: David Gibson --- hw/ppc/ppc.c | 2 +- target/ppc/cpu.h | 6 +++--- target/ppc/excp_helper.c | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index cffdc3914a..12439dbe5d 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -776,7 +776,7 @@ static inline void cpu_ppc_hdecr_excp(PowerPCCPU *cpu) * interrupts in a PM state. Not only they don't cause a * wakeup but they also get effectively discarded. */ - if (!env->in_pm_state) { + if (!env->resume_as_sreset) { ppc_set_irq(cpu, PPC_INTERRUPT_HDECR, 1); } } diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 5b1899bfc9..d2364564a0 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1117,10 +1117,10 @@ struct CPUPPCState { /* * On P7/P8/P9, set when in PM state, we need to handle resume in - * a special way (such as routing some resume causes to 0x100), so - * flag this here. + * a special way (such as routing some resume causes to 0x100, ie, + * sreset), so flag this here. */ - bool in_pm_state; + bool resume_as_sreset; #endif /* Those resources are used only during code translation */ diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 489a54f51b..7536620a41 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -69,7 +69,7 @@ static int powerpc_reset_wakeup(CPUState *cs, CPUPPCState *env, int excp, target_ulong *msr) { /* We no longer are in a PM state */ - env->in_pm_state = false; + env->resume_as_sreset = false; /* Pretend to be returning from doze always as we don't lose state */ *msr |= (0x1ull << (63 - 47)); @@ -141,7 +141,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) * check for special resume at 0x100 from doze/nap/sleep/winkle on * P7/P8/P9 */ - if (env->in_pm_state) { + if (env->resume_as_sreset) { excp = powerpc_reset_wakeup(cs, env, excp, &msr); } @@ -787,7 +787,7 @@ static void ppc_hw_interrupt(CPUPPCState *env) * clear when coming out of some power management states (in order * for them to become a 0x100). */ - async_deliver = (msr_ee != 0) || env->in_pm_state; + async_deliver = (msr_ee != 0) || env->resume_as_sreset; /* Hypervisor decrementer exception */ if (env->pending_interrupts & (1 << PPC_INTERRUPT_HDECR)) { @@ -970,7 +970,7 @@ void helper_pminsn(CPUPPCState *env, powerpc_pm_insn_t insn) env->pending_interrupts &= ~(1 << PPC_INTERRUPT_HDECR); /* Condition for waking up at 0x100 */ - env->in_pm_state = (insn != PPC_PM_STOP) || + env->resume_as_sreset = (insn != PPC_PM_STOP) || (env->spr[SPR_PSSCR] & PSSCR_EC); } #endif /* defined(TARGET_PPC64) */ From a790e82b13854f443850cbf11b25cdb98f6885c5 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 15 Feb 2019 17:16:44 +0100 Subject: [PATCH 06/50] target/ppc: Add POWER9 exception model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit And use it to get the correct HILE bit in HID0 Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Message-Id: <20190215161648.9600-7-clg@kaod.org> Signed-off-by: David Gibson --- target/ppc/cpu-qom.h | 2 ++ target/ppc/excp_helper.c | 17 +++++++++++++---- target/ppc/translate.c | 3 ++- target/ppc/translate_init.inc.c | 2 +- 4 files changed, 18 insertions(+), 6 deletions(-) diff --git a/target/ppc/cpu-qom.h b/target/ppc/cpu-qom.h index e9cb158423..904ee694ac 100644 --- a/target/ppc/cpu-qom.h +++ b/target/ppc/cpu-qom.h @@ -113,6 +113,8 @@ enum powerpc_excp_t { POWERPC_EXCP_POWER7, /* POWER8 exception model */ POWERPC_EXCP_POWER8, + /* POWER9 exception model */ + POWERPC_EXCP_POWER9, }; /*****************************************************************************/ diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 7536620a41..37546bb0f0 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -147,7 +147,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) /* Exception targetting modifiers * - * LPES0 is supported on POWER7/8 + * LPES0 is supported on POWER7/8/9 * LPES1 is not supported (old iSeries mode) * * On anything else, we behave as if LPES0 is 1 @@ -158,9 +158,10 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) */ #if defined(TARGET_PPC64) if (excp_model == POWERPC_EXCP_POWER7 || - excp_model == POWERPC_EXCP_POWER8) { + excp_model == POWERPC_EXCP_POWER8 || + excp_model == POWERPC_EXCP_POWER9) { lpes0 = !!(env->spr[SPR_LPCR] & LPCR_LPES0); - if (excp_model == POWERPC_EXCP_POWER8) { + if (excp_model != POWERPC_EXCP_POWER7) { ail = (env->spr[SPR_LPCR] & LPCR_AIL) >> LPCR_AIL_SHIFT; } else { ail = 0; @@ -662,7 +663,15 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) } } else if (excp_model == POWERPC_EXCP_POWER8) { if (new_msr & MSR_HVB) { - if (env->spr[SPR_HID0] & (HID0_HILE | HID0_POWER9_HILE)) { + if (env->spr[SPR_HID0] & HID0_HILE) { + new_msr |= (target_ulong)1 << MSR_LE; + } + } else if (env->spr[SPR_LPCR] & LPCR_ILE) { + new_msr |= (target_ulong)1 << MSR_LE; + } + } else if (excp_model == POWERPC_EXCP_POWER9) { + if (new_msr & MSR_HVB) { + if (env->spr[SPR_HID0] & HID0_POWER9_HILE) { new_msr |= (target_ulong)1 << MSR_LE; } } else if (env->spr[SPR_LPCR] & LPCR_ILE) { diff --git a/target/ppc/translate.c b/target/ppc/translate.c index fde7ead7b7..819221f246 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -7481,7 +7481,8 @@ void ppc_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf, #if defined(TARGET_PPC64) if (env->excp_model == POWERPC_EXCP_POWER7 || - env->excp_model == POWERPC_EXCP_POWER8) { + env->excp_model == POWERPC_EXCP_POWER8 || + env->excp_model == POWERPC_EXCP_POWER9) { cpu_fprintf(f, "HSRR0 " TARGET_FMT_lx " HSRR1 " TARGET_FMT_lx "\n", env->spr[SPR_HSRR0], env->spr[SPR_HSRR1]); } diff --git a/target/ppc/translate_init.inc.c b/target/ppc/translate_init.inc.c index 8b1d324b3b..9909e58761 100644 --- a/target/ppc/translate_init.inc.c +++ b/target/ppc/translate_init.inc.c @@ -8905,7 +8905,7 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data) pcc->hash64_opts = &ppc_hash64_opts_POWER7; pcc->radix_page_info = &POWER9_radix_page_info; #endif - pcc->excp_model = POWERPC_EXCP_POWER8; + pcc->excp_model = POWERPC_EXCP_POWER9; pcc->bus_model = PPC_FLAGS_INPUT_POWER7; pcc->bfd_mach = bfd_mach_ppc64; pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE | From f8154fd22bf80b1555bac46119747e899c09d0c9 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 15 Feb 2019 17:16:45 +0100 Subject: [PATCH 07/50] target/ppc: Detect erroneous condition in interrupt delivery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's very easy for the CPU specific has_work() implementation and the logic in ppc_hw_interrupt() to be subtly out of sync. This can occasionally allow a CPU to wakeup from a PM state and resume executing past the PM instruction when it should resume at the 0x100 vector. This detects if it happens and aborts, making it a lot easier to catch such bugs when testing rather than chasing obscure guest misbehaviour. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Message-Id: <20190215161648.9600-8-clg@kaod.org> Signed-off-by: David Gibson --- target/ppc/excp_helper.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 37546bb0f0..1a2f469a5f 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -878,6 +878,22 @@ static void ppc_hw_interrupt(CPUPPCState *env) return; } } + + if (env->resume_as_sreset) { + /* + * This is a bug ! It means that has_work took us out of halt without + * anything to deliver while in a PM state that requires getting + * out via a 0x100 + * + * This means we will incorrectly execute past the power management + * instruction instead of triggering a reset. + * + * It generally means a discrepancy between the wakup conditions in the + * processor has_work implementation and the logic in this function. + */ + cpu_abort(CPU(ppc_env_get_cpu(env)), + "Wakeup from PM state but interrupt Undelivered"); + } } void ppc_cpu_do_system_reset(CPUState *cs) From d8ce5fd6643d5c568d6357354c205474e7022602 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 15 Feb 2019 17:16:46 +0100 Subject: [PATCH 08/50] target/ppc: Add Hypervisor Virtualization Interrupt on POWER9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds support for delivering that exception Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Message-Id: <20190215161648.9600-9-clg@kaod.org> Signed-off-by: David Gibson --- target/ppc/cpu.h | 5 ++++- target/ppc/excp_helper.c | 17 ++++++++++++++++- target/ppc/translate_init.inc.c | 16 +++++++++++++++- 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index d2364564a0..7d37d85ac5 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -160,8 +160,10 @@ enum { /* Server doorbell variants */ POWERPC_EXCP_SDOOR = 99, POWERPC_EXCP_SDOOR_HV = 100, + /* ISA 3.00 additions */ + POWERPC_EXCP_HVIRT = 101, /* EOL */ - POWERPC_EXCP_NB = 101, + POWERPC_EXCP_NB = 102, /* QEMU exceptions: used internally during code translation */ POWERPC_EXCP_STOP = 0x200, /* stop translation */ POWERPC_EXCP_BRANCH = 0x201, /* branch instruction */ @@ -2349,6 +2351,7 @@ enum { PPC_INTERRUPT_PERFM, /* Performance monitor interrupt */ PPC_INTERRUPT_HMI, /* Hypervisor Maintainance interrupt */ PPC_INTERRUPT_HDOORBELL, /* Hypervisor Doorbell interrupt */ + PPC_INTERRUPT_HVIRT, /* Hypervisor virtualization interrupt */ }; /* Processor Compatibility mask (PCR) */ diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 1a2f469a5f..d171a5eb62 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -97,6 +97,9 @@ static int powerpc_reset_wakeup(CPUState *cs, CPUPPCState *env, int excp, case POWERPC_EXCP_HV_MAINT: *msr |= 0xaull << (63 - 45); break; + case POWERPC_EXCP_HVIRT: + *msr |= 0x9ull << (63 - 45); + break; default: cpu_abort(cs, "Unsupported exception %d in Power Save mode\n", excp); @@ -427,6 +430,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) case POWERPC_EXCP_HISEG: /* Hypervisor instruction segment exception */ case POWERPC_EXCP_SDOOR_HV: /* Hypervisor Doorbell interrupt */ case POWERPC_EXCP_HV_EMU: + case POWERPC_EXCP_HVIRT: /* Hypervisor virtualization */ srr0 = SPR_HSRR0; srr1 = SPR_HSRR1; new_msr |= (target_ulong)MSR_HVB; @@ -809,7 +813,18 @@ static void ppc_hw_interrupt(CPUPPCState *env) return; } } - /* Extermal interrupt can ignore MSR:EE under some circumstances */ + + /* Hypervisor virtualization interrupt */ + if (env->pending_interrupts & (1 << PPC_INTERRUPT_HVIRT)) { + /* LPCR will be clear when not supported so this will work */ + bool hvice = !!(env->spr[SPR_LPCR] & LPCR_HVICE); + if ((async_deliver || msr_hv == 0) && hvice) { + powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_HVIRT); + return; + } + } + + /* External interrupt can ignore MSR:EE under some circumstances */ if (env->pending_interrupts & (1 << PPC_INTERRUPT_EXT)) { bool lpes0 = !!(env->spr[SPR_LPCR] & LPCR_LPES0); if (async_deliver || (env->has_hv_mode && msr_hv == 0 && !lpes0)) { diff --git a/target/ppc/translate_init.inc.c b/target/ppc/translate_init.inc.c index 9909e58761..6062163d85 100644 --- a/target/ppc/translate_init.inc.c +++ b/target/ppc/translate_init.inc.c @@ -3313,6 +3313,15 @@ static void init_excp_POWER8(CPUPPCState *env) #endif } +static void init_excp_POWER9(CPUPPCState *env) +{ + init_excp_POWER8(env); + +#if !defined(CONFIG_USER_ONLY) + env->excp_vectors[POWERPC_EXCP_HVIRT] = 0x00000EA0; +#endif +} + #endif /*****************************************************************************/ @@ -8783,7 +8792,7 @@ static void init_proc_POWER9(CPUPPCState *env) env->icache_line_size = 128; /* Allocate hardware IRQ controller */ - init_excp_POWER8(env); + init_excp_POWER9(env); ppcPOWER7_irq_init(ppc_env_get_cpu(env)); } @@ -8836,6 +8845,11 @@ static bool cpu_has_work_POWER9(CPUState *cs) (env->spr[SPR_LPCR] & LPCR_HDEE)) { return true; } + /* Hypervisor virtualization exception */ + if ((env->pending_interrupts & (1u << PPC_INTERRUPT_HVIRT)) && + (env->spr[SPR_LPCR] & LPCR_HVEE)) { + return true; + } if (env->pending_interrupts & (1u << PPC_INTERRUPT_RESET)) { return true; } From 67afe7759df4c3dec8abfc373b98d1a8d108ff66 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 15 Feb 2019 17:16:47 +0100 Subject: [PATCH 09/50] target/ppc: Add POWER9 external interrupt model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds support for the Hypervisor directed interrupts in addition to the OS ones. Signed-off-by: Benjamin Herrenschmidt [clg: - modified the icp_realize() and xive_tctx_realize() to take into account explicitely the POWER9 interrupt model - introduced a specific power9_set_irq for POWER9 ] Signed-off-by: Cédric Le Goater Message-Id: <20190215161648.9600-10-clg@kaod.org> Signed-off-by: David Gibson --- hw/intc/xics.c | 3 +++ hw/intc/xive.c | 3 +++ hw/ppc/ppc.c | 42 +++++++++++++++++++++++++++++++++ include/hw/ppc/ppc.h | 2 ++ target/ppc/cpu-qom.h | 2 ++ target/ppc/cpu.h | 7 ++++++ target/ppc/translate_init.inc.c | 4 ++-- 7 files changed, 61 insertions(+), 2 deletions(-) diff --git a/hw/intc/xics.c b/hw/intc/xics.c index 3009fa7472..767fdeb829 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -338,6 +338,9 @@ static void icp_realize(DeviceState *dev, Error **errp) case PPC_FLAGS_INPUT_POWER7: icp->output = env->irq_inputs[POWER7_INPUT_INT]; break; + case PPC_FLAGS_INPUT_POWER9: /* For SPAPR xics emulation */ + icp->output = env->irq_inputs[POWER9_INPUT_INT]; + break; case PPC_FLAGS_INPUT_970: icp->output = env->irq_inputs[PPC970_INPUT_INT]; diff --git a/hw/intc/xive.c b/hw/intc/xive.c index 2e9b8efd43..425aa97ef9 100644 --- a/hw/intc/xive.c +++ b/hw/intc/xive.c @@ -484,6 +484,9 @@ static void xive_tctx_realize(DeviceState *dev, Error **errp) case PPC_FLAGS_INPUT_POWER7: tctx->output = env->irq_inputs[POWER7_INPUT_INT]; break; + case PPC_FLAGS_INPUT_POWER9: + tctx->output = env->irq_inputs[POWER9_INPUT_INT]; + break; default: error_setg(errp, "XIVE interrupt controller does not support " diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index 12439dbe5d..d1e3d4cd20 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -306,6 +306,48 @@ void ppcPOWER7_irq_init(PowerPCCPU *cpu) env->irq_inputs = (void **)qemu_allocate_irqs(&power7_set_irq, cpu, POWER7_INPUT_NB); } + +/* POWER9 internal IRQ controller */ +static void power9_set_irq(void *opaque, int pin, int level) +{ + PowerPCCPU *cpu = opaque; + CPUPPCState *env = &cpu->env; + + LOG_IRQ("%s: env %p pin %d level %d\n", __func__, + env, pin, level); + + switch (pin) { + case POWER9_INPUT_INT: + /* Level sensitive - active high */ + LOG_IRQ("%s: set the external IRQ state to %d\n", + __func__, level); + ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level); + break; + case POWER9_INPUT_HINT: + /* Level sensitive - active high */ + LOG_IRQ("%s: set the external IRQ state to %d\n", + __func__, level); + ppc_set_irq(cpu, PPC_INTERRUPT_HVIRT, level); + break; + default: + /* Unknown pin - do nothing */ + LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin); + return; + } + if (level) { + env->irq_input_state |= 1 << pin; + } else { + env->irq_input_state &= ~(1 << pin); + } +} + +void ppcPOWER9_irq_init(PowerPCCPU *cpu) +{ + CPUPPCState *env = &cpu->env; + + env->irq_inputs = (void **)qemu_allocate_irqs(&power9_set_irq, cpu, + POWER9_INPUT_NB); +} #endif /* defined(TARGET_PPC64) */ void ppc40x_core_reset(PowerPCCPU *cpu) diff --git a/include/hw/ppc/ppc.h b/include/hw/ppc/ppc.h index 298ec354a8..746170f635 100644 --- a/include/hw/ppc/ppc.h +++ b/include/hw/ppc/ppc.h @@ -73,6 +73,7 @@ static inline void ppc40x_irq_init(PowerPCCPU *cpu) {} static inline void ppc6xx_irq_init(PowerPCCPU *cpu) {} static inline void ppc970_irq_init(PowerPCCPU *cpu) {} static inline void ppcPOWER7_irq_init(PowerPCCPU *cpu) {} +static inline void ppcPOWER9_irq_init(PowerPCCPU *cpu) {} static inline void ppce500_irq_init(PowerPCCPU *cpu) {} #else void ppc40x_irq_init(PowerPCCPU *cpu); @@ -80,6 +81,7 @@ void ppce500_irq_init(PowerPCCPU *cpu); void ppc6xx_irq_init(PowerPCCPU *cpu); void ppc970_irq_init(PowerPCCPU *cpu); void ppcPOWER7_irq_init(PowerPCCPU *cpu); +void ppcPOWER9_irq_init(PowerPCCPU *cpu); #endif /* PPC machines for OpenBIOS */ diff --git a/target/ppc/cpu-qom.h b/target/ppc/cpu-qom.h index 904ee694ac..ae51fe754e 100644 --- a/target/ppc/cpu-qom.h +++ b/target/ppc/cpu-qom.h @@ -142,6 +142,8 @@ enum powerpc_input_t { PPC_FLAGS_INPUT_970, /* PowerPC POWER7 bus */ PPC_FLAGS_INPUT_POWER7, + /* PowerPC POWER9 bus */ + PPC_FLAGS_INPUT_POWER9, /* PowerPC 401 bus */ PPC_FLAGS_INPUT_401, /* Freescale RCPU bus */ diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 7d37d85ac5..ececad9f1f 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -2327,6 +2327,13 @@ enum { * them */ POWER7_INPUT_NB, }; + +enum { + /* POWER9 input pins */ + POWER9_INPUT_INT = 0, + POWER9_INPUT_HINT = 1, + POWER9_INPUT_NB, +}; #endif /* Hardware exceptions definitions */ diff --git a/target/ppc/translate_init.inc.c b/target/ppc/translate_init.inc.c index 6062163d85..9d84164915 100644 --- a/target/ppc/translate_init.inc.c +++ b/target/ppc/translate_init.inc.c @@ -8793,7 +8793,7 @@ static void init_proc_POWER9(CPUPPCState *env) /* Allocate hardware IRQ controller */ init_excp_POWER9(env); - ppcPOWER7_irq_init(ppc_env_get_cpu(env)); + ppcPOWER9_irq_init(ppc_env_get_cpu(env)); } static bool ppc_pvr_match_power9(PowerPCCPUClass *pcc, uint32_t pvr) @@ -8920,7 +8920,7 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data) pcc->radix_page_info = &POWER9_radix_page_info; #endif pcc->excp_model = POWERPC_EXCP_POWER9; - pcc->bus_model = PPC_FLAGS_INPUT_POWER7; + pcc->bus_model = PPC_FLAGS_INPUT_POWER9; pcc->bfd_mach = bfd_mach_ppc64; pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE | POWERPC_FLAG_BE | POWERPC_FLAG_PMM | From 6eebe6dccb343f46ee2331e4173016b0feb0ab1d Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 15 Feb 2019 17:16:48 +0100 Subject: [PATCH 10/50] target/ppc: Add support for LPCR:HEIC on POWER9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This controls whether the External Interrupt (0x500) can be delivered to the hypervisor or not. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Message-Id: <20190215161648.9600-11-clg@kaod.org> Signed-off-by: David Gibson --- target/ppc/excp_helper.c | 5 ++++- target/ppc/translate_init.inc.c | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index d171a5eb62..39bedbb11d 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -827,7 +827,10 @@ static void ppc_hw_interrupt(CPUPPCState *env) /* External interrupt can ignore MSR:EE under some circumstances */ if (env->pending_interrupts & (1 << PPC_INTERRUPT_EXT)) { bool lpes0 = !!(env->spr[SPR_LPCR] & LPCR_LPES0); - if (async_deliver || (env->has_hv_mode && msr_hv == 0 && !lpes0)) { + bool heic = !!(env->spr[SPR_LPCR] & LPCR_HEIC); + /* HEIC blocks delivery to the hypervisor */ + if ((async_deliver && !(heic && msr_hv && !msr_pr)) || + (env->has_hv_mode && msr_hv == 0 && !lpes0)) { powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_EXTERNAL); return; } diff --git a/target/ppc/translate_init.inc.c b/target/ppc/translate_init.inc.c index 9d84164915..965c5273a6 100644 --- a/target/ppc/translate_init.inc.c +++ b/target/ppc/translate_init.inc.c @@ -8823,7 +8823,10 @@ static bool cpu_has_work_POWER9(CPUState *cs) /* External Exception */ if ((env->pending_interrupts & (1u << PPC_INTERRUPT_EXT)) && (env->spr[SPR_LPCR] & LPCR_EEE)) { - return true; + bool heic = !!(env->spr[SPR_LPCR] & LPCR_HEIC); + if (heic == 0 || !msr_hv || msr_pr) { + return true; + } } /* Decrementer Exception */ if ((env->pending_interrupts & (1u << PPC_INTERRUPT_DECR)) && From 27461d69a0f108dea756419251acc3ea65198f1b Mon Sep 17 00:00:00 2001 From: Prasad J Pandit Date: Mon, 18 Feb 2019 23:43:49 +0530 Subject: [PATCH 11/50] ppc: add host-serial and host-model machine attributes (CVE-2019-8934) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On ppc hosts, hypervisor shares following system attributes - /proc/device-tree/system-id - /proc/device-tree/model with a guest. This could lead to information leakage and misuse.[*] Add machine attributes to control such system information exposure to a guest. [*] https://wiki.openstack.org/wiki/OSSN/OSSN-0028 Reported-by: Daniel P. Berrangé Fix-suggested-by: Daniel P. Berrangé Signed-off-by: Prasad J Pandit Message-Id: <20190218181349.23885-1-ppandit@redhat.com> Reviewed-by: Daniel P. Berrangé Reviewed-by: Greg Kurz Signed-off-by: David Gibson --- hw/ppc/spapr.c | 76 ++++++++++++++++++++++++++++++++++++++---- include/hw/ppc/spapr.h | 2 ++ 2 files changed, 72 insertions(+), 6 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index abf9ebce59..b3631e22c4 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1247,13 +1247,30 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr) * Add info to guest to indentify which host is it being run on * and what is the uuid of the guest */ - if (kvmppc_get_host_model(&buf)) { - _FDT(fdt_setprop_string(fdt, 0, "host-model", buf)); - g_free(buf); + if (spapr->host_model && !g_str_equal(spapr->host_model, "none")) { + if (g_str_equal(spapr->host_model, "passthrough")) { + /* -M host-model=passthrough */ + if (kvmppc_get_host_model(&buf)) { + _FDT(fdt_setprop_string(fdt, 0, "host-model", buf)); + g_free(buf); + } + } else { + /* -M host-model= */ + _FDT(fdt_setprop_string(fdt, 0, "host-model", spapr->host_model)); + } } - if (kvmppc_get_host_serial(&buf)) { - _FDT(fdt_setprop_string(fdt, 0, "host-serial", buf)); - g_free(buf); + + if (spapr->host_serial && !g_str_equal(spapr->host_serial, "none")) { + if (g_str_equal(spapr->host_serial, "passthrough")) { + /* -M host-serial=passthrough */ + if (kvmppc_get_host_serial(&buf)) { + _FDT(fdt_setprop_string(fdt, 0, "host-serial", buf)); + g_free(buf); + } + } else { + /* -M host-serial= */ + _FDT(fdt_setprop_string(fdt, 0, "host-serial", spapr->host_serial)); + } } buf = qemu_uuid_unparse_strdup(&qemu_uuid); @@ -3144,6 +3161,36 @@ static void spapr_set_ic_mode(Object *obj, const char *value, Error **errp) } } +static char *spapr_get_host_model(Object *obj, Error **errp) +{ + sPAPRMachineState *spapr = SPAPR_MACHINE(obj); + + return g_strdup(spapr->host_model); +} + +static void spapr_set_host_model(Object *obj, const char *value, Error **errp) +{ + sPAPRMachineState *spapr = SPAPR_MACHINE(obj); + + g_free(spapr->host_model); + spapr->host_model = g_strdup(value); +} + +static char *spapr_get_host_serial(Object *obj, Error **errp) +{ + sPAPRMachineState *spapr = SPAPR_MACHINE(obj); + + return g_strdup(spapr->host_serial); +} + +static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) +{ + sPAPRMachineState *spapr = SPAPR_MACHINE(obj); + + g_free(spapr->host_serial); + spapr->host_serial = g_strdup(value); +} + static void spapr_instance_init(Object *obj) { sPAPRMachineState *spapr = SPAPR_MACHINE(obj); @@ -3189,6 +3236,17 @@ static void spapr_instance_init(Object *obj) object_property_set_description(obj, "ic-mode", "Specifies the interrupt controller mode (xics, xive, dual)", NULL); + + object_property_add_str(obj, "host-model", + spapr_get_host_model, spapr_set_host_model, + &error_abort); + object_property_set_description(obj, "host-model", + "Set host's model-id to use - none|passthrough|string", &error_abort); + object_property_add_str(obj, "host-serial", + spapr_get_host_serial, spapr_set_host_serial, + &error_abort); + object_property_set_description(obj, "host-serial", + "Set host's system-id to use - none|passthrough|string", &error_abort); } static void spapr_machine_finalizefn(Object *obj) @@ -4086,9 +4144,15 @@ DEFINE_SPAPR_MACHINE(4_0, "4.0", true); static void spapr_machine_3_1_class_options(MachineClass *mc) { sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + static GlobalProperty compat[] = { + { TYPE_SPAPR_MACHINE, "host-model", "passthrough" }, + { TYPE_SPAPR_MACHINE, "host-serial", "passthrough" }, + }; spapr_machine_4_0_class_options(mc); compat_props_add(mc->compat_props, hw_compat_3_1, hw_compat_3_1_len); + compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0"); smc->update_dt_enabled = false; } diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 631fc5103b..fec0f26f49 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -177,6 +177,8 @@ struct sPAPRMachineState { /*< public >*/ char *kvm_type; + char *host_model; + char *host_serial; int32_t irq_map_nr; unsigned long *irq_map; From d40bfcbbbb0ab2c9b585b94956d225ee51a013f8 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 18 Feb 2019 10:21:57 +0100 Subject: [PATCH 12/50] cpus: Properly release the iothread lock when killing a dummy VCPU This enables CPU unplug under qtest. Reviewed-by: Michael S. Tsirkin Reviewed-by: Greg Kurz Reviewed-by: Thomas Huth Reviewed-by: David Gibson Signed-off-by: David Hildenbrand Message-Id: <20190218092202.26683-2-david@redhat.com> Signed-off-by: David Gibson --- cpus.c | 1 + 1 file changed, 1 insertion(+) diff --git a/cpus.c b/cpus.c index 154daf57dc..e83f72b48b 100644 --- a/cpus.c +++ b/cpus.c @@ -1333,6 +1333,7 @@ static void *qemu_dummy_cpu_thread_fn(void *arg) qemu_wait_io_event(cpu); } while (!cpu->unplug); + qemu_mutex_unlock_iothread(); rcu_unregister_thread(); return NULL; #endif From b8165118f52ce5ee88565d3cec83d30374efdc96 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 18 Feb 2019 10:21:58 +0100 Subject: [PATCH 13/50] spapr: support memory unplug for qtest Fake availability of OV5_HP_EVT, so we can test memory unplug in qtest. Reviewed-by: Michael S. Tsirkin Reviewed-by: Greg Kurz Acked-by: David Gibson Signed-off-by: David Hildenbrand Message-Id: <20190218092202.26683-3-david@redhat.com> Signed-off-by: David Gibson --- hw/ppc/spapr_ovec.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/hw/ppc/spapr_ovec.c b/hw/ppc/spapr_ovec.c index 318bf33de4..12510b236a 100644 --- a/hw/ppc/spapr_ovec.c +++ b/hw/ppc/spapr_ovec.c @@ -16,6 +16,7 @@ #include "qemu/bitmap.h" #include "exec/address-spaces.h" #include "qemu/error-report.h" +#include "sysemu/qtest.h" #include "trace.h" #include @@ -131,6 +132,11 @@ bool spapr_ovec_test(sPAPROptionVector *ov, long bitnr) g_assert(ov); g_assert(bitnr < OV_MAXBITS); + /* support memory unplug for qtest */ + if (qtest_enabled() && bitnr == OV5_HP_EVT) { + return true; + } + return test_bit(bitnr, ov->bitmap) ? true : false; } From 0d9d4872e54945c161933f1c1f6eddf1f18dfc90 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 18 Feb 2019 10:21:59 +0100 Subject: [PATCH 14/50] tests/device-plug: Add a simple PCI unplug request test The issue with testing asynchronous unplug requests it that they usually require a running guest to handle the request. However, to test if unplug of PCI devices works, we can apply a nice little trick on some architectures: On system reset, x86 ACPI, s390x and spapr will perform the unplug, resulting in the device of interest to get deleted and a DEVICE_DELETED event getting sent. On s390x, we still get a warning qemu-system-s390x: -device virtio-mouse-pci,id=dev0: warning: Plugging a PCI/zPCI device without the 'zpci' CPU feature enabled; the guest will not be able to see/use this device This will be fixed soon, when we enable the zpci CPU feature always (Conny already has a patch for this queued). Reviewed-by: Michael S. Tsirkin Reviewed-by: Greg Kurz Reviewed-by: Thomas Huth Reviewed-by: Collin Walling Reviewed-by: David Gibson Signed-off-by: David Hildenbrand Message-Id: <20190218092202.26683-4-david@redhat.com> Acked-by: Cornelia Huck Signed-off-by: David Gibson --- tests/Makefile.include | 4 ++ tests/device-plug-test.c | 93 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+) create mode 100644 tests/device-plug-test.c diff --git a/tests/Makefile.include b/tests/Makefile.include index 3741f8f6dd..b62d82beb4 100644 --- a/tests/Makefile.include +++ b/tests/Makefile.include @@ -192,6 +192,7 @@ check-qtest-i386-$(CONFIG_ISA_IPMI_KCS) += tests/ipmi-kcs-test$(EXESUF) # check-qtest-i386-$(CONFIG_ISA_IPMI_BT) += tests/ipmi-bt-test$(EXESUF) check-qtest-i386-y += tests/i440fx-test$(EXESUF) check-qtest-i386-y += tests/fw_cfg-test$(EXESUF) +check-qtest-i386-y += tests/device-plug-test$(EXESUF) check-qtest-i386-y += tests/drive_del-test$(EXESUF) check-qtest-i386-$(CONFIG_WDT_IB700) += tests/wdt_ib700-test$(EXESUF) check-qtest-i386-y += tests/tco-test$(EXESUF) @@ -256,6 +257,7 @@ check-qtest-ppc-$(CONFIG_M48T59) += tests/m48t59-test$(EXESUF) check-qtest-ppc64-y += $(check-qtest-ppc-y) check-qtest-ppc64-$(CONFIG_PSERIES) += tests/spapr-phb-test$(EXESUF) +check-qtest-ppc64-$(CONFIG_PSERIES) += tests/device-plug-test$(EXESUF) check-qtest-ppc64-$(CONFIG_POWERNV) += tests/pnv-xscom-test$(EXESUF) check-qtest-ppc64-y += tests/migration-test$(EXESUF) check-qtest-ppc64-$(CONFIG_PSERIES) += tests/rtas-test$(EXESUF) @@ -310,6 +312,7 @@ check-qtest-s390x-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) check-qtest-s390x-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) check-qtest-s390x-$(CONFIG_POSIX) += tests/test-filter-redirector$(EXESUF) check-qtest-s390x-y += tests/drive_del-test$(EXESUF) +check-qtest-s390x-y += tests/device-plug-test$(EXESUF) check-qtest-s390x-y += tests/virtio-ccw-test$(EXESUF) check-qtest-s390x-y += tests/cpu-plug-test$(EXESUF) check-qtest-s390x-y += tests/migration-test$(EXESUF) @@ -750,6 +753,7 @@ tests/ipoctal232-test$(EXESUF): tests/ipoctal232-test.o tests/qom-test$(EXESUF): tests/qom-test.o tests/test-hmp$(EXESUF): tests/test-hmp.o tests/machine-none-test$(EXESUF): tests/machine-none-test.o +tests/device-plug-test$(EXESUF): tests/device-plug-test.o tests/drive_del-test$(EXESUF): tests/drive_del-test.o $(libqos-virtio-obj-y) tests/nvme-test$(EXESUF): tests/nvme-test.o $(libqos-pc-obj-y) tests/pvpanic-test$(EXESUF): tests/pvpanic-test.o diff --git a/tests/device-plug-test.c b/tests/device-plug-test.c new file mode 100644 index 0000000000..cd9ada539d --- /dev/null +++ b/tests/device-plug-test.c @@ -0,0 +1,93 @@ +/* + * QEMU device plug/unplug handling + * + * Copyright (C) 2019 Red Hat Inc. + * + * Authors: + * David Hildenbrand + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "libqtest.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qstring.h" + +static void device_del_request(QTestState *qtest, const char *id) +{ + QDict *resp; + + resp = qtest_qmp(qtest, + "{'execute': 'device_del', 'arguments': { 'id': %s } }", + id); + g_assert(qdict_haskey(resp, "return")); + qobject_unref(resp); +} + +static void system_reset(QTestState *qtest) +{ + QDict *resp; + + resp = qtest_qmp(qtest, "{'execute': 'system_reset'}"); + g_assert(qdict_haskey(resp, "return")); + qobject_unref(resp); +} + +static void wait_device_deleted_event(QTestState *qtest, const char *id) +{ + QDict *resp, *data; + QString *qstr; + + /* + * Other devices might get removed along with the removed device. Skip + * these. The device of interest will be the last one. + */ + for (;;) { + resp = qtest_qmp_eventwait_ref(qtest, "DEVICE_DELETED"); + data = qdict_get_qdict(resp, "data"); + if (!data || !qdict_get(data, "device")) { + qobject_unref(resp); + continue; + } + qstr = qobject_to(QString, qdict_get(data, "device")); + g_assert(qstr); + if (!strcmp(qstring_get_str(qstr), id)) { + qobject_unref(resp); + break; + } + qobject_unref(resp); + } +} + +static void test_pci_unplug_request(void) +{ + QTestState *qtest = qtest_initf("-device virtio-mouse-pci,id=dev0"); + + /* + * Request device removal. As the guest is not running, the request won't + * be processed. However during system reset, the removal will be + * handled, removing the device. + */ + device_del_request(qtest, "dev0"); + system_reset(qtest); + wait_device_deleted_event(qtest, "dev0"); + + qtest_quit(qtest); +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + + /* + * We need a system that will process unplug requests during system resets + * and does not do PCI surprise removal. This holds for x86 ACPI, + * s390x and spapr. + */ + qtest_add_func("/device-plug/pci-unplug-request", + test_pci_unplug_request); + + return g_test_run(); +} From 613ebbec647d6d159a2fc81b4c34290765901de3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 18 Feb 2019 10:22:00 +0100 Subject: [PATCH 15/50] tests/device-plug: Add CCW unplug test for s390x As CCW unplugs are surprise removals without asking the guest first, we can test this without any guest interaction. Reviewed-by: Michael S. Tsirkin Reviewed-by: Thomas Huth Signed-off-by: David Hildenbrand Message-Id: <20190218092202.26683-5-david@redhat.com> Acked-by: Cornelia Huck Signed-off-by: David Gibson --- tests/device-plug-test.c | 41 +++++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/tests/device-plug-test.c b/tests/device-plug-test.c index cd9ada539d..d1a6c94af2 100644 --- a/tests/device-plug-test.c +++ b/tests/device-plug-test.c @@ -15,17 +15,26 @@ #include "qapi/qmp/qdict.h" #include "qapi/qmp/qstring.h" -static void device_del_request(QTestState *qtest, const char *id) +static void device_del_start(QTestState *qtest, const char *id) { - QDict *resp; + qtest_qmp_send(qtest, + "{'execute': 'device_del', 'arguments': { 'id': %s } }", id); +} + +static void device_del_finish(QTestState *qtest) +{ + QDict *resp = qtest_qmp_receive(qtest); - resp = qtest_qmp(qtest, - "{'execute': 'device_del', 'arguments': { 'id': %s } }", - id); g_assert(qdict_haskey(resp, "return")); qobject_unref(resp); } +static void device_del_request(QTestState *qtest, const char *id) +{ + device_del_start(qtest, id); + device_del_finish(qtest); +} + static void system_reset(QTestState *qtest) { QDict *resp; @@ -77,8 +86,25 @@ static void test_pci_unplug_request(void) qtest_quit(qtest); } +static void test_ccw_unplug(void) +{ + QTestState *qtest = qtest_initf("-device virtio-balloon-ccw,id=dev0"); + + /* + * The DEVICE_DELETED events will be sent before the command + * completes. + */ + device_del_start(qtest, "dev0"); + wait_device_deleted_event(qtest, "dev0"); + device_del_finish(qtest); + + qtest_quit(qtest); +} + int main(int argc, char **argv) { + const char *arch = qtest_get_arch(); + g_test_init(&argc, &argv, NULL); /* @@ -89,5 +115,10 @@ int main(int argc, char **argv) qtest_add_func("/device-plug/pci-unplug-request", test_pci_unplug_request); + if (!strcmp(arch, "s390x")) { + qtest_add_func("/device-plug/ccw-unplug", + test_ccw_unplug); + } + return g_test_run(); } From c76480e5a0cc068b5fd39b50f4e36496bfaba95a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 18 Feb 2019 10:22:01 +0100 Subject: [PATCH 16/50] tests/device-plug: Add CPU core unplug request test for spapr We can easily test this, just like PCI. On s390x, cpu unplug is not supported. On x86 ACPI, cpu unplug requires guest interaction to work, so it can't be tested that easily. We might add tests for ACPI later. Reviewed-by: Michael S. Tsirkin Reviewed-by: Greg Kurz Reviewed-by: Thomas Huth Signed-off-by: David Hildenbrand Message-Id: <20190218092202.26683-6-david@redhat.com> Signed-off-by: David Gibson --- tests/device-plug-test.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/device-plug-test.c b/tests/device-plug-test.c index d1a6c94af2..0262ad6be6 100644 --- a/tests/device-plug-test.c +++ b/tests/device-plug-test.c @@ -101,6 +101,21 @@ static void test_ccw_unplug(void) qtest_quit(qtest); } +static void test_spapr_cpu_unplug_request(void) +{ + QTestState *qtest; + + qtest = qtest_initf("-cpu power9_v2.0 -smp 1,maxcpus=2 " + "-device power9_v2.0-spapr-cpu-core,core-id=1,id=dev0"); + + /* similar to test_pci_unplug_request */ + device_del_request(qtest, "dev0"); + system_reset(qtest); + wait_device_deleted_event(qtest, "dev0"); + + qtest_quit(qtest); +} + int main(int argc, char **argv) { const char *arch = qtest_get_arch(); @@ -120,5 +135,10 @@ int main(int argc, char **argv) test_ccw_unplug); } + if (!strcmp(arch, "ppc64")) { + qtest_add_func("/device-plug/spapr-cpu-unplug-request", + test_spapr_cpu_unplug_request); + } + return g_test_run(); } From 368807049344f437d467aea9af4bc8b05d5fbee2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 18 Feb 2019 10:22:02 +0100 Subject: [PATCH 17/50] tests/device-plug: Add memory unplug request test for spapr We can easily test this, just like PCI. On x86 ACPI, we need guest interaction to make it work, so it is not that easy to test. We might add tests for that later on. Reviewed-by: Michael S. Tsirkin Reviewed-by: Greg Kurz Reviewed-by: Thomas Huth Signed-off-by: David Hildenbrand Message-Id: <20190218092202.26683-7-david@redhat.com> Signed-off-by: David Gibson --- tests/device-plug-test.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/device-plug-test.c b/tests/device-plug-test.c index 0262ad6be6..87593d9ecf 100644 --- a/tests/device-plug-test.c +++ b/tests/device-plug-test.c @@ -116,6 +116,22 @@ static void test_spapr_cpu_unplug_request(void) qtest_quit(qtest); } +static void test_spapr_memory_unplug_request(void) +{ + QTestState *qtest; + + qtest = qtest_initf("-m 256M,slots=1,maxmem=768M " + "-object memory-backend-ram,id=mem0,size=512M " + "-device pc-dimm,id=dev0,memdev=mem0"); + + /* similar to test_pci_unplug_request */ + device_del_request(qtest, "dev0"); + system_reset(qtest); + wait_device_deleted_event(qtest, "dev0"); + + qtest_quit(qtest); +} + int main(int argc, char **argv) { const char *arch = qtest_get_arch(); @@ -138,6 +154,8 @@ int main(int argc, char **argv) if (!strcmp(arch, "ppc64")) { qtest_add_func("/device-plug/spapr-cpu-unplug-request", test_spapr_cpu_unplug_request); + qtest_add_func("/device-plug/spapr-memory-unplug-request", + test_spapr_memory_unplug_request); } return g_test_run(); From 00fd075e1894fabff10dc7cd61af9130903a23c9 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 15 Feb 2019 18:00:18 +0100 Subject: [PATCH 18/50] target/ppc/spapr: Set LPCR:HR when using Radix mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The HW relies on LPCR:HR along with the PATE to determine whether to use Radix or Hash mode. In fact it uses LPCR:HR more commonly than the PATE. For us, it's also more efficient to do so, especially since unlike the HW we do not maintain a cache of the current PATE and HV PATE in a generic place. Prepare the grounds for that by ensuring that LPCR:HR is set properly on SPAPR machines. Another option would have been to use a callback to get the PATE but this gets messy when implementing bare metal support, it's much simpler (and faster) to use LPCR. Since existing migration streams may not have it, fix it up in spapr_post_load() as well based on the pseudo-PATE entry that we keep. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Cédric Le Goater Message-Id: <20190215170029.15641-2-clg@kaod.org> Signed-off-by: David Gibson --- hw/ppc/spapr.c | 41 +++++++++++++++++++++++++++++++++++- hw/ppc/spapr_hcall.c | 46 +++++++---------------------------------- hw/ppc/spapr_rtas.c | 6 +++--- include/hw/ppc/spapr.h | 1 + target/ppc/cpu.h | 1 + target/ppc/mmu-hash64.c | 2 +- 6 files changed, 54 insertions(+), 43 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index b3631e22c4..84f6e9d9a8 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1389,6 +1389,37 @@ static void emulate_spapr_hypercall(PPCVirtualHypervisor *vhyp, } } +struct LPCRSyncState { + target_ulong value; + target_ulong mask; +}; + +static void do_lpcr_sync(CPUState *cs, run_on_cpu_data arg) +{ + struct LPCRSyncState *s = arg.host_ptr; + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; + target_ulong lpcr; + + cpu_synchronize_state(cs); + lpcr = env->spr[SPR_LPCR]; + lpcr &= ~s->mask; + lpcr |= s->value; + ppc_store_lpcr(cpu, lpcr); +} + +void spapr_set_all_lpcrs(target_ulong value, target_ulong mask) +{ + CPUState *cs; + struct LPCRSyncState s = { + .value = value, + .mask = mask + }; + CPU_FOREACH(cs) { + run_on_cpu(cs, do_lpcr_sync, RUN_ON_CPU_HOST_PTR(&s)); + } +} + static uint64_t spapr_get_patbe(PPCVirtualHypervisor *vhyp) { sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp); @@ -1565,7 +1596,7 @@ void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift, } } /* We're setting up a hash table, so that means we're not radix */ - spapr->patb_entry = 0; + spapr_set_all_lpcrs(0, LPCR_HR | LPCR_UPRT); } void spapr_setup_hpt_and_vrma(sPAPRMachineState *spapr) @@ -1623,6 +1654,7 @@ static void spapr_machine_reset(void) * without a HPT because KVM will start them in radix mode. * Set the GR bit in PATB so that we know there is no HPT. */ spapr->patb_entry = PATBE1_GR; + spapr_set_all_lpcrs(LPCR_HR | LPCR_UPRT, LPCR_HR | LPCR_UPRT); } else { spapr_setup_hpt_and_vrma(spapr); } @@ -1781,6 +1813,13 @@ static int spapr_post_load(void *opaque, int version_id) bool radix = !!(spapr->patb_entry & PATBE1_GR); bool gtse = !!(cpu->env.spr[SPR_LPCR] & LPCR_GTSE); + /* + * Update LPCR:HR and UPRT as they may not be set properly in + * the stream + */ + spapr_set_all_lpcrs(radix ? (LPCR_HR | LPCR_UPRT) : 0, + LPCR_HR | LPCR_UPRT); + err = kvmppc_configure_v3_mmu(cpu, radix, gtse, spapr->patb_entry); if (err) { error_report("Process table config unsupported by the host"); diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 17bcaa3822..b47241ace6 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -17,37 +17,6 @@ #include "mmu-book3s-v3.h" #include "hw/mem/memory-device.h" -struct LPCRSyncState { - target_ulong value; - target_ulong mask; -}; - -static void do_lpcr_sync(CPUState *cs, run_on_cpu_data arg) -{ - struct LPCRSyncState *s = arg.host_ptr; - PowerPCCPU *cpu = POWERPC_CPU(cs); - CPUPPCState *env = &cpu->env; - target_ulong lpcr; - - cpu_synchronize_state(cs); - lpcr = env->spr[SPR_LPCR]; - lpcr &= ~s->mask; - lpcr |= s->value; - ppc_store_lpcr(cpu, lpcr); -} - -static void set_all_lpcrs(target_ulong value, target_ulong mask) -{ - CPUState *cs; - struct LPCRSyncState s = { - .value = value, - .mask = mask - }; - CPU_FOREACH(cs) { - run_on_cpu(cs, do_lpcr_sync, RUN_ON_CPU_HOST_PTR(&s)); - } -} - static bool has_spr(PowerPCCPU *cpu, int spr) { /* We can test whether the SPR is defined by checking for a valid name */ @@ -1255,12 +1224,12 @@ static target_ulong h_set_mode_resource_le(PowerPCCPU *cpu, switch (mflags) { case H_SET_MODE_ENDIAN_BIG: - set_all_lpcrs(0, LPCR_ILE); + spapr_set_all_lpcrs(0, LPCR_ILE); spapr_pci_switch_vga(true); return H_SUCCESS; case H_SET_MODE_ENDIAN_LITTLE: - set_all_lpcrs(LPCR_ILE, LPCR_ILE); + spapr_set_all_lpcrs(LPCR_ILE, LPCR_ILE); spapr_pci_switch_vga(false); return H_SUCCESS; } @@ -1289,7 +1258,7 @@ static target_ulong h_set_mode_resource_addr_trans_mode(PowerPCCPU *cpu, return H_UNSUPPORTED_FLAG; } - set_all_lpcrs(mflags << LPCR_AIL_SHIFT, LPCR_AIL); + spapr_set_all_lpcrs(mflags << LPCR_AIL_SHIFT, LPCR_AIL); return H_SUCCESS; } @@ -1422,10 +1391,11 @@ static target_ulong h_register_process_table(PowerPCCPU *cpu, spapr->patb_entry = cproc; /* Save new process table */ - /* Update the UPRT and GTSE bits in the LPCR for all cpus */ - set_all_lpcrs(((flags & (FLAG_RADIX | FLAG_HASH_PROC_TBL)) ? LPCR_UPRT : 0) | - ((flags & FLAG_GTSE) ? LPCR_GTSE : 0), - LPCR_UPRT | LPCR_GTSE); + /* Update the UPRT, HR and GTSE bits in the LPCR for all cpus */ + spapr_set_all_lpcrs(((flags & (FLAG_RADIX | FLAG_HASH_PROC_TBL)) ? + (LPCR_UPRT | LPCR_HR) : 0) | + ((flags & FLAG_GTSE) ? LPCR_GTSE : 0), + LPCR_UPRT | LPCR_HR | LPCR_GTSE); if (kvm_enabled()) { return kvmppc_configure_v3_mmu(cpu, flags & FLAG_RADIX, diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index d6a0952154..7a2cb786a3 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -172,10 +172,10 @@ static void rtas_start_cpu(PowerPCCPU *callcpu, sPAPRMachineState *spapr, * New cpus are expected to start in the same radix/hash mode * as the existing CPUs */ - if (ppc64_radix_guest(callcpu)) { - lpcr |= LPCR_UPRT | LPCR_GTSE; + if (ppc64_v3_radix(callcpu)) { + lpcr |= LPCR_UPRT | LPCR_GTSE | LPCR_HR; } else { - lpcr &= ~(LPCR_UPRT | LPCR_GTSE); + lpcr &= ~(LPCR_UPRT | LPCR_GTSE | LPCR_HR); } } ppc_store_lpcr(newcpu, lpcr); diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index fec0f26f49..659204ea93 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -841,4 +841,5 @@ void spapr_check_pagesize(sPAPRMachineState *spapr, hwaddr pagesize, #define SPAPR_OV5_XIVE_EXPLOIT 0x40 #define SPAPR_OV5_XIVE_BOTH 0x80 /* Only to advertise on the platform */ +void spapr_set_all_lpcrs(target_ulong value, target_ulong mask); #endif /* HW_SPAPR_H */ diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index ececad9f1f..3f39a77750 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -388,6 +388,7 @@ struct ppc_slb_t { #define LPCR_AIL (3ull << LPCR_AIL_SHIFT) #define LPCR_UPRT PPC_BIT(41) /* Use Process Table */ #define LPCR_EVIRT PPC_BIT(42) /* Enhanced Virtualisation */ +#define LPCR_HR PPC_BIT(43) /* Host Radix */ #define LPCR_ONL PPC_BIT(45) #define LPCR_LD PPC_BIT(46) /* Large Decrementer */ #define LPCR_P7_PECE0 PPC_BIT(49) diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c index 276d9015e7..f1c7729332 100644 --- a/target/ppc/mmu-hash64.c +++ b/target/ppc/mmu-hash64.c @@ -1084,7 +1084,7 @@ void ppc_store_lpcr(PowerPCCPU *cpu, target_ulong val) case POWERPC_MMU_3_00: /* P9 */ lpcr = val & (LPCR_VPM1 | LPCR_ISL | LPCR_KBV | LPCR_DPFD | (LPCR_PECE_U_MASK & LPCR_HVEE) | LPCR_ILE | LPCR_AIL | - LPCR_UPRT | LPCR_EVIRT | LPCR_ONL | + LPCR_UPRT | LPCR_EVIRT | LPCR_ONL | LPCR_HR | (LPCR_PECE_L_MASK & (LPCR_PDEE | LPCR_HDEE | LPCR_EEE | LPCR_DEE | LPCR_OEE)) | LPCR_MER | LPCR_GTSE | LPCR_TC | LPCR_HEIC | LPCR_LPES0 | LPCR_HVICE | LPCR_HDICE); From 38c784a1cce1a5fe68cf1a6474aad5a9f8c7dc1a Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 15 Feb 2019 18:00:19 +0100 Subject: [PATCH 19/50] target/ppc/mmu: Use LPCR:HR to chose radix vs. hash translation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that LPCR:HR is set properly for SPAPR, use it for deciding the translation type, which also works for bare metal Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Cédric Le Goater Message-Id: <20190215170029.15641-3-clg@kaod.org> Signed-off-by: David Gibson --- target/ppc/mmu-book3s-v3.c | 11 ++++++++++- target/ppc/mmu-book3s-v3.h | 14 +++++++++----- target/ppc/mmu_helper.c | 9 ++------- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/target/ppc/mmu-book3s-v3.c b/target/ppc/mmu-book3s-v3.c index b60df4408f..a174e7efc5 100644 --- a/target/ppc/mmu-book3s-v3.c +++ b/target/ppc/mmu-book3s-v3.c @@ -26,9 +26,18 @@ int ppc64_v3_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx, int mmu_idx) { - if (ppc64_radix_guest(cpu)) { /* Guest uses radix */ + if (ppc64_v3_radix(cpu)) { /* Guest uses radix */ return ppc_radix64_handle_mmu_fault(cpu, eaddr, rwx, mmu_idx); } else { /* Guest uses hash */ return ppc_hash64_handle_mmu_fault(cpu, eaddr, rwx, mmu_idx); } } + +hwaddr ppc64_v3_get_phys_page_debug(PowerPCCPU *cpu, vaddr eaddr) +{ + if (ppc64_v3_radix(cpu)) { + return ppc_radix64_get_phys_page_debug(cpu, eaddr); + } else { + return ppc_hash64_get_phys_page_debug(cpu, eaddr); + } +} diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h index fdf80987d7..41b7715862 100644 --- a/target/ppc/mmu-book3s-v3.h +++ b/target/ppc/mmu-book3s-v3.h @@ -43,14 +43,18 @@ static inline bool ppc64_use_proc_tbl(PowerPCCPU *cpu) return !!(cpu->env.spr[SPR_LPCR] & LPCR_UPRT); } -static inline bool ppc64_radix_guest(PowerPCCPU *cpu) +/* + * The LPCR:HR bit is a shortcut that avoids having to + * dig out the partition table in the fast path. This is + * also how the HW uses it. + */ +static inline bool ppc64_v3_radix(PowerPCCPU *cpu) { - PPCVirtualHypervisorClass *vhc = - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); - - return !!(vhc->get_patbe(cpu->vhyp) & PATBE1_GR); + return !!(cpu->env.spr[SPR_LPCR] & LPCR_HR); } +hwaddr ppc64_v3_get_phys_page_debug(PowerPCCPU *cpu, vaddr eaddr); + int ppc64_v3_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx, int mmu_idx); diff --git a/target/ppc/mmu_helper.c b/target/ppc/mmu_helper.c index bcf19da61d..4a6be4d63b 100644 --- a/target/ppc/mmu_helper.c +++ b/target/ppc/mmu_helper.c @@ -1342,7 +1342,7 @@ void dump_mmu(FILE *f, fprintf_function cpu_fprintf, CPUPPCState *env) dump_slb(f, cpu_fprintf, ppc_env_get_cpu(env)); break; case POWERPC_MMU_3_00: - if (ppc64_radix_guest(ppc_env_get_cpu(env))) { + if (ppc64_v3_radix(ppc_env_get_cpu(env))) { /* TODO - Unsupported */ } else { dump_slb(f, cpu_fprintf, ppc_env_get_cpu(env)); @@ -1489,12 +1489,7 @@ hwaddr ppc_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) case POWERPC_MMU_2_07: return ppc_hash64_get_phys_page_debug(cpu, addr); case POWERPC_MMU_3_00: - if (ppc64_radix_guest(ppc_env_get_cpu(env))) { - return ppc_radix64_get_phys_page_debug(cpu, addr); - } else { - return ppc_hash64_get_phys_page_debug(cpu, addr); - } - break; + return ppc64_v3_get_phys_page_debug(cpu, addr); #endif case POWERPC_MMU_32B: From 2b9e0a6b9456dcb4b9e2f14d90a9db1622f462ba Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 15 Feb 2019 18:00:20 +0100 Subject: [PATCH 20/50] target/ppc: Re-enable RMLS on POWER9 for virtual hypervisors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Historically the 64-bit server MMU supports two way of configuring the guest "real mode" mapping: - The "RMA" with is a single chunk of physically contiguous memory remapped as guest real, and controlled by the RMLS field in the LPCR register and the RMOR register. - The "VRMA" which uses special PTEs inserted in the partition hash table by the hypervisor. POWER9 deprecates the former, which is reflected by the filtering done in ppc_store_lpcr() which effectively prevents setting of the RMLS field. However, when using fully emulated SPAPR machines, our qemu code currently only knows how to define the guest real mode memory using RMLS. Thus you cannot run a SPAPR machine anymore with a POWER9 CPU model today. This works around it with a quirk in ppc_store_lpcr() to continue allowing the RMLS field to be set when using a virtual hypervisor. Ultimately we will want to implement configuring a VRMA instead which will also be necessary if we want to migrate a SPAPR guest between TCG and KVM but this is a lot more work. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Cédric Le Goater Message-Id: <20190215170029.15641-4-clg@kaod.org> Signed-off-by: David Gibson --- target/ppc/mmu-hash64.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c index f1c7729332..1175b991d9 100644 --- a/target/ppc/mmu-hash64.c +++ b/target/ppc/mmu-hash64.c @@ -1088,6 +1088,14 @@ void ppc_store_lpcr(PowerPCCPU *cpu, target_ulong val) (LPCR_PECE_L_MASK & (LPCR_PDEE | LPCR_HDEE | LPCR_EEE | LPCR_DEE | LPCR_OEE)) | LPCR_MER | LPCR_GTSE | LPCR_TC | LPCR_HEIC | LPCR_LPES0 | LPCR_HVICE | LPCR_HDICE); + /* + * If we have a virtual hypervisor, we need to bring back RMLS. It + * doesn't exist on an actual P9 but that's all we know how to + * configure with softmmu at the moment + */ + if (cpu->vhyp) { + lpcr |= (val & LPCR_RMLS); + } break; default: ; From 2819282dae6c38b2c9d5499ad748b1471f26af1a Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 15 Feb 2019 18:00:21 +0100 Subject: [PATCH 21/50] target/ppc: Fix #include guard in mmu-book3s-v3.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Cédric Le Goater Message-Id: <20190215170029.15641-5-clg@kaod.org> Signed-off-by: David Gibson --- target/ppc/mmu-book3s-v3.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h index 41b7715862..12ec0054c2 100644 --- a/target/ppc/mmu-book3s-v3.h +++ b/target/ppc/mmu-book3s-v3.h @@ -17,8 +17,8 @@ * License along with this library; if not, see . */ -#ifndef MMU_H -#define MMU_H +#ifndef MMU_BOOOK3S_V3_H +#define MMU_BOOOK3S_V3_H #ifndef CONFIG_USER_ONLY @@ -62,4 +62,4 @@ int ppc64_v3_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx, #endif /* CONFIG_USER_ONLY */ -#endif /* MMU_H */ +#endif /* MMU_BOOOK3S_V3_H */ From 3054b0ca4bdd83e0780bd76805ca17aa733031c8 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 15 Feb 2019 18:00:23 +0100 Subject: [PATCH 22/50] target/ppc: Fix ordering of hash MMU accesses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With mttcg, we can have MMU lookups happening at the same time as the guest modifying the page tables. Since the HPTEs of the hash table MMU contains two words (or double worlds on 64-bit), we need to make sure we read them in the right order, with the correct memory barrier. Additionally, when using emulated SPAPR mode, the hypercalls writing to the hash table must also perform the udpates in the right order. Note: This part is still not entirely correct Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Cédric Le Goater Message-Id: <20190215170029.15641-7-clg@kaod.org> Signed-off-by: David Gibson --- hw/ppc/spapr.c | 21 +++++++++++++++++++-- target/ppc/mmu-hash32.c | 6 ++++++ target/ppc/mmu-hash64.c | 6 ++++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 84f6e9d9a8..d2520bc662 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1524,8 +1524,25 @@ static void spapr_store_hpte(PPCVirtualHypervisor *vhyp, hwaddr ptex, if (!spapr->htab) { kvmppc_write_hpte(ptex, pte0, pte1); } else { - stq_p(spapr->htab + offset, pte0); - stq_p(spapr->htab + offset + HASH_PTE_SIZE_64 / 2, pte1); + if (pte0 & HPTE64_V_VALID) { + stq_p(spapr->htab + offset + HASH_PTE_SIZE_64 / 2, pte1); + /* + * When setting valid, we write PTE1 first. This ensures + * proper synchronization with the reading code in + * ppc_hash64_pteg_search() + */ + smp_wmb(); + stq_p(spapr->htab + offset, pte0); + } else { + stq_p(spapr->htab + offset, pte0); + /* + * When clearing it we set PTE0 first. This ensures proper + * synchronization with the reading code in + * ppc_hash64_pteg_search() + */ + smp_wmb(); + stq_p(spapr->htab + offset + HASH_PTE_SIZE_64 / 2, pte1); + } } } diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c index 03ae3c1279..e8562a7c87 100644 --- a/target/ppc/mmu-hash32.c +++ b/target/ppc/mmu-hash32.c @@ -319,6 +319,12 @@ static hwaddr ppc_hash32_pteg_search(PowerPCCPU *cpu, hwaddr pteg_off, for (i = 0; i < HPTES_PER_GROUP; i++) { pte0 = ppc_hash32_load_hpte0(cpu, pte_offset); + /* + * pte0 contains the valid bit and must be read before pte1, + * otherwise we might see an old pte1 with a new valid bit and + * thus an inconsistent hpte value + */ + smp_rmb(); pte1 = ppc_hash32_load_hpte1(cpu, pte_offset); if ((pte0 & HPTE32_V_VALID) diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c index 1175b991d9..fbefe5b5aa 100644 --- a/target/ppc/mmu-hash64.c +++ b/target/ppc/mmu-hash64.c @@ -507,6 +507,12 @@ static hwaddr ppc_hash64_pteg_search(PowerPCCPU *cpu, hwaddr hash, } for (i = 0; i < HPTES_PER_GROUP; i++) { pte0 = ppc_hash64_hpte0(cpu, pteg, i); + /* + * pte0 contains the valid bit and must be read before pte1, + * otherwise we might see an old pte1 with a new valid bit and + * thus an inconsistent hpte value + */ + smp_rmb(); pte1 = ppc_hash64_hpte1(cpu, pteg, i); /* This compares V, B, H (secondary) and the AVPN */ From 34525595fb379796241b0f3a9946bb21c4da1178 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 15 Feb 2019 18:00:24 +0100 Subject: [PATCH 23/50] target/ppc: Add basic support for "new format" HPTE as found on POWER9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit POWER9 (arch v3) slightly changes the HPTE format. The B bits move from the first to the second half of the HPTE, and the AVPN/ARPN are slightly shorter. However, under SPAPR, the hypercalls still take the old format (and probably will for the foreseable future). The simplest way to support this is thus to convert the HPTEs from new to old format when reading them if the MMU model is v3 and there is no virtual hypervisor, leaving the rest of the code unchanged. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Cédric Le Goater Message-Id: <20190215170029.15641-8-clg@kaod.org> [dwg: Moved function to .c since there was no real need for it in the .h] Signed-off-by: David Gibson --- target/ppc/mmu-hash64.c | 17 +++++++++++++++++ target/ppc/mmu-hash64.h | 5 +++++ 2 files changed, 22 insertions(+) diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c index fbefe5b5aa..3c057a8c70 100644 --- a/target/ppc/mmu-hash64.c +++ b/target/ppc/mmu-hash64.c @@ -490,6 +490,18 @@ static unsigned hpte_page_shift(const PPCHash64SegmentPageSizes *sps, return 0; /* Bad page size encoding */ } +static void ppc64_v3_new_to_old_hpte(target_ulong *pte0, target_ulong *pte1) +{ + /* Insert B into pte0 */ + *pte0 = (*pte0 & HPTE64_V_COMMON_BITS) | + ((*pte1 & HPTE64_R_3_0_SSIZE_MASK) << + (HPTE64_V_SSIZE_SHIFT - HPTE64_R_3_0_SSIZE_SHIFT)); + + /* Remove B from pte1 */ + *pte1 = *pte1 & ~HPTE64_R_3_0_SSIZE_MASK; +} + + static hwaddr ppc_hash64_pteg_search(PowerPCCPU *cpu, hwaddr hash, const PPCHash64SegmentPageSizes *sps, target_ulong ptem, @@ -515,6 +527,11 @@ static hwaddr ppc_hash64_pteg_search(PowerPCCPU *cpu, hwaddr hash, smp_rmb(); pte1 = ppc_hash64_hpte1(cpu, pteg, i); + /* Convert format if necessary */ + if (cpu->env.mmu_model == POWERPC_MMU_3_00 && !cpu->vhyp) { + ppc64_v3_new_to_old_hpte(&pte0, &pte1); + } + /* This compares V, B, H (secondary) and the AVPN */ if (HPTE64_V_COMPARE(pte0, ptem)) { *pshift = hpte_page_shift(sps, pte0, pte1); diff --git a/target/ppc/mmu-hash64.h b/target/ppc/mmu-hash64.h index f11efc9cbc..016d6b44ee 100644 --- a/target/ppc/mmu-hash64.h +++ b/target/ppc/mmu-hash64.h @@ -102,6 +102,11 @@ void ppc_hash64_filter_pagesizes(PowerPCCPU *cpu, #define HPTE64_V_1TB_SEG 0x4000000000000000ULL #define HPTE64_V_VRMA_MASK 0x4001ffffff000000ULL +/* Format changes for ARCH v3 */ +#define HPTE64_V_COMMON_BITS 0x000fffffffffffffULL +#define HPTE64_R_3_0_SSIZE_SHIFT 58 +#define HPTE64_R_3_0_SSIZE_MASK (3ULL << HPTE64_R_3_0_SSIZE_SHIFT) + static inline hwaddr ppc_hash64_hpt_base(PowerPCCPU *cpu) { if (cpu->vhyp) { From 74c4912f097bab98a8b0f8ec2bee1db269505c14 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 15 Feb 2019 18:00:25 +0100 Subject: [PATCH 24/50] target/ppc: Fix synchronization of mttcg with broadcast TLB flushes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's use the generic helper tlb_flush_all_cpus_synced() instead of iterating the CPUs ourselves. We do lose the optimization of clearing the "other" CPUs "need flush" flags but this shouldn't be a problem in practice. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Cédric Le Goater Message-Id: <20190215170029.15641-9-clg@kaod.org> Signed-off-by: David Gibson --- target/ppc/helper_regs.h | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/target/ppc/helper_regs.h b/target/ppc/helper_regs.h index 5efd18049e..a2205e1044 100644 --- a/target/ppc/helper_regs.h +++ b/target/ppc/helper_regs.h @@ -174,26 +174,19 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value, static inline void check_tlb_flush(CPUPPCState *env, bool global) { CPUState *cs = CPU(ppc_env_get_cpu(env)); - if (env->tlb_need_flush & TLB_NEED_LOCAL_FLUSH) { - tlb_flush(cs); + + /* Handle global flushes first */ + if (global && (env->tlb_need_flush & TLB_NEED_GLOBAL_FLUSH)) { + env->tlb_need_flush &= ~TLB_NEED_GLOBAL_FLUSH; env->tlb_need_flush &= ~TLB_NEED_LOCAL_FLUSH; + tlb_flush_all_cpus_synced(cs); + return; } - /* Propagate TLB invalidations to other CPUs when the guest uses broadcast - * TLB invalidation instructions. - */ - if (global && (env->tlb_need_flush & TLB_NEED_GLOBAL_FLUSH)) { - CPUState *other_cs; - CPU_FOREACH(other_cs) { - if (other_cs != cs) { - PowerPCCPU *cpu = POWERPC_CPU(other_cs); - CPUPPCState *other_env = &cpu->env; - - other_env->tlb_need_flush &= ~TLB_NEED_LOCAL_FLUSH; - tlb_flush(other_cs); - } - } - env->tlb_need_flush &= ~TLB_NEED_GLOBAL_FLUSH; + /* Then handle local ones */ + if (env->tlb_need_flush & TLB_NEED_LOCAL_FLUSH) { + env->tlb_need_flush &= ~TLB_NEED_LOCAL_FLUSH; + tlb_flush(cs); } } #else From c4dae9cd37f5c6c7a6aa741f985f5abb57a6bb3f Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 15 Feb 2019 18:00:26 +0100 Subject: [PATCH 25/50] target/ppc: Flush the TLB locally when the LPIDR is written MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Our TCG TLB only tags whether it's a HV vs a guest access, so it must be flushed when the LPIDR is changed. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Cédric Le Goater Message-Id: <20190215170029.15641-10-clg@kaod.org> Signed-off-by: David Gibson --- target/ppc/helper.h | 1 + target/ppc/misc_helper.c | 15 +++++++++++++++ target/ppc/translate_init.inc.c | 7 ++++++- 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 18910d18a4..638a6e99c4 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -689,6 +689,7 @@ DEF_HELPER_2(store_ptcr, void, env, tl) #endif DEF_HELPER_2(store_sdr1, void, env, tl) DEF_HELPER_2(store_pidr, void, env, tl) +DEF_HELPER_2(store_lpidr, void, env, tl) DEF_HELPER_FLAGS_2(store_tbl, TCG_CALL_NO_RWG, void, env, tl) DEF_HELPER_FLAGS_2(store_tbu, TCG_CALL_NO_RWG, void, env, tl) DEF_HELPER_FLAGS_2(store_atbl, TCG_CALL_NO_RWG, void, env, tl) diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c index b884930096..c65d1ade15 100644 --- a/target/ppc/misc_helper.c +++ b/target/ppc/misc_helper.c @@ -117,6 +117,21 @@ void helper_store_pidr(CPUPPCState *env, target_ulong val) tlb_flush(CPU(cpu)); } +void helper_store_lpidr(CPUPPCState *env, target_ulong val) +{ + PowerPCCPU *cpu = ppc_env_get_cpu(env); + + env->spr[SPR_LPIDR] = val; + + /* + * We need to flush the TLB on LPID changes as we only tag HV vs + * guest in TCG TLB. Also the quadrants means the HV will + * potentially access and cache entries for the current LPID as + * well. + */ + tlb_flush(CPU(cpu)); +} + void helper_store_hid0_601(CPUPPCState *env, target_ulong val) { target_ulong hid0; diff --git a/target/ppc/translate_init.inc.c b/target/ppc/translate_init.inc.c index 965c5273a6..58542c0fe0 100644 --- a/target/ppc/translate_init.inc.c +++ b/target/ppc/translate_init.inc.c @@ -408,6 +408,11 @@ static void spr_write_pidr(DisasContext *ctx, int sprn, int gprn) gen_helper_store_pidr(cpu_env, cpu_gpr[gprn]); } +static void spr_write_lpidr(DisasContext *ctx, int sprn, int gprn) +{ + gen_helper_store_lpidr(cpu_env, cpu_gpr[gprn]); +} + static void spr_read_hior(DisasContext *ctx, int gprn, int sprn) { tcg_gen_ld_tl(cpu_gpr[gprn], cpu_env, offsetof(CPUPPCState, excp_prefix)); @@ -7885,7 +7890,7 @@ static void gen_spr_book3s_ids(CPUPPCState *env) spr_register_hv(env, SPR_LPIDR, "LPIDR", SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, - &spr_read_generic, &spr_write_generic, + &spr_read_generic, &spr_write_lpidr, 0x00000000); spr_register_hv(env, SPR_HFSCR, "HFSCR", SPR_NOACCESS, SPR_NOACCESS, From 79825f4d583a327c347db504f44bbb7470b130e8 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 15 Feb 2019 18:00:27 +0100 Subject: [PATCH 26/50] target/ppc: Rename PATB/PATBE -> PATE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That "b" means "base address" and thus shouldn't be in the name of actual entries and related constants. This patch keeps the synthetic patb_entry field of the spapr virtual hypervisor unchanged until I figure out if that has an impact on the migration stream. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Cédric Le Goater Message-Id: <20190215170029.15641-11-clg@kaod.org> Signed-off-by: David Gibson --- hw/ppc/spapr.c | 24 +++++++++++++++--------- hw/ppc/spapr_hcall.c | 22 ++++++++++++---------- target/ppc/cpu.h | 6 +++++- target/ppc/mmu-book3s-v3.h | 11 ++++++++++- target/ppc/mmu-radix64.c | 18 ++++++++++-------- target/ppc/mmu-radix64.h | 4 ++-- 6 files changed, 54 insertions(+), 31 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index d2520bc662..00eb3b643c 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1420,11 +1420,13 @@ void spapr_set_all_lpcrs(target_ulong value, target_ulong mask) } } -static uint64_t spapr_get_patbe(PPCVirtualHypervisor *vhyp) +static void spapr_get_pate(PPCVirtualHypervisor *vhyp, ppc_v3_pate_t *entry) { sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp); - return spapr->patb_entry; + /* Copy PATE1:GR into PATE0:HR */ + entry->dw0 = spapr->patb_entry & PATE0_HR; + entry->dw1 = spapr->patb_entry; } #define HPTE(_table, _i) (void *)(((uint64_t *)(_table)) + ((_i) * 2)) @@ -1667,17 +1669,21 @@ static void spapr_machine_reset(void) if (kvm_enabled() && kvmppc_has_cap_mmu_radix() && ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00, 0, spapr->max_compat_pvr)) { - /* If using KVM with radix mode available, VCPUs can be started + /* + * If using KVM with radix mode available, VCPUs can be started * without a HPT because KVM will start them in radix mode. - * Set the GR bit in PATB so that we know there is no HPT. */ - spapr->patb_entry = PATBE1_GR; + * Set the GR bit in PATE so that we know there is no HPT. + */ + spapr->patb_entry = PATE1_GR; spapr_set_all_lpcrs(LPCR_HR | LPCR_UPRT, LPCR_HR | LPCR_UPRT); } else { spapr_setup_hpt_and_vrma(spapr); } - /* if this reset wasn't generated by CAS, we should reset our - * negotiated options and start from scratch */ + /* + * If this reset wasn't generated by CAS, we should reset our + * negotiated options and start from scratch + */ if (!spapr->cas_reboot) { spapr_ovec_cleanup(spapr->ov5_cas); spapr->ov5_cas = spapr_ovec_new(); @@ -1827,7 +1833,7 @@ static int spapr_post_load(void *opaque, int version_id) if (kvm_enabled() && spapr->patb_entry) { PowerPCCPU *cpu = POWERPC_CPU(first_cpu); - bool radix = !!(spapr->patb_entry & PATBE1_GR); + bool radix = !!(spapr->patb_entry & PATE1_GR); bool gtse = !!(cpu->env.spr[SPR_LPCR] & LPCR_GTSE); /* @@ -4118,7 +4124,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) vhc->map_hptes = spapr_map_hptes; vhc->unmap_hptes = spapr_unmap_hptes; vhc->store_hpte = spapr_store_hpte; - vhc->get_patbe = spapr_get_patbe; + vhc->get_pate = spapr_get_pate; vhc->encode_hpt_for_kvm_pr = spapr_encode_hpt_for_kvm_pr; xic->ics_get = spapr_ics_get; xic->ics_resend = spapr_ics_resend; diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index b47241ace6..476bad6271 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -1311,12 +1311,12 @@ static void spapr_check_setup_free_hpt(sPAPRMachineState *spapr, * later and so assumed radix and now it's called H_REG_PROC_TBL */ - if ((patbe_old & PATBE1_GR) == (patbe_new & PATBE1_GR)) { + if ((patbe_old & PATE1_GR) == (patbe_new & PATE1_GR)) { /* We assume RADIX, so this catches all the "Do Nothing" cases */ - } else if (!(patbe_old & PATBE1_GR)) { + } else if (!(patbe_old & PATE1_GR)) { /* HASH->RADIX : Free HPT */ spapr_free_hpt(spapr); - } else if (!(patbe_new & PATBE1_GR)) { + } else if (!(patbe_new & PATE1_GR)) { /* RADIX->HASH || NOTHING->HASH : Allocate HPT */ spapr_setup_hpt_and_vrma(spapr); } @@ -1354,7 +1354,7 @@ static target_ulong h_register_process_table(PowerPCCPU *cpu, } else if (table_size > 24) { return H_P4; } - cproc = PATBE1_GR | proc_tbl | table_size; + cproc = PATE1_GR | proc_tbl | table_size; } else { /* Register new HPT process table */ if (flags & FLAG_HASH_PROC_TBL) { /* Hash with Segment Tables */ /* TODO - Not Supported */ @@ -1373,13 +1373,15 @@ static target_ulong h_register_process_table(PowerPCCPU *cpu, } } else { /* Deregister current process table */ - /* Set to benign value: (current GR) | 0. This allows - * deregistration in KVM to succeed even if the radix bit in flags - * doesn't match the radix bit in the old PATB. */ - cproc = spapr->patb_entry & PATBE1_GR; + /* + * Set to benign value: (current GR) | 0. This allows + * deregistration in KVM to succeed even if the radix bit + * in flags doesn't match the radix bit in the old PATE. + */ + cproc = spapr->patb_entry & PATE1_GR; } } else { /* Maintain current registration */ - if (!(flags & FLAG_RADIX) != !(spapr->patb_entry & PATBE1_GR)) { + if (!(flags & FLAG_RADIX) != !(spapr->patb_entry & PATE1_GR)) { /* Technically caused by flag bits => H_PARAMETER */ return H_PARAMETER; /* Existing Process Table Mismatch */ } @@ -1616,7 +1618,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, if (!spapr->cas_reboot) { /* If spapr_machine_reset() did not set up a HPT but one is necessary * (because the guest isn't going to use radix) then set it up here. */ - if ((spapr->patb_entry & PATBE1_GR) && !guest_radix) { + if ((spapr->patb_entry & PATE1_GR) && !guest_radix) { /* legacy hash or new hash: */ spapr_setup_hpt_and_vrma(spapr); } diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 3f39a77750..26604ddf98 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -320,6 +320,10 @@ struct ppc_slb_t { #define SEGMENT_SHIFT_1T 40 #define SEGMENT_MASK_1T (~((1ULL << SEGMENT_SHIFT_1T) - 1)) +typedef struct ppc_v3_pate_t { + uint64_t dw0; + uint64_t dw1; +} ppc_v3_pate_t; /*****************************************************************************/ /* Machine state register bits definition */ @@ -1248,7 +1252,7 @@ struct PPCVirtualHypervisorClass { hwaddr ptex, int n); void (*store_hpte)(PPCVirtualHypervisor *vhyp, hwaddr ptex, uint64_t pte0, uint64_t pte1); - uint64_t (*get_patbe)(PPCVirtualHypervisor *vhyp); + void (*get_pate)(PPCVirtualHypervisor *vhyp, ppc_v3_pate_t *entry); target_ulong (*encode_hpt_for_kvm_pr)(PPCVirtualHypervisor *vhyp); }; diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h index 12ec0054c2..d63ca6b1c7 100644 --- a/target/ppc/mmu-book3s-v3.h +++ b/target/ppc/mmu-book3s-v3.h @@ -29,7 +29,16 @@ #define PTCR_PATS 0x000000000000001FULL /* Partition Table Size */ /* Partition Table Entry Fields */ -#define PATBE1_GR 0x8000000000000000 +#define PATE0_HR 0x8000000000000000 + +/* + * WARNING: This field doesn't actually exist in the final version of + * the architecture and is unused by hardware. However, qemu uses it + * as an indication of a radix guest in the pseudo-PATB entry that it + * maintains for SPAPR guests and in the migration stream, so we need + * to keep it around + */ +#define PATE1_GR 0x8000000000000000 /* Process Table Entry */ struct prtb_entry { diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c index ab76cbc835..a07d757063 100644 --- a/target/ppc/mmu-radix64.c +++ b/target/ppc/mmu-radix64.c @@ -194,8 +194,9 @@ int ppc_radix64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx, PPCVirtualHypervisorClass *vhc = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); hwaddr raddr, pte_addr; - uint64_t lpid = 0, pid = 0, offset, size, patbe, prtbe0, pte; + uint64_t lpid = 0, pid = 0, offset, size, prtbe0, pte; int page_size, prot, fault_cause = 0; + ppc_v3_pate_t pate; assert((rwx == 0) || (rwx == 1) || (rwx == 2)); assert(!msr_hv); /* For now there is no Radix PowerNV Support */ @@ -220,17 +221,17 @@ int ppc_radix64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx, } /* Get Process Table */ - patbe = vhc->get_patbe(cpu->vhyp); + vhc->get_pate(cpu->vhyp, &pate); /* Index Process Table by PID to Find Corresponding Process Table Entry */ offset = pid * sizeof(struct prtb_entry); - size = 1ULL << ((patbe & PATBE1_R_PRTS) + 12); + size = 1ULL << ((pate.dw1 & PATE1_R_PRTS) + 12); if (offset >= size) { /* offset exceeds size of the process table */ ppc_radix64_raise_si(cpu, rwx, eaddr, DSISR_NOPTE); return 1; } - prtbe0 = ldq_phys(cs->as, (patbe & PATBE1_R_PRTB) + offset); + prtbe0 = ldq_phys(cs->as, (pate.dw1 & PATE1_R_PRTB) + offset); /* Walk Radix Tree from Process Table Entry to Convert EA to RA */ page_size = PRTBE_R_GET_RTS(prtbe0); @@ -258,8 +259,9 @@ hwaddr ppc_radix64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong eaddr) PPCVirtualHypervisorClass *vhc = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); hwaddr raddr, pte_addr; - uint64_t lpid = 0, pid = 0, offset, size, patbe, prtbe0, pte; + uint64_t lpid = 0, pid = 0, offset, size, prtbe0, pte; int page_size, fault_cause = 0; + ppc_v3_pate_t pate; /* Handle Real Mode */ if (msr_dr == 0) { @@ -273,16 +275,16 @@ hwaddr ppc_radix64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong eaddr) } /* Get Process Table */ - patbe = vhc->get_patbe(cpu->vhyp); + vhc->get_pate(cpu->vhyp, &pate); /* Index Process Table by PID to Find Corresponding Process Table Entry */ offset = pid * sizeof(struct prtb_entry); - size = 1ULL << ((patbe & PATBE1_R_PRTS) + 12); + size = 1ULL << ((pate.dw1 & PATE1_R_PRTS) + 12); if (offset >= size) { /* offset exceeds size of the process table */ return -1; } - prtbe0 = ldq_phys(cs->as, (patbe & PATBE1_R_PRTB) + offset); + prtbe0 = ldq_phys(cs->as, (pate.dw1 & PATE1_R_PRTB) + offset); /* Walk Radix Tree from Process Table Entry to Convert EA to RA */ page_size = PRTBE_R_GET_RTS(prtbe0); diff --git a/target/ppc/mmu-radix64.h b/target/ppc/mmu-radix64.h index 0ecf063a17..96228546aa 100644 --- a/target/ppc/mmu-radix64.h +++ b/target/ppc/mmu-radix64.h @@ -12,8 +12,8 @@ #define R_EADDR_QUADRANT3 0xC000000000000000 /* Radix Partition Table Entry Fields */ -#define PATBE1_R_PRTB 0x0FFFFFFFFFFFF000 -#define PATBE1_R_PRTS 0x000000000000001F +#define PATE1_R_PRTB 0x0FFFFFFFFFFFF000 +#define PATE1_R_PRTS 0x000000000000001F /* Radix Process Table Entry Fields */ #define PRTBE_R_GET_RTS(rts) \ From 3367c62f522bda6254b5760eab1ee94523a24ab2 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 15 Feb 2019 18:00:28 +0100 Subject: [PATCH 27/50] target/ppc: Support for POWER9 native hash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (Might need more patch splitting) Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Cédric Le Goater Message-Id: <20190215170029.15641-12-clg@kaod.org> [dwg: Hack to fix compile with some earlier include tweaks of mine] Signed-off-by: David Gibson --- target/ppc/mmu-book3s-v3.c | 18 +++++++++++++++ target/ppc/mmu-book3s-v3.h | 47 ++++++++++++++++++++++++++++++++++++++ target/ppc/mmu-hash64.c | 6 +++-- target/ppc/mmu-hash64.h | 19 +-------------- 4 files changed, 70 insertions(+), 20 deletions(-) diff --git a/target/ppc/mmu-book3s-v3.c b/target/ppc/mmu-book3s-v3.c index a174e7efc5..32b8c166b5 100644 --- a/target/ppc/mmu-book3s-v3.c +++ b/target/ppc/mmu-book3s-v3.c @@ -41,3 +41,21 @@ hwaddr ppc64_v3_get_phys_page_debug(PowerPCCPU *cpu, vaddr eaddr) return ppc_hash64_get_phys_page_debug(cpu, eaddr); } } + +bool ppc64_v3_get_pate(PowerPCCPU *cpu, target_ulong lpid, ppc_v3_pate_t *entry) +{ + uint64_t patb = cpu->env.spr[SPR_PTCR] & PTCR_PATB; + uint64_t pats = cpu->env.spr[SPR_PTCR] & PTCR_PATS; + + /* Calculate number of entries */ + pats = 1ull << (pats + 12 - 4); + if (pats <= lpid) { + return false; + } + + /* Grab entry */ + patb += 16 * lpid; + entry->dw0 = ldq_phys(CPU(cpu)->as, patb); + entry->dw1 = ldq_phys(CPU(cpu)->as, patb + 8); + return true; +} diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h index d63ca6b1c7..ee8288e32d 100644 --- a/target/ppc/mmu-book3s-v3.h +++ b/target/ppc/mmu-book3s-v3.h @@ -20,6 +20,8 @@ #ifndef MMU_BOOOK3S_V3_H #define MMU_BOOOK3S_V3_H +#include "mmu-hash64.h" + #ifndef CONFIG_USER_ONLY /* @@ -52,6 +54,9 @@ static inline bool ppc64_use_proc_tbl(PowerPCCPU *cpu) return !!(cpu->env.spr[SPR_LPCR] & LPCR_UPRT); } +bool ppc64_v3_get_pate(PowerPCCPU *cpu, target_ulong lpid, + ppc_v3_pate_t *entry); + /* * The LPCR:HR bit is a shortcut that avoids having to * dig out the partition table in the fast path. This is @@ -67,6 +72,48 @@ hwaddr ppc64_v3_get_phys_page_debug(PowerPCCPU *cpu, vaddr eaddr); int ppc64_v3_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx, int mmu_idx); +static inline hwaddr ppc_hash64_hpt_base(PowerPCCPU *cpu) +{ + uint64_t base; + + if (cpu->vhyp) { + return 0; + } + if (cpu->env.mmu_model == POWERPC_MMU_3_00) { + ppc_v3_pate_t pate; + + if (!ppc64_v3_get_pate(cpu, cpu->env.spr[SPR_LPIDR], &pate)) { + return 0; + } + base = pate.dw0; + } else { + base = cpu->env.spr[SPR_SDR1]; + } + return base & SDR_64_HTABORG; +} + +static inline hwaddr ppc_hash64_hpt_mask(PowerPCCPU *cpu) +{ + uint64_t base; + + if (cpu->vhyp) { + PPCVirtualHypervisorClass *vhc = + PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); + return vhc->hpt_mask(cpu->vhyp); + } + if (cpu->env.mmu_model == POWERPC_MMU_3_00) { + ppc_v3_pate_t pate; + + if (!ppc64_v3_get_pate(cpu, cpu->env.spr[SPR_LPIDR], &pate)) { + return 0; + } + base = pate.dw0; + } else { + base = cpu->env.spr[SPR_SDR1]; + } + return (1ULL << ((base & SDR_64_HTABSIZE) + 18 - 7)) - 1; +} + #endif /* TARGET_PPC64 */ #endif /* CONFIG_USER_ONLY */ diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c index 3c057a8c70..c431303eff 100644 --- a/target/ppc/mmu-hash64.c +++ b/target/ppc/mmu-hash64.c @@ -417,7 +417,7 @@ const ppc_hash_pte64_t *ppc_hash64_map_hptes(PowerPCCPU *cpu, hwaddr ptex, int n) { hwaddr pte_offset = ptex * HASH_PTE_SIZE_64; - hwaddr base = ppc_hash64_hpt_base(cpu); + hwaddr base; hwaddr plen = n * HASH_PTE_SIZE_64; const ppc_hash_pte64_t *hptes; @@ -426,6 +426,7 @@ const ppc_hash_pte64_t *ppc_hash64_map_hptes(PowerPCCPU *cpu, PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); return vhc->map_hptes(cpu->vhyp, ptex, n); } + base = ppc_hash64_hpt_base(cpu); if (!base) { return NULL; @@ -941,7 +942,7 @@ hwaddr ppc_hash64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr) void ppc_hash64_store_hpte(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte0, uint64_t pte1) { - hwaddr base = ppc_hash64_hpt_base(cpu); + hwaddr base; hwaddr offset = ptex * HASH_PTE_SIZE_64; if (cpu->vhyp) { @@ -950,6 +951,7 @@ void ppc_hash64_store_hpte(PowerPCCPU *cpu, hwaddr ptex, vhc->store_hpte(cpu->vhyp, ptex, pte0, pte1); return; } + base = ppc_hash64_hpt_base(cpu); stq_phys(CPU(cpu)->as, base + offset, pte0); stq_phys(CPU(cpu)->as, base + offset + HASH_PTE_SIZE_64 / 2, pte1); diff --git a/target/ppc/mmu-hash64.h b/target/ppc/mmu-hash64.h index 016d6b44ee..6b555b7220 100644 --- a/target/ppc/mmu-hash64.h +++ b/target/ppc/mmu-hash64.h @@ -63,6 +63,7 @@ void ppc_hash64_filter_pagesizes(PowerPCCPU *cpu, #define SDR_64_HTABORG 0x0FFFFFFFFFFC0000ULL #define SDR_64_HTABSIZE 0x000000000000001FULL +#define PATE0_HTABORG 0x0FFFFFFFFFFC0000ULL #define HPTES_PER_GROUP 8 #define HASH_PTE_SIZE_64 16 #define HASH_PTEG_SIZE_64 (HASH_PTE_SIZE_64 * HPTES_PER_GROUP) @@ -107,24 +108,6 @@ void ppc_hash64_filter_pagesizes(PowerPCCPU *cpu, #define HPTE64_R_3_0_SSIZE_SHIFT 58 #define HPTE64_R_3_0_SSIZE_MASK (3ULL << HPTE64_R_3_0_SSIZE_SHIFT) -static inline hwaddr ppc_hash64_hpt_base(PowerPCCPU *cpu) -{ - if (cpu->vhyp) { - return 0; - } - return cpu->env.spr[SPR_SDR1] & SDR_64_HTABORG; -} - -static inline hwaddr ppc_hash64_hpt_mask(PowerPCCPU *cpu) -{ - if (cpu->vhyp) { - PPCVirtualHypervisorClass *vhc = - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); - return vhc->hpt_mask(cpu->vhyp); - } - return (1ULL << ((cpu->env.spr[SPR_SDR1] & SDR_64_HTABSIZE) + 18 - 7)) - 1; -} - struct ppc_hash_pte64 { uint64_t pte0, pte1; }; From 539c6e735871aae54d9a61520efdfc02dc506d9b Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 15 Feb 2019 18:00:29 +0100 Subject: [PATCH 28/50] target/ppc: Basic POWER9 bare-metal radix MMU support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No guest support yet Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Cédric Le Goater Message-Id: <20190215170029.15641-13-clg@kaod.org> Signed-off-by: David Gibson --- target/ppc/mmu-radix64.c | 81 ++++++++++++++++++++++++++++++++++------ 1 file changed, 69 insertions(+), 12 deletions(-) diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c index a07d757063..ca1fb2673f 100644 --- a/target/ppc/mmu-radix64.c +++ b/target/ppc/mmu-radix64.c @@ -31,10 +31,26 @@ static bool ppc_radix64_get_fully_qualified_addr(CPUPPCState *env, vaddr eaddr, uint64_t *lpid, uint64_t *pid) { - /* We don't have HV support yet and shouldn't get here with it set anyway */ - assert(!msr_hv); - - if (!msr_hv) { /* !MSR[HV] -> Guest */ + if (msr_hv) { /* MSR[HV] -> Hypervisor/bare metal */ + switch (eaddr & R_EADDR_QUADRANT) { + case R_EADDR_QUADRANT0: + *lpid = 0; + *pid = env->spr[SPR_BOOKS_PID]; + break; + case R_EADDR_QUADRANT1: + *lpid = env->spr[SPR_LPIDR]; + *pid = env->spr[SPR_BOOKS_PID]; + break; + case R_EADDR_QUADRANT2: + *lpid = env->spr[SPR_LPIDR]; + *pid = 0; + break; + case R_EADDR_QUADRANT3: + *lpid = 0; + *pid = 0; + break; + } + } else { /* !MSR[HV] -> Guest */ switch (eaddr & R_EADDR_QUADRANT) { case R_EADDR_QUADRANT0: /* Guest application */ *lpid = env->spr[SPR_LPIDR]; @@ -186,21 +202,32 @@ static uint64_t ppc_radix64_walk_tree(PowerPCCPU *cpu, vaddr eaddr, raddr, psize, fault_cause, pte_addr); } +static bool validate_pate(PowerPCCPU *cpu, uint64_t lpid, ppc_v3_pate_t *pate) +{ + CPUPPCState *env = &cpu->env; + + if (!(pate->dw0 & PATE0_HR)) { + return false; + } + if (lpid == 0 && !msr_hv) { + return false; + } + /* More checks ... */ + return true; +} + int ppc_radix64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx, int mmu_idx) { CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; - PPCVirtualHypervisorClass *vhc = - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); + PPCVirtualHypervisorClass *vhc; hwaddr raddr, pte_addr; uint64_t lpid = 0, pid = 0, offset, size, prtbe0, pte; int page_size, prot, fault_cause = 0; ppc_v3_pate_t pate; assert((rwx == 0) || (rwx == 1) || (rwx == 2)); - assert(!msr_hv); /* For now there is no Radix PowerNV Support */ - assert(cpu->vhyp); assert(ppc64_use_proc_tbl(cpu)); /* Real Mode Access */ @@ -221,7 +248,23 @@ int ppc_radix64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx, } /* Get Process Table */ - vhc->get_pate(cpu->vhyp, &pate); + if (cpu->vhyp) { + vhc = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); + vhc->get_pate(cpu->vhyp, &pate); + } else { + if (!ppc64_v3_get_pate(cpu, lpid, &pate)) { + ppc_radix64_raise_si(cpu, rwx, eaddr, DSISR_NOPTE); + return 1; + } + if (!validate_pate(cpu, lpid, &pate)) { + ppc_radix64_raise_si(cpu, rwx, eaddr, DSISR_R_BADCONFIG); + } + /* We don't support guest mode yet */ + if (lpid != 0) { + error_report("PowerNV guest support Unimplemented"); + exit(1); + } + } /* Index Process Table by PID to Find Corresponding Process Table Entry */ offset = pid * sizeof(struct prtb_entry); @@ -256,8 +299,7 @@ hwaddr ppc_radix64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong eaddr) { CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; - PPCVirtualHypervisorClass *vhc = - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); + PPCVirtualHypervisorClass *vhc; hwaddr raddr, pte_addr; uint64_t lpid = 0, pid = 0, offset, size, prtbe0, pte; int page_size, fault_cause = 0; @@ -275,7 +317,22 @@ hwaddr ppc_radix64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong eaddr) } /* Get Process Table */ - vhc->get_pate(cpu->vhyp, &pate); + if (cpu->vhyp) { + vhc = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); + vhc->get_pate(cpu->vhyp, &pate); + } else { + if (!ppc64_v3_get_pate(cpu, lpid, &pate)) { + return -1; + } + if (!validate_pate(cpu, lpid, &pate)) { + return -1; + } + /* We don't support guest mode yet */ + if (lpid != 0) { + error_report("PowerNV guest support Unimplemented"); + exit(1); + } + } /* Index Process Table by PID to Find Corresponding Process Table Entry */ offset = pid * sizeof(struct prtb_entry); From d9c95c71aca38e3221ae38672494721650c99edc Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Tue, 19 Feb 2019 18:17:38 +0100 Subject: [PATCH 29/50] spapr_drc: Allow FDT fragment to be added later The current logic is to provide the FDT fragment when attaching a device to a DRC. This works perfectly fine for our current hotplug support, but soon we will add support for PHB hotplug which has some constraints, that CPU, PCI and LMB devices don't seem to have. The first constraint is that the "ibm,dma-window" property of the PHB node requires the IOMMU to be configured, ie, spapr_tce_table_enable() has been called, which happens during PHB reset. It is okay in the case of hotplug since the device is reset before the hotplug handler is called. On the contrary with coldplug, the hotplug handler is called first and device is only reset during the initial system reset. Trying to create the FDT fragment on the hotplug path in this case, would result in somthing like this: ibm,dma-window = < 0x80000000 0x00 0x00 0x00 0x00 >; This will cause linux in the guest to panic, by simply removing and re-adding the PHB using the drmgr command: page = alloc_pages_node(nid, GFP_KERNEL, get_order(sz)); if (!page) panic("iommu_init_table: Can't allocate %ld bytes\n", sz); The second and maybe more problematic constraint is that the "interrupt-map" property needs to reference the interrupt controller node using the very same phandle that SLOF has already exposed to the guest. QEMU requires SLOF to call the private KVMPPC_H_UPDATE_DT hcall at some point to know about this phandle. With the latest QEMU and SLOF, this happens when SLOF gets quiesced. This means that if the PHB gets hotplugged after CAS but before SLOF quiesce, then we're sure that the phandle is not known when the hotplug handler is called. The FDT is only needed when the guest first invokes RTAS to configure the connector actually, long after SLOF quiesce. Let's postpone the creation of FDT fragments for PHBs to rtas_ibm_configure_connector(). Since we only need this for PHBs, introduce a new method in the base DRC class for that. DRC subtypes will be converted to use it in subsequent patches. Allow spapr_drc_attach() to be passed a NULL fdt argument if the method is available. When all DRC subtypes have been converted, the fdt argument will eventually disappear. Signed-off-by: Greg Kurz Message-Id: <155059665823.1466090.18358845122627355537.stgit@bahia.lab.toulouse-stg.fr.ibm.com> Signed-off-by: David Gibson --- hw/ppc/spapr_drc.c | 36 +++++++++++++++++++++++++++++++----- include/hw/ppc/spapr_drc.h | 6 ++++++ 2 files changed, 37 insertions(+), 5 deletions(-) diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index 2edb7d1e9c..66b965a0a7 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -22,6 +22,7 @@ #include "qemu/error-report.h" #include "hw/ppc/spapr.h" /* for RTAS return codes */ #include "hw/pci-host/spapr.h" /* spapr_phb_remove_pci_device_cb callback */ +#include "sysemu/device_tree.h" #include "trace.h" #define DRC_CONTAINER_PATH "/dr-connector" @@ -376,6 +377,8 @@ static void prop_get_fdt(Object *obj, Visitor *v, const char *name, void spapr_drc_attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt, int fdt_start_offset, Error **errp) { + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + trace_spapr_drc_attach(spapr_drc_index(drc)); if (drc->dev) { @@ -384,11 +387,14 @@ void spapr_drc_attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt, } g_assert((drc->state == SPAPR_DRC_STATE_LOGICAL_UNUSABLE) || (drc->state == SPAPR_DRC_STATE_PHYSICAL_POWERON)); - g_assert(fdt); + g_assert(fdt || drck->dt_populate); drc->dev = d; - drc->fdt = fdt; - drc->fdt_start_offset = fdt_start_offset; + + if (fdt) { + drc->fdt = fdt; + drc->fdt_start_offset = fdt_start_offset; + } object_property_add_link(OBJECT(drc), "device", object_get_typename(OBJECT(drc->dev)), @@ -1102,10 +1108,30 @@ static void rtas_ibm_configure_connector(PowerPCCPU *cpu, goto out; } - g_assert(drc->fdt); - drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + g_assert(drc->fdt || drck->dt_populate); + + if (!drc->fdt) { + Error *local_err = NULL; + void *fdt; + int fdt_size; + + fdt = create_device_tree(&fdt_size); + + if (drck->dt_populate(drc, spapr, fdt, &drc->fdt_start_offset, + &local_err)) { + g_free(fdt); + error_free(local_err); + rc = SPAPR_DR_CC_RESPONSE_ERROR; + goto out; + } + + drc->fdt = fdt; + drc->ccs_offset = drc->fdt_start_offset; + drc->ccs_depth = 0; + } + do { uint32_t tag; const char *name; diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h index f6ff32e7e2..2aa919f0cf 100644 --- a/include/hw/ppc/spapr_drc.h +++ b/include/hw/ppc/spapr_drc.h @@ -18,6 +18,7 @@ #include "qom/object.h" #include "sysemu/sysemu.h" #include "hw/qdev.h" +#include "qapi/error.h" #define TYPE_SPAPR_DR_CONNECTOR "spapr-dr-connector" #define SPAPR_DR_CONNECTOR_GET_CLASS(obj) \ @@ -213,6 +214,8 @@ typedef struct sPAPRDRConnector { int fdt_start_offset; } sPAPRDRConnector; +struct sPAPRMachineState; + typedef struct sPAPRDRConnectorClass { /*< private >*/ DeviceClass parent; @@ -228,6 +231,9 @@ typedef struct sPAPRDRConnectorClass { uint32_t (*isolate)(sPAPRDRConnector *drc); uint32_t (*unisolate)(sPAPRDRConnector *drc); void (*release)(DeviceState *dev); + + int (*dt_populate)(sPAPRDRConnector *drc, struct sPAPRMachineState *spapr, + void *fdt, int *fdt_start_offset, Error **errp); } sPAPRDRConnectorClass; typedef struct sPAPRDRCPhysical { From 62d38c9bd3a217fa21f4fe73526839eee5f2f770 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Tue, 19 Feb 2019 18:17:43 +0100 Subject: [PATCH 30/50] spapr: Generate FDT fragment for LMBs at configure connector time Signed-off-by: Greg Kurz Message-Id: <155059666331.1466090.6766540766297333313.stgit@bahia.lab.toulouse-stg.fr.ibm.com> Signed-off-by: David Gibson --- hw/ppc/spapr.c | 33 ++++++++++++++++++--------------- hw/ppc/spapr_drc.c | 1 + include/hw/ppc/spapr.h | 4 +++- 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 00eb3b643c..b92deee771 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -3333,14 +3333,26 @@ static void spapr_nmi(NMIState *n, int cpu_index, Error **errp) } } +int spapr_lmb_dt_populate(sPAPRDRConnector *drc, sPAPRMachineState *spapr, + void *fdt, int *fdt_start_offset, Error **errp) +{ + uint64_t addr; + uint32_t node; + + addr = spapr_drc_index(drc) * SPAPR_MEMORY_BLOCK_SIZE; + node = object_property_get_uint(OBJECT(drc->dev), PC_DIMM_NODE_PROP, + &error_abort); + *fdt_start_offset = spapr_populate_memory_node(fdt, node, addr, + SPAPR_MEMORY_BLOCK_SIZE); + return 0; +} + static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size, - uint32_t node, bool dedicated_hp_event_source, - Error **errp) + bool dedicated_hp_event_source, Error **errp) { sPAPRDRConnector *drc; uint32_t nr_lmbs = size/SPAPR_MEMORY_BLOCK_SIZE; - int i, fdt_offset, fdt_size; - void *fdt; + int i; uint64_t addr = addr_start; bool hotplugged = spapr_drc_hotplugged(dev); Error *local_err = NULL; @@ -3350,11 +3362,7 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size, addr / SPAPR_MEMORY_BLOCK_SIZE); g_assert(drc); - fdt = create_device_tree(&fdt_size); - fdt_offset = spapr_populate_memory_node(fdt, node, addr, - SPAPR_MEMORY_BLOCK_SIZE); - - spapr_drc_attach(drc, dev, fdt, fdt_offset, &local_err); + spapr_drc_attach(drc, dev, NULL, 0, &local_err); if (local_err) { while (addr > addr_start) { addr -= SPAPR_MEMORY_BLOCK_SIZE; @@ -3362,7 +3370,6 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size, addr / SPAPR_MEMORY_BLOCK_SIZE); spapr_drc_detach(drc); } - g_free(fdt); error_propagate(errp, local_err); return; } @@ -3395,7 +3402,6 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev, sPAPRMachineState *ms = SPAPR_MACHINE(hotplug_dev); PCDIMMDevice *dimm = PC_DIMM(dev); uint64_t size, addr; - uint32_t node; size = memory_device_get_region_size(MEMORY_DEVICE(dev), &error_abort); @@ -3410,10 +3416,7 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev, goto out_unplug; } - node = object_property_get_uint(OBJECT(dev), PC_DIMM_NODE_PROP, - &error_abort); - spapr_add_lmbs(dev, addr, size, node, - spapr_ovec_test(ms->ov5_cas, OV5_HP_EVT), + spapr_add_lmbs(dev, addr, size, spapr_ovec_test(ms->ov5_cas, OV5_HP_EVT), &local_err); if (local_err) { goto out_unplug; diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index 66b965a0a7..634c28695a 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -700,6 +700,7 @@ static void spapr_drc_lmb_class_init(ObjectClass *k, void *data) drck->typename = "MEM"; drck->drc_name_prefix = "LMB "; drck->release = spapr_lmb_release; + drck->dt_populate = spapr_lmb_dt_populate; } static const TypeInfo spapr_dr_connector_info = { diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 659204ea93..0ec309da49 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -764,9 +764,11 @@ void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift, void spapr_clear_pending_events(sPAPRMachineState *spapr); int spapr_max_server_number(sPAPRMachineState *spapr); -/* CPU and LMB DRC release callbacks. */ +/* DRC callbacks. */ void spapr_core_release(DeviceState *dev); void spapr_lmb_release(DeviceState *dev); +int spapr_lmb_dt_populate(sPAPRDRConnector *drc, sPAPRMachineState *spapr, + void *fdt, int *fdt_start_offset, Error **errp); void spapr_rtc_read(sPAPRRTCState *rtc, struct tm *tm, uint32_t *ns); int spapr_rtc_import_offset(sPAPRRTCState *rtc, int64_t legacy_offset); From 345b12b99edefced1c103cc35bd3ae1373c9e9d6 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Tue, 19 Feb 2019 18:17:48 +0100 Subject: [PATCH 31/50] spapr: Generate FDT fragment for CPUs at configure connector time Signed-off-by: Greg Kurz Message-Id: <155059666839.1466090.3833376527523126752.stgit@bahia.lab.toulouse-stg.fr.ibm.com> Signed-off-by: David Gibson --- hw/ppc/spapr.c | 52 +++++++++++++++++++----------------------- hw/ppc/spapr_drc.c | 1 + include/hw/ppc/spapr.h | 2 ++ 3 files changed, 26 insertions(+), 29 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index b92deee771..6cf7a9f5c1 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -3636,27 +3636,6 @@ out: error_propagate(errp, local_err); } -static void *spapr_populate_hotplug_cpu_dt(CPUState *cs, int *fdt_offset, - sPAPRMachineState *spapr) -{ - PowerPCCPU *cpu = POWERPC_CPU(cs); - DeviceClass *dc = DEVICE_GET_CLASS(cs); - int id = spapr_get_vcpu_id(cpu); - void *fdt; - int offset, fdt_size; - char *nodename; - - fdt = create_device_tree(&fdt_size); - nodename = g_strdup_printf("%s@%x", dc->fw_name, id); - offset = fdt_add_subnode(fdt, 0, nodename); - - spapr_populate_cpu_dt(cs, fdt, offset, spapr); - g_free(nodename); - - *fdt_offset = offset; - return fdt; -} - /* Callback to be called during DRC release. */ void spapr_core_release(DeviceState *dev) { @@ -3717,6 +3696,27 @@ void spapr_core_unplug_request(HotplugHandler *hotplug_dev, DeviceState *dev, spapr_hotplug_req_remove_by_index(drc); } +int spapr_core_dt_populate(sPAPRDRConnector *drc, sPAPRMachineState *spapr, + void *fdt, int *fdt_start_offset, Error **errp) +{ + sPAPRCPUCore *core = SPAPR_CPU_CORE(drc->dev); + CPUState *cs = CPU(core->threads[0]); + PowerPCCPU *cpu = POWERPC_CPU(cs); + DeviceClass *dc = DEVICE_GET_CLASS(cs); + int id = spapr_get_vcpu_id(cpu); + char *nodename; + int offset; + + nodename = g_strdup_printf("%s@%x", dc->fw_name, id); + offset = fdt_add_subnode(fdt, 0, nodename); + g_free(nodename); + + spapr_populate_cpu_dt(cs, fdt, offset, spapr); + + *fdt_start_offset = offset; + return 0; +} + static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { @@ -3725,7 +3725,7 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev, sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); sPAPRCPUCore *core = SPAPR_CPU_CORE(OBJECT(dev)); CPUCore *cc = CPU_CORE(dev); - CPUState *cs = CPU(core->threads[0]); + CPUState *cs; sPAPRDRConnector *drc; Error *local_err = NULL; CPUArchId *core_slot; @@ -3744,14 +3744,8 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev, g_assert(drc || !mc->has_hotpluggable_cpus); if (drc) { - void *fdt; - int fdt_offset; - - fdt = spapr_populate_hotplug_cpu_dt(cs, &fdt_offset, spapr); - - spapr_drc_attach(drc, dev, fdt, fdt_offset, &local_err); + spapr_drc_attach(drc, dev, NULL, 0, &local_err); if (local_err) { - g_free(fdt); error_propagate(errp, local_err); return; } diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index 634c28695a..aa26aa40be 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -680,6 +680,7 @@ static void spapr_drc_cpu_class_init(ObjectClass *k, void *data) drck->typename = "CPU"; drck->drc_name_prefix = "CPU "; drck->release = spapr_core_release; + drck->dt_populate = spapr_core_dt_populate; } static void spapr_drc_pci_class_init(ObjectClass *k, void *data) diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 0ec309da49..5e3c760725 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -766,6 +766,8 @@ int spapr_max_server_number(sPAPRMachineState *spapr); /* DRC callbacks. */ void spapr_core_release(DeviceState *dev); +int spapr_core_dt_populate(sPAPRDRConnector *drc, sPAPRMachineState *spapr, + void *fdt, int *fdt_start_offset, Error **errp); void spapr_lmb_release(DeviceState *dev); int spapr_lmb_dt_populate(sPAPRDRConnector *drc, sPAPRMachineState *spapr, void *fdt, int *fdt_start_offset, Error **errp); From 46fd02990df10be2833c1c67b244aa87f0b61d6d Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Tue, 19 Feb 2019 18:17:53 +0100 Subject: [PATCH 32/50] spapr/pci: Generate FDT fragment at configure connector time Signed-off-by: Greg Kurz Message-Id: <155059667346.1466090.326696113231137772.stgit@bahia.lab.toulouse-stg.fr.ibm.com> Signed-off-by: David Gibson --- hw/ppc/spapr_drc.c | 1 + hw/ppc/spapr_pci.c | 19 ++++++++++++------- include/hw/pci-host/spapr.h | 4 +++- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index aa26aa40be..248eb8a93d 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -691,6 +691,7 @@ static void spapr_drc_pci_class_init(ObjectClass *k, void *data) drck->typename = "28"; drck->drc_name_prefix = "C"; drck->release = spapr_phb_remove_pci_device_cb; + drck->dt_populate = spapr_pci_dt_populate; } static void spapr_drc_lmb_class_init(ObjectClass *k, void *data) diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 60777b2355..b22c9f57b2 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1408,6 +1408,17 @@ static uint32_t spapr_phb_get_pci_drc_index(sPAPRPHBState *phb, return spapr_drc_index(drc); } +int spapr_pci_dt_populate(sPAPRDRConnector *drc, sPAPRMachineState *spapr, + void *fdt, int *fdt_start_offset, Error **errp) +{ + HotplugHandler *plug_handler = qdev_get_hotplug_handler(drc->dev); + sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(plug_handler); + PCIDevice *pdev = PCI_DEVICE(drc->dev); + + *fdt_start_offset = spapr_create_pci_child_dt(sphb, pdev, fdt, 0); + return 0; +} + static void spapr_pci_plug(HotplugHandler *plug_handler, DeviceState *plugged_dev, Error **errp) { @@ -1417,8 +1428,6 @@ static void spapr_pci_plug(HotplugHandler *plug_handler, Error *local_err = NULL; PCIBus *bus = PCI_BUS(qdev_get_parent_bus(DEVICE(pdev))); uint32_t slotnr = PCI_SLOT(pdev->devfn); - void *fdt = NULL; - int fdt_start_offset, fdt_size; /* if DR is disabled we don't need to do anything in the case of * hotplug or coldplug callbacks @@ -1448,10 +1457,7 @@ static void spapr_pci_plug(HotplugHandler *plug_handler, goto out; } - fdt = create_device_tree(&fdt_size); - fdt_start_offset = spapr_create_pci_child_dt(phb, pdev, fdt, 0); - - spapr_drc_attach(drc, DEVICE(pdev), fdt, fdt_start_offset, &local_err); + spapr_drc_attach(drc, DEVICE(pdev), NULL, 0, &local_err); if (local_err) { goto out; } @@ -1483,7 +1489,6 @@ static void spapr_pci_plug(HotplugHandler *plug_handler, out: if (local_err) { error_propagate(errp, local_err); - g_free(fdt); } } diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h index 51d81c4b7c..f6e43f48fe 100644 --- a/include/hw/pci-host/spapr.h +++ b/include/hw/pci-host/spapr.h @@ -121,8 +121,10 @@ sPAPRPHBState *spapr_pci_find_phb(sPAPRMachineState *spapr, uint64_t buid); PCIDevice *spapr_pci_find_dev(sPAPRMachineState *spapr, uint64_t buid, uint32_t config_addr); -/* PCI release callback. */ +/* DRC callbacks */ void spapr_phb_remove_pci_device_cb(DeviceState *dev); +int spapr_pci_dt_populate(sPAPRDRConnector *drc, sPAPRMachineState *spapr, + void *fdt, int *fdt_start_offset, Error **errp); /* VFIO EEH hooks */ #ifdef CONFIG_LINUX From 09d876ce2c8ea3efd34e02d898c3122e04b814bc Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Tue, 19 Feb 2019 18:17:58 +0100 Subject: [PATCH 33/50] spapr/drc: Drop spapr_drc_attach() fdt argument All DRC subtypes have been converted to generate the FDT fragment at configure connector time instead of attach time. The fdt and fdt_offset arguments of spapr_drc_attach() aren't needed anymore. Drop them and make the implementation of the dt_populate() method mandatory. Signed-off-by: Greg Kurz Message-Id: <155059667853.1466090.16527852453054217565.stgit@bahia.lab.toulouse-stg.fr.ibm.com> Signed-off-by: David Gibson --- hw/ppc/spapr.c | 4 ++-- hw/ppc/spapr_drc.c | 13 +------------ hw/ppc/spapr_pci.c | 2 +- include/hw/ppc/spapr_drc.h | 3 +-- 4 files changed, 5 insertions(+), 17 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 6cf7a9f5c1..9364d07364 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -3362,7 +3362,7 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size, addr / SPAPR_MEMORY_BLOCK_SIZE); g_assert(drc); - spapr_drc_attach(drc, dev, NULL, 0, &local_err); + spapr_drc_attach(drc, dev, &local_err); if (local_err) { while (addr > addr_start) { addr -= SPAPR_MEMORY_BLOCK_SIZE; @@ -3744,7 +3744,7 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev, g_assert(drc || !mc->has_hotpluggable_cpus); if (drc) { - spapr_drc_attach(drc, dev, NULL, 0, &local_err); + spapr_drc_attach(drc, dev, &local_err); if (local_err) { error_propagate(errp, local_err); return; diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index 248eb8a93d..87ca7d9735 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -374,11 +374,8 @@ static void prop_get_fdt(Object *obj, Visitor *v, const char *name, } while (fdt_depth != 0); } -void spapr_drc_attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt, - int fdt_start_offset, Error **errp) +void spapr_drc_attach(sPAPRDRConnector *drc, DeviceState *d, Error **errp) { - sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - trace_spapr_drc_attach(spapr_drc_index(drc)); if (drc->dev) { @@ -387,15 +384,9 @@ void spapr_drc_attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt, } g_assert((drc->state == SPAPR_DRC_STATE_LOGICAL_UNUSABLE) || (drc->state == SPAPR_DRC_STATE_PHYSICAL_POWERON)); - g_assert(fdt || drck->dt_populate); drc->dev = d; - if (fdt) { - drc->fdt = fdt; - drc->fdt_start_offset = fdt_start_offset; - } - object_property_add_link(OBJECT(drc), "device", object_get_typename(OBJECT(drc->dev)), (Object **)(&drc->dev), @@ -1113,8 +1104,6 @@ static void rtas_ibm_configure_connector(PowerPCCPU *cpu, drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - g_assert(drc->fdt || drck->dt_populate); - if (!drc->fdt) { Error *local_err = NULL; void *fdt; diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index b22c9f57b2..e2bc9fec82 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1457,7 +1457,7 @@ static void spapr_pci_plug(HotplugHandler *plug_handler, goto out; } - spapr_drc_attach(drc, DEVICE(pdev), NULL, 0, &local_err); + spapr_drc_attach(drc, DEVICE(pdev), &local_err); if (local_err) { goto out; } diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h index 2aa919f0cf..f32758ec84 100644 --- a/include/hw/ppc/spapr_drc.h +++ b/include/hw/ppc/spapr_drc.h @@ -261,8 +261,7 @@ sPAPRDRConnector *spapr_drc_by_id(const char *type, uint32_t id); int spapr_drc_populate_dt(void *fdt, int fdt_offset, Object *owner, uint32_t drc_type_mask); -void spapr_drc_attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt, - int fdt_start_offset, Error **errp); +void spapr_drc_attach(sPAPRDRConnector *drc, DeviceState *d, Error **errp); void spapr_drc_detach(sPAPRDRConnector *drc); bool spapr_drc_needed(void *opaque); From 6cead90c5c9c462c56ddc41a0bd0b4cfef9f62a8 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Tue, 19 Feb 2019 18:18:03 +0100 Subject: [PATCH 34/50] xics: Write source state to KVM at claim time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pseries machine only uses LSIs to support legacy PCI devices. Every PHB claims 4 LSIs at realize time. When using in-kernel XICS (or upcoming in-kernel XIVE), QEMU synchronizes the state of all irqs, including these LSIs, later on at machine reset. In order to support PHB hotplug, we need a way to tell KVM about the LSIs that doesn't require a machine reset. An easy way to do that is to always inform KVM when an interrupt is claimed, which really isn't a performance path. Signed-off-by: Greg Kurz Message-Id: <155059668360.1466090.5969630516627776426.stgit@bahia.lab.toulouse-stg.fr.ibm.com> Reviewed-by: Cédric Le Goater Signed-off-by: David Gibson --- hw/intc/xics.c | 4 +++ hw/intc/xics_kvm.c | 76 +++++++++++++++++++++++++------------------ include/hw/ppc/xics.h | 1 + 3 files changed, 49 insertions(+), 32 deletions(-) diff --git a/hw/intc/xics.c b/hw/intc/xics.c index 767fdeb829..af7dc709ab 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -758,6 +758,10 @@ void ics_set_irq_type(ICSState *ics, int srcno, bool lsi) ics->irqs[srcno].flags |= lsi ? XICS_FLAGS_IRQ_LSI : XICS_FLAGS_IRQ_MSI; + + if (kvm_irqchip_in_kernel()) { + ics_set_kvm_state_one(ics, srcno); + } } static void xics_register_types(void) diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c index a00d0a7962..c6e1b630a4 100644 --- a/hw/intc/xics_kvm.c +++ b/hw/intc/xics_kvm.c @@ -213,45 +213,57 @@ void ics_synchronize_state(ICSState *ics) ics_get_kvm_state(ics); } -int ics_set_kvm_state(ICSState *ics) +int ics_set_kvm_state_one(ICSState *ics, int srcno) { uint64_t state; - int i; Error *local_err = NULL; + ICSIRQState *irq = &ics->irqs[srcno]; + int ret; + + state = irq->server; + state |= (uint64_t)(irq->saved_priority & KVM_XICS_PRIORITY_MASK) + << KVM_XICS_PRIORITY_SHIFT; + if (irq->priority != irq->saved_priority) { + assert(irq->priority == 0xff); + state |= KVM_XICS_MASKED; + } + + if (irq->flags & XICS_FLAGS_IRQ_LSI) { + state |= KVM_XICS_LEVEL_SENSITIVE; + if (irq->status & XICS_STATUS_ASSERTED) { + state |= KVM_XICS_PENDING; + } + } else { + if (irq->status & XICS_STATUS_MASKED_PENDING) { + state |= KVM_XICS_PENDING; + } + } + if (irq->status & XICS_STATUS_PRESENTED) { + state |= KVM_XICS_PRESENTED; + } + if (irq->status & XICS_STATUS_QUEUED) { + state |= KVM_XICS_QUEUED; + } + + ret = kvm_device_access(kernel_xics_fd, KVM_DEV_XICS_GRP_SOURCES, + srcno + ics->offset, &state, true, &local_err); + if (local_err) { + error_report_err(local_err); + return ret; + } + + return 0; +} + +int ics_set_kvm_state(ICSState *ics) +{ + int i; for (i = 0; i < ics->nr_irqs; i++) { - ICSIRQState *irq = &ics->irqs[i]; int ret; - state = irq->server; - state |= (uint64_t)(irq->saved_priority & KVM_XICS_PRIORITY_MASK) - << KVM_XICS_PRIORITY_SHIFT; - if (irq->priority != irq->saved_priority) { - assert(irq->priority == 0xff); - state |= KVM_XICS_MASKED; - } - - if (ics->irqs[i].flags & XICS_FLAGS_IRQ_LSI) { - state |= KVM_XICS_LEVEL_SENSITIVE; - if (irq->status & XICS_STATUS_ASSERTED) { - state |= KVM_XICS_PENDING; - } - } else { - if (irq->status & XICS_STATUS_MASKED_PENDING) { - state |= KVM_XICS_PENDING; - } - } - if (irq->status & XICS_STATUS_PRESENTED) { - state |= KVM_XICS_PRESENTED; - } - if (irq->status & XICS_STATUS_QUEUED) { - state |= KVM_XICS_QUEUED; - } - - ret = kvm_device_access(kernel_xics_fd, KVM_DEV_XICS_GRP_SOURCES, - i + ics->offset, &state, true, &local_err); - if (local_err) { - error_report_err(local_err); + ret = ics_set_kvm_state_one(ics, i); + if (ret) { return ret; } } diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h index d36bbe11ee..eb65ad7e43 100644 --- a/include/hw/ppc/xics.h +++ b/include/hw/ppc/xics.h @@ -195,6 +195,7 @@ void icp_synchronize_state(ICPState *icp); void icp_kvm_realize(DeviceState *dev, Error **errp); void ics_get_kvm_state(ICSState *ics); +int ics_set_kvm_state_one(ICSState *ics, int srcno); int ics_set_kvm_state(ICSState *ics); void ics_synchronize_state(ICSState *ics); void ics_kvm_set_irq(ICSState *ics, int srcno, int val); From 743ed566c1d80991ab22fb404be706bb8b742335 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Tue, 19 Feb 2019 18:18:08 +0100 Subject: [PATCH 35/50] spapr: Expose the name of the interrupt controller node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will be needed by PHB hotplug in order to access the "phandle" property of the interrupt controller node. Reviewed-by: Cédric Le Goater Signed-off-by: Greg Kurz Reviewed-by: David Gibson Message-Id: <155059668867.1466090.6339199751719123386.stgit@bahia.lab.toulouse-stg.fr.ibm.com> Signed-off-by: David Gibson --- hw/intc/spapr_xive.c | 9 ++++----- hw/intc/xics_spapr.c | 2 +- hw/ppc/spapr_irq.c | 21 ++++++++++++++++++++- include/hw/ppc/spapr_irq.h | 1 + include/hw/ppc/spapr_xive.h | 3 +++ include/hw/ppc/xics_spapr.h | 2 ++ 6 files changed, 31 insertions(+), 7 deletions(-) diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c index 290a290e43..06e3c9fdbf 100644 --- a/hw/intc/spapr_xive.c +++ b/hw/intc/spapr_xive.c @@ -317,6 +317,9 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp) /* Map all regions */ spapr_xive_map_mmio(xive); + xive->nodename = g_strdup_printf("interrupt-controller@%" PRIx64, + xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT)); + qemu_register_reset(spapr_xive_reset, dev); } @@ -1448,7 +1451,6 @@ void spapr_dt_xive(sPAPRMachineState *spapr, uint32_t nr_servers, void *fdt, cpu_to_be32(7), /* start */ cpu_to_be32(0xf8), /* count */ }; - gchar *nodename; /* Thread Interrupt Management Area : User (ring 3) and OS (ring 2) */ timas[0] = cpu_to_be64(xive->tm_base + @@ -1458,10 +1460,7 @@ void spapr_dt_xive(sPAPRMachineState *spapr, uint32_t nr_servers, void *fdt, XIVE_TM_OS_PAGE * (1ull << TM_SHIFT)); timas[3] = cpu_to_be64(1ull << TM_SHIFT); - nodename = g_strdup_printf("interrupt-controller@%" PRIx64, - xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT)); - _FDT(node = fdt_add_subnode(fdt, 0, nodename)); - g_free(nodename); + _FDT(node = fdt_add_subnode(fdt, 0, xive->nodename)); _FDT(fdt_setprop_string(fdt, node, "device_type", "power-ivpe")); _FDT(fdt_setprop(fdt, node, "reg", timas, sizeof(timas))); diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c index e2d8b38183..53bda6661b 100644 --- a/hw/intc/xics_spapr.c +++ b/hw/intc/xics_spapr.c @@ -254,7 +254,7 @@ void spapr_dt_xics(sPAPRMachineState *spapr, uint32_t nr_servers, void *fdt, }; int node; - _FDT(node = fdt_add_subnode(fdt, 0, "interrupt-controller")); + _FDT(node = fdt_add_subnode(fdt, 0, XICS_NODENAME)); _FDT(fdt_setprop_string(fdt, node, "device_type", "PowerPC-External-Interrupt-Presentation")); diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c index 4297eed600..359761494c 100644 --- a/hw/ppc/spapr_irq.c +++ b/hw/ppc/spapr_irq.c @@ -230,6 +230,11 @@ static void spapr_irq_reset_xics(sPAPRMachineState *spapr, Error **errp) /* TODO: create the KVM XICS device */ } +static const char *spapr_irq_get_nodename_xics(sPAPRMachineState *spapr) +{ + return XICS_NODENAME; +} + #define SPAPR_IRQ_XICS_NR_IRQS 0x1000 #define SPAPR_IRQ_XICS_NR_MSIS \ (XICS_IRQ_BASE + SPAPR_IRQ_XICS_NR_IRQS - SPAPR_IRQ_MSI) @@ -249,6 +254,7 @@ sPAPRIrq spapr_irq_xics = { .post_load = spapr_irq_post_load_xics, .reset = spapr_irq_reset_xics, .set_irq = spapr_irq_set_irq_xics, + .get_nodename = spapr_irq_get_nodename_xics, }; /* @@ -384,6 +390,11 @@ static void spapr_irq_set_irq_xive(void *opaque, int srcno, int val) xive_source_set_irq(&spapr->xive->source, srcno, val); } +static const char *spapr_irq_get_nodename_xive(sPAPRMachineState *spapr) +{ + return spapr->xive->nodename; +} + /* * XIVE uses the full IRQ number space. Set it to 8K to be compatible * with XICS. @@ -407,6 +418,7 @@ sPAPRIrq spapr_irq_xive = { .post_load = spapr_irq_post_load_xive, .reset = spapr_irq_reset_xive, .set_irq = spapr_irq_set_irq_xive, + .get_nodename = spapr_irq_get_nodename_xive, }; /* @@ -541,6 +553,11 @@ static void spapr_irq_set_irq_dual(void *opaque, int srcno, int val) spapr_irq_current(spapr)->set_irq(spapr, srcno, val); } +static const char *spapr_irq_get_nodename_dual(sPAPRMachineState *spapr) +{ + return spapr_irq_current(spapr)->get_nodename(spapr); +} + /* * Define values in sync with the XIVE and XICS backend */ @@ -561,7 +578,8 @@ sPAPRIrq spapr_irq_dual = { .cpu_intc_create = spapr_irq_cpu_intc_create_dual, .post_load = spapr_irq_post_load_dual, .reset = spapr_irq_reset_dual, - .set_irq = spapr_irq_set_irq_dual + .set_irq = spapr_irq_set_irq_dual, + .get_nodename = spapr_irq_get_nodename_dual, }; /* @@ -691,4 +709,5 @@ sPAPRIrq spapr_irq_xics_legacy = { .cpu_intc_create = spapr_irq_cpu_intc_create_xics, .post_load = spapr_irq_post_load_xics, .set_irq = spapr_irq_set_irq_xics, + .get_nodename = spapr_irq_get_nodename_xics, }; diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h index 488511c3d8..8bf1a72919 100644 --- a/include/hw/ppc/spapr_irq.h +++ b/include/hw/ppc/spapr_irq.h @@ -47,6 +47,7 @@ typedef struct sPAPRIrq { int (*post_load)(sPAPRMachineState *spapr, int version_id); void (*reset)(sPAPRMachineState *spapr, Error **errp); void (*set_irq)(void *opaque, int srcno, int val); + const char *(*get_nodename)(sPAPRMachineState *spapr); } sPAPRIrq; extern sPAPRIrq spapr_irq_xics; diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h index 9bec9192e4..2d31f24e3b 100644 --- a/include/hw/ppc/spapr_xive.h +++ b/include/hw/ppc/spapr_xive.h @@ -26,6 +26,9 @@ typedef struct sPAPRXive { XiveENDSource end_source; hwaddr end_base; + /* DT */ + gchar *nodename; + /* Routing table */ XiveEAS *eat; uint32_t nr_irqs; diff --git a/include/hw/ppc/xics_spapr.h b/include/hw/ppc/xics_spapr.h index b1ab27d022..b8d924baf4 100644 --- a/include/hw/ppc/xics_spapr.h +++ b/include/hw/ppc/xics_spapr.h @@ -29,6 +29,8 @@ #include "hw/ppc/spapr.h" +#define XICS_NODENAME "interrupt-controller" + void spapr_dt_xics(sPAPRMachineState *spapr, uint32_t nr_servers, void *fdt, uint32_t phandle); int xics_kvm_init(sPAPRMachineState *spapr, Error **errp); From ad62bff6383178fe95caff3ec63a6ca4ee398dec Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Tue, 19 Feb 2019 18:18:13 +0100 Subject: [PATCH 36/50] spapr_irq: Expose the phandle of the interrupt controller This will be used by PHB hotplug in order to create the "interrupt-map" property of the PHB node. Signed-off-by: Greg Kurz Message-Id: <155059669374.1466090.12943228478046223856.stgit@bahia.lab.toulouse-stg.fr.ibm.com> Signed-off-by: David Gibson --- hw/ppc/spapr_irq.c | 21 +++++++++++++++++++++ include/hw/ppc/spapr_irq.h | 1 + 2 files changed, 22 insertions(+) diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c index 359761494c..4145079d7f 100644 --- a/hw/ppc/spapr_irq.c +++ b/hw/ppc/spapr_irq.c @@ -638,6 +638,27 @@ void spapr_irq_reset(sPAPRMachineState *spapr, Error **errp) } } +int spapr_irq_get_phandle(sPAPRMachineState *spapr, void *fdt, Error **errp) +{ + const char *nodename = spapr->irq->get_nodename(spapr); + int offset, phandle; + + offset = fdt_subnode_offset(fdt, 0, nodename); + if (offset < 0) { + error_setg(errp, "Can't find node \"%s\": %s", nodename, + fdt_strerror(offset)); + return -1; + } + + phandle = fdt_get_phandle(fdt, offset); + if (!phandle) { + error_setg(errp, "Can't get phandle of node \"%s\"", nodename); + return -1; + } + + return phandle; +} + /* * XICS legacy routines - to deprecate one day */ diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h index 8bf1a72919..ec1ee64fa6 100644 --- a/include/hw/ppc/spapr_irq.h +++ b/include/hw/ppc/spapr_irq.h @@ -61,6 +61,7 @@ void spapr_irq_free(sPAPRMachineState *spapr, int irq, int num); qemu_irq spapr_qirq(sPAPRMachineState *spapr, int irq); int spapr_irq_post_load(sPAPRMachineState *spapr, int version_id); void spapr_irq_reset(sPAPRMachineState *spapr, Error **errp); +int spapr_irq_get_phandle(sPAPRMachineState *spapr, void *fdt, Error **errp); /* * XICS legacy routines From ef28b98d58121b0a9ae94c9397d1def2531166ed Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Tue, 19 Feb 2019 18:18:18 +0100 Subject: [PATCH 37/50] spapr_pci: add PHB unrealize To support PHB hotplug we need to clean up lingering references, memory, child properties, etc. prior to the PHB object being finalized. Generally this will be called as a result of calling object_unparent() on the PHB object, which in turn would normally be called as the result of an unplug() operation. When the PHB is finalized, child objects will be unparented in turn, and finalized if the PHB was the only reference holder. so we don't bother to explicitly unparent child objects of the PHB, with the notable exception of DRCs. This is needed to avoid a QEMU crash when unplugging a PHB and resetting the machine before the guest could handle the event. The DRCs are removed from the QOM tree by pci_unregister_root_bus() and we must make sure we're not leaving stale aliases under the global /dr-connector path. The formula that gives the number of DMA windows is moved to an inline function in the hw/pci-host/spapr.h header because it will have other users. The unrealize function is able to cope with partially realized PHBs. It is hence used to implement proper rollback on the realize error path. Signed-off-by: Michael Roth Signed-off-by: Greg Kurz Reviewed-by: David Gibson Message-Id: <155059669881.1466090.13515030705986041517.stgit@bahia.lab.toulouse-stg.fr.ibm.com> Signed-off-by: David Gibson --- hw/ppc/spapr_pci.c | 86 +++++++++++++++++++++++++++++++++++-- include/hw/pci-host/spapr.h | 5 +++ 2 files changed, 87 insertions(+), 4 deletions(-) diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index e2bc9fec82..ede928b0bf 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1570,6 +1570,75 @@ static void spapr_pci_unplug_request(HotplugHandler *plug_handler, } } +static void spapr_phb_finalizefn(Object *obj) +{ + sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(obj); + + g_free(sphb->dtbusname); + sphb->dtbusname = NULL; +} + +static void spapr_phb_unrealize(DeviceState *dev, Error **errp) +{ + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + SysBusDevice *s = SYS_BUS_DEVICE(dev); + PCIHostState *phb = PCI_HOST_BRIDGE(s); + sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(phb); + sPAPRTCETable *tcet; + int i; + const unsigned windows_supported = spapr_phb_windows_supported(sphb); + + if (sphb->msi) { + g_hash_table_unref(sphb->msi); + sphb->msi = NULL; + } + + /* + * Remove IO/MMIO subregions and aliases, rest should get cleaned + * via PHB's unrealize->object_finalize + */ + for (i = windows_supported - 1; i >= 0; i--) { + tcet = spapr_tce_find_by_liobn(sphb->dma_liobn[i]); + if (tcet) { + memory_region_del_subregion(&sphb->iommu_root, + spapr_tce_get_iommu(tcet)); + } + } + + if (sphb->dr_enabled) { + for (i = PCI_SLOT_MAX * 8 - 1; i >= 0; i--) { + sPAPRDRConnector *drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PCI, + (sphb->index << 16) | i); + + if (drc) { + object_unparent(OBJECT(drc)); + } + } + } + + for (i = PCI_NUM_PINS - 1; i >= 0; i--) { + if (sphb->lsi_table[i].irq) { + spapr_irq_free(spapr, sphb->lsi_table[i].irq, 1); + sphb->lsi_table[i].irq = 0; + } + } + + QLIST_REMOVE(sphb, list); + + memory_region_del_subregion(&sphb->iommu_root, &sphb->msiwindow); + + address_space_destroy(&sphb->iommu_as); + + qbus_set_hotplug_handler(BUS(phb->bus), NULL, &error_abort); + pci_unregister_root_bus(phb->bus); + + memory_region_del_subregion(get_system_memory(), &sphb->iowindow); + if (sphb->mem64_win_pciaddr != (hwaddr)-1) { + memory_region_del_subregion(get_system_memory(), &sphb->mem64window); + } + memory_region_del_subregion(get_system_memory(), &sphb->mem32window); +} + static void spapr_phb_realize(DeviceState *dev, Error **errp) { /* We don't use SPAPR_MACHINE() in order to exit gracefully if the user @@ -1587,8 +1656,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) PCIBus *bus; uint64_t msi_window_size = 4096; sPAPRTCETable *tcet; - const unsigned windows_supported = - sphb->ddw_enabled ? SPAPR_PCI_DMA_MAX_WINDOWS : 1; + const unsigned windows_supported = spapr_phb_windows_supported(sphb); if (!spapr) { error_setg(errp, TYPE_SPAPR_PCI_HOST_BRIDGE " needs a pseries machine"); @@ -1745,6 +1813,10 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) if (local_err) { error_propagate_prepend(errp, local_err, "can't allocate LSIs: "); + /* + * Older machines will never support PHB hotplug, ie, this is an + * init only path and QEMU will terminate. No need to rollback. + */ return; } } @@ -1752,7 +1824,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) spapr_irq_claim(spapr, irq, true, &local_err); if (local_err) { error_propagate_prepend(errp, local_err, "can't allocate LSIs: "); - return; + goto unrealize; } sphb->lsi_table[i].irq = irq; @@ -1772,13 +1844,17 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) if (!tcet) { error_setg(errp, "Creating window#%d failed for %s", i, sphb->dtbusname); - return; + goto unrealize; } memory_region_add_subregion(&sphb->iommu_root, 0, spapr_tce_get_iommu(tcet)); } sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, g_free); + return; + +unrealize: + spapr_phb_unrealize(dev, NULL); } static int spapr_phb_children_reset(Object *child, void *opaque) @@ -1977,6 +2053,7 @@ static void spapr_phb_class_init(ObjectClass *klass, void *data) hc->root_bus_path = spapr_phb_root_bus_path; dc->realize = spapr_phb_realize; + dc->unrealize = spapr_phb_unrealize; dc->props = spapr_phb_properties; dc->reset = spapr_phb_reset; dc->vmsd = &vmstate_spapr_pci; @@ -1992,6 +2069,7 @@ static const TypeInfo spapr_phb_info = { .name = TYPE_SPAPR_PCI_HOST_BRIDGE, .parent = TYPE_PCI_HOST_BRIDGE, .instance_size = sizeof(sPAPRPHBState), + .instance_finalize = spapr_phb_finalizefn, .class_init = spapr_phb_class_init, .interfaces = (InterfaceInfo[]) { { TYPE_HOTPLUG_HANDLER }, diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h index f6e43f48fe..4b0443f4cf 100644 --- a/include/hw/pci-host/spapr.h +++ b/include/hw/pci-host/spapr.h @@ -165,4 +165,9 @@ static inline void spapr_phb_vfio_reset(DeviceState *qdev) void spapr_phb_dma_reset(sPAPRPHBState *sphb); +static inline unsigned spapr_phb_windows_supported(sPAPRPHBState *sphb) +{ + return sphb->ddw_enabled ? SPAPR_PCI_DMA_MAX_WINDOWS : 1; +} + #endif /* PCI_HOST_SPAPR_H */ From 962b6c3650d6b43b21ffc12eb83e0ca39aace7af Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Tue, 19 Feb 2019 18:18:23 +0100 Subject: [PATCH 38/50] spapr: create DR connectors for PHBs Signed-off-by: Michael Roth Reviewed-by: David Gibson Signed-off-by: Greg Kurz Message-Id: <155059670389.1466090.10015601248906623076.stgit@bahia.lab.toulouse-stg.fr.ibm.com> Signed-off-by: David Gibson --- hw/ppc/spapr.c | 13 +++++++++++++ hw/ppc/spapr_drc.c | 17 +++++++++++++++++ include/hw/ppc/spapr.h | 1 + include/hw/ppc/spapr_drc.h | 8 ++++++++ 4 files changed, 39 insertions(+) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 9364d07364..96bea7580a 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2875,6 +2875,19 @@ static void spapr_machine_init(MachineState *machine) /* We always have at least the nvram device on VIO */ spapr_create_nvram(spapr); + /* + * Setup hotplug / dynamic-reconfiguration connectors. top-level + * connectors (described in root DT node's "ibm,drc-types" property) + * are pre-initialized here. additional child connectors (such as + * connectors for a PHBs PCI slots) are added as needed during their + * parent's realization. + */ + if (smc->dr_phb_enabled) { + for (i = 0; i < SPAPR_MAX_PHBS; i++) { + spapr_dr_connector_new(OBJECT(machine), TYPE_SPAPR_DRC_PHB, i); + } + } + /* Set up PCI */ spapr_pci_rtas_init(); diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index 87ca7d9735..fd6380adb3 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -696,6 +696,15 @@ static void spapr_drc_lmb_class_init(ObjectClass *k, void *data) drck->dt_populate = spapr_lmb_dt_populate; } +static void spapr_drc_phb_class_init(ObjectClass *k, void *data) +{ + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_CLASS(k); + + drck->typeshift = SPAPR_DR_CONNECTOR_TYPE_SHIFT_PHB; + drck->typename = "PHB"; + drck->drc_name_prefix = "PHB "; +} + static const TypeInfo spapr_dr_connector_info = { .name = TYPE_SPAPR_DR_CONNECTOR, .parent = TYPE_DEVICE, @@ -739,6 +748,13 @@ static const TypeInfo spapr_drc_lmb_info = { .class_init = spapr_drc_lmb_class_init, }; +static const TypeInfo spapr_drc_phb_info = { + .name = TYPE_SPAPR_DRC_PHB, + .parent = TYPE_SPAPR_DRC_LOGICAL, + .instance_size = sizeof(sPAPRDRConnector), + .class_init = spapr_drc_phb_class_init, +}; + /* helper functions for external users */ sPAPRDRConnector *spapr_drc_by_index(uint32_t index) @@ -1207,6 +1223,7 @@ static void spapr_drc_register_types(void) type_register_static(&spapr_drc_cpu_info); type_register_static(&spapr_drc_pci_info); type_register_static(&spapr_drc_lmb_info); + type_register_static(&spapr_drc_phb_info); spapr_rtas_register(RTAS_SET_INDICATOR, "set-indicator", rtas_set_indicator); diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 5e3c760725..b173fd7149 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -104,6 +104,7 @@ struct sPAPRMachineClass { /*< public >*/ bool dr_lmb_enabled; /* enable dynamic-reconfig/hotplug of LMBs */ + bool dr_phb_enabled; /* enable dynamic-reconfig/hotplug of PHBs */ bool update_dt_enabled; /* enable KVMPPC_H_UPDATE_DT */ bool use_ohci_by_default; /* use USB-OHCI instead of XHCI */ bool pre_2_10_has_unused_icps; diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h index f32758ec84..46b0f6216d 100644 --- a/include/hw/ppc/spapr_drc.h +++ b/include/hw/ppc/spapr_drc.h @@ -71,6 +71,14 @@ #define SPAPR_DRC_LMB(obj) OBJECT_CHECK(sPAPRDRConnector, (obj), \ TYPE_SPAPR_DRC_LMB) +#define TYPE_SPAPR_DRC_PHB "spapr-drc-phb" +#define SPAPR_DRC_PHB_GET_CLASS(obj) \ + OBJECT_GET_CLASS(sPAPRDRConnectorClass, obj, TYPE_SPAPR_DRC_PHB) +#define SPAPR_DRC_PHB_CLASS(klass) \ + OBJECT_CLASS_CHECK(sPAPRDRConnectorClass, klass, TYPE_SPAPR_DRC_PHB) +#define SPAPR_DRC_PHB(obj) OBJECT_CHECK(sPAPRDRConnector, (obj), \ + TYPE_SPAPR_DRC_PHB) + /* * Various hotplug types managed by sPAPRDRConnector * From 3998ccd092989da5b6abcd0b182f49a55c841e5f Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Tue, 19 Feb 2019 18:18:29 +0100 Subject: [PATCH 39/50] spapr: populate PHB DRC entries for root DT node This add entries to the root OF node to advertise our PHBs as being DR-capable in accordance with PAPR specification. Signed-off-by: Nathan Fontenot Signed-off-by: Michael Roth Reviewed-by: David Gibson Signed-off-by: Greg Kurz Message-Id: <155059670897.1466090.10843921337591637414.stgit@bahia.lab.toulouse-stg.fr.ibm.com> Signed-off-by: David Gibson --- hw/ppc/spapr.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 96bea7580a..fcda177090 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1365,6 +1365,14 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr) exit(1); } + if (smc->dr_phb_enabled) { + ret = spapr_drc_populate_dt(fdt, 0, NULL, SPAPR_DR_CONNECTOR_TYPE_PHB); + if (ret < 0) { + error_report("Couldn't set up PHB DR device tree properties"); + exit(1); + } + } + return fdt; } From 4b6d336f2c9e89350a8fafebb49422da24cbcb45 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Tue, 19 Feb 2019 18:18:34 +0100 Subject: [PATCH 40/50] spapr_events: add support for phb hotplug events Extend the existing EPOW event format we use for PCI devices to emit PHB plug/unplug events. Signed-off-by: Michael Roth Reviewed-by: David Gibson Signed-off-by: Greg Kurz Message-Id: <155059671405.1466090.535964535260503283.stgit@bahia.lab.toulouse-stg.fr.ibm.com> Signed-off-by: David Gibson --- hw/ppc/spapr_events.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index b9c7ecb9e9..ab9a1f0063 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -526,6 +526,9 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, case SPAPR_DR_CONNECTOR_TYPE_CPU: hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_CPU; break; + case SPAPR_DR_CONNECTOR_TYPE_PHB: + hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_PHB; + break; default: /* we shouldn't be signaling hotplug events for resources * that don't support them From 0a0a66cd1b79cba792cf00293a540e0330375f3c Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Tue, 19 Feb 2019 18:18:39 +0100 Subject: [PATCH 41/50] spapr_pci: provide node start offset via spapr_populate_pci_dt() PHB hotplug re-uses PHB device tree generation code and passes it to a guest via RTAS. Doing this requires knowledge of where exactly in the device tree the node describing the PHB begins. Provide this via a new optional pointer that can be used to store the PHB node's start offset. Signed-off-by: Michael Roth Reviewed-by: David Gibson Signed-off-by: Greg Kurz Message-Id: <155059671912.1466090.10891589403973703473.stgit@bahia.lab.toulouse-stg.fr.ibm.com> Signed-off-by: David Gibson --- hw/ppc/spapr.c | 2 +- hw/ppc/spapr_pci.c | 5 ++++- include/hw/pci-host/spapr.h | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index fcda177090..76b3c15d59 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1312,7 +1312,7 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr) QLIST_FOREACH(phb, &spapr->phbs, list) { ret = spapr_populate_pci_dt(phb, PHANDLE_INTC, fdt, - spapr->irq->nr_msis); + spapr->irq->nr_msis, NULL); if (ret < 0) { error_report("couldn't setup PCI devices in fdt"); exit(1); diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index ede928b0bf..a0e1769439 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -2153,7 +2153,7 @@ static void spapr_phb_pci_enumerate(sPAPRPHBState *phb) } int spapr_populate_pci_dt(sPAPRPHBState *phb, uint32_t intc_phandle, void *fdt, - uint32_t nr_msis) + uint32_t nr_msis, int *node_offset) { int bus_off, i, j, ret; gchar *nodename; @@ -2208,6 +2208,9 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, uint32_t intc_phandle, void *fdt, nodename = g_strdup_printf("pci@%" PRIx64, phb->buid); _FDT(bus_off = fdt_add_subnode(fdt, 0, nodename)); g_free(nodename); + if (node_offset) { + *node_offset = bus_off; + } /* Write PHB properties */ _FDT(fdt_setprop_string(fdt, bus_off, "device_type", "pci")); diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h index 4b0443f4cf..ab0e3a0a6f 100644 --- a/include/hw/pci-host/spapr.h +++ b/include/hw/pci-host/spapr.h @@ -113,7 +113,7 @@ static inline qemu_irq spapr_phb_lsi_qirq(struct sPAPRPHBState *phb, int pin) } int spapr_populate_pci_dt(sPAPRPHBState *phb, uint32_t intc_phandle, void *fdt, - uint32_t nr_msis); + uint32_t nr_msis, int *node_offset); void spapr_pci_rtas_init(void); From f130928d2a9b598ccdb61e1d834674f224eb9faa Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Tue, 19 Feb 2019 18:18:44 +0100 Subject: [PATCH 42/50] spapr_pci: add ibm, my-drc-index property for PHB hotplug This is needed to denote a boot-time PHB as being hot-pluggable. Signed-off-by: Michael Roth Reviewed-by: David Gibson Signed-off-by: Greg Kurz Message-Id: <155059672420.1466090.15147504040270659866.stgit@bahia.lab.toulouse-stg.fr.ibm.com> Signed-off-by: David Gibson --- hw/ppc/spapr_pci.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index a0e1769439..03fc26985a 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -2203,6 +2203,7 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, uint32_t intc_phandle, void *fdt, sPAPRTCETable *tcet; PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus; sPAPRFDT s_fdt; + sPAPRDRConnector *drc; /* Start populating the FDT */ nodename = g_strdup_printf("pci@%" PRIx64, phb->buid); @@ -2269,6 +2270,14 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, uint32_t intc_phandle, void *fdt, tcet->liobn, tcet->bus_offset, tcet->nb_table << tcet->page_shift); + drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PHB, phb->index); + if (drc) { + uint32_t drc_index = cpu_to_be32(spapr_drc_index(drc)); + + _FDT(fdt_setprop(fdt, bus_off, "ibm,my-drc-index", &drc_index, + sizeof(drc_index))); + } + /* Walk the bridges and program the bus numbers*/ spapr_phb_pci_enumerate(phb); _FDT(fdt_setprop_cell(fdt, bus_off, "qemu,phb-enumerated", 0x1)); From bb2bdd812ee9a9a38007b95c3967c528e552912c Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Tue, 19 Feb 2019 18:18:49 +0100 Subject: [PATCH 43/50] spapr: add hotplug hooks for PHB hotplug Hotplugging PHBs is a machine-level operation, but PHBs reside on the main system bus, so we register spapr machine as the handler for the main system bus. Provide the usual pre-plug, plug and unplug-request handlers. Move the checking of the PHB index to the pre-plug handler. It is okay to do that and assert in the realize function because the pre-plug handler is always called, even for the oldest machine types we support. Signed-off-by: Michael Roth (Fixed interrupt controller phandle in "interrupt-map" and TCE table size in "ibm,dma-window" FDT fragment, Greg Kurz) Signed-off-by: Greg Kurz Message-Id: <155059672926.1466090.13612804072190051439.stgit@bahia.lab.toulouse-stg.fr.ibm.com> Signed-off-by: David Gibson --- hw/ppc/spapr.c | 128 ++++++++++++++++++++++++++++++++++++++++- hw/ppc/spapr_drc.c | 2 + hw/ppc/spapr_pci.c | 16 +----- include/hw/ppc/spapr.h | 3 + 4 files changed, 133 insertions(+), 16 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 76b3c15d59..7422c05254 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -3009,6 +3009,9 @@ static void spapr_machine_init(MachineState *machine) register_savevm_live(NULL, "spapr/htab", -1, 1, &savevm_htab_handlers, spapr); + qbus_set_hotplug_handler(sysbus_get_default(), OBJECT(machine), + &error_fatal); + qemu_register_boot_set(spapr_boot_set, spapr); if (kvm_enabled()) { @@ -3850,6 +3853,115 @@ out: error_propagate(errp, local_err); } +int spapr_phb_dt_populate(sPAPRDRConnector *drc, sPAPRMachineState *spapr, + void *fdt, int *fdt_start_offset, Error **errp) +{ + sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(drc->dev); + int intc_phandle; + + intc_phandle = spapr_irq_get_phandle(spapr, spapr->fdt_blob, errp); + if (intc_phandle <= 0) { + return -1; + } + + if (spapr_populate_pci_dt(sphb, intc_phandle, fdt, spapr->irq->nr_msis, + fdt_start_offset)) { + error_setg(errp, "unable to create FDT node for PHB %d", sphb->index); + return -1; + } + + /* generally SLOF creates these, for hotplug it's up to QEMU */ + _FDT(fdt_setprop_string(fdt, *fdt_start_offset, "name", "pci")); + + return 0; +} + +static void spapr_phb_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) +{ + sPAPRMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev)); + sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(dev); + sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + const unsigned windows_supported = spapr_phb_windows_supported(sphb); + + if (dev->hotplugged && !smc->dr_phb_enabled) { + error_setg(errp, "PHB hotplug not supported for this machine"); + return; + } + + if (sphb->index == (uint32_t)-1) { + error_setg(errp, "\"index\" for PAPR PHB is mandatory"); + return; + } + + /* + * This will check that sphb->index doesn't exceed the maximum number of + * PHBs for the current machine type. + */ + smc->phb_placement(spapr, sphb->index, + &sphb->buid, &sphb->io_win_addr, + &sphb->mem_win_addr, &sphb->mem64_win_addr, + windows_supported, sphb->dma_liobn, errp); +} + +static void spapr_phb_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) +{ + sPAPRMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev)); + sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(dev); + sPAPRDRConnector *drc; + bool hotplugged = spapr_drc_hotplugged(dev); + Error *local_err = NULL; + + if (!smc->dr_phb_enabled) { + return; + } + + drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PHB, sphb->index); + /* hotplug hooks should check it's enabled before getting this far */ + assert(drc); + + spapr_drc_attach(drc, DEVICE(dev), &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + if (hotplugged) { + spapr_hotplug_req_add_by_index(drc); + } else { + spapr_drc_reset(drc); + } +} + +void spapr_phb_release(DeviceState *dev) +{ + HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev); + + hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort); +} + +static void spapr_phb_unplug(HotplugHandler *hotplug_dev, DeviceState *dev) +{ + object_unparent(OBJECT(dev)); +} + +static void spapr_phb_unplug_request(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(dev); + sPAPRDRConnector *drc; + + drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PHB, sphb->index); + assert(drc); + + if (!spapr_drc_unplug_requested(drc)) { + spapr_drc_detach(drc); + spapr_hotplug_req_remove_by_index(drc); + } +} + static void spapr_machine_device_plug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { @@ -3857,6 +3969,8 @@ static void spapr_machine_device_plug(HotplugHandler *hotplug_dev, spapr_memory_plug(hotplug_dev, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) { spapr_core_plug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_PCI_HOST_BRIDGE)) { + spapr_phb_plug(hotplug_dev, dev, errp); } } @@ -3867,6 +3981,8 @@ static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev, spapr_memory_unplug(hotplug_dev, dev); } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) { spapr_core_unplug(hotplug_dev, dev); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_PCI_HOST_BRIDGE)) { + spapr_phb_unplug(hotplug_dev, dev); } } @@ -3875,6 +3991,7 @@ static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev, { sPAPRMachineState *sms = SPAPR_MACHINE(OBJECT(hotplug_dev)); MachineClass *mc = MACHINE_GET_CLASS(sms); + sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { if (spapr_ovec_test(sms->ov5_cas, OV5_HP_EVT)) { @@ -3894,6 +4011,12 @@ static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev, return; } spapr_core_unplug_request(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_PCI_HOST_BRIDGE)) { + if (!smc->dr_phb_enabled) { + error_setg(errp, "PHB hot unplug not supported on this machine"); + return; + } + spapr_phb_unplug_request(hotplug_dev, dev, errp); } } @@ -3904,6 +4027,8 @@ static void spapr_machine_device_pre_plug(HotplugHandler *hotplug_dev, spapr_memory_pre_plug(hotplug_dev, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) { spapr_core_pre_plug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_PCI_HOST_BRIDGE)) { + spapr_phb_pre_plug(hotplug_dev, dev, errp); } } @@ -3911,7 +4036,8 @@ static HotplugHandler *spapr_get_hotplug_handler(MachineState *machine, DeviceState *dev) { if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) || - object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) { + object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE) || + object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_PCI_HOST_BRIDGE)) { return HOTPLUG_HANDLER(machine); } return NULL; diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index fd6380adb3..2943cf47d4 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -703,6 +703,8 @@ static void spapr_drc_phb_class_init(ObjectClass *k, void *data) drck->typeshift = SPAPR_DR_CONNECTOR_TYPE_SHIFT_PHB; drck->typename = "PHB"; drck->drc_name_prefix = "PHB "; + drck->release = spapr_phb_release; + drck->dt_populate = spapr_phb_dt_populate; } static const TypeInfo spapr_dr_connector_info = { diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 03fc26985a..06a5ffd281 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1663,21 +1663,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) return; } - if (sphb->index != (uint32_t)-1) { - Error *local_err = NULL; - - smc->phb_placement(spapr, sphb->index, - &sphb->buid, &sphb->io_win_addr, - &sphb->mem_win_addr, &sphb->mem64_win_addr, - windows_supported, sphb->dma_liobn, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } - } else { - error_setg(errp, "\"index\" for PAPR PHB is mandatory"); - return; - } + assert(sphb->index != (uint32_t)-1); /* checked in spapr_phb_pre_plug() */ if (sphb->mem64_win_size != 0) { if (sphb->mem_win_size > SPAPR_PCI_MEM32_WIN_SIZE) { diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index b173fd7149..59073a7579 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -772,6 +772,9 @@ int spapr_core_dt_populate(sPAPRDRConnector *drc, sPAPRMachineState *spapr, void spapr_lmb_release(DeviceState *dev); int spapr_lmb_dt_populate(sPAPRDRConnector *drc, sPAPRMachineState *spapr, void *fdt, int *fdt_start_offset, Error **errp); +void spapr_phb_release(DeviceState *dev); +int spapr_phb_dt_populate(sPAPRDRConnector *drc, sPAPRMachineState *spapr, + void *fdt, int *fdt_start_offset, Error **errp); void spapr_rtc_read(sPAPRRTCState *rtc, struct tm *tm, uint32_t *ns); int spapr_rtc_import_offset(sPAPRRTCState *rtc, int64_t legacy_offset); From dae5e39adae897e859685975afc1d2c0ade38505 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Tue, 19 Feb 2019 18:18:54 +0100 Subject: [PATCH 44/50] spapr: enable PHB hotplug for default pseries machine type The 'dr_phb_enabled' field of that class can be set as part of machine-specific init code. It will be used to conditionally enable creation of DRC objects and device-tree description to facilitate hotplug of PHBs. Since we can't migrate this state to older machine types, default the option to true and disable it for older machine types. Signed-off-by: Michael Roth Signed-off-by: Greg Kurz Reviewed-by: David Gibson Message-Id: <155059673433.1466090.6188091133769611501.stgit@bahia.lab.toulouse-stg.fr.ibm.com> Signed-off-by: David Gibson --- hw/ppc/spapr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 7422c05254..5d8b8c9e53 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -4290,6 +4290,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF; spapr_caps_add_properties(smc, &error_abort); smc->irq = &spapr_irq_xics; + smc->dr_phb_enabled = true; } static const TypeInfo spapr_machine_info = { @@ -4361,6 +4362,7 @@ static void spapr_machine_3_1_class_options(MachineClass *mc) mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0"); smc->update_dt_enabled = false; + smc->dr_phb_enabled = false; } DEFINE_SPAPR_MACHINE(3_1, "3.1", false); From 9bcb5b29419c79f0249280aaed80d40d7d798eeb Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Tue, 19 Feb 2019 18:18:59 +0100 Subject: [PATCH 45/50] tests/device-plug: Add PHB unplug request test for spapr We can easily test this, just like PCI. PHB unplug is not supported on s390x and x86 ACPI. Signed-off-by: Greg Kurz Message-Id: <155059673939.1466090.14354001937819612724.stgit@bahia.lab.toulouse-stg.fr.ibm.com> Signed-off-by: David Gibson --- tests/device-plug-test.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/device-plug-test.c b/tests/device-plug-test.c index 87593d9ecf..318e422d51 100644 --- a/tests/device-plug-test.c +++ b/tests/device-plug-test.c @@ -132,6 +132,20 @@ static void test_spapr_memory_unplug_request(void) qtest_quit(qtest); } +static void test_spapr_phb_unplug_request(void) +{ + QTestState *qtest; + + qtest = qtest_initf("-device spapr-pci-host-bridge,index=1,id=dev0"); + + /* similar to test_pci_unplug_request */ + device_del_request(qtest, "dev0"); + system_reset(qtest); + wait_device_deleted_event(qtest, "dev0"); + + qtest_quit(qtest); +} + int main(int argc, char **argv) { const char *arch = qtest_get_arch(); @@ -156,6 +170,8 @@ int main(int argc, char **argv) test_spapr_cpu_unplug_request); qtest_add_func("/device-plug/spapr-memory-unplug-request", test_spapr_memory_unplug_request); + qtest_add_func("/device-plug/spapr-phb-unplug-request", + test_spapr_phb_unplug_request); } return g_test_run(); From 3dbe65c17890e4beefe9812590c723e17e611852 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Tue, 19 Feb 2019 15:25:30 +0100 Subject: [PATCH 46/50] ppc/xive: xive does not have a POWER7 interrupt model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch "target/ppc: Add POWER9 external interrupt model" should have removed the section covering PPC_FLAGS_INPUT_POWER7. Signed-off-by: Cédric Le Goater Message-Id: <20190219142530.17807-1-clg@kaod.org> Signed-off-by: David Gibson --- hw/intc/xive.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/hw/intc/xive.c b/hw/intc/xive.c index 425aa97ef9..daa7badc84 100644 --- a/hw/intc/xive.c +++ b/hw/intc/xive.c @@ -481,9 +481,6 @@ static void xive_tctx_realize(DeviceState *dev, Error **errp) env = &cpu->env; switch (PPC_INPUT(env)) { - case PPC_FLAGS_INPUT_POWER7: - tctx->output = env->irq_inputs[POWER7_INPUT_INT]; - break; case PPC_FLAGS_INPUT_POWER9: tctx->output = env->irq_inputs[POWER9_INPUT_INT]; break; From f6d4dca807d853516fc307519f3260a432818bdc Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 21 Feb 2019 12:24:48 +0100 Subject: [PATCH 47/50] hw/ppc: Use object_initialize_child for correct reference counting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both functions, object_initialize() and object_property_add_child() increase the reference counter of the new object, so one of the references has to be dropped afterwards to get the reference counting right. Otherwise the child object will not be properly cleaned up when the parent gets destroyed. Thus let's use now object_initialize_child() instead to get the reference counting here right. Suggested-by: Eduardo Habkost Signed-off-by: Thomas Huth Message-Id: <1550748288-30598-1-git-send-email-thuth@redhat.com> Reviewed-by: Cédric Le Goater Signed-off-by: David Gibson --- hw/intc/spapr_xive.c | 11 +++++------ hw/ppc/pnv.c | 12 ++++++------ hw/ppc/pnv_psi.c | 4 ++-- hw/ppc/spapr.c | 6 +++--- 4 files changed, 16 insertions(+), 17 deletions(-) diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c index 06e3c9fdbf..e0e5cb5d8e 100644 --- a/hw/intc/spapr_xive.c +++ b/hw/intc/spapr_xive.c @@ -244,13 +244,12 @@ static void spapr_xive_instance_init(Object *obj) { sPAPRXive *xive = SPAPR_XIVE(obj); - object_initialize(&xive->source, sizeof(xive->source), TYPE_XIVE_SOURCE); - object_property_add_child(obj, "source", OBJECT(&xive->source), NULL); + object_initialize_child(obj, "source", &xive->source, sizeof(xive->source), + TYPE_XIVE_SOURCE, &error_abort, NULL); - object_initialize(&xive->end_source, sizeof(xive->end_source), - TYPE_XIVE_END_SOURCE); - object_property_add_child(obj, "end_source", OBJECT(&xive->end_source), - NULL); + object_initialize_child(obj, "end_source", &xive->end_source, + sizeof(xive->end_source), TYPE_XIVE_END_SOURCE, + &error_abort, NULL); } static void spapr_xive_realize(DeviceState *dev, Error **errp) diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index da540860a2..9e03e9c336 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -736,18 +736,18 @@ static void pnv_chip_power8_instance_init(Object *obj) { Pnv8Chip *chip8 = PNV8_CHIP(obj); - object_initialize(&chip8->psi, sizeof(chip8->psi), TYPE_PNV_PSI); - object_property_add_child(obj, "psi", OBJECT(&chip8->psi), NULL); + object_initialize_child(obj, "psi", &chip8->psi, sizeof(chip8->psi), + TYPE_PNV_PSI, &error_abort, NULL); object_property_add_const_link(OBJECT(&chip8->psi), "xics", OBJECT(qdev_get_machine()), &error_abort); - object_initialize(&chip8->lpc, sizeof(chip8->lpc), TYPE_PNV_LPC); - object_property_add_child(obj, "lpc", OBJECT(&chip8->lpc), NULL); + object_initialize_child(obj, "lpc", &chip8->lpc, sizeof(chip8->lpc), + TYPE_PNV_LPC, &error_abort, NULL); object_property_add_const_link(OBJECT(&chip8->lpc), "psi", OBJECT(&chip8->psi), &error_abort); - object_initialize(&chip8->occ, sizeof(chip8->occ), TYPE_PNV_OCC); - object_property_add_child(obj, "occ", OBJECT(&chip8->occ), NULL); + object_initialize_child(obj, "occ", &chip8->occ, sizeof(chip8->occ), + TYPE_PNV_OCC, &error_abort, NULL); object_property_add_const_link(OBJECT(&chip8->occ), "psi", OBJECT(&chip8->psi), &error_abort); } diff --git a/hw/ppc/pnv_psi.c b/hw/ppc/pnv_psi.c index 8ced095063..44bc0cbf58 100644 --- a/hw/ppc/pnv_psi.c +++ b/hw/ppc/pnv_psi.c @@ -444,8 +444,8 @@ static void pnv_psi_init(Object *obj) { PnvPsi *psi = PNV_PSI(obj); - object_initialize(&psi->ics, sizeof(psi->ics), TYPE_ICS_SIMPLE); - object_property_add_child(obj, "ics-psi", OBJECT(&psi->ics), NULL); + object_initialize_child(obj, "ics-psi", &psi->ics, sizeof(psi->ics), + TYPE_ICS_SIMPLE, &error_abort, NULL); } static const uint8_t irq_to_xivr[] = { diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 5d8b8c9e53..b6a571b6f1 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1776,9 +1776,9 @@ static void spapr_create_nvram(sPAPRMachineState *spapr) static void spapr_rtc_create(sPAPRMachineState *spapr) { - object_initialize(&spapr->rtc, sizeof(spapr->rtc), TYPE_SPAPR_RTC); - object_property_add_child(OBJECT(spapr), "rtc", OBJECT(&spapr->rtc), - &error_fatal); + object_initialize_child(OBJECT(spapr), "rtc", + &spapr->rtc, sizeof(spapr->rtc), TYPE_SPAPR_RTC, + &error_fatal, NULL); object_property_set_bool(OBJECT(&spapr->rtc), true, "realized", &error_fatal); object_property_add_alias(OBJECT(spapr), "rtc-time", OBJECT(&spapr->rtc), From b45b56baeecd6391bdfed5bd66f2cd59a3c86a77 Mon Sep 17 00:00:00 2001 From: Murilo Opsfelder Araujo Date: Mon, 25 Feb 2019 14:01:53 -0300 Subject: [PATCH 48/50] ppc/pnv: increase kernel size limit to 256MiB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building kernel with CONFIG_DEBUG_INFO_REDUCED can generate a ~90MB image and building with CONFIG_DEBUG_INFO can generate a ~225M one, both exceeds the current limit of 32MiB. Increasing kernel size limit to 256MiB should fit for now. Signed-off-by: Murilo Opsfelder Araujo Message-Id: <20190225170155.1972-2-muriloo@linux.ibm.com> Reviewed-by: Cédric Le Goater Signed-off-by: David Gibson --- hw/ppc/pnv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 9e03e9c336..4144976aec 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -54,6 +54,7 @@ #define FW_MAX_SIZE 0x00400000 #define KERNEL_LOAD_ADDR 0x20000000 +#define KERNEL_MAX_SIZE (256 * MiB) #define INITRD_LOAD_ADDR 0x60000000 static const char *pnv_chip_core_typename(const PnvChip *o) @@ -588,7 +589,7 @@ static void pnv_init(MachineState *machine) long kernel_size; kernel_size = load_image_targphys(machine->kernel_filename, - KERNEL_LOAD_ADDR, 0x2000000); + KERNEL_LOAD_ADDR, KERNEL_MAX_SIZE); if (kernel_size < 0) { error_report("Could not load kernel '%s'", machine->kernel_filename); From 584ea7e76f9aeb010188e23769566617d53877e3 Mon Sep 17 00:00:00 2001 From: Murilo Opsfelder Araujo Date: Mon, 25 Feb 2019 14:01:54 -0300 Subject: [PATCH 49/50] ppc/pnv: add INITRD_MAX_SIZE constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current 0x10000000 value is actually 256MiB, not 128MB as the comment suggests. Move it to a constant and fix the comment (no change in the size value). Signed-off-by: Murilo Opsfelder Araujo Message-Id: <20190225170155.1972-3-muriloo@linux.ibm.com> Reviewed-by: Cédric Le Goater Signed-off-by: David Gibson --- hw/ppc/pnv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 4144976aec..0cd6af4669 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -56,6 +56,7 @@ #define KERNEL_LOAD_ADDR 0x20000000 #define KERNEL_MAX_SIZE (256 * MiB) #define INITRD_LOAD_ADDR 0x60000000 +#define INITRD_MAX_SIZE (256 * MiB) static const char *pnv_chip_core_typename(const PnvChip *o) { @@ -601,7 +602,7 @@ static void pnv_init(MachineState *machine) if (machine->initrd_filename) { pnv->initrd_base = INITRD_LOAD_ADDR; pnv->initrd_size = load_image_targphys(machine->initrd_filename, - pnv->initrd_base, 0x10000000); /* 128MB max */ + pnv->initrd_base, INITRD_MAX_SIZE); if (pnv->initrd_size < 0) { error_report("Could not load initial ram disk '%s'", machine->initrd_filename); From b268a6162da8ef9daa6384f24d4b95a0385081eb Mon Sep 17 00:00:00 2001 From: Murilo Opsfelder Araujo Date: Mon, 25 Feb 2019 14:01:55 -0300 Subject: [PATCH 50/50] ppc/pnv: use IEC binary prefixes to represent sizes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using IEC binary prefixes from qemu/units.h provides a more human-friendly value to size constants. Suggested-by: Eric Blake Signed-off-by: Murilo Opsfelder Araujo Message-Id: <20190225170155.1972-4-muriloo@linux.ibm.com> Reviewed-by: Cédric Le Goater Signed-off-by: David Gibson --- hw/ppc/pnv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 0cd6af4669..3d5dfef220 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -47,11 +47,11 @@ #include -#define FDT_MAX_SIZE 0x00100000 +#define FDT_MAX_SIZE (1 * MiB) #define FW_FILE_NAME "skiboot.lid" #define FW_LOAD_ADDR 0x0 -#define FW_MAX_SIZE 0x00400000 +#define FW_MAX_SIZE (4 * MiB) #define KERNEL_LOAD_ADDR 0x20000000 #define KERNEL_MAX_SIZE (256 * MiB)