From 8b9f2118ca40b1de72d8f75b59a5fb4d347a69f7 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Wed, 16 Mar 2016 10:43:52 +0100 Subject: [PATCH 01/16] ppc64: set MSR_SF bit When a qemu-system-ppc64 is started, the 64-bit mode bit is not set in MSR. Signed-off-by: Laurent Vivier Reviewed-by: Alexander Graf Signed-off-by: David Gibson --- target-ppc/translate_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index fb206aff29..11d5fd37b4 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -9703,7 +9703,7 @@ static void ppc_cpu_reset(CPUState *s) #if defined(TARGET_PPC64) if (env->mmu_model & POWERPC_MMU_64) { - env->msr |= (1ULL << MSR_SF); + msr |= (1ULL << MSR_SF); } #endif From 0ddbd0536296f5a36c8f225edd4d14441be6b153 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Mon, 21 Mar 2016 13:14:02 +1100 Subject: [PATCH 02/16] spapr/target-ppc/kvm: Only add hcall-instructions if KVM supports it ePAPR defines "hcall-instructions" device-tree property which contains code to call hypercalls in ePAPR paravirtualized guests. In general pseries guests won't use this property, instead using the PAPR defined hypercall interface. However, this property has been re-used to implement a hack to allow PR KVM to run (slightly modified) guests in some situations where it otherwise wouldn't be able to (because the system's L0 hypervisor doesn't forward the PAPR hypercalls to the PR KVM kernel). Hence, this property is always present in the device tree for pseries guests. All KVM guests use it at least to read features via the KVM_HC_FEATURES hypercall. The property is populated by the code returned from the KVM's KVM_PPC_GET_PVINFO ioctl; if not implemented in the KVM, QEMU supplies code which will fail all hypercall attempts. If QEMU does not create the property, and the guest kernel is compiled with CONFIG_EPAPR_PARAVIRT (which is normally the case), there is exactly the same stub at @epapr_hypercall_start already. Rather than maintaining this fairly useless stub implementation, it makes more sense not to create the property in the device tree in the first place if the host kernel does not implement it. This changes kvmppc_get_hypercall() to return 1 if the host kernel does not implement KVM_CAP_PPC_GET_PVINFO. The caller can use it to decide on whether to create the property or not. This changes the pseries machine to not create the property if KVM does not implement KVM_PPC_GET_PVINFO. In practice this means that from now on the property will not be created if either HV KVM or TCG is used. Signed-off-by: Alexey Kardashevskiy [reworded commit message for clarity --dwg] Signed-off-by: David Gibson --- hw/ppc/spapr.c | 9 +++++---- target-ppc/kvm.c | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index d0bb42305b..d43d6d998a 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -497,10 +497,11 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, * Older KVM versions with older guest kernels were broken with the * magic page, don't allow the guest to map it. */ - kvmppc_get_hypercall(first_cpu->env_ptr, hypercall, - sizeof(hypercall)); - _FDT((fdt_property(fdt, "hcall-instructions", hypercall, - sizeof(hypercall)))); + if (!kvmppc_get_hypercall(first_cpu->env_ptr, hypercall, + sizeof(hypercall))) { + _FDT((fdt_property(fdt, "hcall-instructions", hypercall, + sizeof(hypercall)))); + } } _FDT((fdt_end_node(fdt))); } diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index 2fc993143e..20f113a079 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -2007,7 +2007,7 @@ int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len) hc[2] = cpu_to_be32(0x48000008); hc[3] = cpu_to_be32(bswap32(0x3860ffff)); - return 0; + return 1; } static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall) From 1488270e822dc497a9bc799c6c49fdca85b9f28b Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 21 Mar 2016 13:52:31 +0100 Subject: [PATCH 03/16] ppc: Update SPR definitions Add definitions for additional SPR numbers and SPR bit definitions that will be relevant for subsequent improvements to POWER8 emulation Also fix the definition of LPIDR which was incorrect (and is different for server and embedded). Signed-off-by: Benjamin Herrenschmidt Reviewed-by: Thomas Huth Reviewed-by: David Gibson Signed-off-by: David Gibson --- target-ppc/cpu.h | 54 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 7 deletions(-) diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index 8d90d862de..9ce301f189 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -474,9 +474,17 @@ struct ppc_slb_t { #define MSR_RI 1 /* Recoverable interrupt 1 */ #define MSR_LE 0 /* Little-endian mode 1 hflags */ -#define LPCR_ILE (1 << (63-38)) -#define LPCR_AIL_SHIFT (63-40) /* Alternate interrupt location */ -#define LPCR_AIL (3 << LPCR_AIL_SHIFT) +/* LPCR bits */ +#define LPCR_VPM0 (1ull << (63 - 0)) +#define LPCR_VPM1 (1ull << (63 - 1)) +#define LPCR_ISL (1ull << (63 - 2)) +#define LPCR_KBV (1ull << (63 - 3)) +#define LPCR_ILE (1ull << (63 - 38)) +#define LPCR_MER (1ull << (63 - 52)) +#define LPCR_LPES0 (1ull << (63 - 60)) +#define LPCR_LPES1 (1ull << (63 - 61)) +#define LPCR_AIL_SHIFT (63 - 40) /* Alternate interrupt location */ +#define LPCR_AIL (3ull << LPCR_AIL_SHIFT) #define msr_sf ((env->msr >> MSR_SF) & 1) #define msr_isf ((env->msr >> MSR_ISF) & 1) @@ -1381,6 +1389,10 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch) #define SPR_MPC_ICTRL (0x09E) #define SPR_MPC_BAR (0x09F) #define SPR_PSPB (0x09F) +#define SPR_DAWR (0x0B4) +#define SPR_RPR (0x0BA) +#define SPR_DAWRX (0x0BC) +#define SPR_HFSCR (0x0BE) #define SPR_VRSAVE (0x100) #define SPR_USPRG0 (0x100) #define SPR_USPRG1 (0x101) @@ -1435,19 +1447,25 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch) #define SPR_HSRR1 (0x13B) #define SPR_BOOKE_IAC4 (0x13B) #define SPR_BOOKE_DAC1 (0x13C) -#define SPR_LPIDR (0x13D) +#define SPR_MMCRH (0x13C) #define SPR_DABR2 (0x13D) #define SPR_BOOKE_DAC2 (0x13D) +#define SPR_TFMR (0x13D) #define SPR_BOOKE_DVC1 (0x13E) #define SPR_LPCR (0x13E) #define SPR_BOOKE_DVC2 (0x13F) +#define SPR_LPIDR (0x13F) #define SPR_BOOKE_TSR (0x150) +#define SPR_HMER (0x150) +#define SPR_HMEER (0x151) #define SPR_PCR (0x152) +#define SPR_BOOKE_LPIDR (0x152) #define SPR_BOOKE_TCR (0x154) #define SPR_BOOKE_TLB0PS (0x158) #define SPR_BOOKE_TLB1PS (0x159) #define SPR_BOOKE_TLB2PS (0x15A) #define SPR_BOOKE_TLB3PS (0x15B) +#define SPR_AMOR (0x15D) #define SPR_BOOKE_MAS7_MAS3 (0x174) #define SPR_BOOKE_IVOR0 (0x190) #define SPR_BOOKE_IVOR1 (0x191) @@ -1667,6 +1685,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch) #define SPR_RCPU_L2U_RA3 (0x32B) #define SPR_TAR (0x32F) #define SPR_VTB (0x351) +#define SPR_MMCRC (0x353) #define SPR_440_INV0 (0x370) #define SPR_440_INV1 (0x371) #define SPR_440_INV2 (0x372) @@ -1705,6 +1724,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch) #define SPR_440_DVLIM (0x398) #define SPR_750_WPAR (0x399) #define SPR_440_IVLIM (0x399) +#define SPR_TSCR (0x399) #define SPR_750_DMAU (0x39A) #define SPR_750_DMAL (0x39B) #define SPR_440_RSTCFG (0x39B) @@ -1879,9 +1899,10 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch) #define L1CSR1_ICE 0x00000001 /* Instruction Cache Enable */ /* HID0 bits */ -#define HID0_DEEPNAP (1 << 24) -#define HID0_DOZE (1 << 23) -#define HID0_NAP (1 << 22) +#define HID0_DEEPNAP (1 << 24) /* pre-2.06 */ +#define HID0_DOZE (1 << 23) /* pre-2.06 */ +#define HID0_NAP (1 << 22) /* pre-2.06 */ +#define HID0_HILE (1ull << (63 - 19)) /* POWER8 */ /*****************************************************************************/ /* PowerPC Instructions types definitions */ @@ -2230,6 +2251,25 @@ enum { PCR_TM_DIS = 1ull << (63-2), /* Trans. memory disable (POWER8) */ }; +/* HMER/HMEER */ +enum { + HMER_MALFUNCTION_ALERT = 1ull << (63 - 0), + HMER_PROC_RECV_DONE = 1ull << (63 - 2), + HMER_PROC_RECV_ERROR_MASKED = 1ull << (63 - 3), + HMER_TFAC_ERROR = 1ull << (63 - 4), + HMER_TFMR_PARITY_ERROR = 1ull << (63 - 5), + HMER_XSCOM_FAIL = 1ull << (63 - 8), + HMER_XSCOM_DONE = 1ull << (63 - 9), + HMER_PROC_RECV_AGAIN = 1ull << (63 - 11), + HMER_WARN_RISE = 1ull << (63 - 14), + HMER_WARN_FALL = 1ull << (63 - 15), + HMER_SCOM_FIR_HMI = 1ull << (63 - 16), + HMER_TRIG_FIR_HMI = 1ull << (63 - 17), + HMER_HYP_RESOURCE_ERR = 1ull << (63 - 20), + HMER_XSCOM_STATUS_MASK = 7ull << (63 - 23), + HMER_XSCOM_STATUS_LSH = (63 - 23), +}; + /*****************************************************************************/ static inline target_ulong cpu_read_xer(CPUPPCState *env) From eb94268e73a379a54326701a0341f2649e222d29 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 21 Mar 2016 13:52:32 +0100 Subject: [PATCH 04/16] ppc: Add macros to register hypervisor mode SPRs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current set of spr_register_* macros only take the user and supervisor function pointers. To make the transition easy, we don't change that but we add "_hv" variants that can be used to register all 3 sets. To simplify the transition, users of the "old" macro will set the hypervisor callback to be the same as the supervisor one. The new registration function only needs to be used for registers that are either hypervisor only or behave differently in HV mode. Signed-off-by: Benjamin Herrenschmidt Reviewed-by: David Gibson [clg: fixed else if condition in gen_op_mfspr() ] Signed-off-by: Cédric Le Goater Reviewed-by: Thomas Huth Signed-off-by: David Gibson --- target-ppc/translate.c | 34 ++++++++++++++++++++-------------- target-ppc/translate_init.c | 35 +++++++++++++++++++++++++++++++---- 2 files changed, 51 insertions(+), 18 deletions(-) diff --git a/target-ppc/translate.c b/target-ppc/translate.c index e402ff9203..6f0e7b4fac 100644 --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -4282,14 +4282,17 @@ static inline void gen_op_mfspr(DisasContext *ctx) void (*read_cb)(DisasContext *ctx, int gprn, int sprn); uint32_t sprn = SPR(ctx->opcode); -#if !defined(CONFIG_USER_ONLY) - if (ctx->hv) - read_cb = ctx->spr_cb[sprn].hea_read; - else if (!ctx->pr) - read_cb = ctx->spr_cb[sprn].oea_read; - else -#endif +#if defined(CONFIG_USER_ONLY) + read_cb = ctx->spr_cb[sprn].uea_read; +#else + if (ctx->pr) { read_cb = ctx->spr_cb[sprn].uea_read; + } else if (ctx->hv) { + read_cb = ctx->spr_cb[sprn].hea_read; + } else { + read_cb = ctx->spr_cb[sprn].oea_read; + } +#endif if (likely(read_cb != NULL)) { if (likely(read_cb != SPR_NOACCESS)) { (*read_cb)(ctx, rD(ctx->opcode), sprn); @@ -4437,14 +4440,17 @@ static void gen_mtspr(DisasContext *ctx) void (*write_cb)(DisasContext *ctx, int sprn, int gprn); uint32_t sprn = SPR(ctx->opcode); -#if !defined(CONFIG_USER_ONLY) - if (ctx->hv) - write_cb = ctx->spr_cb[sprn].hea_write; - else if (!ctx->pr) - write_cb = ctx->spr_cb[sprn].oea_write; - else -#endif +#if defined(CONFIG_USER_ONLY) + write_cb = ctx->spr_cb[sprn].uea_write; +#else + if (ctx->pr) { write_cb = ctx->spr_cb[sprn].uea_write; + } else if (ctx->hv) { + write_cb = ctx->spr_cb[sprn].hea_write; + } else { + write_cb = ctx->spr_cb[sprn].oea_write; + } +#endif if (likely(write_cb != NULL)) { if (likely(write_cb != SPR_NOACCESS)) { (*write_cb)(ctx, sprn, rS(ctx->opcode)); diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index 11d5fd37b4..ebfce5deb8 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -579,17 +579,33 @@ static inline void vscr_init (CPUPPCState *env, uint32_t val) #define spr_register_kvm(env, num, name, uea_read, uea_write, \ oea_read, oea_write, one_reg_id, initial_value) \ _spr_register(env, num, name, uea_read, uea_write, initial_value) +#define spr_register_kvm_hv(env, num, name, uea_read, uea_write, \ + oea_read, oea_write, hea_read, hea_write, \ + one_reg_id, initial_value) \ + _spr_register(env, num, name, uea_read, uea_write, initial_value) #else #if !defined(CONFIG_KVM) #define spr_register_kvm(env, num, name, uea_read, uea_write, \ - oea_read, oea_write, one_reg_id, initial_value) \ + oea_read, oea_write, one_reg_id, initial_value) \ _spr_register(env, num, name, uea_read, uea_write, \ - oea_read, oea_write, initial_value) + oea_read, oea_write, oea_read, oea_write, initial_value) +#define spr_register_kvm_hv(env, num, name, uea_read, uea_write, \ + oea_read, oea_write, hea_read, hea_write, \ + one_reg_id, initial_value) \ + _spr_register(env, num, name, uea_read, uea_write, \ + oea_read, oea_write, hea_read, hea_write, initial_value) #else #define spr_register_kvm(env, num, name, uea_read, uea_write, \ - oea_read, oea_write, one_reg_id, initial_value) \ + oea_read, oea_write, one_reg_id, initial_value) \ _spr_register(env, num, name, uea_read, uea_write, \ - oea_read, oea_write, one_reg_id, initial_value) + oea_read, oea_write, oea_read, oea_write, \ + one_reg_id, initial_value) +#define spr_register_kvm_hv(env, num, name, uea_read, uea_write, \ + oea_read, oea_write, hea_read, hea_write, \ + one_reg_id, initial_value) \ + _spr_register(env, num, name, uea_read, uea_write, \ + oea_read, oea_write, hea_read, hea_write, \ + one_reg_id, initial_value) #endif #endif @@ -598,6 +614,13 @@ static inline void vscr_init (CPUPPCState *env, uint32_t val) spr_register_kvm(env, num, name, uea_read, uea_write, \ oea_read, oea_write, 0, initial_value) +#define spr_register_hv(env, num, name, uea_read, uea_write, \ + oea_read, oea_write, hea_read, hea_write, \ + initial_value) \ + spr_register_kvm_hv(env, num, name, uea_read, uea_write, \ + oea_read, oea_write, hea_read, hea_write, \ + 0, initial_value) + static inline void _spr_register(CPUPPCState *env, int num, const char *name, void (*uea_read)(DisasContext *ctx, int gprn, int sprn), @@ -606,6 +629,8 @@ static inline void _spr_register(CPUPPCState *env, int num, void (*oea_read)(DisasContext *ctx, int gprn, int sprn), void (*oea_write)(DisasContext *ctx, int sprn, int gprn), + void (*hea_read)(DisasContext *opaque, int gprn, int sprn), + void (*hea_write)(DisasContext *opaque, int sprn, int gprn), #endif #if defined(CONFIG_KVM) uint64_t one_reg_id, @@ -633,6 +658,8 @@ static inline void _spr_register(CPUPPCState *env, int num, #if !defined(CONFIG_USER_ONLY) spr->oea_read = oea_read; spr->oea_write = oea_write; + spr->hea_read = hea_read; + spr->hea_write = hea_write; #endif #if defined(CONFIG_KVM) spr->one_reg_id = one_reg_id, From f401dd32cb8e9ef68bc4f0d600479f670c2bf39a Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 21 Mar 2016 13:52:33 +0100 Subject: [PATCH 05/16] ppc: Add a bunch of hypervisor SPRs to Book3s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't give them a KVM reg number to most of the registers yet as no current KVM version supports HV mode. For DAWR and DAWRX, the KVM reg number is needed since this register can be set by the guest via the H_SET_MODE hypercall. Signed-off-by: Benjamin Herrenschmidt [clg: squashed in patch 'ppc: Add KVM numbers to some P8 SPRs' changed the commit log with a proposal of Thomas Huth removed all hunks except those related to AMOR and DAWR* ] Signed-off-by: Cédric Le Goater Reviewed-by: Thomas Huth Reviewed-by: David Gibson Signed-off-by: David Gibson --- target-ppc/translate_init.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index ebfce5deb8..3ecbd85315 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -1105,6 +1105,11 @@ static void gen_spr_amr (CPUPPCState *env) SPR_NOACCESS, SPR_NOACCESS, &spr_read_generic, &spr_write_generic, KVM_REG_PPC_UAMOR, 0); + spr_register_hv(env, SPR_AMOR, "AMOR", + SPR_NOACCESS, SPR_NOACCESS, + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + 0); #endif /* !CONFIG_USER_ONLY */ } #endif /* TARGET_PPC64 */ @@ -7491,6 +7496,20 @@ static void gen_spr_book3s_dbg(CPUPPCState *env) KVM_REG_PPC_DABRX, 0x00000000); } +static void gen_spr_book3s_207_dbg(CPUPPCState *env) +{ + spr_register_kvm_hv(env, SPR_DAWR, "DAWR", + SPR_NOACCESS, SPR_NOACCESS, + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + KVM_REG_PPC_DAWR, 0x00000000); + spr_register_kvm_hv(env, SPR_DAWRX, "DAWRX", + SPR_NOACCESS, SPR_NOACCESS, + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + KVM_REG_PPC_DAWRX, 0x00000000); +} + static void gen_spr_970_dbg(CPUPPCState *env) { /* Breakpoints */ @@ -7960,6 +7979,8 @@ static void init_proc_book3s_64(CPUPPCState *env, int version) } if (version < BOOK3S_CPU_POWER8) { gen_spr_book3s_dbg(env); + } else { + gen_spr_book3s_207_dbg(env); } #if !defined(CONFIG_USER_ONLY) switch (version) { From 26a7f1291bb5581e51c413d744207d0a5910ff4c Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 21 Mar 2016 13:52:34 +0100 Subject: [PATCH 06/16] ppc: Create cpu_ppc_set_papr() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit And move the code adjusting the MSR mask and calling kvmppc_set_papr() to it. This allows us to add a few more things such as disabling setting of MSR:HV and appropriate LPCR bits which will be used when fixing the exception model. Signed-off-by: Benjamin Herrenschmidt Reviewed-by: David Gibson [clg: removed LPCR setting ] Signed-off-by: Cédric Le Goater Signed-off-by: David Gibson --- hw/ppc/spapr.c | 11 ++--------- target-ppc/cpu.h | 1 + target-ppc/translate_init.c | 23 ++++++++++++++++++++++- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index d43d6d998a..ebbc6fe093 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1613,15 +1613,8 @@ static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu, /* Set time-base frequency to 512 MHz */ cpu_ppc_tb_init(env, TIMEBASE_FREQ); - /* PAPR always has exception vectors in RAM not ROM. To ensure this, - * MSR[IP] should never be set. - */ - env->msr_mask &= ~(1 << 6); - - /* Tell KVM that we're in PAPR mode */ - if (kvm_enabled()) { - kvmppc_set_papr(cpu); - } + /* Enable PAPR mode in TCG or KVM */ + cpu_ppc_set_papr(cpu); if (cpu->max_compat) { Error *local_err = NULL; diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index 9ce301f189..a7da0d3e95 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -1268,6 +1268,7 @@ void store_booke_tcr (CPUPPCState *env, target_ulong val); void store_booke_tsr (CPUPPCState *env, target_ulong val); void ppc_tlb_invalidate_all (CPUPPCState *env); void ppc_tlb_invalidate_one (CPUPPCState *env, target_ulong addr); +void cpu_ppc_set_papr(PowerPCCPU *cpu); #endif #endif diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index 3ecbd85315..4d82e1c756 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -8380,8 +8380,29 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data) pcc->l1_icache_size = 0x8000; pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr; } -#endif /* defined (TARGET_PPC64) */ +#if !defined(CONFIG_USER_ONLY) + +void cpu_ppc_set_papr(PowerPCCPU *cpu) +{ + CPUPPCState *env = &cpu->env; + + /* PAPR always has exception vectors in RAM not ROM. To ensure this, + * MSR[IP] should never be set. + * + * We also disallow setting of MSR_HV + */ + env->msr_mask &= ~((1ull << MSR_EP) | MSR_HVB); + + /* Tell KVM that we're in PAPR mode */ + if (kvm_enabled()) { + kvmppc_set_papr(cpu); + } +} + +#endif /* !defined(CONFIG_USER_ONLY) */ + +#endif /* defined (TARGET_PPC64) */ /*****************************************************************************/ /* Generic CPU instantiation routine */ From 21a558bed91f363ea8fc3f9a95b222a0949c70e7 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 21 Mar 2016 13:52:35 +0100 Subject: [PATCH 07/16] ppc: Add dummy SPR_IC for POWER8 It's supposed to be an instruction counter. For now make us not crash when accessing it. Signed-off-by: Benjamin Herrenschmidt Reviewed-by: Thomas Huth Reviewed-by: David Gibson Signed-off-by: David Gibson --- target-ppc/cpu.h | 1 + target-ppc/translate_init.c | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index a7da0d3e95..167c73f863 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -1685,6 +1685,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch) #define SPR_MPC_MD_DBRAM1 (0x32A) #define SPR_RCPU_L2U_RA3 (0x32B) #define SPR_TAR (0x32F) +#define SPR_IC (0x350) #define SPR_VTB (0x351) #define SPR_MMCRC (0x353) #define SPR_440_INV0 (0x370) diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index 4d82e1c756..3a13ad7f90 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -7924,6 +7924,17 @@ static void gen_spr_power8_pspb(CPUPPCState *env) KVM_REG_PPC_PSPB, 0); } +static void gen_spr_power8_ic(CPUPPCState *env) +{ +#if !defined(CONFIG_USER_ONLY) + spr_register_hv(env, SPR_IC, "IC", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + 0); +#endif +} + static void init_proc_book3s_64(CPUPPCState *env, int version) { gen_spr_ne_601(env); @@ -7976,6 +7987,7 @@ static void init_proc_book3s_64(CPUPPCState *env, int version) gen_spr_power8_tm(env); gen_spr_power8_pspb(env); gen_spr_vtb(env); + gen_spr_power8_ic(env); } if (version < BOOK3S_CPU_POWER8) { gen_spr_book3s_dbg(env); From 6a9c4ef452c98060e919aa49db49c09ed8c37745 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 21 Mar 2016 13:52:36 +0100 Subject: [PATCH 08/16] ppc: Initialize AMOR in PAPR mode Make sure we give the guest full authorization Signed-off-by: Benjamin Herrenschmidt Reviewed-by: Thomas Huth Reviewed-by: David Gibson Signed-off-by: David Gibson --- target-ppc/translate_init.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index 3a13ad7f90..5125743ae0 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -8398,6 +8398,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data) void cpu_ppc_set_papr(PowerPCCPU *cpu) { CPUPPCState *env = &cpu->env; + ppc_spr_t *amor = &env->spr_cb[SPR_AMOR]; /* PAPR always has exception vectors in RAM not ROM. To ensure this, * MSR[IP] should never be set. @@ -8406,6 +8407,9 @@ void cpu_ppc_set_papr(PowerPCCPU *cpu) */ env->msr_mask &= ~((1ull << MSR_EP) | MSR_HVB); + /* Set a full AMOR so guest can use the AMR as it sees fit */ + env->spr[SPR_AMOR] = amor->default_value = 0xffffffffffffffffull; + /* Tell KVM that we're in PAPR mode */ if (kvm_enabled()) { kvmppc_set_papr(cpu); From 97eaf30ec68eab6efdedf32ad210581d074bcb03 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 21 Mar 2016 13:52:37 +0100 Subject: [PATCH 09/16] ppc: Fix writing to AMR/UAMOR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The masks weren't chosen nor applied properly. The architecture specifies that writes to AMR are masked by UAMOR for PR=1, otherwise AMOR for HV=0. The writes to UAMOR are masked by AMOR for HV=0 Signed-off-by: Benjamin Herrenschmidt [clg: moved gen_spr_amr() prototype change to next patch ] Signed-off-by: Cédric Le Goater Reviewed-by: Thomas Huth Reviewed-by: David Gibson Signed-off-by: David Gibson --- target-ppc/translate_init.c | 82 ++++++++++++++++++++++++++++--------- 1 file changed, 63 insertions(+), 19 deletions(-) diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index 5125743ae0..a5b59e704f 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -1063,26 +1063,68 @@ static void gen_spr_7xx (CPUPPCState *env) #ifdef TARGET_PPC64 #ifndef CONFIG_USER_ONLY -static void spr_read_uamr (DisasContext *ctx, int gprn, int sprn) -{ - gen_load_spr(cpu_gpr[gprn], SPR_AMR); - spr_load_dump_spr(SPR_AMR); -} - -static void spr_write_uamr (DisasContext *ctx, int sprn, int gprn) -{ - gen_store_spr(SPR_AMR, cpu_gpr[gprn]); - spr_store_dump_spr(SPR_AMR); -} - -static void spr_write_uamr_pr (DisasContext *ctx, int sprn, int gprn) +static void spr_write_amr(DisasContext *ctx, int sprn, int gprn) { TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); - gen_load_spr(t0, SPR_UAMOR); - tcg_gen_and_tl(t0, t0, cpu_gpr[gprn]); + /* Note, the HV=1 PR=0 case is handled earlier by simply using + * spr_write_generic for HV mode in the SPR table + */ + + /* Build insertion mask into t1 based on context */ + if (ctx->pr) { + gen_load_spr(t1, SPR_UAMOR); + } else { + gen_load_spr(t1, SPR_AMOR); + } + + /* Mask new bits into t2 */ + tcg_gen_and_tl(t2, t1, cpu_gpr[gprn]); + + /* Load AMR and clear new bits in t0 */ + gen_load_spr(t0, SPR_AMR); + tcg_gen_andc_tl(t0, t0, t1); + + /* Or'in new bits and write it out */ + tcg_gen_or_tl(t0, t0, t2); gen_store_spr(SPR_AMR, t0); spr_store_dump_spr(SPR_AMR); + + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(t2); +} + +static void spr_write_uamor(DisasContext *ctx, int sprn, int gprn) +{ + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + + /* Note, the HV=1 case is handled earlier by simply using + * spr_write_generic for HV mode in the SPR table + */ + + /* Build insertion mask into t1 based on context */ + gen_load_spr(t1, SPR_AMOR); + + /* Mask new bits into t2 */ + tcg_gen_and_tl(t2, t1, cpu_gpr[gprn]); + + /* Load AMR and clear new bits in t0 */ + gen_load_spr(t0, SPR_UAMOR); + tcg_gen_andc_tl(t0, t0, t1); + + /* Or'in new bits and write it out */ + tcg_gen_or_tl(t0, t0, t2); + gen_store_spr(SPR_UAMOR, t0); + spr_store_dump_spr(SPR_UAMOR); + + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(t2); } #endif /* CONFIG_USER_ONLY */ @@ -1094,15 +1136,17 @@ static void gen_spr_amr (CPUPPCState *env) * userspace accessible, 29 is privileged. So we only need to set * the kvm ONE_REG id on one of them, we use 29 */ spr_register(env, SPR_UAMR, "UAMR", - &spr_read_uamr, &spr_write_uamr_pr, - &spr_read_uamr, &spr_write_uamr, + &spr_read_generic, &spr_write_amr, + &spr_read_generic, &spr_write_amr, 0); - spr_register_kvm(env, SPR_AMR, "AMR", + spr_register_kvm_hv(env, SPR_AMR, "AMR", SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_amr, &spr_read_generic, &spr_write_generic, KVM_REG_PPC_AMR, 0); - spr_register_kvm(env, SPR_UAMOR, "UAMOR", + spr_register_kvm_hv(env, SPR_UAMOR, "UAMOR", SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_uamor, &spr_read_generic, &spr_write_generic, KVM_REG_PPC_UAMOR, 0); spr_register_hv(env, SPR_AMOR, "AMOR", From a6eabb9e5908ddf521ea372651b8321ffa804bd8 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 21 Mar 2016 13:52:38 +0100 Subject: [PATCH 10/16] ppc: Add POWER8 IAMR register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With appropriate AMR-like masks. Not actually used by the translation logic at that point Signed-off-by: Benjamin Herrenschmidt [clg: changed spr_register_hv(SPR_IAMR) to spr_register_kvm_hv(SPR_IAMR) changed gen_spr_amr() prototype ] Signed-off-by: Cédric Le Goater Reviewed-by: Thomas Huth Reviewed-by: David Gibson Signed-off-by: David Gibson --- target-ppc/cpu.h | 1 + target-ppc/translate_init.c | 41 +++++++++++++++++++++++++++++++++++-- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index 167c73f863..a3c4fb112a 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -1360,6 +1360,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch) #define SPR_BOOKE_CSRR0 (0x03A) #define SPR_BOOKE_CSRR1 (0x03B) #define SPR_BOOKE_DEAR (0x03D) +#define SPR_IAMR (0x03D) #define SPR_BOOKE_ESR (0x03E) #define SPR_BOOKE_IVPR (0x03F) #define SPR_MPC_EIE (0x050) diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index a5b59e704f..bd62e3be02 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -1126,9 +1126,39 @@ static void spr_write_uamor(DisasContext *ctx, int sprn, int gprn) tcg_temp_free(t1); tcg_temp_free(t2); } + +static void spr_write_iamr(DisasContext *ctx, int sprn, int gprn) +{ + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + + /* Note, the HV=1 case is handled earlier by simply using + * spr_write_generic for HV mode in the SPR table + */ + + /* Build insertion mask into t1 based on context */ + gen_load_spr(t1, SPR_AMOR); + + /* Mask new bits into t2 */ + tcg_gen_and_tl(t2, t1, cpu_gpr[gprn]); + + /* Load AMR and clear new bits in t0 */ + gen_load_spr(t0, SPR_IAMR); + tcg_gen_andc_tl(t0, t0, t1); + + /* Or'in new bits and write it out */ + tcg_gen_or_tl(t0, t0, t2); + gen_store_spr(SPR_IAMR, t0); + spr_store_dump_spr(SPR_IAMR); + + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(t2); +} #endif /* CONFIG_USER_ONLY */ -static void gen_spr_amr (CPUPPCState *env) +static void gen_spr_amr(CPUPPCState *env, bool has_iamr) { #ifndef CONFIG_USER_ONLY /* Virtual Page Class Key protection */ @@ -1154,6 +1184,13 @@ static void gen_spr_amr (CPUPPCState *env) SPR_NOACCESS, SPR_NOACCESS, &spr_read_generic, &spr_write_generic, 0); + if (has_iamr) { + spr_register_kvm_hv(env, SPR_IAMR, "IAMR", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_iamr, + &spr_read_generic, &spr_write_generic, + KVM_REG_PPC_IAMR, 0); + } #endif /* !CONFIG_USER_ONLY */ } #endif /* TARGET_PPC64 */ @@ -8000,7 +8037,7 @@ static void init_proc_book3s_64(CPUPPCState *env, int version) case BOOK3S_CPU_POWER7: case BOOK3S_CPU_POWER8: gen_spr_book3s_ids(env); - gen_spr_amr(env); + gen_spr_amr(env, version >= BOOK3S_CPU_POWER8); gen_spr_book3s_purr(env); env->ci_large_pages = true; break; From eb5ceb4d389b1b34b210baf9fa49b48bacb2538b Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 21 Mar 2016 13:52:39 +0100 Subject: [PATCH 11/16] ppc: Add dummy CIABR SPR We should implement HW breakpoint/watchpoint, qemu supports them... Signed-off-by: Benjamin Herrenschmidt Reviewed-by: Thomas Huth Reviewed-by: David Gibson Signed-off-by: David Gibson --- target-ppc/cpu.h | 1 + target-ppc/translate_init.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index a3c4fb112a..29c48600d9 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -1393,6 +1393,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch) #define SPR_PSPB (0x09F) #define SPR_DAWR (0x0B4) #define SPR_RPR (0x0BA) +#define SPR_CIABR (0x0BB) #define SPR_DAWRX (0x0BC) #define SPR_HFSCR (0x0BE) #define SPR_VRSAVE (0x100) diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index bd62e3be02..37c4fb5302 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -7589,6 +7589,11 @@ static void gen_spr_book3s_207_dbg(CPUPPCState *env) SPR_NOACCESS, SPR_NOACCESS, &spr_read_generic, &spr_write_generic, KVM_REG_PPC_DAWRX, 0x00000000); + spr_register_kvm_hv(env, SPR_CIABR, "CIABR", + SPR_NOACCESS, SPR_NOACCESS, + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + KVM_REG_PPC_CIABR, 0x00000000); } static void gen_spr_970_dbg(CPUPPCState *env) From 9c1cf38d28fd0c09efc08b6f9e86dff293248e44 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 21 Mar 2016 13:52:40 +0100 Subject: [PATCH 12/16] ppc: A couple more dummy POWER8 Book4 regs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Benjamin Herrenschmidt [clg: squashed in patch 'ppc: Add dummy ACOP SPR' ] Signed-off-by: Cédric Le Goater Reviewed-by: Thomas Huth Reviewed-by: David Gibson Signed-off-by: David Gibson --- target-ppc/cpu.h | 3 +++ target-ppc/translate_init.c | 12 ++++++++++++ 2 files changed, 15 insertions(+) diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index 29c48600d9..676081e69d 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -1355,7 +1355,9 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch) #define SPR_SRR1 (0x01B) #define SPR_CFAR (0x01C) #define SPR_AMR (0x01D) +#define SPR_ACOP (0x01F) #define SPR_BOOKE_PID (0x030) +#define SPR_BOOKS_PID (0x030) #define SPR_BOOKE_DECAR (0x036) #define SPR_BOOKE_CSRR0 (0x03A) #define SPR_BOOKE_CSRR1 (0x03B) @@ -1706,6 +1708,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch) #define SPR_POWER_SPMC1 (0x37C) #define SPR_POWER_SPMC2 (0x37D) #define SPR_POWER_MMCRS (0x37E) +#define SPR_WORT (0x37F) #define SPR_PPR (0x380) #define SPR_750_GQR0 (0x390) #define SPR_440_DNV0 (0x390) diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index 37c4fb5302..5cec1df8bd 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -8018,6 +8018,18 @@ static void gen_spr_power8_ic(CPUPPCState *env) &spr_read_generic, SPR_NOACCESS, &spr_read_generic, &spr_write_generic, 0); + spr_register_kvm(env, SPR_ACOP, "ACOP", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + KVM_REG_PPC_ACOP, 0); + spr_register_kvm(env, SPR_BOOKS_PID, "PID", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + KVM_REG_PPC_PID, 0); + spr_register_kvm(env, SPR_WORT, "WORT", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + KVM_REG_PPC_WORT, 0); #endif } From d6f39fdfcd30b3f65f9aad396e7caa49285a5a0a Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Mon, 21 Mar 2016 17:25:22 +0100 Subject: [PATCH 13/16] hw/net/spapr_llan: Extract rx buffer code into separate functions Refactor the code a little bit by extracting the code that reads and writes the receive buffer list page into separate functions. There should be no functional change in this patch, this is just a preparation for the upcoming extensions that introduce receive buffer pools. Signed-off-by: Thomas Huth Reviewed-by: David Gibson Reviewed-by: Laurent Vivier Signed-off-by: David Gibson --- hw/net/spapr_llan.c | 106 +++++++++++++++++++++++++++++--------------- 1 file changed, 70 insertions(+), 36 deletions(-) diff --git a/hw/net/spapr_llan.c b/hw/net/spapr_llan.c index 5237b4d863..39a1dd1d13 100644 --- a/hw/net/spapr_llan.c +++ b/hw/net/spapr_llan.c @@ -103,6 +103,42 @@ static int spapr_vlan_can_receive(NetClientState *nc) return (dev->isopen && dev->rx_bufs > 0); } +/** + * Get buffer descriptor from the receive buffer list page that has been + * supplied by the guest with the H_REGISTER_LOGICAL_LAN call + */ +static vlan_bd_t spapr_vlan_get_rx_bd_from_page(VIOsPAPRVLANDevice *dev, + size_t size) +{ + int buf_ptr = dev->use_buf_ptr; + vlan_bd_t bd; + + do { + buf_ptr += 8; + if (buf_ptr >= VLAN_RX_BDS_LEN + VLAN_RX_BDS_OFF) { + buf_ptr = VLAN_RX_BDS_OFF; + } + + bd = vio_ldq(&dev->sdev, dev->buf_list + buf_ptr); + DPRINTF("use_buf_ptr=%d bd=0x%016llx\n", + buf_ptr, (unsigned long long)bd); + } while ((!(bd & VLAN_BD_VALID) || VLAN_BD_LEN(bd) < size + 8) + && buf_ptr != dev->use_buf_ptr); + + if (!(bd & VLAN_BD_VALID) || VLAN_BD_LEN(bd) < size + 8) { + /* Failed to find a suitable buffer */ + return 0; + } + + /* Remove the buffer from the pool */ + dev->use_buf_ptr = buf_ptr; + vio_stq(&dev->sdev, dev->buf_list + dev->use_buf_ptr, 0); + + DPRINTF("Found buffer: ptr=%d rxbufs=%d\n", dev->use_buf_ptr, dev->rx_bufs); + + return bd; +} + static ssize_t spapr_vlan_receive(NetClientState *nc, const uint8_t *buf, size_t size) { @@ -110,7 +146,6 @@ static ssize_t spapr_vlan_receive(NetClientState *nc, const uint8_t *buf, VIOsPAPRDevice *sdev = VIO_SPAPR_DEVICE(dev); vlan_bd_t rxq_bd = vio_ldq(sdev, dev->buf_list + VLAN_RXQ_BD_OFF); vlan_bd_t bd; - int buf_ptr = dev->use_buf_ptr; uint64_t handle; uint8_t control; @@ -125,29 +160,12 @@ static ssize_t spapr_vlan_receive(NetClientState *nc, const uint8_t *buf, return -1; } - do { - buf_ptr += 8; - if (buf_ptr >= (VLAN_RX_BDS_LEN + VLAN_RX_BDS_OFF)) { - buf_ptr = VLAN_RX_BDS_OFF; - } - - bd = vio_ldq(sdev, dev->buf_list + buf_ptr); - DPRINTF("use_buf_ptr=%d bd=0x%016llx\n", - buf_ptr, (unsigned long long)bd); - } while ((!(bd & VLAN_BD_VALID) || (VLAN_BD_LEN(bd) < (size + 8))) - && (buf_ptr != dev->use_buf_ptr)); - - if (!(bd & VLAN_BD_VALID) || (VLAN_BD_LEN(bd) < (size + 8))) { - /* Failed to find a suitable buffer */ + bd = spapr_vlan_get_rx_bd_from_page(dev, size); + if (!bd) { return -1; } - /* Remove the buffer from the pool */ dev->rx_bufs--; - dev->use_buf_ptr = buf_ptr; - vio_stq(sdev, dev->buf_list + dev->use_buf_ptr, 0); - - DPRINTF("Found buffer: ptr=%d num=%d\n", dev->use_buf_ptr, dev->rx_bufs); /* Transfer the packet data */ if (spapr_vio_dma_write(sdev, VLAN_BD_ADDR(bd) + 8, buf, size) < 0) { @@ -372,6 +390,32 @@ static target_ulong h_free_logical_lan(PowerPCCPU *cpu, return H_SUCCESS; } +static target_long spapr_vlan_add_rxbuf_to_page(VIOsPAPRVLANDevice *dev, + target_ulong buf) +{ + vlan_bd_t bd; + + if (dev->rx_bufs >= VLAN_MAX_BUFS) { + return H_RESOURCE; + } + + do { + dev->add_buf_ptr += 8; + if (dev->add_buf_ptr >= VLAN_RX_BDS_LEN + VLAN_RX_BDS_OFF) { + dev->add_buf_ptr = VLAN_RX_BDS_OFF; + } + + bd = vio_ldq(&dev->sdev, dev->buf_list + dev->add_buf_ptr); + } while (bd & VLAN_BD_VALID); + + vio_stq(&dev->sdev, dev->buf_list + dev->add_buf_ptr, buf); + + DPRINTF("h_add_llan_buf(): Added buf ptr=%d rx_bufs=%d bd=0x%016llx\n", + dev->add_buf_ptr, dev->rx_bufs, (unsigned long long)buf); + + return 0; +} + static target_ulong h_add_logical_lan_buffer(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, @@ -381,7 +425,7 @@ static target_ulong h_add_logical_lan_buffer(PowerPCCPU *cpu, target_ulong buf = args[1]; VIOsPAPRDevice *sdev = spapr_vio_find_by_reg(spapr->vio_bus, reg); VIOsPAPRVLANDevice *dev = VIO_SPAPR_VLAN_DEVICE(sdev); - vlan_bd_t bd; + target_long ret; DPRINTF("H_ADD_LOGICAL_LAN_BUFFER(0x" TARGET_FMT_lx ", 0x" TARGET_FMT_lx ")\n", reg, buf); @@ -397,29 +441,19 @@ static target_ulong h_add_logical_lan_buffer(PowerPCCPU *cpu, return H_PARAMETER; } - if (!dev->isopen || dev->rx_bufs >= VLAN_MAX_BUFS) { + if (!dev->isopen) { return H_RESOURCE; } - do { - dev->add_buf_ptr += 8; - if (dev->add_buf_ptr >= (VLAN_RX_BDS_LEN + VLAN_RX_BDS_OFF)) { - dev->add_buf_ptr = VLAN_RX_BDS_OFF; - } - - bd = vio_ldq(sdev, dev->buf_list + dev->add_buf_ptr); - } while (bd & VLAN_BD_VALID); - - vio_stq(sdev, dev->buf_list + dev->add_buf_ptr, buf); + ret = spapr_vlan_add_rxbuf_to_page(dev, buf); + if (ret) { + return ret; + } dev->rx_bufs++; qemu_flush_queued_packets(qemu_get_queue(dev->nic)); - DPRINTF("h_add_logical_lan_buffer(): Added buf ptr=%d rx_bufs=%d" - " bd=0x%016llx\n", dev->add_buf_ptr, dev->rx_bufs, - (unsigned long long)buf); - return H_SUCCESS; } From 831e8822530bb511a24329494f9121ad1f8b94ab Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Mon, 21 Mar 2016 17:25:23 +0100 Subject: [PATCH 14/16] hw/net/spapr_llan: Fix receive buffer handling for better performance tl;dr: This patch introduces an alternate way of handling the receive buffers of the spapr-vlan device, resulting in much better receive performance for the guest. Full story: One of our testers recently discovered that the performance of the spapr-vlan device is very poor compared to other NICs, and that a simple "ping -i 0.2 -s 65507 someip" in the guest can result in more than 50% lost ping packets (especially with older guest kernels < 3.17). After doing some analysis, it was clear that there is a problem with the way we handle the receive buffers in spapr_llan.c: The ibmveth driver of the guest Linux kernel tries to add a lot of buffers into several buffer pools (with 512, 2048 and 65536 byte sizes by default, but it can be changed via the entries in the /sys/devices/vio/1000/pool* directories of the guest). However, the spapr-vlan device of QEMU only tries to squeeze all receive buffer descriptors into one single page which has been supplied by the guest during the H_REGISTER_LOGICAL_LAN call, without taking care of different buffer sizes. This has two bad effects: First, only a very limited number of buffer descriptors is accepted at all. Second, we also hand 64k buffers to the guest even if the 2k buffers would fit better - and this results in dropped packets in the IP layer of the guest since too much skbuf memory is used. Though it seems at a first glance like PAPR says that we should store the receive buffer descriptors in the page that is supplied during the H_REGISTER_LOGICAL_LAN call, chapter 16.4.1.2 in the LoPAPR spec declares that "the contents of these descriptors are architecturally opaque, none of these descriptors are manipulated by code above the architected interfaces". That means we don't have to store the RX buffer descriptors in this page, but can also manage the receive buffers at the hypervisor level only. This is now what we are doing here: Introducing proper RX buffer pools which are also sorted by size of the buffers, so we can hand out a buffer with the best fitting size when a packet has been received. To avoid problems with migration from/to older version of QEMU, the old behavior is also retained and enabled by default. The new buffer management has to be enabled via a new "use-rx-buffer-pools" property. Now with the new buffer pool management enabled, the problem with "ping -s 65507" is fixed for me, and the throughput of a simple test with wget increases from creeping 3MB/s up to 20MB/s! Signed-off-by: Thomas Huth Signed-off-by: David Gibson --- hw/net/spapr_llan.c | 218 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 216 insertions(+), 2 deletions(-) diff --git a/hw/net/spapr_llan.c b/hw/net/spapr_llan.c index 39a1dd1d13..c7ce87017b 100644 --- a/hw/net/spapr_llan.c +++ b/hw/net/spapr_llan.c @@ -45,6 +45,10 @@ #define DPRINTF(fmt...) #endif +/* Compatibility flags for migration */ +#define SPAPRVLAN_FLAG_RX_BUF_POOLS_BIT 0 +#define SPAPRVLAN_FLAG_RX_BUF_POOLS (1 << SPAPRVLAN_FLAG_RX_BUF_POOLS_BIT) + /* * Virtual LAN device */ @@ -86,6 +90,15 @@ typedef uint64_t vlan_bd_t; #define VIO_SPAPR_VLAN_DEVICE(obj) \ OBJECT_CHECK(VIOsPAPRVLANDevice, (obj), TYPE_VIO_SPAPR_VLAN_DEVICE) +#define RX_POOL_MAX_BDS 4096 +#define RX_MAX_POOLS 5 + +typedef struct { + int32_t bufsize; + int32_t count; + vlan_bd_t bds[RX_POOL_MAX_BDS]; +} RxBufPool; + typedef struct VIOsPAPRVLANDevice { VIOsPAPRDevice sdev; NICConf nicconf; @@ -94,6 +107,8 @@ typedef struct VIOsPAPRVLANDevice { target_ulong buf_list; uint32_t add_buf_ptr, use_buf_ptr, rx_bufs; target_ulong rxq_ptr; + uint32_t compat_flags; /* Compatability flags for migration */ + RxBufPool *rx_pool[RX_MAX_POOLS]; /* Receive buffer descriptor pools */ } VIOsPAPRVLANDevice; static int spapr_vlan_can_receive(NetClientState *nc) @@ -103,6 +118,37 @@ static int spapr_vlan_can_receive(NetClientState *nc) return (dev->isopen && dev->rx_bufs > 0); } +/** + * Get buffer descriptor from one of our receive buffer pools + */ +static vlan_bd_t spapr_vlan_get_rx_bd_from_pool(VIOsPAPRVLANDevice *dev, + size_t size) +{ + vlan_bd_t bd; + int pool; + + for (pool = 0; pool < RX_MAX_POOLS; pool++) { + if (dev->rx_pool[pool]->count > 0 && + dev->rx_pool[pool]->bufsize >= size + 8) { + break; + } + } + if (pool == RX_MAX_POOLS) { + /* Failed to find a suitable buffer */ + return 0; + } + + DPRINTF("Found buffer: pool=%d count=%d rxbufs=%d\n", pool, + dev->rx_pool[pool]->count, dev->rx_bufs); + + /* Remove the buffer from the pool */ + dev->rx_pool[pool]->count--; + bd = dev->rx_pool[pool]->bds[dev->rx_pool[pool]->count]; + dev->rx_pool[pool]->bds[dev->rx_pool[pool]->count] = 0; + + return bd; +} + /** * Get buffer descriptor from the receive buffer list page that has been * supplied by the guest with the H_REGISTER_LOGICAL_LAN call @@ -160,7 +206,11 @@ static ssize_t spapr_vlan_receive(NetClientState *nc, const uint8_t *buf, return -1; } - bd = spapr_vlan_get_rx_bd_from_page(dev, size); + if (dev->compat_flags & SPAPRVLAN_FLAG_RX_BUF_POOLS) { + bd = spapr_vlan_get_rx_bd_from_pool(dev, size); + } else { + bd = spapr_vlan_get_rx_bd_from_page(dev, size); + } if (!bd) { return -1; } @@ -213,13 +263,31 @@ static NetClientInfo net_spapr_vlan_info = { .receive = spapr_vlan_receive, }; +static void spapr_vlan_reset_rx_pool(RxBufPool *rxp) +{ + /* + * Use INT_MAX as bufsize so that unused buffers are moved to the end + * of the list during the qsort in spapr_vlan_add_rxbuf_to_pool() later. + */ + rxp->bufsize = INT_MAX; + rxp->count = 0; + memset(rxp->bds, 0, sizeof(rxp->bds)); +} + static void spapr_vlan_reset(VIOsPAPRDevice *sdev) { VIOsPAPRVLANDevice *dev = VIO_SPAPR_VLAN_DEVICE(sdev); + int i; dev->buf_list = 0; dev->rx_bufs = 0; dev->isopen = 0; + + if (dev->compat_flags & SPAPRVLAN_FLAG_RX_BUF_POOLS) { + for (i = 0; i < RX_MAX_POOLS; i++) { + spapr_vlan_reset_rx_pool(dev->rx_pool[i]); + } + } } static void spapr_vlan_realize(VIOsPAPRDevice *sdev, Error **errp) @@ -236,10 +304,31 @@ static void spapr_vlan_realize(VIOsPAPRDevice *sdev, Error **errp) static void spapr_vlan_instance_init(Object *obj) { VIOsPAPRVLANDevice *dev = VIO_SPAPR_VLAN_DEVICE(obj); + int i; device_add_bootindex_property(obj, &dev->nicconf.bootindex, "bootindex", "", DEVICE(dev), NULL); + + if (dev->compat_flags & SPAPRVLAN_FLAG_RX_BUF_POOLS) { + for (i = 0; i < RX_MAX_POOLS; i++) { + dev->rx_pool[i] = g_new(RxBufPool, 1); + spapr_vlan_reset_rx_pool(dev->rx_pool[i]); + } + } +} + +static void spapr_vlan_instance_finalize(Object *obj) +{ + VIOsPAPRVLANDevice *dev = VIO_SPAPR_VLAN_DEVICE(obj); + int i; + + if (dev->compat_flags & SPAPRVLAN_FLAG_RX_BUF_POOLS) { + for (i = 0; i < RX_MAX_POOLS; i++) { + g_free(dev->rx_pool[i]); + dev->rx_pool[i] = NULL; + } + } } void spapr_vlan_create(VIOsPAPRBus *bus, NICInfo *nd) @@ -390,6 +479,87 @@ static target_ulong h_free_logical_lan(PowerPCCPU *cpu, return H_SUCCESS; } +/** + * Used for qsort, this function compares two RxBufPools by size. + */ +static int rx_pool_size_compare(const void *p1, const void *p2) +{ + const RxBufPool *pool1 = *(RxBufPool **)p1; + const RxBufPool *pool2 = *(RxBufPool **)p2; + + if (pool1->bufsize < pool2->bufsize) { + return -1; + } + return pool1->bufsize > pool2->bufsize; +} + +/** + * Search for a matching buffer pool with exact matching size, + * or return -1 if no matching pool has been found. + */ +static int spapr_vlan_get_rx_pool_id(VIOsPAPRVLANDevice *dev, int size) +{ + int pool; + + for (pool = 0; pool < RX_MAX_POOLS; pool++) { + if (dev->rx_pool[pool]->bufsize == size) { + return pool; + } + } + + return -1; +} + +/** + * Enqueuing receive buffer by adding it to one of our receive buffer pools + */ +static target_long spapr_vlan_add_rxbuf_to_pool(VIOsPAPRVLANDevice *dev, + target_ulong buf) +{ + int size = VLAN_BD_LEN(buf); + int pool; + + pool = spapr_vlan_get_rx_pool_id(dev, size); + if (pool < 0) { + /* + * No matching pool found? Try to use a new one. If the guest used all + * pools before, but changed the size of one pool inbetween, we might + * need to recycle that pool here (if it's empty already). Thus scan + * all buffer pools now, starting with the last (likely empty) one. + */ + for (pool = RX_MAX_POOLS - 1; pool >= 0 ; pool--) { + if (dev->rx_pool[pool]->count == 0) { + dev->rx_pool[pool]->bufsize = size; + /* + * Sort pools by size so that spapr_vlan_receive() + * can later find the smallest buffer pool easily. + */ + qsort(dev->rx_pool, RX_MAX_POOLS, sizeof(dev->rx_pool[0]), + rx_pool_size_compare); + pool = spapr_vlan_get_rx_pool_id(dev, size); + DPRINTF("created RX pool %d for size %lld\n", pool, + VLAN_BD_LEN(buf)); + break; + } + } + } + /* Still no usable pool? Give up */ + if (pool < 0 || dev->rx_pool[pool]->count >= RX_POOL_MAX_BDS) { + return H_RESOURCE; + } + + DPRINTF("h_add_llan_buf(): Add buf using pool %i (size %lli, count=%i)\n", + pool, VLAN_BD_LEN(buf), dev->rx_pool[pool]->count); + + dev->rx_pool[pool]->bds[dev->rx_pool[pool]->count++] = buf; + + return 0; +} + +/** + * This is the old way of enqueuing receive buffers: Add it to the rx queue + * page that has been supplied by the guest (which is quite limited in size). + */ static target_long spapr_vlan_add_rxbuf_to_page(VIOsPAPRVLANDevice *dev, target_ulong buf) { @@ -445,7 +615,11 @@ static target_ulong h_add_logical_lan_buffer(PowerPCCPU *cpu, return H_RESOURCE; } - ret = spapr_vlan_add_rxbuf_to_page(dev, buf); + if (dev->compat_flags & SPAPRVLAN_FLAG_RX_BUF_POOLS) { + ret = spapr_vlan_add_rxbuf_to_pool(dev, buf); + } else { + ret = spapr_vlan_add_rxbuf_to_page(dev, buf); + } if (ret) { return ret; } @@ -543,9 +717,44 @@ static target_ulong h_multicast_ctrl(PowerPCCPU *cpu, sPAPRMachineState *spapr, static Property spapr_vlan_properties[] = { DEFINE_SPAPR_PROPERTIES(VIOsPAPRVLANDevice, sdev), DEFINE_NIC_PROPERTIES(VIOsPAPRVLANDevice, nicconf), + DEFINE_PROP_BIT("use-rx-buffer-pools", VIOsPAPRVLANDevice, + compat_flags, SPAPRVLAN_FLAG_RX_BUF_POOLS_BIT, false), DEFINE_PROP_END_OF_LIST(), }; +static bool spapr_vlan_rx_buffer_pools_needed(void *opaque) +{ + VIOsPAPRVLANDevice *dev = opaque; + + return (dev->compat_flags & SPAPRVLAN_FLAG_RX_BUF_POOLS) != 0; +} + +static const VMStateDescription vmstate_rx_buffer_pool = { + .name = "spapr_llan/rx_buffer_pool", + .version_id = 1, + .minimum_version_id = 1, + .needed = spapr_vlan_rx_buffer_pools_needed, + .fields = (VMStateField[]) { + VMSTATE_INT32(bufsize, RxBufPool), + VMSTATE_INT32(count, RxBufPool), + VMSTATE_UINT64_ARRAY(bds, RxBufPool, RX_POOL_MAX_BDS), + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription vmstate_rx_pools = { + .name = "spapr_llan/rx_pools", + .version_id = 1, + .minimum_version_id = 1, + .needed = spapr_vlan_rx_buffer_pools_needed, + .fields = (VMStateField[]) { + VMSTATE_ARRAY_OF_POINTER_TO_STRUCT(rx_pool, VIOsPAPRVLANDevice, + RX_MAX_POOLS, 1, + vmstate_rx_buffer_pool, RxBufPool), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_spapr_llan = { .name = "spapr_llan", .version_id = 1, @@ -562,6 +771,10 @@ static const VMStateDescription vmstate_spapr_llan = { VMSTATE_END_OF_LIST() }, + .subsections = (const VMStateDescription * []) { + &vmstate_rx_pools, + NULL + } }; static void spapr_vlan_class_init(ObjectClass *klass, void *data) @@ -588,6 +801,7 @@ static const TypeInfo spapr_vlan_info = { .instance_size = sizeof(VIOsPAPRVLANDevice), .class_init = spapr_vlan_class_init, .instance_init = spapr_vlan_instance_init, + .instance_finalize = spapr_vlan_instance_finalize, }; static void spapr_vlan_register_types(void) From 57c522f47bf85de20c5bef3706b6fca6a70d7ac5 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Mon, 21 Mar 2016 17:25:24 +0100 Subject: [PATCH 15/16] hw/net/spapr_llan: Enable the RX buffer pools by default for new machines RX buffer pools are now enabled by default for new machine types. For older machine types, they are still disabled to avoid breaking migration. Signed-off-by: Thomas Huth Reviewed-by: David Gibson Reviewed-by: Laurent Vivier Signed-off-by: David Gibson --- hw/net/spapr_llan.c | 2 +- hw/ppc/spapr.c | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/hw/net/spapr_llan.c b/hw/net/spapr_llan.c index c7ce87017b..efc31cbfba 100644 --- a/hw/net/spapr_llan.c +++ b/hw/net/spapr_llan.c @@ -718,7 +718,7 @@ static Property spapr_vlan_properties[] = { DEFINE_SPAPR_PROPERTIES(VIOsPAPRVLANDevice, sdev), DEFINE_NIC_PROPERTIES(VIOsPAPRVLANDevice, nicconf), DEFINE_PROP_BIT("use-rx-buffer-pools", VIOsPAPRVLANDevice, - compat_flags, SPAPRVLAN_FLAG_RX_BUF_POOLS_BIT, false), + compat_flags, SPAPRVLAN_FLAG_RX_BUF_POOLS_BIT, true), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index ebbc6fe093..65abccb2f5 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2356,7 +2356,12 @@ DEFINE_SPAPR_MACHINE(2_6, "2.6", true); * pseries-2.5 */ #define SPAPR_COMPAT_2_5 \ - HW_COMPAT_2_5 + HW_COMPAT_2_5 \ + { \ + .driver = "spapr-vlan", \ + .property = "use-rx-buffer-pools", \ + .value = "off", \ + }, static void spapr_machine_2_5_instance_options(MachineState *machine) { From 9d0e5c8ceb70e738101bfcb31fa1c0f7375d844b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Tue, 22 Mar 2016 15:23:13 +0100 Subject: [PATCH 16/16] ppc: move POWER8 Book4 regs in their own routine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit fce55481360d "ppc: A couple more dummy POWER8 Book4 regs" squashed in to rapidly a set of POWER8 Book4 regs in the wrong routine. This patch introduces the missing gen_spr_power8_book4() routine to fix their location. Signed-off-by: Cédric Le Goater Signed-off-by: David Gibson --- target-ppc/translate_init.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index 5cec1df8bd..0a33597f6b 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -8018,6 +8018,13 @@ static void gen_spr_power8_ic(CPUPPCState *env) &spr_read_generic, SPR_NOACCESS, &spr_read_generic, &spr_write_generic, 0); +#endif +} + +static void gen_spr_power8_book4(CPUPPCState *env) +{ + /* Add a number of P8 book4 registers */ +#if !defined(CONFIG_USER_ONLY) spr_register_kvm(env, SPR_ACOP, "ACOP", SPR_NOACCESS, SPR_NOACCESS, &spr_read_generic, &spr_write_generic, @@ -8086,6 +8093,7 @@ static void init_proc_book3s_64(CPUPPCState *env, int version) gen_spr_power8_pspb(env); gen_spr_vtb(env); gen_spr_power8_ic(env); + gen_spr_power8_book4(env); } if (version < BOOK3S_CPU_POWER8) { gen_spr_book3s_dbg(env);