From e4c93e44ab103f6c67abd85d620343f61aafa004 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 21 Oct 2022 17:01:31 +0100 Subject: [PATCH 01/31] target/arm: Implement FEAT_E0PD FEAT_E0PD adds new bits E0PD0 and E0PD1 to TCR_EL1, which allow the OS to forbid EL0 access to half of the address space. Since this is an EL0-specific variation on the existing TCR_ELx.{EPD0,EPD1}, we can implement it entirely in aa64_va_parameters(). This requires moving the existing regime_is_user() to internals.h so that the code in helper.c can get at it. Reviewed-by: Richard Henderson Signed-off-by: Peter Maydell Message-id: 20221021160131.3531787-1-peter.maydell@linaro.org --- docs/system/arm/emulation.rst | 1 + target/arm/cpu.h | 5 +++++ target/arm/cpu64.c | 1 + target/arm/helper.c | 9 +++++++++ target/arm/internals.h | 19 +++++++++++++++++++ target/arm/ptw.c | 19 ------------------- 6 files changed, 35 insertions(+), 19 deletions(-) diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst index cfb4b0768b..fd61360a08 100644 --- a/docs/system/arm/emulation.rst +++ b/docs/system/arm/emulation.rst @@ -24,6 +24,7 @@ the following architecture extensions: - FEAT_Debugv8p4 (Debug changes for v8.4) - FEAT_DotProd (Advanced SIMD dot product instructions) - FEAT_DoubleFault (Double Fault Extension) +- FEAT_E0PD (Preventing EL0 access to halves of address maps) - FEAT_ETS (Enhanced Translation Synchronization) - FEAT_FCMA (Floating-point complex number instructions) - FEAT_FHM (Floating-point half-precision multiplication instructions) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index db9ec6a038..09564d0393 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -4147,6 +4147,11 @@ static inline bool isar_feature_aa64_lva(const ARMISARegisters *id) return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, VARANGE) != 0; } +static inline bool isar_feature_aa64_e0pd(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, E0PD) != 0; +} + static inline bool isar_feature_aa64_tts2uxn(const ARMISARegisters *id) { return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, XNX) != 0; diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index 85e0d1daf1..da95eabab5 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -1185,6 +1185,7 @@ static void aarch64_max_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64MMFR2, FWB, 1); /* FEAT_S2FWB */ t = FIELD_DP64(t, ID_AA64MMFR2, TTL, 1); /* FEAT_TTL */ t = FIELD_DP64(t, ID_AA64MMFR2, BBM, 2); /* FEAT_BBM at level 2 */ + t = FIELD_DP64(t, ID_AA64MMFR2, E0PD, 1); /* FEAT_E0PD */ cpu->isar.id_aa64mmfr2 = t; t = cpu->isar.id_aa64zfr0; diff --git a/target/arm/helper.c b/target/arm/helper.c index c672903f43..252651a8d1 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -10491,6 +10491,8 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, ps = extract32(tcr, 16, 3); ds = extract64(tcr, 32, 1); } else { + bool e0pd; + /* * Bit 55 is always between the two regions, and is canonical for * determining if address tagging is enabled. @@ -10502,15 +10504,22 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, epd = extract32(tcr, 7, 1); sh = extract32(tcr, 12, 2); hpd = extract64(tcr, 41, 1); + e0pd = extract64(tcr, 55, 1); } else { tsz = extract32(tcr, 16, 6); gran = tg1_to_gran_size(extract32(tcr, 30, 2)); epd = extract32(tcr, 23, 1); sh = extract32(tcr, 28, 2); hpd = extract64(tcr, 42, 1); + e0pd = extract64(tcr, 56, 1); } ps = extract64(tcr, 32, 3); ds = extract64(tcr, 59, 1); + + if (e0pd && cpu_isar_feature(aa64_e0pd, cpu) && + regime_is_user(env, mmu_idx)) { + epd = true; + } } gran = sanitize_gran_size(cpu, gran, stage2); diff --git a/target/arm/internals.h b/target/arm/internals.h index b26c9ca17b..1926f3679c 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -707,6 +707,25 @@ static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx) } } +static inline bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx) +{ + switch (mmu_idx) { + case ARMMMUIdx_E20_0: + case ARMMMUIdx_Stage1_E0: + case ARMMMUIdx_MUser: + case ARMMMUIdx_MSUser: + case ARMMMUIdx_MUserNegPri: + case ARMMMUIdx_MSUserNegPri: + return true; + default: + return false; + case ARMMMUIdx_E10_0: + case ARMMMUIdx_E10_1: + case ARMMMUIdx_E10_1_PAN: + g_assert_not_reached(); + } +} + /* Return the SCTLR value which controls this address translation regime */ static inline uint64_t regime_sctlr(CPUARMState *env, ARMMMUIdx mmu_idx) { diff --git a/target/arm/ptw.c b/target/arm/ptw.c index 6c5ed56a10..aed6f92d6f 100644 --- a/target/arm/ptw.c +++ b/target/arm/ptw.c @@ -104,25 +104,6 @@ static bool regime_translation_big_endian(CPUARMState *env, ARMMMUIdx mmu_idx) return (regime_sctlr(env, mmu_idx) & SCTLR_EE) != 0; } -static bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx) -{ - switch (mmu_idx) { - case ARMMMUIdx_E20_0: - case ARMMMUIdx_Stage1_E0: - case ARMMMUIdx_MUser: - case ARMMMUIdx_MSUser: - case ARMMMUIdx_MUserNegPri: - case ARMMMUIdx_MSUserNegPri: - return true; - default: - return false; - case ARMMMUIdx_E10_0: - case ARMMMUIdx_E10_1: - case ARMMMUIdx_E10_1_PAN: - g_assert_not_reached(); - } -} - /* Return the TTBR associated with this translation regime */ static uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn) { From 7cd5d384bb298ce3c595a3774213b5b478881ac8 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Tue, 27 Sep 2022 11:03:48 +0100 Subject: [PATCH 02/31] hw/arm/virt: Fix devicetree warnings about the virtio-iommu node The "PCI Bus Binding to: IEEE Std 1275-1994" defines the compatible string for a PCIe bus or endpoint as "pci," or similar. Since the initial binding for PCI virtio-iommu didn't follow this rule, it was modified to accept both strings and ensure backward compatibility. Also, the unit-name for the node should be "device,function". Fix corresponding dt-validate and dtc warnings: pcie@10000000: virtio_iommu@16:compatible: ['virtio,pci-iommu'] does not contain items matching the given schema pcie@10000000: Unevaluated properties are not allowed (... 'virtio_iommu@16' were unexpected) From schema: linux/Documentation/devicetree/bindings/pci/host-generic-pci.yaml virtio_iommu@16: compatible: 'oneOf' conditional failed, one must be fixed: ['virtio,pci-iommu'] is too short 'pci1af4,1057' was expected From schema: dtschema/schemas/pci/pci-bus.yaml Warning (pci_device_reg): /pcie@10000000/virtio_iommu@16: PCI unit address format error, expected "2,0" Signed-off-by: Jean-Philippe Brucker Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- hw/arm/virt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c index cda9defe8f..b871350856 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -1371,14 +1371,15 @@ static void create_smmu(const VirtMachineState *vms, static void create_virtio_iommu_dt_bindings(VirtMachineState *vms) { - const char compat[] = "virtio,pci-iommu"; + const char compat[] = "virtio,pci-iommu\0pci1af4,1057"; uint16_t bdf = vms->virtio_iommu_bdf; MachineState *ms = MACHINE(vms); char *node; vms->iommu_phandle = qemu_fdt_alloc_phandle(ms->fdt); - node = g_strdup_printf("%s/virtio_iommu@%d", vms->pciehb_nodename, bdf); + node = g_strdup_printf("%s/virtio_iommu@%x,%x", vms->pciehb_nodename, + PCI_SLOT(bdf), PCI_FUNC(bdf)); qemu_fdt_add_subnode(ms->fdt, node); qemu_fdt_setprop(ms->fdt, node, "compatible", compat, sizeof(compat)); qemu_fdt_setprop_sized_cells(ms->fdt, node, "reg", From c939a7c7b93ee44a4963fabe81454e1f956ecd4b Mon Sep 17 00:00:00 2001 From: Ake Koomsin Date: Mon, 17 Oct 2022 18:24:32 +0900 Subject: [PATCH 03/31] target/arm: honor HCR_E2H and HCR_TGE in arm_excp_unmasked() An exception targeting EL2 from lower EL is actually maskable when HCR_E2H and HCR_TGE are both set. This applies to both secure and non-secure Security state. We can remove the conditions that try to suppress masking of interrupts when we are Secure and the exception targets EL2 and Secure EL2 is disabled. This is OK because in that situation arm_phys_excp_target_el() will never return 2 as the target EL. The 'not if secure' check in this function was originally written before arm_hcr_el2_eff(), and back then the target EL returned by arm_phys_excp_target_el() could be 2 even if we were in Secure EL0/EL1; but it is no longer needed. Signed-off-by: Ake Koomsin Message-id: 20221017092432.546881-1-ake@igel.co.jp [PMM: Add commit message paragraph explaining why it's OK to remove the checks on secure and SCR_EEL2] Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/cpu.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 0a7bfbf999..a021df9e9e 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -587,14 +587,24 @@ static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx, if ((target_el > cur_el) && (target_el != 1)) { /* Exceptions targeting a higher EL may not be maskable */ if (arm_feature(env, ARM_FEATURE_AARCH64)) { - /* - * 64-bit masking rules are simple: exceptions to EL3 - * can't be masked, and exceptions to EL2 can only be - * masked from Secure state. The HCR and SCR settings - * don't affect the masking logic, only the interrupt routing. - */ - if (target_el == 3 || !secure || (env->cp15.scr_el3 & SCR_EEL2)) { + switch (target_el) { + case 2: + /* + * According to ARM DDI 0487H.a, an interrupt can be masked + * when HCR_E2H and HCR_TGE are both set regardless of the + * current Security state. Note that we need to revisit this + * part again once we need to support NMI. + */ + if ((hcr_el2 & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) { + unmasked = true; + } + break; + case 3: + /* Interrupt cannot be masked when the target EL is 3 */ unmasked = true; + break; + default: + g_assert_not_reached(); } } else { /* From 310616d3677aad709d3eaf8f5f08683f2853f227 Mon Sep 17 00:00:00 2001 From: Damien Hedde Date: Thu, 20 Oct 2022 15:27:49 +0100 Subject: [PATCH 04/31] hw/core/resettable: fix reset level counting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The code for handling the reset level count in the Resettable code has two issues: The reset count is only decremented for the 1->0 case. This means that if there's ever a nested reset that takes the count to 2 then it will never again be decremented. Eventually the count will exceed the '50' limit in resettable_phase_enter() and QEMU will trip over the assertion failure. The repro case in issue 1266 is an example of this that happens now the SCSI subsystem uses three-phase reset. Secondly, the count is decremented only after the exit phase handler is called. Moving the reset count decrement from "just after" to "just before" calling the exit phase handler allows resettable_is_in_reset() to return false during the handler execution. This simplifies reset handling in resettable devices. Typically, a function that updates the device state will just need to read the current reset state and not anymore treat the "in a reset-exit transition" as a special case. Note that the semantics change to the *_is_in_reset() functions will have no effect on the current codebase, because only two devices (hw/char/cadence_uart.c and hw/misc/zynq_sclr.c) currently call those functions, and in neither case do they do it from the device's exit phase methed. Fixes: 4a5fc890 ("scsi: Use device_cold_reset() and bus_cold_reset()") Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1266 Signed-off-by: Damien Hedde Signed-off-by: Peter Maydell Reported-by: Michael Peter Reviewed-by: Philippe Mathieu-Daudé Message-id: 20221020142749.3357951-1-peter.maydell@linaro.org Buglink: https://bugs.launchpad.net/qemu/+bug/1905297 Reported-by: Michael Peter [PMM: adjust the docs paragraph changed to get the name of the 'enter' phase right and to clarify exactly when the count is adjusted; rewrite the commit message] Signed-off-by: Peter Maydell --- docs/devel/reset.rst | 8 +++++--- hw/core/resettable.c | 3 +-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/devel/reset.rst b/docs/devel/reset.rst index abea1102dc..7cc6a6b314 100644 --- a/docs/devel/reset.rst +++ b/docs/devel/reset.rst @@ -210,9 +210,11 @@ Polling the reset state Resettable interface provides the ``resettable_is_in_reset()`` function. This function returns true if the object parameter is currently under reset. -An object is under reset from the beginning of the *init* phase to the end of -the *exit* phase. During all three phases, the function will return that the -object is in reset. +An object is under reset from the beginning of the *enter* phase (before +either its children or its own enter method is called) to the *exit* +phase. During *enter* and *hold* phase only, the function will return that the +object is in reset. The state is changed after the *exit* is propagated to +its children and just before calling the object's own *exit* method. This function may be used if the object behavior has to be adapted while in reset state. For example if a device has an irq input, diff --git a/hw/core/resettable.c b/hw/core/resettable.c index 96a99ce39e..c3df75c6ba 100644 --- a/hw/core/resettable.c +++ b/hw/core/resettable.c @@ -201,12 +201,11 @@ static void resettable_phase_exit(Object *obj, void *opaque, ResetType type) resettable_child_foreach(rc, obj, resettable_phase_exit, NULL, type); assert(s->count > 0); - if (s->count == 1) { + if (--s->count == 0) { trace_resettable_phase_exit_exec(obj, obj_typename, !!rc->phases.exit); if (rc->phases.exit && !resettable_get_tr_func(rc, obj)) { rc->phases.exit(obj); } - s->count = 0; } s->exit_phase_in_progress = false; trace_resettable_phase_exit_end(obj, obj_typename, s->count); From 7764963b9239dc966122617cc8b61d4530d6ce2a Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 13 Oct 2022 18:18:17 +0100 Subject: [PATCH 05/31] hw/hyperv/hyperv.c: Use device_cold_reset() instead of device_legacy_reset() The semantic difference between the deprecated device_legacy_reset() function and the newer device_cold_reset() function is that the new function resets both the device itself and any qbuses it owns, whereas the legacy function resets just the device itself and nothing else. In hyperv_synic_reset() we reset a SynICState, which has no qbuses, so for this purpose the two functions behave identically and we can stop using the deprecated one. Signed-off-by: Peter Maydell Reviewed-by: Maciej S. Szmigiero Message-id: 20221013171817.1447562-1-peter.maydell@linaro.org --- hw/hyperv/hyperv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/hyperv/hyperv.c b/hw/hyperv/hyperv.c index 4a1b59cb9d..57b402b956 100644 --- a/hw/hyperv/hyperv.c +++ b/hw/hyperv/hyperv.c @@ -157,7 +157,7 @@ void hyperv_synic_reset(CPUState *cs) SynICState *synic = get_synic(cs); if (synic) { - device_legacy_reset(DEVICE(synic)); + device_cold_reset(DEVICE(synic)); } } From 7719419deb07a431455dfb0178480ef4be2d3e2c Mon Sep 17 00:00:00 2001 From: Axel Heider Date: Wed, 19 Oct 2022 15:09:50 +0200 Subject: [PATCH 06/31] target/imx: reload cmp timer outside of the reload ptimer transaction When running seL4 tests (https://docs.sel4.systems/projects/sel4test) on the sabrelight platform, the timer tests fail. The arm/imx6 EPIT timer interrupt does not fire properly, instead of a e.g. second in can take up to a minute to finally see the interrupt. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1263 Signed-off-by: Axel Heider Message-id: 166663118138.13362.1229967229046092876-0@git.sr.ht Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- hw/timer/imx_epit.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/hw/timer/imx_epit.c b/hw/timer/imx_epit.c index 2bf8c754b2..ec0fa440d7 100644 --- a/hw/timer/imx_epit.c +++ b/hw/timer/imx_epit.c @@ -275,10 +275,15 @@ static void imx_epit_write(void *opaque, hwaddr offset, uint64_t value, /* If IOVW bit is set then set the timer value */ ptimer_set_count(s->timer_reload, s->lr); } - + /* + * Commit the change to s->timer_reload, so it can propagate. Otherwise + * the timer interrupt may not fire properly. The commit must happen + * before calling imx_epit_reload_compare_timer(), which reads + * s->timer_reload internally again. + */ + ptimer_transaction_commit(s->timer_reload); imx_epit_reload_compare_timer(s); ptimer_transaction_commit(s->timer_cmp); - ptimer_transaction_commit(s->timer_reload); break; case 3: /* CMP */ From edc05dd43ade287e735f1d8c972562dbdd12a378 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 24 Oct 2022 15:18:38 +1000 Subject: [PATCH 07/31] target/arm: Introduce regime_is_stage2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reduce the amount of typing required for this check. Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson Message-id: 20221024051851.3074715-2-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper.c | 14 +++++--------- target/arm/internals.h | 5 +++++ target/arm/ptw.c | 14 ++++++-------- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/target/arm/helper.c b/target/arm/helper.c index 252651a8d1..47afaec6b4 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -10352,7 +10352,7 @@ int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx) { if (regime_has_2_ranges(mmu_idx)) { return extract64(tcr, 37, 2); - } else if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) { + } else if (regime_is_stage2(mmu_idx)) { return 0; /* VTCR_EL2 */ } else { /* Replicate the single TBI bit so we always have 2 bits. */ @@ -10364,7 +10364,7 @@ int aa64_va_parameter_tbid(uint64_t tcr, ARMMMUIdx mmu_idx) { if (regime_has_2_ranges(mmu_idx)) { return extract64(tcr, 51, 2); - } else if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) { + } else if (regime_is_stage2(mmu_idx)) { return 0; /* VTCR_EL2 */ } else { /* Replicate the single TBID bit so we always have 2 bits. */ @@ -10474,7 +10474,7 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, int select, tsz, tbi, max_tsz, min_tsz, ps, sh; ARMGranuleSize gran; ARMCPU *cpu = env_archcpu(env); - bool stage2 = mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S; + bool stage2 = regime_is_stage2(mmu_idx); if (!regime_has_2_ranges(mmu_idx)) { select = 0; @@ -10541,22 +10541,18 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, } ds = false; } else if (ds) { - switch (mmu_idx) { - case ARMMMUIdx_Stage2: - case ARMMMUIdx_Stage2_S: + if (regime_is_stage2(mmu_idx)) { if (gran == Gran16K) { ds = cpu_isar_feature(aa64_tgran16_2_lpa2, cpu); } else { ds = cpu_isar_feature(aa64_tgran4_2_lpa2, cpu); } - break; - default: + } else { if (gran == Gran16K) { ds = cpu_isar_feature(aa64_tgran16_lpa2, cpu); } else { ds = cpu_isar_feature(aa64_tgran4_lpa2, cpu); } - break; } if (ds) { min_tsz = 12; diff --git a/target/arm/internals.h b/target/arm/internals.h index 1926f3679c..2c4768dd05 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -673,6 +673,11 @@ static inline bool regime_is_pan(CPUARMState *env, ARMMMUIdx mmu_idx) } } +static inline bool regime_is_stage2(ARMMMUIdx mmu_idx) +{ + return mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S; +} + /* Return the exception level which controls this address translation regime */ static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx) { diff --git a/target/arm/ptw.c b/target/arm/ptw.c index aed6f92d6f..32d6412586 100644 --- a/target/arm/ptw.c +++ b/target/arm/ptw.c @@ -823,8 +823,7 @@ static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64, bool have_wxn; int wxn = 0; - assert(mmu_idx != ARMMMUIdx_Stage2); - assert(mmu_idx != ARMMMUIdx_Stage2_S); + assert(!regime_is_stage2(mmu_idx)); user_rw = simple_ap_to_rw_prot_is_user(ap, true); if (is_user) { @@ -1152,7 +1151,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, goto do_fault; } - if (mmu_idx != ARMMMUIdx_Stage2 && mmu_idx != ARMMMUIdx_Stage2_S) { + if (!regime_is_stage2(mmu_idx)) { /* * The starting level depends on the virtual address size (which can * be up to 48 bits) and the translation granule size. It indicates @@ -1323,7 +1322,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, attrs = extract64(descriptor, 2, 10) | (extract64(descriptor, 52, 12) << 10); - if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) { + if (regime_is_stage2(mmu_idx)) { /* Stage 2 table descriptors do not include any attribute fields */ break; } @@ -1355,7 +1354,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, ap = extract32(attrs, 4, 2); - if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) { + if (regime_is_stage2(mmu_idx)) { ns = mmu_idx == ARMMMUIdx_Stage2; xn = extract32(attrs, 11, 2); result->f.prot = get_S2prot(env, ap, xn, s1_is_el0); @@ -1385,7 +1384,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, result->f.guarded = guarded; } - if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) { + if (regime_is_stage2(mmu_idx)) { result->cacheattrs.is_s2_format = true; result->cacheattrs.attrs = extract32(attrs, 0, 4); } else { @@ -1416,8 +1415,7 @@ do_fault: fi->type = fault_type; fi->level = level; /* Tag the error as S2 for failed S1 PTW at S2 or ordinary S2. */ - fi->stage2 = fi->s1ptw || (mmu_idx == ARMMMUIdx_Stage2 || - mmu_idx == ARMMMUIdx_Stage2_S); + fi->stage2 = fi->s1ptw || regime_is_stage2(mmu_idx); fi->s1ns = mmu_idx == ARMMMUIdx_Stage2; return true; } From 48da29e485af5c92e284fbb6ef279b2a98eea7c4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 24 Oct 2022 15:18:39 +1000 Subject: [PATCH 08/31] target/arm: Add ptw_idx to S1Translate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hoist the computation of the mmu_idx for the ptw up to get_phys_addr_with_struct and get_phys_addr_twostage. This removes the duplicate check for stage2 disabled from the middle of the walk, performing it only once. Signed-off-by: Richard Henderson Reviewed-by: Alex Bennée Tested-by: Alex Bennée Message-id: 20221024051851.3074715-3-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/ptw.c | 71 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 54 insertions(+), 17 deletions(-) diff --git a/target/arm/ptw.c b/target/arm/ptw.c index 32d6412586..3c153f6831 100644 --- a/target/arm/ptw.c +++ b/target/arm/ptw.c @@ -17,6 +17,7 @@ typedef struct S1Translate { ARMMMUIdx in_mmu_idx; + ARMMMUIdx in_ptw_idx; bool in_secure; bool in_debug; bool out_secure; @@ -214,33 +215,24 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, { bool is_secure = ptw->in_secure; ARMMMUIdx mmu_idx = ptw->in_mmu_idx; - ARMMMUIdx s2_mmu_idx = is_secure ? ARMMMUIdx_Stage2_S : ARMMMUIdx_Stage2; - bool s2_phys = false; + ARMMMUIdx s2_mmu_idx = ptw->in_ptw_idx; uint8_t pte_attrs; bool pte_secure; - if (!arm_mmu_idx_is_stage1_of_2(mmu_idx) - || regime_translation_disabled(env, s2_mmu_idx, is_secure)) { - s2_mmu_idx = is_secure ? ARMMMUIdx_Phys_S : ARMMMUIdx_Phys_NS; - s2_phys = true; - } - if (unlikely(ptw->in_debug)) { /* * From gdbstub, do not use softmmu so that we don't modify the * state of the cpu at all, including softmmu tlb contents. */ - if (s2_phys) { - ptw->out_phys = addr; - pte_attrs = 0; - pte_secure = is_secure; - } else { + if (regime_is_stage2(s2_mmu_idx)) { S1Translate s2ptw = { .in_mmu_idx = s2_mmu_idx, + .in_ptw_idx = is_secure ? ARMMMUIdx_Phys_S : ARMMMUIdx_Phys_NS, .in_secure = is_secure, .in_debug = true, }; GetPhysAddrResult s2 = { }; + if (!get_phys_addr_lpae(env, &s2ptw, addr, MMU_DATA_LOAD, false, &s2, fi)) { goto fail; @@ -248,6 +240,11 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, ptw->out_phys = s2.f.phys_addr; pte_attrs = s2.cacheattrs.attrs; pte_secure = s2.f.attrs.secure; + } else { + /* Regime is physical. */ + ptw->out_phys = addr; + pte_attrs = 0; + pte_secure = is_secure; } ptw->out_host = NULL; } else { @@ -268,7 +265,7 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, pte_secure = full->attrs.secure; } - if (!s2_phys) { + if (regime_is_stage2(s2_mmu_idx)) { uint64_t hcr = arm_hcr_el2_eff_secstate(env, is_secure); if ((hcr & HCR_PTW) && S2_attrs_are_device(hcr, pte_attrs)) { @@ -1263,7 +1260,18 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, descaddr |= (address >> (stride * (4 - level))) & indexmask; descaddr &= ~7ULL; nstable = extract32(tableattrs, 4, 1); - ptw->in_secure = !nstable; + if (!nstable) { + /* + * Stage2_S -> Stage2 or Phys_S -> Phys_NS + * Assert that the non-secure idx are even, and relative order. + */ + QEMU_BUILD_BUG_ON((ARMMMUIdx_Phys_NS & 1) != 0); + QEMU_BUILD_BUG_ON((ARMMMUIdx_Stage2 & 1) != 0); + QEMU_BUILD_BUG_ON(ARMMMUIdx_Phys_NS + 1 != ARMMMUIdx_Phys_S); + QEMU_BUILD_BUG_ON(ARMMMUIdx_Stage2 + 1 != ARMMMUIdx_Stage2_S); + ptw->in_ptw_idx &= ~1; + ptw->in_secure = false; + } descriptor = arm_ldq_ptw(env, ptw, descaddr, fi); if (fi->type != ARMFault_None) { goto do_fault; @@ -2449,6 +2457,7 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw, is_el0 = ptw->in_mmu_idx == ARMMMUIdx_Stage1_E0; ptw->in_mmu_idx = s2walk_secure ? ARMMMUIdx_Stage2_S : ARMMMUIdx_Stage2; + ptw->in_ptw_idx = s2walk_secure ? ARMMMUIdx_Phys_S : ARMMMUIdx_Phys_NS; ptw->in_secure = s2walk_secure; /* @@ -2508,10 +2517,32 @@ static bool get_phys_addr_with_struct(CPUARMState *env, S1Translate *ptw, ARMMMUFaultInfo *fi) { ARMMMUIdx mmu_idx = ptw->in_mmu_idx; - ARMMMUIdx s1_mmu_idx = stage_1_mmu_idx(mmu_idx); bool is_secure = ptw->in_secure; + ARMMMUIdx s1_mmu_idx; - if (mmu_idx != s1_mmu_idx) { + switch (mmu_idx) { + case ARMMMUIdx_Phys_S: + case ARMMMUIdx_Phys_NS: + /* Checking Phys early avoids special casing later vs regime_el. */ + return get_phys_addr_disabled(env, address, access_type, mmu_idx, + is_secure, result, fi); + + case ARMMMUIdx_Stage1_E0: + case ARMMMUIdx_Stage1_E1: + case ARMMMUIdx_Stage1_E1_PAN: + /* First stage lookup uses second stage for ptw. */ + ptw->in_ptw_idx = is_secure ? ARMMMUIdx_Stage2_S : ARMMMUIdx_Stage2; + break; + + case ARMMMUIdx_E10_0: + s1_mmu_idx = ARMMMUIdx_Stage1_E0; + goto do_twostage; + case ARMMMUIdx_E10_1: + s1_mmu_idx = ARMMMUIdx_Stage1_E1; + goto do_twostage; + case ARMMMUIdx_E10_1_PAN: + s1_mmu_idx = ARMMMUIdx_Stage1_E1_PAN; + do_twostage: /* * Call ourselves recursively to do the stage 1 and then stage 2 * translations if mmu_idx is a two-stage regime, and EL2 present. @@ -2522,6 +2553,12 @@ static bool get_phys_addr_with_struct(CPUARMState *env, S1Translate *ptw, return get_phys_addr_twostage(env, ptw, address, access_type, result, fi); } + /* fall through */ + + default: + /* Single stage and second stage uses physical for ptw. */ + ptw->in_ptw_idx = is_secure ? ARMMMUIdx_Phys_S : ARMMMUIdx_Phys_NS; + break; } /* From 980a68925c8f19ae181c226af0776c0e3ddd0264 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 24 Oct 2022 15:18:40 +1000 Subject: [PATCH 09/31] target/arm: Add isar predicates for FEAT_HAFDBS The MMFR1 field may indicate support for hardware update of access flag alone, or access flag and dirty bit. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20221024051851.3074715-4-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/cpu.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 09564d0393..9aeed3c848 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -4152,6 +4152,16 @@ static inline bool isar_feature_aa64_e0pd(const ARMISARegisters *id) return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, E0PD) != 0; } +static inline bool isar_feature_aa64_hafs(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HAFDBS) != 0; +} + +static inline bool isar_feature_aa64_hdbs(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HAFDBS) >= 2; +} + static inline bool isar_feature_aa64_tts2uxn(const ARMISARegisters *id) { return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, XNX) != 0; From 8973922783483dee02aaf254a9d6a8fdd3e200f4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 24 Oct 2022 15:18:41 +1000 Subject: [PATCH 10/31] target/arm: Extract HA and HD in aa64_va_parameters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Reviewed-by: Alex Bennée Message-id: 20221024051851.3074715-5-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper.c | 8 +++++++- target/arm/internals.h | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/target/arm/helper.c b/target/arm/helper.c index 47afaec6b4..b070a20f1a 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -10470,7 +10470,7 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, ARMMMUIdx mmu_idx, bool data) { uint64_t tcr = regime_tcr(env, mmu_idx); - bool epd, hpd, tsz_oob, ds; + bool epd, hpd, tsz_oob, ds, ha, hd; int select, tsz, tbi, max_tsz, min_tsz, ps, sh; ARMGranuleSize gran; ARMCPU *cpu = env_archcpu(env); @@ -10489,6 +10489,8 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, epd = false; sh = extract32(tcr, 12, 2); ps = extract32(tcr, 16, 3); + ha = extract32(tcr, 21, 1) && cpu_isar_feature(aa64_hafs, cpu); + hd = extract32(tcr, 22, 1) && cpu_isar_feature(aa64_hdbs, cpu); ds = extract64(tcr, 32, 1); } else { bool e0pd; @@ -10514,6 +10516,8 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, e0pd = extract64(tcr, 56, 1); } ps = extract64(tcr, 32, 3); + ha = extract64(tcr, 39, 1) && cpu_isar_feature(aa64_hafs, cpu); + hd = extract64(tcr, 40, 1) && cpu_isar_feature(aa64_hdbs, cpu); ds = extract64(tcr, 59, 1); if (e0pd && cpu_isar_feature(aa64_e0pd, cpu) && @@ -10586,6 +10590,8 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, .hpd = hpd, .tsz_oob = tsz_oob, .ds = ds, + .ha = ha, + .hd = ha && hd, .gran = gran, }; } diff --git a/target/arm/internals.h b/target/arm/internals.h index 2c4768dd05..201ae370d5 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -1065,6 +1065,8 @@ typedef struct ARMVAParameters { bool hpd : 1; bool tsz_oob : 1; /* tsz has been clamped to legal range */ bool ds : 1; + bool ha : 1; + bool hd : 1; ARMGranuleSize gran : 2; } ARMVAParameters; From 93e5b3a6f9c7e481e781bde69a5f87cd93cc1a34 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 24 Oct 2022 15:18:42 +1000 Subject: [PATCH 11/31] target/arm: Move S1_ptw_translate outside arm_ld[lq]_ptw Separate S1 translation from the actual lookup. Will enable lpae hardware updates. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20221024051851.3074715-6-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/ptw.c | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/target/arm/ptw.c b/target/arm/ptw.c index 3c153f6831..44341a9dbc 100644 --- a/target/arm/ptw.c +++ b/target/arm/ptw.c @@ -300,18 +300,12 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, } /* All loads done in the course of a page table walk go through here. */ -static uint32_t arm_ldl_ptw(CPUARMState *env, S1Translate *ptw, hwaddr addr, +static uint32_t arm_ldl_ptw(CPUARMState *env, S1Translate *ptw, ARMMMUFaultInfo *fi) { CPUState *cs = env_cpu(env); uint32_t data; - if (!S1_ptw_translate(env, ptw, addr, fi)) { - /* Failure. */ - assert(fi->s1ptw); - return 0; - } - if (likely(ptw->out_host)) { /* Page tables are in RAM, and we have the host address. */ if (ptw->out_be) { @@ -339,18 +333,12 @@ static uint32_t arm_ldl_ptw(CPUARMState *env, S1Translate *ptw, hwaddr addr, return data; } -static uint64_t arm_ldq_ptw(CPUARMState *env, S1Translate *ptw, hwaddr addr, +static uint64_t arm_ldq_ptw(CPUARMState *env, S1Translate *ptw, ARMMMUFaultInfo *fi) { CPUState *cs = env_cpu(env); uint64_t data; - if (!S1_ptw_translate(env, ptw, addr, fi)) { - /* Failure. */ - assert(fi->s1ptw); - return 0; - } - if (likely(ptw->out_host)) { /* Page tables are in RAM, and we have the host address. */ if (ptw->out_be) { @@ -507,7 +495,10 @@ static bool get_phys_addr_v5(CPUARMState *env, S1Translate *ptw, fi->type = ARMFault_Translation; goto do_fault; } - desc = arm_ldl_ptw(env, ptw, table, fi); + if (!S1_ptw_translate(env, ptw, table, fi)) { + goto do_fault; + } + desc = arm_ldl_ptw(env, ptw, fi); if (fi->type != ARMFault_None) { goto do_fault; } @@ -545,7 +536,10 @@ static bool get_phys_addr_v5(CPUARMState *env, S1Translate *ptw, /* Fine pagetable. */ table = (desc & 0xfffff000) | ((address >> 8) & 0xffc); } - desc = arm_ldl_ptw(env, ptw, table, fi); + if (!S1_ptw_translate(env, ptw, table, fi)) { + goto do_fault; + } + desc = arm_ldl_ptw(env, ptw, fi); if (fi->type != ARMFault_None) { goto do_fault; } @@ -630,7 +624,10 @@ static bool get_phys_addr_v6(CPUARMState *env, S1Translate *ptw, fi->type = ARMFault_Translation; goto do_fault; } - desc = arm_ldl_ptw(env, ptw, table, fi); + if (!S1_ptw_translate(env, ptw, table, fi)) { + goto do_fault; + } + desc = arm_ldl_ptw(env, ptw, fi); if (fi->type != ARMFault_None) { goto do_fault; } @@ -683,7 +680,10 @@ static bool get_phys_addr_v6(CPUARMState *env, S1Translate *ptw, ns = extract32(desc, 3, 1); /* Lookup l2 entry. */ table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc); - desc = arm_ldl_ptw(env, ptw, table, fi); + if (!S1_ptw_translate(env, ptw, table, fi)) { + goto do_fault; + } + desc = arm_ldl_ptw(env, ptw, fi); if (fi->type != ARMFault_None) { goto do_fault; } @@ -1272,7 +1272,10 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, ptw->in_ptw_idx &= ~1; ptw->in_secure = false; } - descriptor = arm_ldq_ptw(env, ptw, descaddr, fi); + if (!S1_ptw_translate(env, ptw, descaddr, fi)) { + goto do_fault; + } + descriptor = arm_ldq_ptw(env, ptw, fi); if (fi->type != ARMFault_None) { goto do_fault; } From f0a398a2490656fac87b7ee4ba1fb01a42840875 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 24 Oct 2022 15:18:43 +1000 Subject: [PATCH 12/31] target/arm: Add ARMFault_UnsuppAtomicUpdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fault type is to be used with FEAT_HAFDBS when the guest enables hw updates, but places the tables in memory where atomic updates are unsupported. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Reviewed-by: Alex Bennée Message-id: 20221024051851.3074715-7-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/internals.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/target/arm/internals.h b/target/arm/internals.h index 201ae370d5..d9121d9ff8 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -338,6 +338,7 @@ typedef enum ARMFaultType { ARMFault_AsyncExternal, ARMFault_Debug, ARMFault_TLBConflict, + ARMFault_UnsuppAtomicUpdate, ARMFault_Lockdown, ARMFault_Exclusive, ARMFault_ICacheMaint, @@ -524,6 +525,9 @@ static inline uint32_t arm_fi_to_lfsc(ARMMMUFaultInfo *fi) case ARMFault_TLBConflict: fsc = 0x30; break; + case ARMFault_UnsuppAtomicUpdate: + fsc = 0x31; + break; case ARMFault_Lockdown: fsc = 0x34; break; From fe4ddc151b72a06eaeaa2359c51b66e698dd9c44 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 24 Oct 2022 15:18:44 +1000 Subject: [PATCH 13/31] target/arm: Remove loop from get_phys_addr_lpae The unconditional loop was used both to iterate over levels and to control parsing of attributes. Use an explicit goto in both cases. While this appears less clean for iterating over levels, we will need to jump back into the middle of this loop for atomic updates, which is even uglier. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20221024051851.3074715-8-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/ptw.c | 192 +++++++++++++++++++++++------------------------ 1 file changed, 96 insertions(+), 96 deletions(-) diff --git a/target/arm/ptw.c b/target/arm/ptw.c index 44341a9dbc..2a5f018835 100644 --- a/target/arm/ptw.c +++ b/target/arm/ptw.c @@ -1061,6 +1061,8 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, uint64_t descaddrmask; bool aarch64 = arm_el_is_aa64(env, el); bool guarded = false; + uint64_t descriptor; + bool nstable; /* TODO: This code does not support shareability levels. */ if (aarch64) { @@ -1253,106 +1255,104 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, * bits at each step. */ tableattrs = is_secure ? 0 : (1 << 4); - for (;;) { - uint64_t descriptor; - bool nstable; - - descaddr |= (address >> (stride * (4 - level))) & indexmask; - descaddr &= ~7ULL; - nstable = extract32(tableattrs, 4, 1); - if (!nstable) { - /* - * Stage2_S -> Stage2 or Phys_S -> Phys_NS - * Assert that the non-secure idx are even, and relative order. - */ - QEMU_BUILD_BUG_ON((ARMMMUIdx_Phys_NS & 1) != 0); - QEMU_BUILD_BUG_ON((ARMMMUIdx_Stage2 & 1) != 0); - QEMU_BUILD_BUG_ON(ARMMMUIdx_Phys_NS + 1 != ARMMMUIdx_Phys_S); - QEMU_BUILD_BUG_ON(ARMMMUIdx_Stage2 + 1 != ARMMMUIdx_Stage2_S); - ptw->in_ptw_idx &= ~1; - ptw->in_secure = false; - } - if (!S1_ptw_translate(env, ptw, descaddr, fi)) { - goto do_fault; - } - descriptor = arm_ldq_ptw(env, ptw, fi); - if (fi->type != ARMFault_None) { - goto do_fault; - } - - if (!(descriptor & 1) || - (!(descriptor & 2) && (level == 3))) { - /* Invalid, or the Reserved level 3 encoding */ - goto do_fault; - } - - descaddr = descriptor & descaddrmask; + next_level: + descaddr |= (address >> (stride * (4 - level))) & indexmask; + descaddr &= ~7ULL; + nstable = extract32(tableattrs, 4, 1); + if (!nstable) { /* - * For FEAT_LPA and PS=6, bits [51:48] of descaddr are in [15:12] - * of descriptor. For FEAT_LPA2 and effective DS, bits [51:50] of - * descaddr are in [9:8]. Otherwise, if descaddr is out of range, - * raise AddressSizeFault. + * Stage2_S -> Stage2 or Phys_S -> Phys_NS + * Assert that the non-secure idx are even, and relative order. */ - if (outputsize > 48) { - if (param.ds) { - descaddr |= extract64(descriptor, 8, 2) << 50; - } else { - descaddr |= extract64(descriptor, 12, 4) << 48; - } - } else if (descaddr >> outputsize) { - fault_type = ARMFault_AddressSize; - goto do_fault; - } - - if ((descriptor & 2) && (level < 3)) { - /* - * Table entry. The top five bits are attributes which may - * propagate down through lower levels of the table (and - * which are all arranged so that 0 means "no effect", so - * we can gather them up by ORing in the bits at each level). - */ - tableattrs |= extract64(descriptor, 59, 5); - level++; - indexmask = indexmask_grainsize; - continue; - } - /* - * Block entry at level 1 or 2, or page entry at level 3. - * These are basically the same thing, although the number - * of bits we pull in from the vaddr varies. Note that although - * descaddrmask masks enough of the low bits of the descriptor - * to give a correct page or table address, the address field - * in a block descriptor is smaller; so we need to explicitly - * clear the lower bits here before ORing in the low vaddr bits. - */ - page_size = (1ULL << ((stride * (4 - level)) + 3)); - descaddr &= ~(hwaddr)(page_size - 1); - descaddr |= (address & (page_size - 1)); - /* Extract attributes from the descriptor */ - attrs = extract64(descriptor, 2, 10) - | (extract64(descriptor, 52, 12) << 10); - - if (regime_is_stage2(mmu_idx)) { - /* Stage 2 table descriptors do not include any attribute fields */ - break; - } - /* Merge in attributes from table descriptors */ - attrs |= nstable << 3; /* NS */ - guarded = extract64(descriptor, 50, 1); /* GP */ - if (param.hpd) { - /* HPD disables all the table attributes except NSTable. */ - break; - } - attrs |= extract32(tableattrs, 0, 2) << 11; /* XN, PXN */ - /* - * The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1 - * means "force PL1 access only", which means forcing AP[1] to 0. - */ - attrs &= ~(extract32(tableattrs, 2, 1) << 4); /* !APT[0] => AP[1] */ - attrs |= extract32(tableattrs, 3, 1) << 5; /* APT[1] => AP[2] */ - break; + QEMU_BUILD_BUG_ON((ARMMMUIdx_Phys_NS & 1) != 0); + QEMU_BUILD_BUG_ON((ARMMMUIdx_Stage2 & 1) != 0); + QEMU_BUILD_BUG_ON(ARMMMUIdx_Phys_NS + 1 != ARMMMUIdx_Phys_S); + QEMU_BUILD_BUG_ON(ARMMMUIdx_Stage2 + 1 != ARMMMUIdx_Stage2_S); + ptw->in_ptw_idx &= ~1; + ptw->in_secure = false; } + if (!S1_ptw_translate(env, ptw, descaddr, fi)) { + goto do_fault; + } + descriptor = arm_ldq_ptw(env, ptw, fi); + if (fi->type != ARMFault_None) { + goto do_fault; + } + + if (!(descriptor & 1) || (!(descriptor & 2) && (level == 3))) { + /* Invalid, or the Reserved level 3 encoding */ + goto do_fault; + } + + descaddr = descriptor & descaddrmask; + + /* + * For FEAT_LPA and PS=6, bits [51:48] of descaddr are in [15:12] + * of descriptor. For FEAT_LPA2 and effective DS, bits [51:50] of + * descaddr are in [9:8]. Otherwise, if descaddr is out of range, + * raise AddressSizeFault. + */ + if (outputsize > 48) { + if (param.ds) { + descaddr |= extract64(descriptor, 8, 2) << 50; + } else { + descaddr |= extract64(descriptor, 12, 4) << 48; + } + } else if (descaddr >> outputsize) { + fault_type = ARMFault_AddressSize; + goto do_fault; + } + + if ((descriptor & 2) && (level < 3)) { + /* + * Table entry. The top five bits are attributes which may + * propagate down through lower levels of the table (and + * which are all arranged so that 0 means "no effect", so + * we can gather them up by ORing in the bits at each level). + */ + tableattrs |= extract64(descriptor, 59, 5); + level++; + indexmask = indexmask_grainsize; + goto next_level; + } + + /* + * Block entry at level 1 or 2, or page entry at level 3. + * These are basically the same thing, although the number + * of bits we pull in from the vaddr varies. Note that although + * descaddrmask masks enough of the low bits of the descriptor + * to give a correct page or table address, the address field + * in a block descriptor is smaller; so we need to explicitly + * clear the lower bits here before ORing in the low vaddr bits. + */ + page_size = (1ULL << ((stride * (4 - level)) + 3)); + descaddr &= ~(hwaddr)(page_size - 1); + descaddr |= (address & (page_size - 1)); + /* Extract attributes from the descriptor */ + attrs = extract64(descriptor, 2, 10) + | (extract64(descriptor, 52, 12) << 10); + + if (regime_is_stage2(mmu_idx)) { + /* Stage 2 table descriptors do not include any attribute fields */ + goto skip_attrs; + } + /* Merge in attributes from table descriptors */ + attrs |= nstable << 3; /* NS */ + guarded = extract64(descriptor, 50, 1); /* GP */ + if (param.hpd) { + /* HPD disables all the table attributes except NSTable. */ + goto skip_attrs; + } + attrs |= extract32(tableattrs, 0, 2) << 11; /* XN, PXN */ + /* + * The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1 + * means "force PL1 access only", which means forcing AP[1] to 0. + */ + attrs &= ~(extract32(tableattrs, 2, 1) << 4); /* !APT[0] => AP[1] */ + attrs |= extract32(tableattrs, 3, 1) << 5; /* APT[1] => AP[2] */ + skip_attrs: + /* * Here descaddr is the final physical address, and attributes * are all in attrs. From 27c1b81d6195ca5f39d656c5cca497ed78943339 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 24 Oct 2022 15:18:45 +1000 Subject: [PATCH 14/31] target/arm: Fix fault reporting in get_phys_addr_lpae MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Always overriding fi->type was incorrect, as we would not properly propagate the fault type from S1_ptw_translate, or arm_ldq_ptw. Simplify things by providing a new label for a translation fault. For other faults, store into fi directly. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Reviewed-by: Alex Bennée Message-id: 20221024051851.3074715-9-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/ptw.c | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/target/arm/ptw.c b/target/arm/ptw.c index 2a5f018835..3302376e42 100644 --- a/target/arm/ptw.c +++ b/target/arm/ptw.c @@ -1044,8 +1044,6 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, ARMCPU *cpu = env_archcpu(env); ARMMMUIdx mmu_idx = ptw->in_mmu_idx; bool is_secure = ptw->in_secure; - /* Read an LPAE long-descriptor translation table. */ - ARMFaultType fault_type = ARMFault_Translation; uint32_t level; ARMVAParameters param; uint64_t ttbr; @@ -1082,8 +1080,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, * so our choice is to always raise the fault. */ if (param.tsz_oob) { - fault_type = ARMFault_Translation; - goto do_fault; + goto do_translation_fault; } addrsize = 64 - 8 * param.tbi; @@ -1120,8 +1117,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, addrsize - inputsize); if (-top_bits != param.select) { /* The gap between the two regions is a Translation fault */ - fault_type = ARMFault_Translation; - goto do_fault; + goto do_translation_fault; } } @@ -1147,7 +1143,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, * Translation table walk disabled => Translation fault on TLB miss * Note: This is always 0 on 64-bit EL2 and EL3. */ - goto do_fault; + goto do_translation_fault; } if (!regime_is_stage2(mmu_idx)) { @@ -1178,8 +1174,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, if (param.ds && stride == 9 && sl2) { if (sl0 != 0) { level = 0; - fault_type = ARMFault_Translation; - goto do_fault; + goto do_translation_fault; } startlevel = -1; } else if (!aarch64 || stride == 9) { @@ -1198,8 +1193,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, ok = check_s2_mmu_setup(cpu, aarch64, startlevel, inputsize, stride, outputsize); if (!ok) { - fault_type = ARMFault_Translation; - goto do_fault; + goto do_translation_fault; } level = startlevel; } @@ -1221,7 +1215,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, descaddr |= extract64(ttbr, 2, 4) << 48; } else if (descaddr >> outputsize) { level = 0; - fault_type = ARMFault_AddressSize; + fi->type = ARMFault_AddressSize; goto do_fault; } @@ -1282,7 +1276,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, if (!(descriptor & 1) || (!(descriptor & 2) && (level == 3))) { /* Invalid, or the Reserved level 3 encoding */ - goto do_fault; + goto do_translation_fault; } descaddr = descriptor & descaddrmask; @@ -1300,7 +1294,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, descaddr |= extract64(descriptor, 12, 4) << 48; } } else if (descaddr >> outputsize) { - fault_type = ARMFault_AddressSize; + fi->type = ARMFault_AddressSize; goto do_fault; } @@ -1357,9 +1351,9 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, * Here descaddr is the final physical address, and attributes * are all in attrs. */ - fault_type = ARMFault_AccessFlag; if ((attrs & (1 << 8)) == 0) { /* Access flag */ + fi->type = ARMFault_AccessFlag; goto do_fault; } @@ -1376,8 +1370,8 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, result->f.prot = get_S1prot(env, mmu_idx, aarch64, ap, ns, xn, pxn); } - fault_type = ARMFault_Permission; if (!(result->f.prot & (1 << access_type))) { + fi->type = ARMFault_Permission; goto do_fault; } @@ -1422,8 +1416,9 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, result->f.lg_page_size = ctz64(page_size); return false; -do_fault: - fi->type = fault_type; + do_translation_fault: + fi->type = ARMFault_Translation; + do_fault: fi->level = level; /* Tag the error as S2 for failed S1 PTW at S2 or ordinary S2. */ fi->stage2 = fi->s1ptw || regime_is_stage2(mmu_idx); From 4566609176f82a8033d422bc6c04fc9c354bed24 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 24 Oct 2022 15:18:46 +1000 Subject: [PATCH 15/31] target/arm: Don't shift attrs in get_phys_addr_lpae MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Leave the upper and lower attributes in the place they originate from in the descriptor. Shifting them around is confusing, since one cannot read the bit numbers out of the manual. Also, new attributes have been added which would alter the shifts. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Reviewed-by: Philippe Mathieu-Daudé Message-id: 20221024051851.3074715-10-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/ptw.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/target/arm/ptw.c b/target/arm/ptw.c index 3302376e42..691110f70c 100644 --- a/target/arm/ptw.c +++ b/target/arm/ptw.c @@ -1050,7 +1050,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, hwaddr descaddr, indexmask, indexmask_grainsize; uint32_t tableattrs; target_ulong page_size; - uint32_t attrs; + uint64_t attrs; int32_t stride; int addrsize, inputsize, outputsize; uint64_t tcr = regime_tcr(env, mmu_idx); @@ -1324,49 +1324,48 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, descaddr &= ~(hwaddr)(page_size - 1); descaddr |= (address & (page_size - 1)); /* Extract attributes from the descriptor */ - attrs = extract64(descriptor, 2, 10) - | (extract64(descriptor, 52, 12) << 10); + attrs = descriptor & (MAKE_64BIT_MASK(2, 10) | MAKE_64BIT_MASK(52, 12)); if (regime_is_stage2(mmu_idx)) { /* Stage 2 table descriptors do not include any attribute fields */ goto skip_attrs; } /* Merge in attributes from table descriptors */ - attrs |= nstable << 3; /* NS */ + attrs |= nstable << 5; /* NS */ guarded = extract64(descriptor, 50, 1); /* GP */ if (param.hpd) { /* HPD disables all the table attributes except NSTable. */ goto skip_attrs; } - attrs |= extract32(tableattrs, 0, 2) << 11; /* XN, PXN */ + attrs |= extract64(tableattrs, 0, 2) << 53; /* XN, PXN */ /* * The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1 * means "force PL1 access only", which means forcing AP[1] to 0. */ - attrs &= ~(extract32(tableattrs, 2, 1) << 4); /* !APT[0] => AP[1] */ - attrs |= extract32(tableattrs, 3, 1) << 5; /* APT[1] => AP[2] */ + attrs &= ~(extract64(tableattrs, 2, 1) << 6); /* !APT[0] => AP[1] */ + attrs |= extract32(tableattrs, 3, 1) << 7; /* APT[1] => AP[2] */ skip_attrs: /* * Here descaddr is the final physical address, and attributes * are all in attrs. */ - if ((attrs & (1 << 8)) == 0) { + if ((attrs & (1 << 10)) == 0) { /* Access flag */ fi->type = ARMFault_AccessFlag; goto do_fault; } - ap = extract32(attrs, 4, 2); + ap = extract32(attrs, 6, 2); if (regime_is_stage2(mmu_idx)) { ns = mmu_idx == ARMMMUIdx_Stage2; - xn = extract32(attrs, 11, 2); + xn = extract64(attrs, 53, 2); result->f.prot = get_S2prot(env, ap, xn, s1_is_el0); } else { - ns = extract32(attrs, 3, 1); - xn = extract32(attrs, 12, 1); - pxn = extract32(attrs, 11, 1); + ns = extract32(attrs, 5, 1); + xn = extract64(attrs, 54, 1); + pxn = extract64(attrs, 53, 1); result->f.prot = get_S1prot(env, mmu_idx, aarch64, ap, ns, xn, pxn); } @@ -1391,10 +1390,10 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, if (regime_is_stage2(mmu_idx)) { result->cacheattrs.is_s2_format = true; - result->cacheattrs.attrs = extract32(attrs, 0, 4); + result->cacheattrs.attrs = extract32(attrs, 2, 4); } else { /* Index into MAIR registers for cache attributes */ - uint8_t attrindx = extract32(attrs, 0, 3); + uint8_t attrindx = extract32(attrs, 2, 3); uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)]; assert(attrindx <= 7); result->cacheattrs.is_s2_format = false; @@ -1409,7 +1408,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, if (param.ds) { result->cacheattrs.shareability = param.sh; } else { - result->cacheattrs.shareability = extract32(attrs, 6, 2); + result->cacheattrs.shareability = extract32(attrs, 8, 2); } result->f.phys_addr = descaddr; From 0e8df0fe2425eae7baaf18da869da9aa3c44dc03 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 24 Oct 2022 15:18:47 +1000 Subject: [PATCH 16/31] target/arm: Consider GP an attribute in get_phys_addr_lpae MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both GP and DBM are in the upper attribute block. Extend the computation of attrs to include them, then simplify the setting of guarded. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Reviewed-by: Alex Bennée Message-id: 20221024051851.3074715-11-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/ptw.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/target/arm/ptw.c b/target/arm/ptw.c index 691110f70c..79a0ef45c7 100644 --- a/target/arm/ptw.c +++ b/target/arm/ptw.c @@ -1058,7 +1058,6 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, uint32_t el = regime_el(env, mmu_idx); uint64_t descaddrmask; bool aarch64 = arm_el_is_aa64(env, el); - bool guarded = false; uint64_t descriptor; bool nstable; @@ -1324,7 +1323,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, descaddr &= ~(hwaddr)(page_size - 1); descaddr |= (address & (page_size - 1)); /* Extract attributes from the descriptor */ - attrs = descriptor & (MAKE_64BIT_MASK(2, 10) | MAKE_64BIT_MASK(52, 12)); + attrs = descriptor & (MAKE_64BIT_MASK(2, 10) | MAKE_64BIT_MASK(50, 14)); if (regime_is_stage2(mmu_idx)) { /* Stage 2 table descriptors do not include any attribute fields */ @@ -1332,7 +1331,6 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, } /* Merge in attributes from table descriptors */ attrs |= nstable << 5; /* NS */ - guarded = extract64(descriptor, 50, 1); /* GP */ if (param.hpd) { /* HPD disables all the table attributes except NSTable. */ goto skip_attrs; @@ -1385,7 +1383,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, /* When in aarch64 mode, and BTI is enabled, remember GP in the TLB. */ if (aarch64 && cpu_isar_feature(aa64_bti, cpu)) { - result->f.guarded = guarded; + result->f.guarded = extract64(attrs, 50, 1); /* GP */ } if (regime_is_stage2(mmu_idx)) { From 34a57faeab62b27c01f008ef08654ca225e46673 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 24 Oct 2022 15:18:48 +1000 Subject: [PATCH 17/31] target/arm: Tidy merging of attributes from descriptor and table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace some gotos with some nested if statements. Signed-off-by: Richard Henderson Reviewed-by: Alex Bennée Message-id: 20221024051851.3074715-12-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/ptw.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/target/arm/ptw.c b/target/arm/ptw.c index 79a0ef45c7..73b3c37b23 100644 --- a/target/arm/ptw.c +++ b/target/arm/ptw.c @@ -1322,27 +1322,25 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, page_size = (1ULL << ((stride * (4 - level)) + 3)); descaddr &= ~(hwaddr)(page_size - 1); descaddr |= (address & (page_size - 1)); - /* Extract attributes from the descriptor */ - attrs = descriptor & (MAKE_64BIT_MASK(2, 10) | MAKE_64BIT_MASK(50, 14)); - if (regime_is_stage2(mmu_idx)) { - /* Stage 2 table descriptors do not include any attribute fields */ - goto skip_attrs; - } - /* Merge in attributes from table descriptors */ - attrs |= nstable << 5; /* NS */ - if (param.hpd) { - /* HPD disables all the table attributes except NSTable. */ - goto skip_attrs; - } - attrs |= extract64(tableattrs, 0, 2) << 53; /* XN, PXN */ /* - * The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1 - * means "force PL1 access only", which means forcing AP[1] to 0. + * Extract attributes from the descriptor, and apply table descriptors. + * Stage 2 table descriptors do not include any attribute fields. + * HPD disables all the table attributes except NSTable. */ - attrs &= ~(extract64(tableattrs, 2, 1) << 6); /* !APT[0] => AP[1] */ - attrs |= extract32(tableattrs, 3, 1) << 7; /* APT[1] => AP[2] */ - skip_attrs: + attrs = descriptor & (MAKE_64BIT_MASK(2, 10) | MAKE_64BIT_MASK(50, 14)); + if (!regime_is_stage2(mmu_idx)) { + attrs |= nstable << 5; /* NS */ + if (!param.hpd) { + attrs |= extract64(tableattrs, 0, 2) << 53; /* XN, PXN */ + /* + * The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1 + * means "force PL1 access only", which means forcing AP[1] to 0. + */ + attrs &= ~(extract64(tableattrs, 2, 1) << 6); /* !APT[0] => AP[1] */ + attrs |= extract32(tableattrs, 3, 1) << 7; /* APT[1] => AP[2] */ + } + } /* * Here descaddr is the final physical address, and attributes From 71943a1e9084dde71cabba5895920f3a0c54de9b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 24 Oct 2022 15:18:49 +1000 Subject: [PATCH 18/31] target/arm: Implement FEAT_HAFDBS, access flag portion Perform the atomic update for hardware management of the access flag. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20221024051851.3074715-13-richard.henderson@linaro.org [PMM: Fix accidental PROT_WRITE to PAGE_WRITE; add missing main-loop.h include] Signed-off-by: Peter Maydell --- docs/system/arm/emulation.rst | 1 + target/arm/cpu64.c | 1 + target/arm/ptw.c | 177 +++++++++++++++++++++++++++++----- 3 files changed, 157 insertions(+), 22 deletions(-) diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst index fd61360a08..e3af79bb8c 100644 --- a/docs/system/arm/emulation.rst +++ b/docs/system/arm/emulation.rst @@ -33,6 +33,7 @@ the following architecture extensions: - FEAT_FlagM (Flag manipulation instructions v2) - FEAT_FlagM2 (Enhancements to flag manipulation instructions) - FEAT_GTG (Guest translation granule size) +- FEAT_HAFDBS (Hardware management of the access flag and dirty bit state) - FEAT_HCX (Support for the HCRX_EL2 register) - FEAT_HPDS (Hierarchical permission disables) - FEAT_I8MM (AArch64 Int8 matrix multiplication instructions) diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index da95eabab5..f2c3e41f5a 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -1165,6 +1165,7 @@ static void aarch64_max_initfn(Object *obj) cpu->isar.id_aa64mmfr0 = t; t = cpu->isar.id_aa64mmfr1; + t = FIELD_DP64(t, ID_AA64MMFR1, HAFDBS, 1); /* FEAT_HAFDBS, AF only */ t = FIELD_DP64(t, ID_AA64MMFR1, VMIDBITS, 2); /* FEAT_VMID16 */ t = FIELD_DP64(t, ID_AA64MMFR1, VH, 1); /* FEAT_VHE */ t = FIELD_DP64(t, ID_AA64MMFR1, HPDS, 1); /* FEAT_HPDS */ diff --git a/target/arm/ptw.c b/target/arm/ptw.c index 73b3c37b23..9941fa0ef9 100644 --- a/target/arm/ptw.c +++ b/target/arm/ptw.c @@ -9,6 +9,7 @@ #include "qemu/osdep.h" #include "qemu/log.h" #include "qemu/range.h" +#include "qemu/main-loop.h" #include "exec/exec-all.h" #include "cpu.h" #include "internals.h" @@ -21,7 +22,9 @@ typedef struct S1Translate { bool in_secure; bool in_debug; bool out_secure; + bool out_rw; bool out_be; + hwaddr out_virt; hwaddr out_phys; void *out_host; } S1Translate; @@ -219,6 +222,8 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, uint8_t pte_attrs; bool pte_secure; + ptw->out_virt = addr; + if (unlikely(ptw->in_debug)) { /* * From gdbstub, do not use softmmu so that we don't modify the @@ -247,6 +252,7 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, pte_secure = is_secure; } ptw->out_host = NULL; + ptw->out_rw = false; } else { CPUTLBEntryFull *full; int flags; @@ -261,6 +267,7 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, goto fail; } ptw->out_phys = full->phys_addr; + ptw->out_rw = full->prot & PAGE_WRITE; pte_attrs = full->pte_attrs; pte_secure = full->attrs.secure; } @@ -304,14 +311,16 @@ static uint32_t arm_ldl_ptw(CPUARMState *env, S1Translate *ptw, ARMMMUFaultInfo *fi) { CPUState *cs = env_cpu(env); + void *host = ptw->out_host; uint32_t data; - if (likely(ptw->out_host)) { + if (likely(host)) { /* Page tables are in RAM, and we have the host address. */ + data = qatomic_read((uint32_t *)host); if (ptw->out_be) { - data = ldl_be_p(ptw->out_host); + data = be32_to_cpu(data); } else { - data = ldl_le_p(ptw->out_host); + data = le32_to_cpu(data); } } else { /* Page tables are in MMIO. */ @@ -337,15 +346,25 @@ static uint64_t arm_ldq_ptw(CPUARMState *env, S1Translate *ptw, ARMMMUFaultInfo *fi) { CPUState *cs = env_cpu(env); + void *host = ptw->out_host; uint64_t data; - if (likely(ptw->out_host)) { + if (likely(host)) { /* Page tables are in RAM, and we have the host address. */ +#ifdef CONFIG_ATOMIC64 + data = qatomic_read__nocheck((uint64_t *)host); if (ptw->out_be) { - data = ldq_be_p(ptw->out_host); + data = be64_to_cpu(data); } else { - data = ldq_le_p(ptw->out_host); + data = le64_to_cpu(data); } +#else + if (ptw->out_be) { + data = ldq_be_p(host); + } else { + data = ldq_le_p(host); + } +#endif } else { /* Page tables are in MMIO. */ MemTxAttrs attrs = { .secure = ptw->out_secure }; @@ -366,6 +385,91 @@ static uint64_t arm_ldq_ptw(CPUARMState *env, S1Translate *ptw, return data; } +static uint64_t arm_casq_ptw(CPUARMState *env, uint64_t old_val, + uint64_t new_val, S1Translate *ptw, + ARMMMUFaultInfo *fi) +{ + uint64_t cur_val; + void *host = ptw->out_host; + + if (unlikely(!host)) { + fi->type = ARMFault_UnsuppAtomicUpdate; + fi->s1ptw = true; + return 0; + } + + /* + * Raising a stage2 Protection fault for an atomic update to a read-only + * page is delayed until it is certain that there is a change to make. + */ + if (unlikely(!ptw->out_rw)) { + int flags; + void *discard; + + env->tlb_fi = fi; + flags = probe_access_flags(env, ptw->out_virt, MMU_DATA_STORE, + arm_to_core_mmu_idx(ptw->in_ptw_idx), + true, &discard, 0); + env->tlb_fi = NULL; + + if (unlikely(flags & TLB_INVALID_MASK)) { + assert(fi->type != ARMFault_None); + fi->s2addr = ptw->out_virt; + fi->stage2 = true; + fi->s1ptw = true; + fi->s1ns = !ptw->in_secure; + return 0; + } + + /* In case CAS mismatches and we loop, remember writability. */ + ptw->out_rw = true; + } + +#ifdef CONFIG_ATOMIC64 + if (ptw->out_be) { + old_val = cpu_to_be64(old_val); + new_val = cpu_to_be64(new_val); + cur_val = qatomic_cmpxchg__nocheck((uint64_t *)host, old_val, new_val); + cur_val = be64_to_cpu(cur_val); + } else { + old_val = cpu_to_le64(old_val); + new_val = cpu_to_le64(new_val); + cur_val = qatomic_cmpxchg__nocheck((uint64_t *)host, old_val, new_val); + cur_val = le64_to_cpu(cur_val); + } +#else + /* + * We can't support the full 64-bit atomic cmpxchg on the host. + * Because this is only used for FEAT_HAFDBS, which is only for AA64, + * we know that TCG_OVERSIZED_GUEST is set, which means that we are + * running in round-robin mode and could only race with dma i/o. + */ +#ifndef TCG_OVERSIZED_GUEST +# error "Unexpected configuration" +#endif + bool locked = qemu_mutex_iothread_locked(); + if (!locked) { + qemu_mutex_lock_iothread(); + } + if (ptw->out_be) { + cur_val = ldq_be_p(host); + if (cur_val == old_val) { + stq_be_p(host, new_val); + } + } else { + cur_val = ldq_le_p(host); + if (cur_val == old_val) { + stq_le_p(host, new_val); + } + } + if (!locked) { + qemu_mutex_unlock_iothread(); + } +#endif + + return cur_val; +} + static bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx, uint32_t *table, uint32_t address) { @@ -1058,7 +1162,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, uint32_t el = regime_el(env, mmu_idx); uint64_t descaddrmask; bool aarch64 = arm_el_is_aa64(env, el); - uint64_t descriptor; + uint64_t descriptor, new_descriptor; bool nstable; /* TODO: This code does not support shareability levels. */ @@ -1272,7 +1376,9 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, if (fi->type != ARMFault_None) { goto do_fault; } + new_descriptor = descriptor; + restart_atomic_update: if (!(descriptor & 1) || (!(descriptor & 2) && (level == 3))) { /* Invalid, or the Reserved level 3 encoding */ goto do_translation_fault; @@ -1318,17 +1424,36 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, * to give a correct page or table address, the address field * in a block descriptor is smaller; so we need to explicitly * clear the lower bits here before ORing in the low vaddr bits. + * + * Afterward, descaddr is the final physical address. */ page_size = (1ULL << ((stride * (4 - level)) + 3)); descaddr &= ~(hwaddr)(page_size - 1); descaddr |= (address & (page_size - 1)); + if (likely(!ptw->in_debug)) { + /* + * Access flag. + * If HA is enabled, prepare to update the descriptor below. + * Otherwise, pass the access fault on to software. + */ + if (!(descriptor & (1 << 10))) { + if (param.ha) { + new_descriptor |= 1 << 10; /* AF */ + } else { + fi->type = ARMFault_AccessFlag; + goto do_fault; + } + } + } + /* - * Extract attributes from the descriptor, and apply table descriptors. - * Stage 2 table descriptors do not include any attribute fields. - * HPD disables all the table attributes except NSTable. + * Extract attributes from the (modified) descriptor, and apply + * table descriptors. Stage 2 table descriptors do not include + * any attribute fields. HPD disables all the table attributes + * except NSTable. */ - attrs = descriptor & (MAKE_64BIT_MASK(2, 10) | MAKE_64BIT_MASK(50, 14)); + attrs = new_descriptor & (MAKE_64BIT_MASK(2, 10) | MAKE_64BIT_MASK(50, 14)); if (!regime_is_stage2(mmu_idx)) { attrs |= nstable << 5; /* NS */ if (!param.hpd) { @@ -1342,18 +1467,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, } } - /* - * Here descaddr is the final physical address, and attributes - * are all in attrs. - */ - if ((attrs & (1 << 10)) == 0) { - /* Access flag */ - fi->type = ARMFault_AccessFlag; - goto do_fault; - } - ap = extract32(attrs, 6, 2); - if (regime_is_stage2(mmu_idx)) { ns = mmu_idx == ARMMMUIdx_Stage2; xn = extract64(attrs, 53, 2); @@ -1370,6 +1484,25 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, goto do_fault; } + /* If FEAT_HAFDBS has made changes, update the PTE. */ + if (new_descriptor != descriptor) { + new_descriptor = arm_casq_ptw(env, descriptor, new_descriptor, ptw, fi); + if (fi->type != ARMFault_None) { + goto do_fault; + } + /* + * I_YZSVV says that if the in-memory descriptor has changed, + * then we must use the information in that new value + * (which might include a different output address, different + * attributes, or generate a fault). + * Restart the handling of the descriptor value from scratch. + */ + if (new_descriptor != descriptor) { + descriptor = new_descriptor; + goto restart_atomic_update; + } + } + if (ns) { /* * The NS bit will (as required by the architecture) have no effect if From 65c123fdf577413a7d910e6b07c10e79d118041f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 24 Oct 2022 15:18:50 +1000 Subject: [PATCH 19/31] target/arm: Implement FEAT_HAFDBS, dirty bit portion Perform the atomic update for hardware management of the dirty bit. Signed-off-by: Richard Henderson Message-id: 20221024051851.3074715-14-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/cpu64.c | 2 +- target/arm/ptw.c | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index f2c3e41f5a..3d74f134f5 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -1165,7 +1165,7 @@ static void aarch64_max_initfn(Object *obj) cpu->isar.id_aa64mmfr0 = t; t = cpu->isar.id_aa64mmfr1; - t = FIELD_DP64(t, ID_AA64MMFR1, HAFDBS, 1); /* FEAT_HAFDBS, AF only */ + t = FIELD_DP64(t, ID_AA64MMFR1, HAFDBS, 2); /* FEAT_HAFDBS */ t = FIELD_DP64(t, ID_AA64MMFR1, VMIDBITS, 2); /* FEAT_VMID16 */ t = FIELD_DP64(t, ID_AA64MMFR1, VH, 1); /* FEAT_VHE */ t = FIELD_DP64(t, ID_AA64MMFR1, HPDS, 1); /* FEAT_HPDS */ diff --git a/target/arm/ptw.c b/target/arm/ptw.c index 9941fa0ef9..a1f4ae654f 100644 --- a/target/arm/ptw.c +++ b/target/arm/ptw.c @@ -1445,6 +1445,22 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, goto do_fault; } } + + /* + * Dirty Bit. + * If HD is enabled, pre-emptively set/clear the appropriate AP/S2AP + * bit for writeback. The actual write protection test may still be + * overridden by tableattrs, to be merged below. + */ + if (param.hd + && extract64(descriptor, 51, 1) /* DBM */ + && access_type == MMU_DATA_STORE) { + if (regime_is_stage2(mmu_idx)) { + new_descriptor |= 1ull << 7; /* set S2AP[1] */ + } else { + new_descriptor &= ~(1ull << 7); /* clear AP[2] */ + } + } } /* From c8d6c286ab4bff1c5ff511f3e834fb2a713d65d2 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 24 Oct 2022 15:18:51 +1000 Subject: [PATCH 20/31] target/arm: Use the max page size in a 2-stage ptw We had only been reporting the stage2 page size. This causes problems if stage1 is using a larger page size (16k, 2M, etc), but stage2 is using a smaller page size, because cputlb does not set large_page_{addr,mask} properly. Fix by using the max of the two page sizes. Reported-by: Marc Zyngier Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20221024051851.3074715-15-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/ptw.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/target/arm/ptw.c b/target/arm/ptw.c index a1f4ae654f..58a7bbda50 100644 --- a/target/arm/ptw.c +++ b/target/arm/ptw.c @@ -2571,7 +2571,7 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw, ARMMMUFaultInfo *fi) { hwaddr ipa; - int s1_prot; + int s1_prot, s1_lgpgsz; bool is_secure = ptw->in_secure; bool ret, ipa_secure, s2walk_secure; ARMCacheAttrs cacheattrs1; @@ -2607,6 +2607,7 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw, * Save the stage1 results so that we may merge prot and cacheattrs later. */ s1_prot = result->f.prot; + s1_lgpgsz = result->f.lg_page_size; cacheattrs1 = result->cacheattrs; memset(result, 0, sizeof(*result)); @@ -2621,6 +2622,14 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw, return ret; } + /* + * Use the maximum of the S1 & S2 page size, so that invalidation + * of pages > TARGET_PAGE_SIZE works correctly. + */ + if (result->f.lg_page_size < s1_lgpgsz) { + result->f.lg_page_size = s1_lgpgsz; + } + /* Combine the S1 and S2 cache attributes. */ hcr = arm_hcr_el2_eff_secstate(env, is_secure); if (hcr & HCR_DC) { From 7966d70f6f6b188475e67c2c363f19eec3d28c96 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 25 Oct 2022 02:43:17 +0200 Subject: [PATCH 21/31] reset: allow registering handlers that aren't called by snapshot loading Snapshot loading only expects to call deterministic handlers, not non-deterministic ones. So introduce a way of registering handlers that won't be called when reseting for snapshots. Signed-off-by: Jason A. Donenfeld Message-id: 20221025004327.568476-2-Jason@zx2c4.com [PMM: updated json doc comment with Markus' text; fixed checkpatch style nit] Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- hw/arm/aspeed.c | 4 ++-- hw/arm/mps2-tz.c | 4 ++-- hw/core/reset.c | 17 ++++++++++++++++- hw/hppa/machine.c | 4 ++-- hw/i386/microvm.c | 4 ++-- hw/i386/pc.c | 6 +++--- hw/ppc/pegasos2.c | 4 ++-- hw/ppc/pnv.c | 4 ++-- hw/ppc/spapr.c | 4 ++-- hw/s390x/s390-virtio-ccw.c | 4 ++-- include/hw/boards.h | 2 +- include/sysemu/reset.h | 5 ++++- migration/savevm.c | 2 +- qapi/run-state.json | 6 +++++- softmmu/runstate.c | 11 ++++++++--- 15 files changed, 54 insertions(+), 27 deletions(-) diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c index f8bc6d4a14..55f114ef72 100644 --- a/hw/arm/aspeed.c +++ b/hw/arm/aspeed.c @@ -1356,12 +1356,12 @@ static void aspeed_machine_bletchley_class_init(ObjectClass *oc, void *data) aspeed_soc_num_cpus(amc->soc_name); } -static void fby35_reset(MachineState *state) +static void fby35_reset(MachineState *state, ShutdownCause reason) { AspeedMachineState *bmc = ASPEED_MACHINE(state); AspeedGPIOState *gpio = &bmc->soc.gpio; - qemu_devices_reset(); + qemu_devices_reset(reason); /* Board ID: 7 (Class-1, 4 slots) */ object_property_set_bool(OBJECT(gpio), "gpioV4", true, &error_fatal); diff --git a/hw/arm/mps2-tz.c b/hw/arm/mps2-tz.c index 394192b9b2..284c09c91d 100644 --- a/hw/arm/mps2-tz.c +++ b/hw/arm/mps2-tz.c @@ -1239,7 +1239,7 @@ static void mps2_set_remap(Object *obj, const char *value, Error **errp) } } -static void mps2_machine_reset(MachineState *machine) +static void mps2_machine_reset(MachineState *machine, ShutdownCause reason) { MPS2TZMachineState *mms = MPS2TZ_MACHINE(machine); @@ -1249,7 +1249,7 @@ static void mps2_machine_reset(MachineState *machine) * reset see the correct mapping. */ remap_memory(mms, mms->remap); - qemu_devices_reset(); + qemu_devices_reset(reason); } static void mps2tz_class_init(ObjectClass *oc, void *data) diff --git a/hw/core/reset.c b/hw/core/reset.c index 36be82c491..d3263b613e 100644 --- a/hw/core/reset.c +++ b/hw/core/reset.c @@ -33,6 +33,7 @@ typedef struct QEMUResetEntry { QTAILQ_ENTRY(QEMUResetEntry) entry; QEMUResetHandler *func; void *opaque; + bool skip_on_snapshot_load; } QEMUResetEntry; static QTAILQ_HEAD(, QEMUResetEntry) reset_handlers = @@ -47,6 +48,16 @@ void qemu_register_reset(QEMUResetHandler *func, void *opaque) QTAILQ_INSERT_TAIL(&reset_handlers, re, entry); } +void qemu_register_reset_nosnapshotload(QEMUResetHandler *func, void *opaque) +{ + QEMUResetEntry *re = g_new0(QEMUResetEntry, 1); + + re->func = func; + re->opaque = opaque; + re->skip_on_snapshot_load = true; + QTAILQ_INSERT_TAIL(&reset_handlers, re, entry); +} + void qemu_unregister_reset(QEMUResetHandler *func, void *opaque) { QEMUResetEntry *re; @@ -60,12 +71,16 @@ void qemu_unregister_reset(QEMUResetHandler *func, void *opaque) } } -void qemu_devices_reset(void) +void qemu_devices_reset(ShutdownCause reason) { QEMUResetEntry *re, *nre; /* reset all devices */ QTAILQ_FOREACH_SAFE(re, &reset_handlers, entry, nre) { + if (reason == SHUTDOWN_CAUSE_SNAPSHOT_LOAD && + re->skip_on_snapshot_load) { + continue; + } re->func(re->opaque); } } diff --git a/hw/hppa/machine.c b/hw/hppa/machine.c index e53d5f0fa7..19ea7c2c66 100644 --- a/hw/hppa/machine.c +++ b/hw/hppa/machine.c @@ -411,12 +411,12 @@ static void machine_hppa_init(MachineState *machine) cpu[0]->env.gr[19] = FW_CFG_IO_BASE; } -static void hppa_machine_reset(MachineState *ms) +static void hppa_machine_reset(MachineState *ms, ShutdownCause reason) { unsigned int smp_cpus = ms->smp.cpus; int i; - qemu_devices_reset(); + qemu_devices_reset(reason); /* Start all CPUs at the firmware entry point. * Monarch CPU will initialize firmware, secondary CPUs diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c index 52f9aa9d8c..ffd1884100 100644 --- a/hw/i386/microvm.c +++ b/hw/i386/microvm.c @@ -467,7 +467,7 @@ static void microvm_machine_state_init(MachineState *machine) microvm_devices_init(mms); } -static void microvm_machine_reset(MachineState *machine) +static void microvm_machine_reset(MachineState *machine, ShutdownCause reason) { MicrovmMachineState *mms = MICROVM_MACHINE(machine); CPUState *cs; @@ -480,7 +480,7 @@ static void microvm_machine_reset(MachineState *machine) mms->kernel_cmdline_fixed = true; } - qemu_devices_reset(); + qemu_devices_reset(reason); CPU_FOREACH(cs) { cpu = X86_CPU(cs); diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 768982ae9a..3e86083db3 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1847,12 +1847,12 @@ static void pc_machine_initfn(Object *obj) cxl_machine_init(obj, &pcms->cxl_devices_state); } -static void pc_machine_reset(MachineState *machine) +static void pc_machine_reset(MachineState *machine, ShutdownCause reason) { CPUState *cs; X86CPU *cpu; - qemu_devices_reset(); + qemu_devices_reset(reason); /* Reset APIC after devices have been reset to cancel * any changes that qemu_devices_reset() might have done. @@ -1867,7 +1867,7 @@ static void pc_machine_reset(MachineState *machine) static void pc_machine_wakeup(MachineState *machine) { cpu_synchronize_all_states(); - pc_machine_reset(machine); + pc_machine_reset(machine, SHUTDOWN_CAUSE_NONE); cpu_synchronize_all_post_reset(); } diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c index ecf682b148..bb4d008ba9 100644 --- a/hw/ppc/pegasos2.c +++ b/hw/ppc/pegasos2.c @@ -248,14 +248,14 @@ static void pegasos2_pci_config_write(Pegasos2MachineState *pm, int bus, pegasos2_mv_reg_write(pm, pcicfg + 4, len, val); } -static void pegasos2_machine_reset(MachineState *machine) +static void pegasos2_machine_reset(MachineState *machine, ShutdownCause reason) { Pegasos2MachineState *pm = PEGASOS2_MACHINE(machine); void *fdt; uint64_t d[2]; int sz; - qemu_devices_reset(); + qemu_devices_reset(reason); if (!pm->vof) { return; /* Firmware should set up machine so nothing to do */ } diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 40bb573d1a..3d01e26f84 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -643,13 +643,13 @@ static void pnv_powerdown_notify(Notifier *n, void *opaque) } } -static void pnv_reset(MachineState *machine) +static void pnv_reset(MachineState *machine, ShutdownCause reason) { PnvMachineState *pnv = PNV_MACHINE(machine); IPMIBmc *bmc; void *fdt; - qemu_devices_reset(); + qemu_devices_reset(reason); /* * The machine should provide by default an internal BMC simulator. diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index f79ac85ca1..66b414d2e9 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1623,7 +1623,7 @@ void spapr_check_mmu_mode(bool guest_radix) } } -static void spapr_machine_reset(MachineState *machine) +static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) { SpaprMachineState *spapr = SPAPR_MACHINE(machine); PowerPCCPU *first_ppc_cpu; @@ -1649,7 +1649,7 @@ static void spapr_machine_reset(MachineState *machine) spapr_setup_hpt(spapr); } - qemu_devices_reset(); + qemu_devices_reset(reason); spapr_ovec_cleanup(spapr->ov5_cas); spapr->ov5_cas = spapr_ovec_new(); diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index 1cc20d8717..806de32034 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -411,7 +411,7 @@ static void s390_pv_prepare_reset(S390CcwMachineState *ms) s390_pv_prep_reset(); } -static void s390_machine_reset(MachineState *machine) +static void s390_machine_reset(MachineState *machine, ShutdownCause reason) { S390CcwMachineState *ms = S390_CCW_MACHINE(machine); enum s390_reset reset_type; @@ -433,7 +433,7 @@ static void s390_machine_reset(MachineState *machine) s390_machine_unprotect(ms); } - qemu_devices_reset(); + qemu_devices_reset(reason); s390_crypto_reset(); /* configure and start the ipl CPU only */ diff --git a/include/hw/boards.h b/include/hw/boards.h index 311ed17e18..90f1dd3aeb 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -231,7 +231,7 @@ struct MachineClass { const char *deprecation_reason; void (*init)(MachineState *state); - void (*reset)(MachineState *state); + void (*reset)(MachineState *state, ShutdownCause reason); void (*wakeup)(MachineState *state); int (*kvm_type)(MachineState *machine, const char *arg); diff --git a/include/sysemu/reset.h b/include/sysemu/reset.h index 0b0d6d7598..609e4d50c2 100644 --- a/include/sysemu/reset.h +++ b/include/sysemu/reset.h @@ -1,10 +1,13 @@ #ifndef QEMU_SYSEMU_RESET_H #define QEMU_SYSEMU_RESET_H +#include "qapi/qapi-events-run-state.h" + typedef void QEMUResetHandler(void *opaque); void qemu_register_reset(QEMUResetHandler *func, void *opaque); +void qemu_register_reset_nosnapshotload(QEMUResetHandler *func, void *opaque); void qemu_unregister_reset(QEMUResetHandler *func, void *opaque); -void qemu_devices_reset(void); +void qemu_devices_reset(ShutdownCause reason); #endif diff --git a/migration/savevm.c b/migration/savevm.c index 48e85c052c..a0cdb714f7 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -3058,7 +3058,7 @@ bool load_snapshot(const char *name, const char *vmstate, goto err_drain; } - qemu_system_reset(SHUTDOWN_CAUSE_NONE); + qemu_system_reset(SHUTDOWN_CAUSE_SNAPSHOT_LOAD); mis->from_src_file = f; if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) { diff --git a/qapi/run-state.json b/qapi/run-state.json index 49989d30e6..419c188dd1 100644 --- a/qapi/run-state.json +++ b/qapi/run-state.json @@ -86,12 +86,16 @@ # ignores --no-reboot. This is useful for sanitizing # hypercalls on s390 that are used during kexec/kdump/boot # +# @snapshot-load: A snapshot is being loaded by the record & replay +# subsystem. This value is used only within QEMU. It +# doesn't occur in QMP. (since 7.2) +# ## { 'enum': 'ShutdownCause', # Beware, shutdown_caused_by_guest() depends on enumeration order 'data': [ 'none', 'host-error', 'host-qmp-quit', 'host-qmp-system-reset', 'host-signal', 'host-ui', 'guest-shutdown', 'guest-reset', - 'guest-panic', 'subsystem-reset'] } + 'guest-panic', 'subsystem-reset', 'snapshot-load'] } ## # @StatusInfo: diff --git a/softmmu/runstate.c b/softmmu/runstate.c index 1e68680b9d..3dd83d5e5d 100644 --- a/softmmu/runstate.c +++ b/softmmu/runstate.c @@ -441,11 +441,16 @@ void qemu_system_reset(ShutdownCause reason) cpu_synchronize_all_states(); if (mc && mc->reset) { - mc->reset(current_machine); + mc->reset(current_machine, reason); } else { - qemu_devices_reset(); + qemu_devices_reset(reason); } - if (reason && reason != SHUTDOWN_CAUSE_SUBSYSTEM_RESET) { + switch (reason) { + case SHUTDOWN_CAUSE_NONE: + case SHUTDOWN_CAUSE_SUBSYSTEM_RESET: + case SHUTDOWN_CAUSE_SNAPSHOT_LOAD: + break; + default: qapi_event_send_reset(shutdown_caused_by_guest(reason), reason); } cpu_synchronize_all_post_reset(); From e1e618b9a0f9c23e9caa5933fa30ee3d33e00bfe Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 25 Oct 2022 02:43:18 +0200 Subject: [PATCH 22/31] device-tree: add re-randomization helper function When the system reboots, the rng-seed that the FDT has should be re-randomized, so that the new boot gets a new seed. Several architectures require this functionality, so export a function for injecting a new seed into the given FDT. Cc: Alistair Francis Cc: David Gibson Signed-off-by: Jason A. Donenfeld Reviewed-by: Alistair Francis Message-id: 20221025004327.568476-3-Jason@zx2c4.com Signed-off-by: Peter Maydell --- include/sysemu/device_tree.h | 9 +++++++++ softmmu/device_tree.c | 21 +++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/include/sysemu/device_tree.h b/include/sysemu/device_tree.h index e7c5441f56..ca5339beae 100644 --- a/include/sysemu/device_tree.h +++ b/include/sysemu/device_tree.h @@ -197,6 +197,15 @@ int qemu_fdt_setprop_sized_cells_from_array(void *fdt, qdt_tmp); \ }) + +/** + * qemu_fdt_randomize_seeds: + * @fdt: device tree blob + * + * Re-randomize all "rng-seed" properties with new seeds. + */ +void qemu_fdt_randomize_seeds(void *fdt); + #define FDT_PCI_RANGE_RELOCATABLE 0x80000000 #define FDT_PCI_RANGE_PREFETCHABLE 0x40000000 #define FDT_PCI_RANGE_ALIASED 0x20000000 diff --git a/softmmu/device_tree.c b/softmmu/device_tree.c index ce74f3d48d..30aa3aea9f 100644 --- a/softmmu/device_tree.c +++ b/softmmu/device_tree.c @@ -22,6 +22,7 @@ #include "qemu/option.h" #include "qemu/bswap.h" #include "qemu/cutils.h" +#include "qemu/guest-random.h" #include "sysemu/device_tree.h" #include "hw/loader.h" #include "hw/boards.h" @@ -680,3 +681,23 @@ void hmp_dumpdtb(Monitor *mon, const QDict *qdict) info_report("dtb dumped to %s", filename); } + +void qemu_fdt_randomize_seeds(void *fdt) +{ + int noffset, poffset, len; + const char *name; + uint8_t *data; + + for (noffset = fdt_next_node(fdt, 0, NULL); + noffset >= 0; + noffset = fdt_next_node(fdt, noffset, NULL)) { + for (poffset = fdt_first_property_offset(fdt, noffset); + poffset >= 0; + poffset = fdt_next_property_offset(fdt, poffset)) { + data = (uint8_t *)fdt_getprop_by_offset(fdt, poffset, &name, &len); + if (!data || strcmp(name, "rng-seed")) + continue; + qemu_guest_getrandom_nofail(data, len); + } + } +} From 14b29fea742034186403914b4d013d0e83f19e78 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 25 Oct 2022 02:43:19 +0200 Subject: [PATCH 23/31] x86: do not re-randomize RNG seed on snapshot load Snapshot loading is supposed to be deterministic, so we shouldn't re-randomize the various seeds used. Signed-off-by: Jason A. Donenfeld Message-id: 20221025004327.568476-4-Jason@zx2c4.com Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- hw/i386/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/i386/x86.c b/hw/i386/x86.c index 1148f70c03..bd50a064a3 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -1111,7 +1111,7 @@ void x86_load_linux(X86MachineState *x86ms, setup_data->type = cpu_to_le32(SETUP_RNG_SEED); setup_data->len = cpu_to_le32(RNG_SEED_LENGTH); qemu_guest_getrandom_nofail(setup_data->data, RNG_SEED_LENGTH); - qemu_register_reset(reset_rng_seed, setup_data); + qemu_register_reset_nosnapshotload(reset_rng_seed, setup_data); fw_cfg_add_bytes_callback(fw_cfg, FW_CFG_KERNEL_DATA, reset_rng_seed, NULL, setup_data, kernel, kernel_size, true); } else { From 98aa4c839d2cd24a2f6d3bbaa68fcb5d4aa502cd Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 25 Oct 2022 02:43:20 +0200 Subject: [PATCH 24/31] arm: re-randomize rng-seed on reboot When the system reboots, the rng-seed that the FDT has should be re-randomized, so that the new boot gets a new seed. Since the FDT is in the ROM region at this point, we add a hook right after the ROM has been added, so that we have a pointer to that copy of the FDT. Cc: Peter Maydell Cc: qemu-arm@nongnu.org Signed-off-by: Jason A. Donenfeld Message-id: 20221025004327.568476-5-Jason@zx2c4.com Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- hw/arm/boot.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/arm/boot.c b/hw/arm/boot.c index b0b92af188..b106f31468 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -683,6 +683,8 @@ int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo, * the DTB is copied again upon reset, even if addr points into RAM. */ rom_add_blob_fixed_as("dtb", fdt, size, addr, as); + qemu_register_reset_nosnapshotload(qemu_fdt_randomize_seeds, + rom_ptr_for_as(as, addr, size)); g_free(fdt); From 64c75db3c5ab6f8c75c8132b200cf1c64186f04b Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 25 Oct 2022 02:43:21 +0200 Subject: [PATCH 25/31] riscv: re-randomize rng-seed on reboot When the system reboots, the rng-seed that the FDT has should be re-randomized, so that the new boot gets a new seed. Since the FDT is in the ROM region at this point, we add a hook right after the ROM has been added, so that we have a pointer to that copy of the FDT. Cc: Palmer Dabbelt Cc: Alistair Francis Cc: Bin Meng Cc: qemu-riscv@nongnu.org Signed-off-by: Jason A. Donenfeld Reviewed-by: Alistair Francis Message-id: 20221025004327.568476-6-Jason@zx2c4.com Signed-off-by: Peter Maydell --- hw/riscv/boot.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c index e82bf27338..ebd351c840 100644 --- a/hw/riscv/boot.c +++ b/hw/riscv/boot.c @@ -30,6 +30,7 @@ #include "sysemu/device_tree.h" #include "sysemu/qtest.h" #include "sysemu/kvm.h" +#include "sysemu/reset.h" #include @@ -241,6 +242,8 @@ uint64_t riscv_load_fdt(hwaddr dram_base, uint64_t mem_size, void *fdt) rom_add_blob_fixed_as("fdt", fdt, fdtsize, fdt_addr, &address_space_memory); + qemu_register_reset_nosnapshotload(qemu_fdt_randomize_seeds, + rom_ptr_for_as(&address_space_memory, fdt_addr, fdtsize)); return fdt_addr; } From 1ffd007c9c5862d50235cfb507a1722fe1c213b5 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 25 Oct 2022 02:43:22 +0200 Subject: [PATCH 26/31] m68k/virt: do not re-randomize RNG seed on snapshot load Snapshot loading is supposed to be deterministic, so we shouldn't re-randomize the various seeds used. Signed-off-by: Jason A. Donenfeld Message-id: 20221025004327.568476-7-Jason@zx2c4.com Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- hw/m68k/virt.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/hw/m68k/virt.c b/hw/m68k/virt.c index 89c4108eb5..da5eafd275 100644 --- a/hw/m68k/virt.c +++ b/hw/m68k/virt.c @@ -89,7 +89,6 @@ typedef struct { M68kCPU *cpu; hwaddr initial_pc; hwaddr initial_stack; - struct bi_record *rng_seed; } ResetInfo; static void main_cpu_reset(void *opaque) @@ -98,16 +97,18 @@ static void main_cpu_reset(void *opaque) M68kCPU *cpu = reset_info->cpu; CPUState *cs = CPU(cpu); - if (reset_info->rng_seed) { - qemu_guest_getrandom_nofail((void *)reset_info->rng_seed->data + 2, - be16_to_cpu(*(uint16_t *)reset_info->rng_seed->data)); - } - cpu_reset(cs); cpu->env.aregs[7] = reset_info->initial_stack; cpu->env.pc = reset_info->initial_pc; } +static void rerandomize_rng_seed(void *opaque) +{ + struct bi_record *rng_seed = opaque; + qemu_guest_getrandom_nofail((void *)rng_seed->data + 2, + be16_to_cpu(*(uint16_t *)rng_seed->data)); +} + static void virt_init(MachineState *machine) { M68kCPU *cpu = NULL; @@ -289,9 +290,10 @@ static void virt_init(MachineState *machine) BOOTINFO0(param_ptr, BI_LAST); rom_add_blob_fixed_as("bootinfo", param_blob, param_ptr - param_blob, parameters_base, cs->as); - reset_info->rng_seed = rom_ptr_for_as(cs->as, parameters_base, - param_ptr - param_blob) + - (param_rng_seed - param_blob); + qemu_register_reset_nosnapshotload(rerandomize_rng_seed, + rom_ptr_for_as(cs->as, parameters_base, + param_ptr - param_blob) + + (param_rng_seed - param_blob)); g_free(param_blob); } } From fbbbe7eb23e4120a64238d41367a3fabccb59781 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 25 Oct 2022 02:43:23 +0200 Subject: [PATCH 27/31] m68k/q800: do not re-randomize RNG seed on snapshot load Snapshot loading is supposed to be deterministic, so we shouldn't re-randomize the various seeds used. Signed-off-by: Jason A. Donenfeld Message-id: 20221025004327.568476-8-Jason@zx2c4.com Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- hw/m68k/q800.c | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/hw/m68k/q800.c b/hw/m68k/q800.c index e09e244ddc..9d52ca6613 100644 --- a/hw/m68k/q800.c +++ b/hw/m68k/q800.c @@ -321,27 +321,23 @@ static const TypeInfo glue_info = { }, }; -typedef struct { - M68kCPU *cpu; - struct bi_record *rng_seed; -} ResetInfo; - static void main_cpu_reset(void *opaque) { - ResetInfo *reset_info = opaque; - M68kCPU *cpu = reset_info->cpu; + M68kCPU *cpu = opaque; CPUState *cs = CPU(cpu); - if (reset_info->rng_seed) { - qemu_guest_getrandom_nofail((void *)reset_info->rng_seed->data + 2, - be16_to_cpu(*(uint16_t *)reset_info->rng_seed->data)); - } - cpu_reset(cs); cpu->env.aregs[7] = ldl_phys(cs->as, 0); cpu->env.pc = ldl_phys(cs->as, 4); } +static void rerandomize_rng_seed(void *opaque) +{ + struct bi_record *rng_seed = opaque; + qemu_guest_getrandom_nofail((void *)rng_seed->data + 2, + be16_to_cpu(*(uint16_t *)rng_seed->data)); +} + static uint8_t fake_mac_rom[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -397,7 +393,6 @@ static void q800_init(MachineState *machine) NubusBus *nubus; DeviceState *glue; DriveInfo *dinfo; - ResetInfo *reset_info; uint8_t rng_seed[32]; linux_boot = (kernel_filename != NULL); @@ -408,12 +403,9 @@ static void q800_init(MachineState *machine) exit(1); } - reset_info = g_new0(ResetInfo, 1); - /* init CPUs */ cpu = M68K_CPU(cpu_create(machine->cpu_type)); - reset_info->cpu = cpu; - qemu_register_reset(main_cpu_reset, reset_info); + qemu_register_reset(main_cpu_reset, cpu); /* RAM */ memory_region_add_subregion(get_system_memory(), 0, machine->ram); @@ -687,9 +679,10 @@ static void q800_init(MachineState *machine) BOOTINFO0(param_ptr, BI_LAST); rom_add_blob_fixed_as("bootinfo", param_blob, param_ptr - param_blob, parameters_base, cs->as); - reset_info->rng_seed = rom_ptr_for_as(cs->as, parameters_base, - param_ptr - param_blob) + - (param_rng_seed - param_blob); + qemu_register_reset_nosnapshotload(rerandomize_rng_seed, + rom_ptr_for_as(cs->as, parameters_base, + param_ptr - param_blob) + + (param_rng_seed - param_blob)); g_free(param_blob); } else { uint8_t *ptr; From 4fbae24450a67c3dfea6d2617e6d1f7a4dd206dc Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 25 Oct 2022 02:43:24 +0200 Subject: [PATCH 28/31] mips/boston: re-randomize rng-seed on reboot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the system reboots, the rng-seed that the FDT has should be re-randomized, so that the new boot gets a new seed. Since the FDT is in the ROM region at this point, we add a hook right after the ROM has been added, so that we have a pointer to that copy of the FDT. Cc: Aleksandar Rikalo Cc: Paul Burton Cc: Philippe Mathieu-Daudé Signed-off-by: Jason A. Donenfeld Message-id: 20221025004327.568476-9-Jason@zx2c4.com Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- hw/mips/boston.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/mips/boston.c b/hw/mips/boston.c index d2ab9da1a0..cab63f43bf 100644 --- a/hw/mips/boston.c +++ b/hw/mips/boston.c @@ -41,6 +41,7 @@ #include "sysemu/sysemu.h" #include "sysemu/qtest.h" #include "sysemu/runstate.h" +#include "sysemu/reset.h" #include #include "qom/object.h" @@ -810,6 +811,8 @@ static void boston_mach_init(MachineState *machine) /* Calculate real fdt size after filter */ dt_size = fdt_totalsize(dtb_load_data); rom_add_blob_fixed("dtb", dtb_load_data, dt_size, dtb_paddr); + qemu_register_reset_nosnapshotload(qemu_fdt_randomize_seeds, + rom_ptr(dtb_paddr, dt_size)); } else { /* Try to load file as FIT */ fit_err = load_fit(&boston_fit_loader, machine->kernel_filename, s); From 2db07d0506d365884b140fd5640fd71db15eacbc Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 25 Oct 2022 02:43:26 +0200 Subject: [PATCH 29/31] openrisc: re-randomize rng-seed on reboot When the system reboots, the rng-seed that the FDT has should be re-randomized, so that the new boot gets a new seed. Since the FDT is in the ROM region at this point, we add a hook right after the ROM has been added, so that we have a pointer to that copy of the FDT. Cc: Stafford Horne Signed-off-by: Jason A. Donenfeld Message-id: 20221025004327.568476-11-Jason@zx2c4.com Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- hw/openrisc/boot.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/openrisc/boot.c b/hw/openrisc/boot.c index 128ccbcba2..007e80cd5a 100644 --- a/hw/openrisc/boot.c +++ b/hw/openrisc/boot.c @@ -14,6 +14,7 @@ #include "hw/openrisc/boot.h" #include "sysemu/device_tree.h" #include "sysemu/qtest.h" +#include "sysemu/reset.h" #include @@ -111,6 +112,8 @@ uint32_t openrisc_load_fdt(void *fdt, hwaddr load_start, rom_add_blob_fixed_as("fdt", fdt, fdtsize, fdt_addr, &address_space_memory); + qemu_register_reset_nosnapshotload(qemu_fdt_randomize_seeds, + rom_ptr_for_as(&address_space_memory, fdt_addr, fdtsize)); return fdt_addr; } From a76b911c0de3d72e245060a57d02f816fb1334e2 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 25 Oct 2022 02:43:27 +0200 Subject: [PATCH 30/31] rx: re-randomize rng-seed on reboot When the system reboots, the rng-seed that the FDT has should be re-randomized, so that the new boot gets a new seed. Since the FDT is in the ROM region at this point, we add a hook right after the ROM has been added, so that we have a pointer to that copy of the FDT. Cc: Yoshinori Sato Signed-off-by: Jason A. Donenfeld Message-id: 20221025004327.568476-12-Jason@zx2c4.com Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- hw/rx/rx-gdbsim.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/rx/rx-gdbsim.c b/hw/rx/rx-gdbsim.c index 8ffe1b8035..47c17026c7 100644 --- a/hw/rx/rx-gdbsim.c +++ b/hw/rx/rx-gdbsim.c @@ -25,6 +25,7 @@ #include "hw/rx/rx62n.h" #include "sysemu/qtest.h" #include "sysemu/device_tree.h" +#include "sysemu/reset.h" #include "hw/boards.h" #include "qom/object.h" @@ -148,6 +149,8 @@ static void rx_gdbsim_init(MachineState *machine) dtb_offset = ROUND_DOWN(machine->ram_size - dtb_size, 16); rom_add_blob_fixed("dtb", dtb, dtb_size, SDRAM_BASE + dtb_offset); + qemu_register_reset_nosnapshotload(qemu_fdt_randomize_seeds, + rom_ptr(SDRAM_BASE + dtb_offset, dtb_size)); /* Set dtb address to R1 */ RX_CPU(first_cpu)->env.regs[1] = SDRAM_BASE + dtb_offset; } From 6233a138599bea89ad683b883dca38388f12fd2d Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 25 Oct 2022 19:28:43 +0200 Subject: [PATCH 31/31] mips/malta: pass RNG seed via env var and re-randomize on reboot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As of the kernel commit linked below, Linux ingests an RNG seed passed as part of the environment block by the bootloader or firmware. This mechanism works across all different environment block types, generically, which pass some block via the second firmware argument. On malta, this has been tested to work when passed as an argument from U-Boot's linux_env_set. As is the case on most other architectures (such as boston), when booting with `-kernel`, QEMU, acting as the bootloader, should pass the RNG seed, so that the machine has good entropy for Linux to consume. So this commit implements that quite simply by using the guest random API, which is what is used on nearly all other archs too. It also reinitializes the seed on reboot, so that it is always fresh. Link: https://git.kernel.org/torvalds/c/056a68cea01 Cc: Aleksandar Rikalo Cc: Paul Burton Cc: Philippe Mathieu-Daudé Signed-off-by: Jason A. Donenfeld Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- hw/mips/malta.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/hw/mips/malta.c b/hw/mips/malta.c index 0e932988e0..7c3ad0974b 100644 --- a/hw/mips/malta.c +++ b/hw/mips/malta.c @@ -26,6 +26,7 @@ #include "qemu/units.h" #include "qemu/bitops.h" #include "qemu/datadir.h" +#include "qemu/guest-random.h" #include "hw/clock.h" #include "hw/southbridge/piix.h" #include "hw/isa/superio.h" @@ -1017,6 +1018,17 @@ static void G_GNUC_PRINTF(3, 4) prom_set(uint32_t *prom_buf, int index, va_end(ap); } +static void reinitialize_rng_seed(void *opaque) +{ + char *rng_seed_hex = opaque; + uint8_t rng_seed[32]; + + qemu_guest_getrandom_nofail(rng_seed, sizeof(rng_seed)); + for (size_t i = 0; i < sizeof(rng_seed); ++i) { + sprintf(rng_seed_hex + i * 2, "%02x", rng_seed[i]); + } +} + /* Kernel */ static uint64_t load_kernel(void) { @@ -1028,6 +1040,9 @@ static uint64_t load_kernel(void) long prom_size; int prom_index = 0; uint64_t (*xlate_to_kseg0) (void *opaque, uint64_t addr); + uint8_t rng_seed[32]; + char rng_seed_hex[sizeof(rng_seed) * 2 + 1]; + size_t rng_seed_prom_offset; #if TARGET_BIG_ENDIAN big_endian = 1; @@ -1115,9 +1130,21 @@ static uint64_t load_kernel(void) prom_set(prom_buf, prom_index++, "modetty0"); prom_set(prom_buf, prom_index++, "38400n8r"); + + qemu_guest_getrandom_nofail(rng_seed, sizeof(rng_seed)); + for (size_t i = 0; i < sizeof(rng_seed); ++i) { + sprintf(rng_seed_hex + i * 2, "%02x", rng_seed[i]); + } + prom_set(prom_buf, prom_index++, "rngseed"); + rng_seed_prom_offset = prom_index * ENVP_ENTRY_SIZE + + sizeof(uint32_t) * ENVP_NB_ENTRIES; + prom_set(prom_buf, prom_index++, "%s", rng_seed_hex); + prom_set(prom_buf, prom_index++, NULL); rom_add_blob_fixed("prom", prom_buf, prom_size, ENVP_PADDR); + qemu_register_reset_nosnapshotload(reinitialize_rng_seed, + rom_ptr(ENVP_PADDR, prom_size) + rng_seed_prom_offset); g_free(prom_buf); return kernel_entry;