target-arm queue:

* Refactor FPCR/FPSR handling in preparation for FEAT_AFP * More decodetree conversions * target/arm: Use cpu_env in cpu_untagged_addr * target/arm: Set arm_v7m_tcg_ops cpu_exec_halt to arm_cpu_exec_halt() * hw/char/pl011: Avoid division-by-zero in pl011_get_baudrate() * hw/misc/bcm2835_thermal: Fix access size handling in bcm2835_thermal_ops * accel/tcg: Make TCGCPUOps::cpu_exec_halt mandatory * STM32L4x5: Handle USART interrupts correctly -----BEGIN PGP SIGNATURE----- iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAmaP24MZHHBldGVyLm1h eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3luAEACF4Uhrcrh7E7RwoDEeQAMQ IG3+LwUbhnBXIUl7DL0qQTjnmwbbTQH2Ukoq3biqAdSs22JwrT6O6MDQ7fA3X8DI 3Ew+72BzAAtQHVHJaFRw2f9UVQop8Poa9I7Di6frH4Gxk5AKQY/IwjrD6jYPqhM7 9KCksksO3w9DRmpFZ1y5I/dGumTe12btEwdazWxrsyZIBNDoUJSU8xpcMk+9oErF 23hcsSaXOGDeWwPuEk1q2mMYnRQQtMhVndxV50sF98MfJ3nnMKEttuFuW0znXMCr Xat8Y4QbigXGmuJNgjXccIzN1Hje+h5zzfUIfVNWBYNzqULvvi/vjwNfJaUiIjm5 DxeOGUu8iZYQbgvJXvn9NwWbptxvhyWsCLpB46icElcN0jr1MU12wk2IH0CZa7KU h4kbu0p17dph5Lantd888b1Vu3pOFr4UiRC3qJB9ddBVLyGl/3Km1wb99x038mPo Mt8Y7Vjnr5OWd+mTNzXFRnYFYIRKu1lI85VuTjd5Uua0lDtFDo/sVnVF9uas84OC /PrQYGso0UE320li+jYHzE18rKPEi2u/3xTgHWAgh3ra7McWVjWDr2yIsAisKKNH 2F72gyZNy2n7FJhTYPQAJnozi68maP5f9tHHHXQdfsCE4+2h0fr/wljCeq1+5waq 4edm31uEbArfW/jLgPHHAA== =Xkmk -----END PGP SIGNATURE----- Merge tag 'pull-target-arm-20240711' of https://git.linaro.org/people/pmaydell/qemu-arm into staging target-arm queue: * Refactor FPCR/FPSR handling in preparation for FEAT_AFP * More decodetree conversions * target/arm: Use cpu_env in cpu_untagged_addr * target/arm: Set arm_v7m_tcg_ops cpu_exec_halt to arm_cpu_exec_halt() * hw/char/pl011: Avoid division-by-zero in pl011_get_baudrate() * hw/misc/bcm2835_thermal: Fix access size handling in bcm2835_thermal_ops * accel/tcg: Make TCGCPUOps::cpu_exec_halt mandatory * STM32L4x5: Handle USART interrupts correctly # -----BEGIN PGP SIGNATURE----- # # iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAmaP24MZHHBldGVyLm1h # eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3luAEACF4Uhrcrh7E7RwoDEeQAMQ # IG3+LwUbhnBXIUl7DL0qQTjnmwbbTQH2Ukoq3biqAdSs22JwrT6O6MDQ7fA3X8DI # 3Ew+72BzAAtQHVHJaFRw2f9UVQop8Poa9I7Di6frH4Gxk5AKQY/IwjrD6jYPqhM7 # 9KCksksO3w9DRmpFZ1y5I/dGumTe12btEwdazWxrsyZIBNDoUJSU8xpcMk+9oErF # 23hcsSaXOGDeWwPuEk1q2mMYnRQQtMhVndxV50sF98MfJ3nnMKEttuFuW0znXMCr # Xat8Y4QbigXGmuJNgjXccIzN1Hje+h5zzfUIfVNWBYNzqULvvi/vjwNfJaUiIjm5 # DxeOGUu8iZYQbgvJXvn9NwWbptxvhyWsCLpB46icElcN0jr1MU12wk2IH0CZa7KU # h4kbu0p17dph5Lantd888b1Vu3pOFr4UiRC3qJB9ddBVLyGl/3Km1wb99x038mPo # Mt8Y7Vjnr5OWd+mTNzXFRnYFYIRKu1lI85VuTjd5Uua0lDtFDo/sVnVF9uas84OC # /PrQYGso0UE320li+jYHzE18rKPEi2u/3xTgHWAgh3ra7McWVjWDr2yIsAisKKNH # 2F72gyZNy2n7FJhTYPQAJnozi68maP5f9tHHHXQdfsCE4+2h0fr/wljCeq1+5waq # 4edm31uEbArfW/jLgPHHAA== # =Xkmk # -----END PGP SIGNATURE----- # gpg: Signature made Thu 11 Jul 2024 06:17:55 AM PDT # gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE # gpg: issuer "peter.maydell@linaro.org" # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [full] # gpg: aka "Peter Maydell <pmaydell@gmail.com>" [full] # gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [full] # gpg: aka "Peter Maydell <peter@archaic.org.uk>" [unknown] * tag 'pull-target-arm-20240711' of https://git.linaro.org/people/pmaydell/qemu-arm: (24 commits) target/arm: Convert PMULL to decodetree target/arm: Convert ADDHN, SUBHN, RADDHN, RSUBHN to decodetree target/arm: Convert SADDW, SSUBW, UADDW, USUBW to decodetree target/arm: Convert SQDMULL, SQDMLAL, SQDMLSL to decodetree target/arm: Convert SADDL, SSUBL, SABDL, SABAL, and unsigned to decodetree target/arm: Convert SMULL, UMULL, SMLAL, UMLAL, SMLSL, UMLSL to decodetree hw/arm: In STM32L4x5 SOC, connect USART devices to EXTI hw/misc: In STM32L4x5 EXTI, handle direct interrupts hw/misc: In STM32L4x5 EXTI, consolidate 2 constants accel/tcg: Make TCGCPUOps::cpu_exec_halt mandatory target: Set TCGCPUOps::cpu_exec_halt to target's has_work implementation target/arm: Set arm_v7m_tcg_ops cpu_exec_halt to arm_cpu_exec_halt() target/arm: Use cpu_env in cpu_untagged_addr hw/misc/bcm2835_thermal: Fix access size handling in bcm2835_thermal_ops hw/char/pl011: Avoid division-by-zero in pl011_get_baudrate() target/arm: Allow FPCR bits that aren't in FPSCR target/arm: Rename FPSR_MASK and FPCR_MASK and define them symbolically target/arm: Rename FPCR_ QC, NZCV macros to FPSR_ target/arm: Store FPSR and FPCR in separate CPU state fields target/arm: Implement store_cpu_field_low32() macro ... Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2024-07-11 12:00:00 -07:00 · 2024-07-11 12:00:00 -07:00 · 23901b2b72
parent 39a032cea2 7f49089158
commit 23901b2b72
39 changed files with 893 additions and 929 deletions
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@ -682,13 +682,8 @@ static inline bool cpu_handle_halt(CPUState *cpu)
 #ifndef CONFIG_USER_ONLY
    if (cpu->halted) {
        const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
-        bool leave_halt;
+        bool leave_halt = tcg_ops->cpu_exec_halt(cpu);

-        if (tcg_ops->cpu_exec_halt) {
-            leave_halt = tcg_ops->cpu_exec_halt(cpu);
-        } else {
-            leave_halt = cpu_has_work(cpu);
-        }
        if (!leave_halt) {
            return true;
        }
@ -1082,6 +1077,10 @@ bool tcg_exec_realizefn(CPUState *cpu, Error **errp)
    static bool tcg_target_initialized;

    if (!tcg_target_initialized) {
+        /* Check mandatory TCGCPUOps handlers */
+#ifndef CONFIG_USER_ONLY
+        assert(cpu->cc->tcg_ops->cpu_exec_halt);
+#endif /* !CONFIG_USER_ONLY */
        cpu->cc->tcg_ops->initialize();
        tcg_target_initialized = true;
    }
--- a/hw/arm/stm32l4x5_soc.c
+++ b/hw/arm/stm32l4x5_soc.c
@ -81,6 +81,10 @@ static const int exti_irq[NUM_EXTI_IRQ] = {
 #define RCC_BASE_ADDRESS 0x40021000
 #define RCC_IRQ 5

+#define EXTI_USART1_IRQ 26
+#define EXTI_UART4_IRQ 29
+#define EXTI_LPUART1_IRQ 31
+
 static const int exti_or_gates_out[NUM_EXTI_OR_GATES] = {
    23, 40, 63, 1,
 };
@ -129,10 +133,6 @@ static const hwaddr uart_addr[] = {

 #define LPUART_BASE_ADDRESS 0x40008000

-static const int usart_irq[] = { 37, 38, 39 };
-static const int uart_irq[] = { 52, 53 };
-#define LPUART_IRQ 70
-
 static void stm32l4x5_soc_initfn(Object *obj)
 {
    Stm32l4x5SocState *s = STM32L4X5_SOC(obj);
@ -297,6 +297,7 @@ static void stm32l4x5_soc_realize(DeviceState *dev_soc, Error **errp)
        }
    }

+    /* Connect SYSCFG to EXTI */
    for (unsigned i = 0; i < GPIO_NUM_PINS; i++) {
        qdev_connect_gpio_out(DEVICE(&s->syscfg), i,
                              qdev_get_gpio_in(DEVICE(&s->exti), i));
@ -322,15 +323,10 @@ static void stm32l4x5_soc_realize(DeviceState *dev_soc, Error **errp)
            return;
        }
        sysbus_mmio_map(busdev, 0, usart_addr[i]);
-        sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, usart_irq[i]));
+        sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(DEVICE(&s->exti),
+                                                       EXTI_USART1_IRQ + i));
    }

-    /*
-     * TODO: Connect the USARTs, UARTs and LPUART to the EXTI once the EXTI
-     * can handle other gpio-in than the gpios. (e.g. Direct Lines for the
-     * usarts)
-     */
-
    /* UART devices */
    for (int i = 0; i < STM_NUM_UARTS; i++) {
        g_autofree char *name = g_strdup_printf("uart%d-out", STM_NUM_USARTS + i + 1);
@ -343,7 +339,8 @@ static void stm32l4x5_soc_realize(DeviceState *dev_soc, Error **errp)
            return;
        }
        sysbus_mmio_map(busdev, 0, uart_addr[i]);
-        sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, uart_irq[i]));
+        sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(DEVICE(&s->exti),
+                                                       EXTI_UART4_IRQ + i));
    }

    /* LPUART device*/
@ -356,7 +353,8 @@ static void stm32l4x5_soc_realize(DeviceState *dev_soc, Error **errp)
        return;
    }
    sysbus_mmio_map(busdev, 0, LPUART_BASE_ADDRESS);
-    sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, LPUART_IRQ));
+    sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(DEVICE(&s->exti),
+                                                   EXTI_LPUART1_IRQ));

    /* APB1 BUS */
    create_unimplemented_device("TIM2",      0x40000000, 0x400);
--- a/hw/char/pl011.c
+++ b/hw/char/pl011.c
@ -87,6 +87,12 @@ DeviceState *pl011_create(hwaddr addr, qemu_irq irq, Chardev *chr)
 #define CR_DTR      (1 << 10)
 #define CR_LBE      (1 << 7)

+/* Integer Baud Rate Divider, UARTIBRD */
+#define IBRD_MASK 0x3f
+
+/* Fractional Baud Rate Divider, UARTFBRD */
+#define FBRD_MASK 0xffff
+
 static const unsigned char pl011_id_arm[8] =
  { 0x11, 0x10, 0x14, 0x00, 0x0d, 0xf0, 0x05, 0xb1 };
 static const unsigned char pl011_id_luminary[8] =
@ -374,11 +380,11 @@ static void pl011_write(void *opaque, hwaddr offset,
        s->ilpr = value;
        break;
    case 9: /* UARTIBRD */
-        s->ibrd = value;
+        s->ibrd = value & IBRD_MASK;
        pl011_trace_baudrate_change(s);
        break;
    case 10: /* UARTFBRD */
-        s->fbrd = value;
+        s->fbrd = value & FBRD_MASK;
        pl011_trace_baudrate_change(s);
        break;
    case 11: /* UARTLCR_H */
@ -531,6 +537,9 @@ static int pl011_post_load(void *opaque, int version_id)
        s->read_pos = 0;
    }

+    s->ibrd &= IBRD_MASK;
+    s->fbrd &= FBRD_MASK;
+
    return 0;
 }

--- a/hw/misc/bcm2835_thermal.c
+++ b/hw/misc/bcm2835_thermal.c
@ -80,8 +80,10 @@ static void bcm2835_thermal_write(void *opaque, hwaddr addr,
 static const MemoryRegionOps bcm2835_thermal_ops = {
    .read = bcm2835_thermal_read,
    .write = bcm2835_thermal_write,
+    .impl.min_access_size = 4,
    .impl.max_access_size = 4,
    .valid.min_access_size = 4,
+    .valid.max_access_size = 4,
    .endianness = DEVICE_NATIVE_ENDIAN,
 };

--- a/hw/misc/stm32l4x5_exti.c
+++ b/hw/misc/stm32l4x5_exti.c
@ -42,7 +42,6 @@
 #define EXTI_SWIER2 0x30
 #define EXTI_PR2    0x34

-#define EXTI_NUM_GPIO_EVENT_IN_LINES 16
 #define EXTI_MAX_IRQ_PER_BANK 32
 #define EXTI_IRQS_BANK0  32
 #define EXTI_IRQS_BANK1  8
@ -114,6 +113,13 @@ static void stm32l4x5_exti_set_irq(void *opaque, int irq, int level)
        return;
    }

+    /* In case of a direct line interrupt */
+    if (extract32(exti_romask[bank], irq, 1)) {
+        qemu_set_irq(s->irq[oirq], level);
+        return;
+    }
+
+    /* In case of a configurable interrupt */
    if ((level && extract32(s->rtsr[bank], irq, 1)) ||
        (!level && extract32(s->ftsr[bank], irq, 1))) {

@ -238,7 +244,7 @@ static void stm32l4x5_exti_init(Object *obj)
 {
    Stm32l4x5ExtiState *s = STM32L4X5_EXTI(obj);

-    for (size_t i = 0; i < EXTI_NUM_INTERRUPT_OUT_LINES; i++) {
+    for (size_t i = 0; i < EXTI_NUM_LINES; i++) {
        sysbus_init_irq(SYS_BUS_DEVICE(obj), &s->irq[i]);
    }

@ -246,8 +252,7 @@ static void stm32l4x5_exti_init(Object *obj)
                          TYPE_STM32L4X5_EXTI, 0x400);
    sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio);

-    qdev_init_gpio_in(DEVICE(obj), stm32l4x5_exti_set_irq,
-                      EXTI_NUM_GPIO_EVENT_IN_LINES);
+    qdev_init_gpio_in(DEVICE(obj), stm32l4x5_exti_set_irq, EXTI_NUM_LINES);
 }

 static const VMStateDescription vmstate_stm32l4x5_exti = {
--- a/include/hw/core/tcg-cpu-ops.h
+++ b/include/hw/core/tcg-cpu-ops.h
@ -122,10 +122,13 @@ struct TCGCPUOps {
     * to do when the CPU is in the halted state.
     *
     * Return true to indicate that the CPU should now leave halt, false
-     * if it should remain in the halted state.
+     * if it should remain in the halted state. (This should generally
+     * be the same value that cpu_has_work() would return.)
     *
-     * If this method is not provided, the default is to do nothing, and
-     * to leave halt if cpu_has_work() returns true.
+     * This method must be provided. If the target does not need to
+     * do anything special for halt, the same function used for its
+     * CPUClass::has_work method can be used here, as they have the
+     * same function signature.
     */
    bool (*cpu_exec_halt)(CPUState *cpu);
    /**
--- a/include/hw/misc/stm32l4x5_exti.h
+++ b/include/hw/misc/stm32l4x5_exti.h
@ -30,7 +30,7 @@
 #define TYPE_STM32L4X5_EXTI "stm32l4x5-exti"
 OBJECT_DECLARE_SIMPLE_TYPE(Stm32l4x5ExtiState, STM32L4X5_EXTI)

-#define EXTI_NUM_INTERRUPT_OUT_LINES 40
+#define EXTI_NUM_LINES 40
 #define EXTI_NUM_REGISTER 2

 struct Stm32l4x5ExtiState {
@ -47,7 +47,7 @@ struct Stm32l4x5ExtiState {

    /* used for edge detection */
    uint32_t irq_levels[EXTI_NUM_REGISTER];
-    qemu_irq irq[EXTI_NUM_INTERRUPT_OUT_LINES];
+    qemu_irq irq[EXTI_NUM_LINES];
 };

 #endif
--- a/target/alpha/cpu.c
+++ b/target/alpha/cpu.c
@ -219,6 +219,7 @@ static const TCGCPUOps alpha_tcg_ops = {
 #else
    .tlb_fill = alpha_cpu_tlb_fill,
    .cpu_exec_interrupt = alpha_cpu_exec_interrupt,
+    .cpu_exec_halt = alpha_cpu_has_work,
    .do_interrupt = alpha_cpu_do_interrupt,
    .do_transaction_failed = alpha_cpu_do_transaction_failed,
    .do_unaligned_access = alpha_cpu_do_unaligned_access,
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@ -1133,7 +1133,7 @@ static bool arm_cpu_virtio_is_big_endian(CPUState *cs)
 }

 #ifdef CONFIG_TCG
-static bool arm_cpu_exec_halt(CPUState *cs)
+bool arm_cpu_exec_halt(CPUState *cs)
 {
    bool leave_halt = cpu_has_work(cs);

--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@ -619,6 +619,13 @@ typedef struct CPUArchState {
        int vec_len;
        int vec_stride;

+        /*
+         * Floating point status and control registers. Some bits are
+         * stored separately in other fields or in the float_status below.
+         */
+        uint64_t fpsr;
+        uint64_t fpcr;
+
        uint32_t xregs[16];

        /* Scratch space for aa32 neon expansion.  */
@ -1680,61 +1687,99 @@ static inline void xpsr_write(CPUARMState *env, uint32_t val, uint32_t mask)
 uint32_t vfp_get_fpscr(CPUARMState *env);
 void vfp_set_fpscr(CPUARMState *env, uint32_t val);

-/* FPCR, Floating Point Control Register
- * FPSR, Floating Poiht Status Register
+/*
+ * FPCR, Floating Point Control Register
+ * FPSR, Floating Point Status Register
 *
- * For A64 the FPSCR is split into two logically distinct registers,
- * FPCR and FPSR. However since they still use non-overlapping bits
- * we store the underlying state in fpscr and just mask on read/write.
+ * For A64 floating point control and status bits are stored in
+ * two logically distinct registers, FPCR and FPSR. We store these
+ * in QEMU in vfp.fpcr and vfp.fpsr.
+ * For A32 there was only one register, FPSCR. The bits are arranged
+ * such that FPSCR bits map to FPCR or FPSR bits in the same bit positions,
+ * so we can use appropriate masking to handle FPSCR reads and writes.
+ * Note that the FPCR has some bits which are not visible in the
+ * AArch32 view (for FEAT_AFP). Writing the FPSCR leaves these unchanged.
 */
-#define FPSR_MASK 0xf800009f
-#define FPCR_MASK 0x07ff9f00

+/* FPCR bits */
 #define FPCR_IOE    (1 << 8)    /* Invalid Operation exception trap enable */
 #define FPCR_DZE    (1 << 9)    /* Divide by Zero exception trap enable */
 #define FPCR_OFE    (1 << 10)   /* Overflow exception trap enable */
 #define FPCR_UFE    (1 << 11)   /* Underflow exception trap enable */
 #define FPCR_IXE    (1 << 12)   /* Inexact exception trap enable */
 #define FPCR_IDE    (1 << 15)   /* Input Denormal exception trap enable */
+#define FPCR_LEN_MASK (7 << 16) /* LEN, A-profile only */
 #define FPCR_FZ16   (1 << 19)   /* ARMv8.2+, FP16 flush-to-zero */
+#define FPCR_STRIDE_MASK (3 << 20) /* Stride */
 #define FPCR_RMODE_MASK (3 << 22) /* Rounding mode */
 #define FPCR_FZ     (1 << 24)   /* Flush-to-zero enable bit */
 #define FPCR_DN     (1 << 25)   /* Default NaN enable bit */
 #define FPCR_AHP    (1 << 26)   /* Alternative half-precision */
-#define FPCR_QC     (1 << 27)   /* Cumulative saturation bit */
-#define FPCR_V      (1 << 28)   /* FP overflow flag */
-#define FPCR_C      (1 << 29)   /* FP carry flag */
-#define FPCR_Z      (1 << 30)   /* FP zero flag */
-#define FPCR_N      (1 << 31)   /* FP negative flag */

 #define FPCR_LTPSIZE_SHIFT 16   /* LTPSIZE, M-profile only */
 #define FPCR_LTPSIZE_MASK (7 << FPCR_LTPSIZE_SHIFT)
 #define FPCR_LTPSIZE_LENGTH 3

-#define FPCR_NZCV_MASK (FPCR_N | FPCR_Z | FPCR_C | FPCR_V)
-#define FPCR_NZCVQC_MASK (FPCR_NZCV_MASK | FPCR_QC)
+/* Cumulative exception trap enable bits */
+#define FPCR_EEXC_MASK (FPCR_IOE | FPCR_DZE | FPCR_OFE | FPCR_UFE | FPCR_IXE | FPCR_IDE)

-static inline uint32_t vfp_get_fpsr(CPUARMState *env)
-{
-    return vfp_get_fpscr(env) & FPSR_MASK;
-}
+/* FPSR bits */
+#define FPSR_IOC    (1 << 0)    /* Invalid Operation cumulative exception */
+#define FPSR_DZC    (1 << 1)    /* Divide by Zero cumulative exception */
+#define FPSR_OFC    (1 << 2)    /* Overflow cumulative exception */
+#define FPSR_UFC    (1 << 3)    /* Underflow cumulative exception */
+#define FPSR_IXC    (1 << 4)    /* Inexact cumulative exception */
+#define FPSR_IDC    (1 << 7)    /* Input Denormal cumulative exception */
+#define FPSR_QC     (1 << 27)   /* Cumulative saturation bit */
+#define FPSR_V      (1 << 28)   /* FP overflow flag */
+#define FPSR_C      (1 << 29)   /* FP carry flag */
+#define FPSR_Z      (1 << 30)   /* FP zero flag */
+#define FPSR_N      (1 << 31)   /* FP negative flag */

-static inline void vfp_set_fpsr(CPUARMState *env, uint32_t val)
-{
-    uint32_t new_fpscr = (vfp_get_fpscr(env) & ~FPSR_MASK) | (val & FPSR_MASK);
-    vfp_set_fpscr(env, new_fpscr);
-}
+/* Cumulative exception status bits */
+#define FPSR_CEXC_MASK (FPSR_IOC | FPSR_DZC | FPSR_OFC | FPSR_UFC | FPSR_IXC | FPSR_IDC)

-static inline uint32_t vfp_get_fpcr(CPUARMState *env)
-{
-    return vfp_get_fpscr(env) & FPCR_MASK;
-}
+#define FPSR_NZCV_MASK (FPSR_N | FPSR_Z | FPSR_C | FPSR_V)
+#define FPSR_NZCVQC_MASK (FPSR_NZCV_MASK | FPSR_QC)

-static inline void vfp_set_fpcr(CPUARMState *env, uint32_t val)
-{
-    uint32_t new_fpscr = (vfp_get_fpscr(env) & ~FPCR_MASK) | (val & FPCR_MASK);
-    vfp_set_fpscr(env, new_fpscr);
-}
+/* A32 FPSCR bits which architecturally map to FPSR bits */
+#define FPSCR_FPSR_MASK (FPSR_NZCVQC_MASK | FPSR_CEXC_MASK)
+/* A32 FPSCR bits which architecturally map to FPCR bits */
+#define FPSCR_FPCR_MASK (FPCR_EEXC_MASK | FPCR_LEN_MASK | FPCR_FZ16 | \
+                         FPCR_STRIDE_MASK | FPCR_RMODE_MASK | \
+                         FPCR_FZ | FPCR_DN | FPCR_AHP)
+/* These masks don't overlap: each bit lives in only one place */
+QEMU_BUILD_BUG_ON(FPSCR_FPSR_MASK & FPSCR_FPCR_MASK);
+
+/**
+ * vfp_get_fpsr: read the AArch64 FPSR
+ * @env: CPU context
+ *
+ * Return the current AArch64 FPSR value
+ */
+uint32_t vfp_get_fpsr(CPUARMState *env);
+
+/**
+ * vfp_get_fpcr: read the AArch64 FPCR
+ * @env: CPU context
+ *
+ * Return the current AArch64 FPCR value
+ */
+uint32_t vfp_get_fpcr(CPUARMState *env);
+
+/**
+ * vfp_set_fpsr: write the AArch64 FPSR
+ * @env: CPU context
+ * @value: new value
+ */
+void vfp_set_fpsr(CPUARMState *env, uint32_t value);
+
+/**
+ * vfp_set_fpcr: write the AArch64 FPCR
+ * @env: CPU context
+ * @value: new value
+ */
+void vfp_set_fpcr(CPUARMState *env, uint32_t value);

 enum arm_cpu_mode {
  ARM_CPU_MODE_USR = 0x10,
@ -3309,8 +3354,8 @@ extern const uint64_t pred_esz_masks[5];
 */
 static inline target_ulong cpu_untagged_addr(CPUState *cs, target_ulong x)
 {
-    ARMCPU *cpu = ARM_CPU(cs);
-    if (cpu->env.tagged_addr_enable) {
+    CPUARMState *env = cpu_env(cs);
+    if (env->tagged_addr_enable) {
        /*
         * TBI is enabled for userspace but not kernelspace addresses.
         * Only clear the tag if bit 55 is clear.
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@ -368,6 +368,9 @@ void arm_restore_state_to_opc(CPUState *cs,

 #ifdef CONFIG_TCG
 void arm_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb);
+
+/* Our implementation of TCGCPUOps::cpu_exec_halt */
+bool arm_cpu_exec_halt(CPUState *cs);
 #endif /* CONFIG_TCG */

 typedef enum ARMFPRounding {
--- a/target/arm/machine.c
+++ b/target/arm/machine.c
@ -18,6 +18,35 @@ static bool vfp_needed(void *opaque)
            : cpu_isar_feature(aa32_vfp_simd, cpu));
 }

+static bool vfp_fpcr_fpsr_needed(void *opaque)
+{
+    /*
+     * If either the FPCR or the FPSR include set bits that are not
+     * visible in the AArch32 FPSCR view of floating point control/status
+     * then we must send the FPCR and FPSR as two separate fields in the
+     * cpu/vfp/fpcr_fpsr subsection, and we will send a 0 for the old
+     * FPSCR field in cpu/vfp.
+     *
+     * If all the set bits are representable in an AArch32 FPSCR then we
+     * send that value as the cpu/vfp FPSCR field, and don't send the
+     * cpu/vfp/fpcr_fpsr subsection.
+     *
+     * On incoming migration, if the cpu/vfp FPSCR field is non-zero we
+     * use it, and if the fpcr_fpsr subsection is present we use that.
+     * (The subsection will never be present with a non-zero FPSCR field,
+     * and if FPSCR is zero and the subsection is not present that means
+     * that FPSCR/FPSR/FPCR are zero.)
+     *
+     * This preserves migration compatibility with older QEMU versions,
+     * in both directions.
+     */
+    ARMCPU *cpu = opaque;
+    CPUARMState *env = &cpu->env;
+
+    return (vfp_get_fpcr(env) & ~FPSCR_FPCR_MASK) ||
+        (vfp_get_fpsr(env) & ~FPSCR_FPSR_MASK);
+}
+
 static int get_fpscr(QEMUFile *f, void *opaque, size_t size,
                     const VMStateField *field)
 {
@ -25,7 +54,10 @@ static int get_fpscr(QEMUFile *f, void *opaque, size_t size,
    CPUARMState *env = &cpu->env;
    uint32_t val = qemu_get_be32(f);

-    vfp_set_fpscr(env, val);
+    if (val) {
+        /* 0 means we might have the data in the fpcr_fpsr subsection */
+        vfp_set_fpscr(env, val);
+    }
    return 0;
 }

@ -34,8 +66,9 @@ static int put_fpscr(QEMUFile *f, void *opaque, size_t size,
 {
    ARMCPU *cpu = opaque;
    CPUARMState *env = &cpu->env;
+    uint32_t fpscr = vfp_fpcr_fpsr_needed(opaque) ? 0 : vfp_get_fpscr(env);

-    qemu_put_be32(f, vfp_get_fpscr(env));
+    qemu_put_be32(f, fpscr);
    return 0;
 }

@ -45,6 +78,86 @@ static const VMStateInfo vmstate_fpscr = {
    .put = put_fpscr,
 };

+static int get_fpcr(QEMUFile *f, void *opaque, size_t size,
+                     const VMStateField *field)
+{
+    ARMCPU *cpu = opaque;
+    CPUARMState *env = &cpu->env;
+    uint64_t val = qemu_get_be64(f);
+
+    vfp_set_fpcr(env, val);
+    return 0;
+}
+
+static int put_fpcr(QEMUFile *f, void *opaque, size_t size,
+                     const VMStateField *field, JSONWriter *vmdesc)
+{
+    ARMCPU *cpu = opaque;
+    CPUARMState *env = &cpu->env;
+
+    qemu_put_be64(f, vfp_get_fpcr(env));
+    return 0;
+}
+
+static const VMStateInfo vmstate_fpcr = {
+    .name = "fpcr",
+    .get = get_fpcr,
+    .put = put_fpcr,
+};
+
+static int get_fpsr(QEMUFile *f, void *opaque, size_t size,
+                     const VMStateField *field)
+{
+    ARMCPU *cpu = opaque;
+    CPUARMState *env = &cpu->env;
+    uint64_t val = qemu_get_be64(f);
+
+    vfp_set_fpsr(env, val);
+    return 0;
+}
+
+static int put_fpsr(QEMUFile *f, void *opaque, size_t size,
+                     const VMStateField *field, JSONWriter *vmdesc)
+{
+    ARMCPU *cpu = opaque;
+    CPUARMState *env = &cpu->env;
+
+    qemu_put_be64(f, vfp_get_fpsr(env));
+    return 0;
+}
+
+static const VMStateInfo vmstate_fpsr = {
+    .name = "fpsr",
+    .get = get_fpsr,
+    .put = put_fpsr,
+};
+
+static const VMStateDescription vmstate_vfp_fpcr_fpsr = {
+    .name = "cpu/vfp/fpcr_fpsr",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = vfp_fpcr_fpsr_needed,
+    .fields = (const VMStateField[]) {
+        {
+            .name = "fpcr",
+            .version_id = 0,
+            .size = sizeof(uint64_t),
+            .info = &vmstate_fpcr,
+            .flags = VMS_SINGLE,
+            .offset = 0,
+        },
+        {
+            .name = "fpsr",
+            .version_id = 0,
+            .size = sizeof(uint64_t),
+            .info = &vmstate_fpsr,
+            .flags = VMS_SINGLE,
+            .offset = 0,
+        },
+        VMSTATE_END_OF_LIST()
+    },
+};
+
 static const VMStateDescription vmstate_vfp = {
    .name = "cpu/vfp",
    .version_id = 3,
@ -100,6 +213,10 @@ static const VMStateDescription vmstate_vfp = {
            .offset = 0,
        },
        VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * const []) {
+        &vmstate_vfp_fpcr_fpsr,
+        NULL
    }
 };

@ -784,6 +901,20 @@ static int cpu_pre_load(void *opaque)
    ARMCPU *cpu = opaque;
    CPUARMState *env = &cpu->env;

+    /*
+     * In an inbound migration where on the source FPSCR/FPSR/FPCR are 0,
+     * there will be no fpcr_fpsr subsection so we won't call vfp_set_fpcr()
+     * and vfp_set_fpsr() from get_fpcr() and get_fpsr(); also the get_fpscr()
+     * function will not call vfp_set_fpscr() because it will see a 0 in the
+     * inbound data. Ensure that in this case we have a correctly set up
+     * zero FPSCR/FPCR/FPSR.
+     *
+     * This is not strictly needed because FPSCR is zero out of reset, but
+     * it avoids the possibility of future confusing migration bugs if some
+     * future architecture change makes the reset value non-zero.
+     */
+    vfp_set_fpscr(env, 0);
+
    /*
     * Pre-initialize irq_line_state to a value that's never valid as
     * real data, so cpu_post_load() can tell whether we've seen the
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@ -785,6 +785,14 @@ SQRDMULH_s      0111 1110 ..1 ..... 10110 1 ..... ..... @rrr_e
 SQRDMLAH_s      0111 1110 ..0 ..... 10000 1 ..... ..... @rrr_e
 SQRDMLSH_s      0111 1110 ..0 ..... 10001 1 ..... ..... @rrr_e

+# Decode scalar x scalar as scalar x indexed, with index 0.
+SQDMULL_si      0101 1110 011 rm:5  11010 0 rn:5  rd:5  &rrx_e idx=0 esz=1
+SQDMULL_si      0101 1110 101 rm:5  11010 0 rn:5  rd:5  &rrx_e idx=0 esz=2
+SQDMLAL_si      0101 1110 011 rm:5  10010 0 rn:5  rd:5  &rrx_e idx=0 esz=1
+SQDMLAL_si      0101 1110 101 rm:5  10010 0 rn:5  rd:5  &rrx_e idx=0 esz=2
+SQDMLSL_si      0101 1110 011 rm:5  10110 0 rn:5  rd:5  &rrx_e idx=0 esz=1
+SQDMLSL_si      0101 1110 101 rm:5  10110 0 rn:5  rd:5  &rrx_e idx=0 esz=2
+
 ### Advanced SIMD scalar pairwise

 FADDP_s         0101 1110 0011 0000 1101 10 ..... ..... @rr_h
@ -962,6 +970,42 @@ FCADD_270       0.10 1110 ..0 ..... 11110 1 ..... ..... @qrrr_e

 FCMLA_v         0 q:1 10 1110 esz:2 0 rm:5 110 rot:2 1 rn:5 rd:5

+SMULL_v         0.00 1110 ..1 ..... 11000 0 ..... ..... @qrrr_e
+UMULL_v         0.10 1110 ..1 ..... 11000 0 ..... ..... @qrrr_e
+SMLAL_v         0.00 1110 ..1 ..... 10000 0 ..... ..... @qrrr_e
+UMLAL_v         0.10 1110 ..1 ..... 10000 0 ..... ..... @qrrr_e
+SMLSL_v         0.00 1110 ..1 ..... 10100 0 ..... ..... @qrrr_e
+UMLSL_v         0.10 1110 ..1 ..... 10100 0 ..... ..... @qrrr_e
+
+SADDL_v         0.00 1110 ..1 ..... 00000 0 ..... ..... @qrrr_e
+UADDL_v         0.10 1110 ..1 ..... 00000 0 ..... ..... @qrrr_e
+SSUBL_v         0.00 1110 ..1 ..... 00100 0 ..... ..... @qrrr_e
+USUBL_v         0.10 1110 ..1 ..... 00100 0 ..... ..... @qrrr_e
+SABAL_v         0.00 1110 ..1 ..... 01010 0 ..... ..... @qrrr_e
+UABAL_v         0.10 1110 ..1 ..... 01010 0 ..... ..... @qrrr_e
+SABDL_v         0.00 1110 ..1 ..... 01110 0 ..... ..... @qrrr_e
+UABDL_v         0.10 1110 ..1 ..... 01110 0 ..... ..... @qrrr_e
+
+SQDMULL_v       0.00 1110 011 ..... 11010 0 ..... ..... @qrrr_h
+SQDMULL_v       0.00 1110 101 ..... 11010 0 ..... ..... @qrrr_s
+SQDMLAL_v       0.00 1110 011 ..... 10010 0 ..... ..... @qrrr_h
+SQDMLAL_v       0.00 1110 101 ..... 10010 0 ..... ..... @qrrr_s
+SQDMLSL_v       0.00 1110 011 ..... 10110 0 ..... ..... @qrrr_h
+SQDMLSL_v       0.00 1110 101 ..... 10110 0 ..... ..... @qrrr_s
+
+SADDW           0.00 1110 ..1 ..... 00010 0 ..... ..... @qrrr_e
+UADDW           0.10 1110 ..1 ..... 00010 0 ..... ..... @qrrr_e
+SSUBW           0.00 1110 ..1 ..... 00110 0 ..... ..... @qrrr_e
+USUBW           0.10 1110 ..1 ..... 00110 0 ..... ..... @qrrr_e
+
+ADDHN           0.00 1110 ..1 ..... 01000 0 ..... ..... @qrrr_e
+RADDHN          0.10 1110 ..1 ..... 01000 0 ..... ..... @qrrr_e
+SUBHN           0.00 1110 ..1 ..... 01100 0 ..... ..... @qrrr_e
+RSUBHN          0.10 1110 ..1 ..... 01100 0 ..... ..... @qrrr_e
+
+PMULL_p8        0.00 1110 001 ..... 11100 0 ..... ..... @qrrr_b
+PMULL_p64       0.00 1110 111 ..... 11100 0 ..... ..... @qrrr_b
+
 ### Advanced SIMD scalar x indexed element

 FMUL_si         0101 1111 00 .. .... 1001 . 0 ..... .....   @rrx_h
@ -992,6 +1036,15 @@ SQRDMLAH_si     0111 1111 10 .. .... 1101 . 0 ..... .....   @rrx_s
 SQRDMLSH_si     0111 1111 01 .. .... 1111 . 0 ..... .....   @rrx_h
 SQRDMLSH_si     0111 1111 10 .. .... 1111 . 0 ..... .....   @rrx_s

+SQDMULL_si      0101 1111 01 .. .... 1011 . 0 ..... .....   @rrx_h
+SQDMULL_si      0101 1111 10 . ..... 1011 . 0 ..... .....   @rrx_s
+
+SQDMLAL_si      0101 1111 01 .. .... 0011 . 0 ..... .....   @rrx_h
+SQDMLAL_si      0101 1111 10 . ..... 0011 . 0 ..... .....   @rrx_s
+
+SQDMLSL_si      0101 1111 01 .. .... 0111 . 0 ..... .....   @rrx_h
+SQDMLSL_si      0101 1111 10 . ..... 0111 . 0 ..... .....   @rrx_s
+
 ### Advanced SIMD vector x indexed element

 FMUL_vi         0.00 1111 00 .. .... 1001 . 0 ..... .....   @qrrx_h
@ -1047,6 +1100,30 @@ FCMLA_vi        0 0 10 1111 01 idx:1 rm:5 0 rot:2 1 0 0 rn:5 rd:5 esz=1 q=0
 FCMLA_vi        0 1 10 1111 01 . rm:5 0 rot:2 1 . 0 rn:5 rd:5 esz=1 idx=%hl q=1
 FCMLA_vi        0 1 10 1111 10 0 rm:5 0 rot:2 1 idx:1 0 rn:5 rd:5 esz=2 q=1

+SMULL_vi        0.00 1111 01 .. .... 1010 . 0 ..... .....   @qrrx_h
+SMULL_vi        0.00 1111 10 . ..... 1010 . 0 ..... .....   @qrrx_s
+UMULL_vi        0.10 1111 01 .. .... 1010 . 0 ..... .....   @qrrx_h
+UMULL_vi        0.10 1111 10 . ..... 1010 . 0 ..... .....   @qrrx_s
+
+SMLAL_vi        0.00 1111 01 .. .... 0010 . 0 ..... .....   @qrrx_h
+SMLAL_vi        0.00 1111 10 . ..... 0010 . 0 ..... .....   @qrrx_s
+UMLAL_vi        0.10 1111 01 .. .... 0010 . 0 ..... .....   @qrrx_h
+UMLAL_vi        0.10 1111 10 . ..... 0010 . 0 ..... .....   @qrrx_s
+
+SMLSL_vi        0.00 1111 01 .. .... 0110 . 0 ..... .....   @qrrx_h
+SMLSL_vi        0.00 1111 10 . ..... 0110 . 0 ..... .....   @qrrx_s
+UMLSL_vi        0.10 1111 01 .. .... 0110 . 0 ..... .....   @qrrx_h
+UMLSL_vi        0.10 1111 10 . ..... 0110 . 0 ..... .....   @qrrx_s
+
+SQDMULL_vi      0.00 1111 01 .. .... 1011 . 0 ..... .....   @qrrx_h
+SQDMULL_vi      0.00 1111 10 . ..... 1011 . 0 ..... .....   @qrrx_s
+
+SQDMLAL_vi      0.00 1111 01 .. .... 0011 . 0 ..... .....   @qrrx_h
+SQDMLAL_vi      0.00 1111 10 . ..... 0011 . 0 ..... .....   @qrrx_s
+
+SQDMLSL_vi      0.00 1111 01 .. .... 0111 . 0 ..... .....   @qrrx_h
+SQDMLSL_vi      0.00 1111 10 . ..... 0111 . 0 ..... .....   @qrrx_s
+
 # Floating-point conditional select

 FCSEL           0001 1110 .. 1 rm:5 cond:4 11 rn:5 rd:5     esz=%esz_hsd
--- a/target/arm/tcg/cpu-v7m.c
+++ b/target/arm/tcg/cpu-v7m.c
@ -244,6 +244,7 @@ static const TCGCPUOps arm_v7m_tcg_ops = {
 #else
    .tlb_fill = arm_cpu_tlb_fill,
    .cpu_exec_interrupt = arm_v7m_cpu_exec_interrupt,
+    .cpu_exec_halt = arm_cpu_exec_halt,
    .do_interrupt = arm_v7m_cpu_do_interrupt,
    .do_transaction_failed = arm_cpu_do_transaction_failed,
    .do_unaligned_access = arm_cpu_do_unaligned_access,
--- a/target/arm/tcg/mve_helper.c
+++ b/target/arm/tcg/mve_helper.c
@ -1115,21 +1115,21 @@ static void do_vadc(CPUARMState *env, uint32_t *d, uint32_t *n, uint32_t *m,

    if (update_flags) {
        /* Store C, clear NZV. */
-        env->vfp.xregs[ARM_VFP_FPSCR] &= ~FPCR_NZCV_MASK;
-        env->vfp.xregs[ARM_VFP_FPSCR] |= carry_in * FPCR_C;
+        env->vfp.fpsr &= ~FPSR_NZCV_MASK;
+        env->vfp.fpsr |= carry_in * FPSR_C;
    }
    mve_advance_vpt(env);
 }

 void HELPER(mve_vadc)(CPUARMState *env, void *vd, void *vn, void *vm)
 {
-    bool carry_in = env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_C;
+    bool carry_in = env->vfp.fpsr & FPSR_C;
    do_vadc(env, vd, vn, vm, 0, carry_in, false);
 }

 void HELPER(mve_vsbc)(CPUARMState *env, void *vd, void *vn, void *vm)
 {
-    bool carry_in = env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_C;
+    bool carry_in = env->vfp.fpsr & FPSR_C;
    do_vadc(env, vd, vn, vm, -1, carry_in, false);
 }

@ -3343,7 +3343,7 @@ static void do_vcvt_sh(CPUARMState *env, void *vd, void *vm, int top)
    uint32_t *m = vm;
    uint16_t r;
    uint16_t mask = mve_element_mask(env);
-    bool ieee = !(env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_AHP);
+    bool ieee = !(env->vfp.fpcr & FPCR_AHP);
    unsigned e;
    float_status *fpst;
    float_status scratch_fpst;
@ -3373,7 +3373,7 @@ static void do_vcvt_hs(CPUARMState *env, void *vd, void *vm, int top)
    uint16_t *m = vm;
    uint32_t r;
    uint16_t mask = mve_element_mask(env);
-    bool ieee = !(env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_AHP);
+    bool ieee = !(env->vfp.fpcr & FPCR_AHP);
    unsigned e;
    float_status *fpst;
    float_status scratch_fpst;
--- a/target/arm/tcg/translate-a32.h
+++ b/target/arm/tcg/translate-a32.h
@ -83,6 +83,13 @@ void store_cpu_offset(TCGv_i32 var, int offset, int size);
                         sizeof_field(CPUARMState, name));              \
    })

+/* Store to the low half of a 64-bit field from a TCGv_i32 */
+#define store_cpu_field_low32(val, name)                                \
+    ({                                                                  \
+        QEMU_BUILD_BUG_ON(sizeof_field(CPUARMState, name) != 8);        \
+        store_cpu_offset(val, offsetoflow32(CPUARMState, name), 4);     \
+    })
+
 #define store_cpu_field_constant(val, name) \
    store_cpu_field(tcg_constant_i32(val), name)

--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
--- a/target/arm/tcg/translate-m-nocp.c
+++ b/target/arm/tcg/translate-m-nocp.c
@ -332,7 +332,7 @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
        if (dc_isar_feature(aa32_mve, s)) {
            /* QC is only present for MVE; otherwise RES0 */
            TCGv_i32 qc = tcg_temp_new_i32();
-            tcg_gen_andi_i32(qc, tmp, FPCR_QC);
+            tcg_gen_andi_i32(qc, tmp, FPSR_QC);
            /*
             * The 4 vfp.qc[] fields need only be "zero" vs "non-zero";
             * here writing the same value into all elements is simplest.
@ -340,11 +340,11 @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
            tcg_gen_gvec_dup_i32(MO_32, offsetof(CPUARMState, vfp.qc),
                                 16, 16, qc);
        }
-        tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
-        fpscr = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
-        tcg_gen_andi_i32(fpscr, fpscr, ~FPCR_NZCV_MASK);
+        tcg_gen_andi_i32(tmp, tmp, FPSR_NZCV_MASK);
+        fpscr = load_cpu_field_low32(vfp.fpsr);
+        tcg_gen_andi_i32(fpscr, fpscr, ~FPSR_NZCV_MASK);
        tcg_gen_or_i32(fpscr, fpscr, tmp);
-        store_cpu_field(fpscr, vfp.xregs[ARM_VFP_FPSCR]);
+        store_cpu_field_low32(fpscr, vfp.fpsr);
        break;
    }
    case ARM_VFP_FPCXT_NS:
@ -390,7 +390,7 @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
        tcg_gen_deposit_i32(control, control, sfpa,
                            R_V7M_CONTROL_SFPA_SHIFT, 1);
        store_cpu_field(control, v7m.control[M_REG_S]);
-        tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
+        tcg_gen_andi_i32(tmp, tmp, ~FPSR_NZCV_MASK);
        gen_helper_vfp_set_fpscr(tcg_env, tmp);
        s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
        break;
@ -457,7 +457,7 @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
    case ARM_VFP_FPSCR_NZCVQC:
        tmp = tcg_temp_new_i32();
        gen_helper_vfp_get_fpscr(tmp, tcg_env);
-        tcg_gen_andi_i32(tmp, tmp, FPCR_NZCVQC_MASK);
+        tcg_gen_andi_i32(tmp, tmp, FPSR_NZCVQC_MASK);
        storefn(s, opaque, tmp, true);
        break;
    case QEMU_VFP_FPSCR_NZCV:
@ -465,8 +465,8 @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
         * Read just NZCV; this is a special case to avoid the
         * helper call for the "VMRS to CPSR.NZCV" insn.
         */
-        tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
-        tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
+        tmp = load_cpu_field_low32(vfp.fpsr);
+        tcg_gen_andi_i32(tmp, tmp, FPSR_NZCV_MASK);
        storefn(s, opaque, tmp, true);
        break;
    case ARM_VFP_FPCXT_S:
@ -476,7 +476,7 @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
        tmp = tcg_temp_new_i32();
        sfpa = tcg_temp_new_i32();
        gen_helper_vfp_get_fpscr(tmp, tcg_env);
-        tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
+        tcg_gen_andi_i32(tmp, tmp, ~FPSR_NZCV_MASK);
        control = load_cpu_field(v7m.control[M_REG_S]);
        tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK);
        tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT);
@ -529,7 +529,7 @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
        sfpa = tcg_temp_new_i32();
        fpscr = tcg_temp_new_i32();
        gen_helper_vfp_get_fpscr(fpscr, tcg_env);
-        tcg_gen_andi_i32(tmp, fpscr, ~FPCR_NZCV_MASK);
+        tcg_gen_andi_i32(tmp, fpscr, ~FPSR_NZCV_MASK);
        control = load_cpu_field(v7m.control[M_REG_S]);
        tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK);
        tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT);
--- a/target/arm/tcg/translate-vfp.c
+++ b/target/arm/tcg/translate-vfp.c
@ -833,8 +833,8 @@ static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
            break;
        case ARM_VFP_FPSCR:
            if (a->rt == 15) {
-                tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
-                tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
+                tmp = load_cpu_field_low32(vfp.fpsr);
+                tcg_gen_andi_i32(tmp, tmp, FPSR_NZCV_MASK);
            } else {
                tmp = tcg_temp_new_i32();
                gen_helper_vfp_get_fpscr(tmp, tcg_env);
--- a/target/arm/tcg/translate.h
+++ b/target/arm/tcg/translate.h
@ -351,8 +351,7 @@ static inline TCGv_i32 get_ahp_flag(void)
 {
    TCGv_i32 ret = tcg_temp_new_i32();

-    tcg_gen_ld_i32(ret, tcg_env,
-                   offsetof(CPUARMState, vfp.xregs[ARM_VFP_FPSCR]));
+    tcg_gen_ld_i32(ret, tcg_env, offsetoflow32(CPUARMState, vfp.fpcr));
    tcg_gen_extract_i32(ret, ret, 26, 1);

    return ret;
--- a/target/arm/vfp_helper.c
+++ b/target/arm/vfp_helper.c
@ -85,7 +85,7 @@ static inline int vfp_exceptbits_to_host(int target_bits)
    return host_bits;
 }

-static uint32_t vfp_get_fpscr_from_host(CPUARMState *env)
+static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
 {
    uint32_t i;

@ -99,14 +99,28 @@ static uint32_t vfp_get_fpscr_from_host(CPUARMState *env)
    return vfp_exceptbits_from_host(i);
 }

-static void vfp_set_fpscr_to_host(CPUARMState *env, uint32_t val)
+static void vfp_set_fpsr_to_host(CPUARMState *env, uint32_t val)
 {
-    int i;
-    uint32_t changed = env->vfp.xregs[ARM_VFP_FPSCR];
+    /*
+     * The exception flags are ORed together when we read fpscr so we
+     * only need to preserve the current state in one of our
+     * float_status values.
+     */
+    int i = vfp_exceptbits_to_host(val);
+    set_float_exception_flags(i, &env->vfp.fp_status);
+    set_float_exception_flags(0, &env->vfp.fp_status_f16);
+    set_float_exception_flags(0, &env->vfp.standard_fp_status);
+    set_float_exception_flags(0, &env->vfp.standard_fp_status_f16);
+}
+
+static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
+{
+    uint64_t changed = env->vfp.fpcr;

    changed ^= val;
+    changed &= mask;
    if (changed & (3 << 22)) {
-        i = (val >> 22) & 3;
+        int i = (val >> 22) & 3;
        switch (i) {
        case FPROUNDING_TIEEVEN:
            i = float_round_nearest_even;
@ -141,52 +155,56 @@ static void vfp_set_fpscr_to_host(CPUARMState *env, uint32_t val)
        set_default_nan_mode(dnan_enabled, &env->vfp.fp_status);
        set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16);
    }
-
-    /*
-     * The exception flags are ORed together when we read fpscr so we
-     * only need to preserve the current state in one of our
-     * float_status values.
-     */
-    i = vfp_exceptbits_to_host(val);
-    set_float_exception_flags(i, &env->vfp.fp_status);
-    set_float_exception_flags(0, &env->vfp.fp_status_f16);
-    set_float_exception_flags(0, &env->vfp.standard_fp_status);
-    set_float_exception_flags(0, &env->vfp.standard_fp_status_f16);
 }

 #else

-static uint32_t vfp_get_fpscr_from_host(CPUARMState *env)
+static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
 {
    return 0;
 }

-static void vfp_set_fpscr_to_host(CPUARMState *env, uint32_t val)
+static void vfp_set_fpsr_to_host(CPUARMState *env, uint32_t val)
+{
+}
+
+static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
 {
 }

 #endif

-uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env)
+uint32_t vfp_get_fpcr(CPUARMState *env)
 {
-    uint32_t i, fpscr;
-
-    fpscr = env->vfp.xregs[ARM_VFP_FPSCR]
-            | (env->vfp.vec_len << 16)
-            | (env->vfp.vec_stride << 20);
+    uint32_t fpcr = env->vfp.fpcr
+        | (env->vfp.vec_len << 16)
+        | (env->vfp.vec_stride << 20);

    /*
-     * M-profile LTPSIZE overlaps A-profile Stride; whichever of the
-     * two is not applicable to this CPU will always be zero.
+     * M-profile LTPSIZE is the same bits [18:16] as A-profile Len; whichever
+     * of the two is not applicable to this CPU will always be zero.
     */
-    fpscr |= env->v7m.ltpsize << 16;
+    fpcr |= env->v7m.ltpsize << 16;

-    fpscr |= vfp_get_fpscr_from_host(env);
+    return fpcr;
+}
+
+uint32_t vfp_get_fpsr(CPUARMState *env)
+{
+    uint32_t fpsr = env->vfp.fpsr;
+    uint32_t i;
+
+    fpsr |= vfp_get_fpsr_from_host(env);

    i = env->vfp.qc[0] | env->vfp.qc[1] | env->vfp.qc[2] | env->vfp.qc[3];
-    fpscr |= i ? FPCR_QC : 0;
+    fpsr |= i ? FPSR_QC : 0;
+    return fpsr;
+}

-    return fpscr;
+uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env)
+{
+    return (vfp_get_fpcr(env) & FPSCR_FPCR_MASK) |
+        (vfp_get_fpsr(env) & FPSCR_FPSR_MASK);
 }

 uint32_t vfp_get_fpscr(CPUARMState *env)
@ -194,56 +212,91 @@ uint32_t vfp_get_fpscr(CPUARMState *env)
    return HELPER(vfp_get_fpscr)(env);
 }

-void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
+void vfp_set_fpsr(CPUARMState *env, uint32_t val)
 {
    ARMCPU *cpu = env_archcpu(env);

+    vfp_set_fpsr_to_host(env, val);
+
+    if (arm_feature(env, ARM_FEATURE_NEON) ||
+        cpu_isar_feature(aa32_mve, cpu)) {
+        /*
+         * The bit we set within vfp.qc[] is arbitrary; the array as a
+         * whole being zero/non-zero is what counts.
+         */
+        env->vfp.qc[0] = val & FPSR_QC;
+        env->vfp.qc[1] = 0;
+        env->vfp.qc[2] = 0;
+        env->vfp.qc[3] = 0;
+    }
+
+    /*
+     * The only FPSR bits we keep in vfp.fpsr are NZCV:
+     * the exception flags IOC|DZC|OFC|UFC|IXC|IDC are stored in
+     * fp_status, and QC is in vfp.qc[]. Store the NZCV bits there,
+     * and zero any of the other FPSR bits.
+     */
+    val &= FPSR_NZCV_MASK;
+    env->vfp.fpsr = val;
+}
+
+static void vfp_set_fpcr_masked(CPUARMState *env, uint32_t val, uint32_t mask)
+{
+    /*
+     * We only set FPCR bits defined by mask, and leave the others alone.
+     * We assume the mask is sensible (e.g. doesn't try to set only
+     * part of a field)
+     */
+    ARMCPU *cpu = env_archcpu(env);
+
    /* When ARMv8.2-FP16 is not supported, FZ16 is RES0.  */
    if (!cpu_isar_feature(any_fp16, cpu)) {
        val &= ~FPCR_FZ16;
    }

-    vfp_set_fpscr_to_host(env, val);
+    vfp_set_fpcr_to_host(env, val, mask);

-    if (!arm_feature(env, ARM_FEATURE_M)) {
-        /*
-         * Short-vector length and stride; on M-profile these bits
-         * are used for different purposes.
-         * We can't make this conditional be "if MVFR0.FPShVec != 0",
-         * because in v7A no-short-vector-support cores still had to
-         * allow Stride/Len to be written with the only effect that
-         * some insns are required to UNDEF if the guest sets them.
-         */
-        env->vfp.vec_len = extract32(val, 16, 3);
-        env->vfp.vec_stride = extract32(val, 20, 2);
-    } else if (cpu_isar_feature(aa32_mve, cpu)) {
-        env->v7m.ltpsize = extract32(val, FPCR_LTPSIZE_SHIFT,
-                                     FPCR_LTPSIZE_LENGTH);
-    }
-
-    if (arm_feature(env, ARM_FEATURE_NEON) ||
-        cpu_isar_feature(aa32_mve, cpu)) {
-        /*
-         * The bit we set within fpscr_q is arbitrary; the register as a
-         * whole being zero/non-zero is what counts.
-         * TODO: M-profile MVE also has a QC bit.
-         */
-        env->vfp.qc[0] = val & FPCR_QC;
-        env->vfp.qc[1] = 0;
-        env->vfp.qc[2] = 0;
-        env->vfp.qc[3] = 0;
+    if (mask & (FPCR_LEN_MASK | FPCR_STRIDE_MASK)) {
+        if (!arm_feature(env, ARM_FEATURE_M)) {
+            /*
+             * Short-vector length and stride; on M-profile these bits
+             * are used for different purposes.
+             * We can't make this conditional be "if MVFR0.FPShVec != 0",
+             * because in v7A no-short-vector-support cores still had to
+             * allow Stride/Len to be written with the only effect that
+             * some insns are required to UNDEF if the guest sets them.
+             */
+            env->vfp.vec_len = extract32(val, 16, 3);
+            env->vfp.vec_stride = extract32(val, 20, 2);
+        } else if (cpu_isar_feature(aa32_mve, cpu)) {
+            env->v7m.ltpsize = extract32(val, FPCR_LTPSIZE_SHIFT,
+                                         FPCR_LTPSIZE_LENGTH);
+        }
    }

    /*
     * We don't implement trapped exception handling, so the
     * trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!)
     *
-     * The exception flags IOC|DZC|OFC|UFC|IXC|IDC are stored in
-     * fp_status; QC, Len and Stride are stored separately earlier.
-     * Clear out all of those and the RES0 bits: only NZCV, AHP, DN,
-     * FZ, RMode and FZ16 are kept in vfp.xregs[FPSCR].
+     * The FPCR bits we keep in vfp.fpcr are AHP, DN, FZ, RMode
+     * and FZ16. Len, Stride and LTPSIZE we just handled. Store those bits
+     * there, and zero any of the other FPCR bits and the RES0 and RAZ/WI
+     * bits.
     */
-    env->vfp.xregs[ARM_VFP_FPSCR] = val & 0xf7c80000;
+    val &= FPCR_AHP | FPCR_DN | FPCR_FZ | FPCR_RMODE_MASK | FPCR_FZ16;
+    env->vfp.fpcr &= ~mask;
+    env->vfp.fpcr |= val;
+}
+
+void vfp_set_fpcr(CPUARMState *env, uint32_t val)
+{
+    vfp_set_fpcr_masked(env, val, MAKE_64BIT_MASK(0, 32));
+}
+
+void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
+{
+    vfp_set_fpcr_masked(env, val, FPSCR_FPCR_MASK);
+    vfp_set_fpsr(env, val & FPSCR_FPSR_MASK);
 }

 void vfp_set_fpscr(CPUARMState *env, uint32_t val)
@ -315,8 +368,7 @@ static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp)
    default:
        g_assert_not_reached();
    }
-    env->vfp.xregs[ARM_VFP_FPSCR] =
-        deposit32(env->vfp.xregs[ARM_VFP_FPSCR], 28, 4, flags);
+    env->vfp.fpsr = deposit64(env->vfp.fpsr, 28, 4, flags); /* NZCV */
 }

 /* XXX: check quiet/signaling case */
@ -1119,8 +1171,7 @@ uint32_t HELPER(vjcvt)(float64 value, CPUARMState *env)
    uint32_t z = (pair >> 32) == 0;

    /* Store Z, clear NCV, in FPSCR.NZCV.  */
-    env->vfp.xregs[ARM_VFP_FPSCR]
-        = (env->vfp.xregs[ARM_VFP_FPSCR] & ~CPSR_NZCV) | (z * CPSR_Z);
+    env->vfp.fpsr = (env->vfp.fpsr & ~FPSR_NZCV_MASK) | (z * FPSR_Z);

    return result;
 }
--- a/target/avr/cpu.c
+++ b/target/avr/cpu.c
@ -210,6 +210,7 @@ static const TCGCPUOps avr_tcg_ops = {
    .synchronize_from_tb = avr_cpu_synchronize_from_tb,
    .restore_state_to_opc = avr_restore_state_to_opc,
    .cpu_exec_interrupt = avr_cpu_exec_interrupt,
+    .cpu_exec_halt = avr_cpu_has_work,
    .tlb_fill = avr_cpu_tlb_fill,
    .do_interrupt = avr_cpu_do_interrupt,
 };
--- a/target/cris/cpu.c
+++ b/target/cris/cpu.c
@ -186,6 +186,7 @@ static const TCGCPUOps crisv10_tcg_ops = {
 #ifndef CONFIG_USER_ONLY
    .tlb_fill = cris_cpu_tlb_fill,
    .cpu_exec_interrupt = cris_cpu_exec_interrupt,
+    .cpu_exec_halt = cris_cpu_has_work,
    .do_interrupt = crisv10_cpu_do_interrupt,
 #endif /* !CONFIG_USER_ONLY */
 };
@ -197,6 +198,7 @@ static const TCGCPUOps crisv32_tcg_ops = {
 #ifndef CONFIG_USER_ONLY
    .tlb_fill = cris_cpu_tlb_fill,
    .cpu_exec_interrupt = cris_cpu_exec_interrupt,
+    .cpu_exec_halt = cris_cpu_has_work,
    .do_interrupt = cris_cpu_do_interrupt,
 #endif /* !CONFIG_USER_ONLY */
 };
--- a/target/hppa/cpu.c
+++ b/target/hppa/cpu.c
@ -228,6 +228,7 @@ static const TCGCPUOps hppa_tcg_ops = {
 #ifndef CONFIG_USER_ONLY
    .tlb_fill = hppa_cpu_tlb_fill,
    .cpu_exec_interrupt = hppa_cpu_exec_interrupt,
+    .cpu_exec_halt = hppa_cpu_has_work,
    .do_interrupt = hppa_cpu_do_interrupt,
    .do_unaligned_access = hppa_cpu_do_unaligned_access,
    .do_transaction_failed = hppa_cpu_do_transaction_failed,
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@ -736,6 +736,7 @@ static const TCGCPUOps loongarch_tcg_ops = {
 #ifndef CONFIG_USER_ONLY
    .tlb_fill = loongarch_cpu_tlb_fill,
    .cpu_exec_interrupt = loongarch_cpu_exec_interrupt,
+    .cpu_exec_halt = loongarch_cpu_has_work,
    .do_interrupt = loongarch_cpu_do_interrupt,
    .do_transaction_failed = loongarch_cpu_do_transaction_failed,
 #endif
--- a/target/m68k/cpu.c
+++ b/target/m68k/cpu.c
@ -536,6 +536,7 @@ static const TCGCPUOps m68k_tcg_ops = {
 #ifndef CONFIG_USER_ONLY
    .tlb_fill = m68k_cpu_tlb_fill,
    .cpu_exec_interrupt = m68k_cpu_exec_interrupt,
+    .cpu_exec_halt = m68k_cpu_has_work,
    .do_interrupt = m68k_cpu_do_interrupt,
    .do_transaction_failed = m68k_cpu_transaction_failed,
 #endif /* !CONFIG_USER_ONLY */
--- a/target/microblaze/cpu.c
+++ b/target/microblaze/cpu.c
@ -413,6 +413,7 @@ static const TCGCPUOps mb_tcg_ops = {
 #ifndef CONFIG_USER_ONLY
    .tlb_fill = mb_cpu_tlb_fill,
    .cpu_exec_interrupt = mb_cpu_exec_interrupt,
+    .cpu_exec_halt = mb_cpu_has_work,
    .do_interrupt = mb_cpu_do_interrupt,
    .do_transaction_failed = mb_cpu_transaction_failed,
    .do_unaligned_access = mb_cpu_do_unaligned_access,
--- a/target/mips/cpu.c
+++ b/target/mips/cpu.c
@ -555,6 +555,7 @@ static const TCGCPUOps mips_tcg_ops = {
 #if !defined(CONFIG_USER_ONLY)
    .tlb_fill = mips_cpu_tlb_fill,
    .cpu_exec_interrupt = mips_cpu_exec_interrupt,
+    .cpu_exec_halt = mips_cpu_has_work,
    .do_interrupt = mips_cpu_do_interrupt,
    .do_transaction_failed = mips_cpu_do_transaction_failed,
    .do_unaligned_access = mips_cpu_do_unaligned_access,
--- a/target/openrisc/cpu.c
+++ b/target/openrisc/cpu.c
@ -233,6 +233,7 @@ static const TCGCPUOps openrisc_tcg_ops = {
 #ifndef CONFIG_USER_ONLY
    .tlb_fill = openrisc_cpu_tlb_fill,
    .cpu_exec_interrupt = openrisc_cpu_exec_interrupt,
+    .cpu_exec_halt = openrisc_cpu_has_work,
    .do_interrupt = openrisc_cpu_do_interrupt,
 #endif /* !CONFIG_USER_ONLY */
 };
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@ -1,3 +1,4 @@
+
 /*
 *  PowerPC CPU initialization for qemu.
 *
@ -7481,6 +7482,7 @@ static const TCGCPUOps ppc_tcg_ops = {
 #else
  .tlb_fill = ppc_cpu_tlb_fill,
  .cpu_exec_interrupt = ppc_cpu_exec_interrupt,
+  .cpu_exec_halt = ppc_cpu_has_work,
  .do_interrupt = ppc_cpu_do_interrupt,
  .cpu_exec_enter = ppc_cpu_exec_enter,
  .cpu_exec_exit = ppc_cpu_exec_exit,
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@ -903,7 +903,7 @@ static vaddr riscv_cpu_get_pc(CPUState *cs)
    return env->pc;
 }

-static bool riscv_cpu_has_work(CPUState *cs)
+bool riscv_cpu_has_work(CPUState *cs)
 {
 #ifndef CONFIG_USER_ONLY
    RISCVCPU *cpu = RISCV_CPU(cs);
--- a/target/riscv/internals.h
+++ b/target/riscv/internals.h
@ -136,4 +136,7 @@ static inline float16 check_nanbox_h(CPURISCVState *env, uint64_t f)
    }
 }

+/* Our implementation of CPUClass::has_work */
+bool riscv_cpu_has_work(CPUState *cs);
+
 #endif
--- a/target/riscv/tcg/tcg-cpu.c
+++ b/target/riscv/tcg/tcg-cpu.c
@ -21,6 +21,7 @@
 #include "exec/exec-all.h"
 #include "tcg-cpu.h"
 #include "cpu.h"
+#include "internals.h"
 #include "pmu.h"
 #include "time_helper.h"
 #include "qapi/error.h"
@ -138,6 +139,7 @@ static const TCGCPUOps riscv_tcg_ops = {
 #ifndef CONFIG_USER_ONLY
    .tlb_fill = riscv_cpu_tlb_fill,
    .cpu_exec_interrupt = riscv_cpu_exec_interrupt,
+    .cpu_exec_halt = riscv_cpu_has_work,
    .do_interrupt = riscv_cpu_do_interrupt,
    .do_transaction_failed = riscv_cpu_do_transaction_failed,
    .do_unaligned_access = riscv_cpu_do_unaligned_access,
--- a/target/rx/cpu.c
+++ b/target/rx/cpu.c
@ -192,6 +192,7 @@ static const TCGCPUOps rx_tcg_ops = {

 #ifndef CONFIG_USER_ONLY
    .cpu_exec_interrupt = rx_cpu_exec_interrupt,
+    .cpu_exec_halt = rx_cpu_has_work,
    .do_interrupt = rx_cpu_do_interrupt,
 #endif /* !CONFIG_USER_ONLY */
 };
--- a/target/s390x/cpu.c
+++ b/target/s390x/cpu.c
@ -370,6 +370,7 @@ static const TCGCPUOps s390_tcg_ops = {
 #else
    .tlb_fill = s390_cpu_tlb_fill,
    .cpu_exec_interrupt = s390_cpu_exec_interrupt,
+    .cpu_exec_halt = s390_cpu_has_work,
    .do_interrupt = s390_cpu_do_interrupt,
    .debug_excp_handler = s390x_cpu_debug_excp_handler,
    .do_unaligned_access = s390x_cpu_do_unaligned_access,
--- a/target/sh4/cpu.c
+++ b/target/sh4/cpu.c
@ -254,6 +254,7 @@ static const TCGCPUOps superh_tcg_ops = {
 #ifndef CONFIG_USER_ONLY
    .tlb_fill = superh_cpu_tlb_fill,
    .cpu_exec_interrupt = superh_cpu_exec_interrupt,
+    .cpu_exec_halt = superh_cpu_has_work,
    .do_interrupt = superh_cpu_do_interrupt,
    .do_unaligned_access = superh_cpu_do_unaligned_access,
    .io_recompile_replay_branch = superh_io_recompile_replay_branch,
--- a/target/sparc/cpu.c
+++ b/target/sparc/cpu.c
@ -926,6 +926,7 @@ static const TCGCPUOps sparc_tcg_ops = {
 #ifndef CONFIG_USER_ONLY
    .tlb_fill = sparc_cpu_tlb_fill,
    .cpu_exec_interrupt = sparc_cpu_exec_interrupt,
+    .cpu_exec_halt = sparc_cpu_has_work,
    .do_interrupt = sparc_cpu_do_interrupt,
    .do_transaction_failed = sparc_cpu_do_transaction_failed,
    .do_unaligned_access = sparc_cpu_do_unaligned_access,
--- a/target/tricore/cpu.c
+++ b/target/tricore/cpu.c
@ -169,6 +169,7 @@ static const TCGCPUOps tricore_tcg_ops = {
    .synchronize_from_tb = tricore_cpu_synchronize_from_tb,
    .restore_state_to_opc = tricore_restore_state_to_opc,
    .tlb_fill = tricore_cpu_tlb_fill,
+    .cpu_exec_halt = tricore_cpu_has_work,
 };

 static void tricore_cpu_class_init(ObjectClass *c, void *data)
--- a/target/xtensa/cpu.c
+++ b/target/xtensa/cpu.c
@ -234,6 +234,7 @@ static const TCGCPUOps xtensa_tcg_ops = {
 #ifndef CONFIG_USER_ONLY
    .tlb_fill = xtensa_cpu_tlb_fill,
    .cpu_exec_interrupt = xtensa_cpu_exec_interrupt,
+    .cpu_exec_halt = xtensa_cpu_has_work,
    .do_interrupt = xtensa_cpu_do_interrupt,
    .do_transaction_failed = xtensa_cpu_do_transaction_failed,
    .do_unaligned_access = xtensa_cpu_do_unaligned_access,