From 69cb498c56263a5ae484fd4fef920d3d3eea04c8 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 29 May 2024 17:46:17 +0200
Subject: [PATCH 01/42] target/i386: fix pushed value of EFLAGS.RF

When preparing an exception stack frame for a fault exception, the value
pushed for RF is 1.  Take that into account.  The same should be true
of interrupts for repeated string instructions, but the situation there
is complicated.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/tcg/seg_helper.c | 49 ++++++++++++++++++++++++++++++++----
 target/i386/tcg/translate.c  |  8 ++++++
 2 files changed, 52 insertions(+), 5 deletions(-)

diff --git a/target/i386/tcg/seg_helper.c b/target/i386/tcg/seg_helper.c
index 0301459004..715db1f232 100644
--- a/target/i386/tcg/seg_helper.c
+++ b/target/i386/tcg/seg_helper.c
@@ -526,6 +526,24 @@ static inline unsigned int get_sp_mask(unsigned int e2)
     }
 }
 
+static int exception_is_fault(int intno)
+{
+    switch (intno) {
+        /*
+         * #DB can be both fault- and trap-like, but it never sets RF=1
+         * in the RFLAGS value pushed on the stack.
+         */
+    case EXCP01_DB:
+    case EXCP03_INT3:
+    case EXCP04_INTO:
+    case EXCP08_DBLE:
+    case EXCP12_MCHK:
+        return 0;
+    }
+    /* Everything else including reserved exception is a fault.  */
+    return 1;
+}
+
 int exception_has_error_code(int intno)
 {
     switch (intno) {
@@ -605,8 +623,9 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int,
     int type, dpl, selector, ss_dpl, cpl;
     int has_error_code, new_stack, shift;
     uint32_t e1, e2, offset, ss = 0, esp, ss_e1 = 0, ss_e2 = 0;
-    uint32_t old_eip, sp_mask;
+    uint32_t old_eip, sp_mask, eflags;
     int vm86 = env->eflags & VM_MASK;
+    bool set_rf;
 
     has_error_code = 0;
     if (!is_int && !is_hw) {
@@ -614,8 +633,10 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int,
     }
     if (is_int) {
         old_eip = next_eip;
+        set_rf = false;
     } else {
         old_eip = env->eip;
+        set_rf = exception_is_fault(intno);
     }
 
     dt = &env->idt;
@@ -748,6 +769,15 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int,
     }
     push_size <<= shift;
 #endif
+    eflags = cpu_compute_eflags(env);
+    /*
+     * AMD states that code breakpoint #DBs clear RF=0, Intel leaves it
+     * as is.  AMD behavior could be implemented in check_hw_breakpoints().
+     */
+    if (set_rf) {
+        eflags |= RF_MASK;
+    }
+
     if (shift == 1) {
         if (new_stack) {
             if (vm86) {
@@ -759,7 +789,7 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int,
             PUSHL(ssp, esp, sp_mask, env->segs[R_SS].selector);
             PUSHL(ssp, esp, sp_mask, env->regs[R_ESP]);
         }
-        PUSHL(ssp, esp, sp_mask, cpu_compute_eflags(env));
+        PUSHL(ssp, esp, sp_mask, eflags);
         PUSHL(ssp, esp, sp_mask, env->segs[R_CS].selector);
         PUSHL(ssp, esp, sp_mask, old_eip);
         if (has_error_code) {
@@ -776,7 +806,7 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int,
             PUSHW(ssp, esp, sp_mask, env->segs[R_SS].selector);
             PUSHW(ssp, esp, sp_mask, env->regs[R_ESP]);
         }
-        PUSHW(ssp, esp, sp_mask, cpu_compute_eflags(env));
+        PUSHW(ssp, esp, sp_mask, eflags);
         PUSHW(ssp, esp, sp_mask, env->segs[R_CS].selector);
         PUSHW(ssp, esp, sp_mask, old_eip);
         if (has_error_code) {
@@ -868,8 +898,9 @@ static void do_interrupt64(CPUX86State *env, int intno, int is_int,
     target_ulong ptr;
     int type, dpl, selector, cpl, ist;
     int has_error_code, new_stack;
-    uint32_t e1, e2, e3, ss;
+    uint32_t e1, e2, e3, ss, eflags;
     target_ulong old_eip, esp, offset;
+    bool set_rf;
 
     has_error_code = 0;
     if (!is_int && !is_hw) {
@@ -877,8 +908,10 @@ static void do_interrupt64(CPUX86State *env, int intno, int is_int,
     }
     if (is_int) {
         old_eip = next_eip;
+        set_rf = false;
     } else {
         old_eip = env->eip;
+        set_rf = exception_is_fault(intno);
     }
 
     dt = &env->idt;
@@ -950,9 +983,15 @@ static void do_interrupt64(CPUX86State *env, int intno, int is_int,
     }
     esp &= ~0xfLL; /* align stack */
 
+    /* See do_interrupt_protected.  */
+    eflags = cpu_compute_eflags(env);
+    if (set_rf) {
+        eflags |= RF_MASK;
+    }
+
     PUSHQ(esp, env->segs[R_SS].selector);
     PUSHQ(esp, env->regs[R_ESP]);
-    PUSHQ(esp, cpu_compute_eflags(env));
+    PUSHQ(esp, eflags);
     PUSHQ(esp, env->segs[R_CS].selector);
     PUSHQ(esp, old_eip);
     if (has_error_code) {
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 0486ab6911..d438f8f76f 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -4630,6 +4630,14 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
      * If jmp_opt, we want to handle each string instruction individually.
      * For icount also disable repz optimization so that each iteration
      * is accounted separately.
+     *
+     * FIXME: this is messy; it makes REP string instructions a lot less
+     * efficient than they should be and it gets in the way of correct
+     * handling of RF (interrupts or traps arriving after any iteration
+     * of a repeated string instruction but the last should set RF to 1).
+     * Perhaps it would be more efficient if REP string instructions were
+     * always at the beginning of the TB, or even their own TB?  That
+     * would even allow accounting up to 64k iterations at once for icount.
      */
     dc->repz_opt = !dc->jmp_opt && !(cflags & CF_USE_ICOUNT);
 

From 73fb7b3c4983e48f3081fca00013a996abf659c0 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 29 May 2024 13:17:27 +0200
Subject: [PATCH 02/42] target/i386: fix implementation of ICEBP

ICEBP generates a trap-like exception, while gen_exception() produces
a fault.  Resurrect gen_update_eip_next() to implement the desired
semantics.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/helper.h          |  1 +
 target/i386/tcg/emit.c.inc    |  5 ++++-
 target/i386/tcg/excp_helper.c | 20 ++++++++++++++++++++
 target/i386/tcg/helper-tcg.h  | 12 +++++++++++-
 target/i386/tcg/translate.c   | 13 +++++++++++++
 5 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/target/i386/helper.h b/target/i386/helper.h
index a52a1bf0f2..8f291a5f66 100644
--- a/target/i386/helper.h
+++ b/target/i386/helper.h
@@ -56,6 +56,7 @@ DEF_HELPER_2(sysret, void, env, int)
 DEF_HELPER_FLAGS_2(pause, TCG_CALL_NO_WG, noreturn, env, int)
 DEF_HELPER_FLAGS_3(raise_interrupt, TCG_CALL_NO_WG, noreturn, env, int, int)
 DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, noreturn, env, int)
+DEF_HELPER_FLAGS_1(icebp, TCG_CALL_NO_WG, noreturn, env)
 DEF_HELPER_3(boundw, void, env, tl, int)
 DEF_HELPER_3(boundl, void, env, tl, int)
 
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index e990141454..36127d9994 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -1858,7 +1858,10 @@ static void gen_INT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 
 static void gen_INT1(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
-    gen_exception(s, EXCP01_DB);
+    gen_update_cc_op(s);
+    gen_update_eip_next(s);
+    gen_helper_icebp(tcg_env);
+    s->base.is_jmp = DISAS_NORETURN;
 }
 
 static void gen_INT3(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
diff --git a/target/i386/tcg/excp_helper.c b/target/i386/tcg/excp_helper.c
index 65e37ae2a0..72387aac24 100644
--- a/target/i386/tcg/excp_helper.c
+++ b/target/i386/tcg/excp_helper.c
@@ -140,6 +140,26 @@ G_NORETURN void raise_exception_ra(CPUX86State *env, int exception_index,
     raise_interrupt2(env, exception_index, 0, 0, 0, retaddr);
 }
 
+G_NORETURN void helper_icebp(CPUX86State *env)
+{
+    CPUState *cs = env_cpu(env);
+
+    do_end_instruction(env);
+
+    /*
+     * INT1 aka ICEBP generates a trap-like #DB, but it is pretty special.
+     *
+     * "Although the ICEBP instruction dispatches through IDT vector 1,
+     * that event is not interceptable by means of the #DB exception
+     * intercept".  Instead there is a separate fault-like ICEBP intercept.
+     */
+    cs->exception_index = EXCP01_DB;
+    env->error_code = 0;
+    env->exception_is_int = 0;
+    env->exception_next_eip = env->eip;
+    cpu_loop_exit(cs);
+}
+
 G_NORETURN void handle_unaligned_access(CPUX86State *env, vaddr vaddr,
                                         MMUAccessType access_type,
                                         uintptr_t retaddr)
diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h
index 85957943bf..eb6a4926a4 100644
--- a/target/i386/tcg/helper-tcg.h
+++ b/target/i386/tcg/helper-tcg.h
@@ -111,7 +111,17 @@ int exception_has_error_code(int intno);
 /* smm_helper.c */
 void do_smm_enter(X86CPU *cpu);
 
-/* bpt_helper.c */
+/* sysemu/bpt_helper.c */
 bool check_hw_breakpoints(CPUX86State *env, bool force_dr6_update);
 
+/*
+ * Do the tasks usually performed by gen_eob().  Callers of this function
+ * should also handle TF as appropriate.
+ */
+static inline void do_end_instruction(CPUX86State *env)
+{
+    /* needed if sti is just before */
+    env->hflags &= ~HF_INHIBIT_IRQ_MASK;
+    env->eflags &= ~HF_RF_MASK;
+}
 #endif /* I386_HELPER_TCG_H */
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index d438f8f76f..77ed9c1db4 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -549,6 +549,19 @@ static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
     }
 }
 
+static void gen_update_eip_next(DisasContext *s)
+{
+    assert(s->pc_save != -1);
+    if (tb_cflags(s->base.tb) & CF_PCREL) {
+        tcg_gen_addi_tl(cpu_eip, cpu_eip, s->pc - s->pc_save);
+    } else if (CODE64(s)) {
+        tcg_gen_movi_tl(cpu_eip, s->pc);
+    } else {
+        tcg_gen_movi_tl(cpu_eip, (uint32_t)(s->pc - s->cs_base));
+    }
+    s->pc_save = s->pc;
+}
+
 static void gen_update_eip_cur(DisasContext *s)
 {
     assert(s->pc_save != -1);

From 536032566b1fc1f4b66450770dbb30b49e736b52 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 29 May 2024 15:12:22 +0200
Subject: [PATCH 03/42] target/i386: cleanup HLT helpers

Use decode.c's support for intercepts, doing the check in TCG-generated
code rather than the helper.  This is cleaner because it allows removing
the eip_addend argument to helper_hlt().

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/helper.h                 |  2 +-
 target/i386/tcg/decode-new.c.inc     |  4 ++--
 target/i386/tcg/emit.c.inc           |  4 ++--
 target/i386/tcg/sysemu/misc_helper.c | 13 ++-----------
 4 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/target/i386/helper.h b/target/i386/helper.h
index 8f291a5f66..c244dbb481 100644
--- a/target/i386/helper.h
+++ b/target/i386/helper.h
@@ -90,7 +90,7 @@ DEF_HELPER_2(vmsave, void, env, int)
 DEF_HELPER_1(stgi, void, env)
 DEF_HELPER_1(clgi, void, env)
 DEF_HELPER_FLAGS_2(flush_page, TCG_CALL_NO_RWG, void, env, tl)
-DEF_HELPER_FLAGS_2(hlt, TCG_CALL_NO_WG, noreturn, env, int)
+DEF_HELPER_FLAGS_1(hlt, TCG_CALL_NO_WG, noreturn, env)
 DEF_HELPER_FLAGS_2(monitor, TCG_CALL_NO_WG, void, env, tl)
 DEF_HELPER_FLAGS_2(mwait, TCG_CALL_NO_WG, noreturn, env, int)
 DEF_HELPER_1(rdmsr, void, env)
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 0ff0866e8f..376d2bdabe 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -1496,7 +1496,7 @@ static const X86OpEntry opcodes_root[256] = {
     [0xE7] = X86_OP_ENTRYrr(OUT,   0,v, I_unsigned,b), /* AX/EAX */
 
     [0xF1] = X86_OP_ENTRY0(INT1,   svm(ICEBP)),
-    [0xF4] = X86_OP_ENTRY0(HLT,    chk(cpl0)),
+    [0xF4] = X86_OP_ENTRY0(HLT,    chk(cpl0) svm(HLT)),
     [0xF5] = X86_OP_ENTRY0(CMC),
     [0xF6] = X86_OP_GROUP1(group3, E,b),
     [0xF7] = X86_OP_GROUP1(group3, E,v),
@@ -2539,7 +2539,7 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
 
     /*
      * Checks that result in #GP or VMEXIT come second.  Intercepts are
-     * generally checked after non-memory exceptions (i.e. before all
+     * generally checked after non-memory exceptions (i.e. after all
      * exceptions if there is no memory operand).  Exceptions are
      * vm86 checks (INTn, IRET, PUSHF/POPF), RSM and XSETBV (!).
      *
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 36127d9994..2e94e8ec56 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -1638,8 +1638,8 @@ static void gen_HLT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
 #ifdef CONFIG_SYSTEM_ONLY
     gen_update_cc_op(s);
-    gen_update_eip_cur(s);
-    gen_helper_hlt(tcg_env, cur_insn_len_i32(s));
+    gen_update_eip_next(s);
+    gen_helper_hlt(tcg_env);
     s->base.is_jmp = DISAS_NORETURN;
 #endif
 }
diff --git a/target/i386/tcg/sysemu/misc_helper.c b/target/i386/tcg/sysemu/misc_helper.c
index edb7c3d894..e41c88346c 100644
--- a/target/i386/tcg/sysemu/misc_helper.c
+++ b/target/i386/tcg/sysemu/misc_helper.c
@@ -516,8 +516,7 @@ void helper_flush_page(CPUX86State *env, target_ulong addr)
     tlb_flush_page(env_cpu(env), addr);
 }
 
-static G_NORETURN
-void do_hlt(CPUX86State *env)
+G_NORETURN void helper_hlt(CPUX86State *env)
 {
     CPUState *cs = env_cpu(env);
 
@@ -527,14 +526,6 @@ void do_hlt(CPUX86State *env)
     cpu_loop_exit(cs);
 }
 
-G_NORETURN void helper_hlt(CPUX86State *env, int next_eip_addend)
-{
-    cpu_svm_check_intercept_param(env, SVM_EXIT_HLT, 0, GETPC());
-    env->eip += next_eip_addend;
-
-    do_hlt(env);
-}
-
 void helper_monitor(CPUX86State *env, target_ulong ptr)
 {
     if ((uint32_t)env->regs[R_ECX] != 0) {
@@ -558,6 +549,6 @@ G_NORETURN void helper_mwait(CPUX86State *env, int next_eip_addend)
     if (cs->cpu_index != 0 || CPU_NEXT(cs) != NULL) {
         do_pause(env);
     } else {
-        do_hlt(env);
+        helper_hlt(env);
     }
 }

From 330e6adc1acd2a235a96b502b3dd15ba6e77c228 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 29 May 2024 15:12:22 +0200
Subject: [PATCH 04/42] target/i386: cleanup PAUSE helpers

Use decode.c's support for intercepts, doing the check in TCG-generated
code rather than the helper.  This is cleaner because it allows removing
the eip_addend argument to helper_pause(), even though it adds a bit of
bloat for opcode 0x90's new decoding function.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/helper.h                 |  2 +-
 target/i386/tcg/decode-new.c.inc     | 15 ++++++++++++++-
 target/i386/tcg/emit.c.inc           | 20 ++++++++------------
 target/i386/tcg/helper-tcg.h         |  1 -
 target/i386/tcg/misc_helper.c        | 10 +---------
 target/i386/tcg/sysemu/misc_helper.c |  2 +-
 6 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/target/i386/helper.h b/target/i386/helper.h
index c244dbb481..2f46cffabd 100644
--- a/target/i386/helper.h
+++ b/target/i386/helper.h
@@ -53,7 +53,7 @@ DEF_HELPER_1(sysenter, void, env)
 DEF_HELPER_2(sysexit, void, env, int)
 DEF_HELPER_2(syscall, void, env, int)
 DEF_HELPER_2(sysret, void, env, int)
-DEF_HELPER_FLAGS_2(pause, TCG_CALL_NO_WG, noreturn, env, int)
+DEF_HELPER_FLAGS_1(pause, TCG_CALL_NO_WG, noreturn, env)
 DEF_HELPER_FLAGS_3(raise_interrupt, TCG_CALL_NO_WG, noreturn, env, int, int)
 DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, noreturn, env, int)
 DEF_HELPER_FLAGS_1(icebp, TCG_CALL_NO_WG, noreturn, env)
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 376d2bdabe..c2d8da8d14 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -1359,6 +1359,19 @@ static void decode_group11(DisasContext *s, CPUX86State *env, X86OpEntry *entry,
     }
 }
 
+static void decode_90(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+    static X86OpEntry pause = X86_OP_ENTRY0(PAUSE, svm(PAUSE));
+    static X86OpEntry nop = X86_OP_ENTRY0(NOP);
+    static X86OpEntry xchg_ax = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v);
+
+    if (REX_B(s)) {
+        *entry = xchg_ax;
+    } else {
+        *entry = (s->prefix & PREFIX_REPZ) ? pause : nop;
+    }
+}
+
 static const X86OpEntry opcodes_root[256] = {
     [0x00] = X86_OP_ENTRY2(ADD, E,b, G,b, lock),
     [0x01] = X86_OP_ENTRY2(ADD, E,v, G,v, lock),
@@ -1441,7 +1454,7 @@ static const X86OpEntry opcodes_root[256] = {
     [0x86] = X86_OP_ENTRY2(XCHG, E,b, G,b, xchg),
     [0x87] = X86_OP_ENTRY2(XCHG, E,v, G,v, xchg),
 
-    [0x90] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
+    [0x90] = X86_OP_GROUP0(90),
     [0x91] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
     [0x92] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
     [0x93] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 2e94e8ec56..f90f3d3c58 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -2350,6 +2350,14 @@ static void gen_PANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
                       decode->op[1].offset, vec_len, vec_len);
 }
 
+static void gen_PAUSE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_update_cc_op(s);
+    gen_update_eip_next(s);
+    gen_helper_pause(tcg_env);
+    s->base.is_jmp = DISAS_NORETURN;
+}
+
 static void gen_PCMPESTRI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
@@ -4014,18 +4022,6 @@ static void gen_WAIT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 
 static void gen_XCHG(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
-    if (decode->b == 0x90 && !REX_B(s)) {
-        if (s->prefix & PREFIX_REPZ) {
-            gen_update_cc_op(s);
-            gen_update_eip_cur(s);
-            gen_helper_pause(tcg_env, cur_insn_len_i32(s));
-            s->base.is_jmp = DISAS_NORETURN;
-        }
-        /* No writeback.  */
-        decode->op[0].unit = X86_OP_SKIP;
-        return;
-    }
-
     if (s->prefix & PREFIX_LOCK) {
         tcg_gen_atomic_xchg_tl(s->T0, s->A0, s->T1,
                                s->mem_index, decode->op[0].ot | MO_LE);
diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h
index eb6a4926a4..15d6c6f8b4 100644
--- a/target/i386/tcg/helper-tcg.h
+++ b/target/i386/tcg/helper-tcg.h
@@ -91,7 +91,6 @@ extern const uint8_t parity_table[256];
 
 /* misc_helper.c */
 void cpu_load_eflags(CPUX86State *env, int eflags, int update_mask);
-G_NORETURN void do_pause(CPUX86State *env);
 
 /* sysemu/svm_helper.c */
 #ifndef CONFIG_USER_ONLY
diff --git a/target/i386/tcg/misc_helper.c b/target/i386/tcg/misc_helper.c
index b0f0f7b893..8316d42ffc 100644
--- a/target/i386/tcg/misc_helper.c
+++ b/target/i386/tcg/misc_helper.c
@@ -88,7 +88,7 @@ G_NORETURN void helper_rdpmc(CPUX86State *env)
     raise_exception_err(env, EXCP06_ILLOP, 0);
 }
 
-G_NORETURN void do_pause(CPUX86State *env)
+G_NORETURN void helper_pause(CPUX86State *env)
 {
     CPUState *cs = env_cpu(env);
 
@@ -97,14 +97,6 @@ G_NORETURN void do_pause(CPUX86State *env)
     cpu_loop_exit(cs);
 }
 
-G_NORETURN void helper_pause(CPUX86State *env, int next_eip_addend)
-{
-    cpu_svm_check_intercept_param(env, SVM_EXIT_PAUSE, 0, GETPC());
-    env->eip += next_eip_addend;
-
-    do_pause(env);
-}
-
 uint64_t helper_rdpkru(CPUX86State *env, uint32_t ecx)
 {
     if ((env->cr[4] & CR4_PKE_MASK) == 0) {
diff --git a/target/i386/tcg/sysemu/misc_helper.c b/target/i386/tcg/sysemu/misc_helper.c
index e41c88346c..093cc2d0f9 100644
--- a/target/i386/tcg/sysemu/misc_helper.c
+++ b/target/i386/tcg/sysemu/misc_helper.c
@@ -547,7 +547,7 @@ G_NORETURN void helper_mwait(CPUX86State *env, int next_eip_addend)
 
     /* XXX: not complete but not completely erroneous */
     if (cs->cpu_index != 0 || CPU_NEXT(cs) != NULL) {
-        do_pause(env);
+        helper_pause(env);
     } else {
         helper_hlt(env);
     }

From 57f8dbdbe94a502301f51809e8b282b02df43370 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 29 May 2024 13:18:56 +0200
Subject: [PATCH 05/42] target/i386: implement DR7.GD

DR7.GD triggers a #DB exception on any access to debug registers.
The GD bit is cleared so that the #DB handler itself can access
the debug registers.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/tcg/sysemu/bpt_helper.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/target/i386/tcg/sysemu/bpt_helper.c b/target/i386/tcg/sysemu/bpt_helper.c
index 4d96a48a3c..c1d5fce250 100644
--- a/target/i386/tcg/sysemu/bpt_helper.c
+++ b/target/i386/tcg/sysemu/bpt_helper.c
@@ -238,6 +238,12 @@ target_ulong helper_get_dr(CPUX86State *env, int reg)
         }
     }
 
+    if (env->dr[7] & DR7_GD) {
+        env->dr[7] &= ~DR7_GD;
+        env->dr[6] |= DR6_BD;
+        raise_exception_ra(env, EXCP01_DB, GETPC());
+    }
+
     return env->dr[reg];
 }
 
@@ -251,6 +257,12 @@ void helper_set_dr(CPUX86State *env, int reg, target_ulong t0)
         }
     }
 
+    if (env->dr[7] & DR7_GD) {
+        env->dr[7] &= ~DR7_GD;
+        env->dr[6] |= DR6_BD;
+        raise_exception_ra(env, EXCP01_DB, GETPC());
+    }
+
     if (reg < 4) {
         if (hw_breakpoint_enabled(env->dr[7], reg)
             && hw_breakpoint_type(env->dr[7], reg) != DR7_TYPE_IO_RW) {

From 8aa76496dfaac0d7b0dd34793359680c90d9aea0 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 29 May 2024 15:41:56 +0200
Subject: [PATCH 06/42] target/i386: disable/enable breakpoints on
 vmentry/vmexit

If the required DR7 (either from the VMCB or from the host save
area) disables a breakpoint that was enabled prior to vmentry
or vmexit, it is left enabled and will trigger EXCP_DEBUG.
This causes a spurious #DB on the next crossing of the breakpoint.

To disable it, vmentry/vmexit must use cpu_x86_update_dr7
to load DR7.

Because cpu_x86_update_dr7 takes a 32-bit argument, check
reserved bits prior to calling cpu_x86_update_dr7, and do the
same for DR6 as well for consistency.

This scenario is tested by the "host_rflags" test in kvm-unit-tests.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/tcg/sysemu/svm_helper.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/target/i386/tcg/sysemu/svm_helper.c b/target/i386/tcg/sysemu/svm_helper.c
index 5d6de2294f..922d8964f8 100644
--- a/target/i386/tcg/sysemu/svm_helper.c
+++ b/target/i386/tcg/sysemu/svm_helper.c
@@ -163,6 +163,8 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend)
     uint64_t new_cr0;
     uint64_t new_cr3;
     uint64_t new_cr4;
+    uint64_t new_dr6;
+    uint64_t new_dr7;
 
     if (aflag == 2) {
         addr = env->regs[R_EAX];
@@ -361,20 +363,22 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend)
                                 env->vm_vmcb + offsetof(struct vmcb, save.rsp));
     env->regs[R_EAX] = x86_ldq_phys(cs,
                                 env->vm_vmcb + offsetof(struct vmcb, save.rax));
-    env->dr[7] = x86_ldq_phys(cs,
-                          env->vm_vmcb + offsetof(struct vmcb, save.dr7));
-    env->dr[6] = x86_ldq_phys(cs,
-                          env->vm_vmcb + offsetof(struct vmcb, save.dr6));
+
+    new_dr7 = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.dr7));
+    new_dr6 = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.dr6));
 
 #ifdef TARGET_X86_64
-    if (env->dr[6] & DR_RESERVED_MASK) {
+    if (new_dr7 & DR_RESERVED_MASK) {
         cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC());
     }
-    if (env->dr[7] & DR_RESERVED_MASK) {
+    if (new_dr6 & DR_RESERVED_MASK) {
         cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC());
     }
 #endif
 
+    cpu_x86_update_dr7(env, new_dr7);
+    env->dr[6] = new_dr6;
+
     if (is_efer_invalid_state(env)) {
         cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC());
     }
@@ -864,8 +868,11 @@ void do_vmexit(CPUX86State *env)
 
     env->dr[6] = x86_ldq_phys(cs,
                           env->vm_hsave + offsetof(struct vmcb, save.dr6));
-    env->dr[7] = x86_ldq_phys(cs,
-                          env->vm_hsave + offsetof(struct vmcb, save.dr7));
+
+    /* Disables all breakpoints in the host DR7 register. */
+    cpu_x86_update_dr7(env,
+             x86_ldq_phys(cs,
+                          env->vm_hsave + offsetof(struct vmcb, save.dr7)) & ~0xff);
 
     /* other setups */
     x86_stl_phys(cs,
@@ -891,8 +898,6 @@ void do_vmexit(CPUX86State *env)
        from the page table indicated the host's CR3. If the PDPEs contain
        illegal state, the processor causes a shutdown. */
 
-    /* Disables all breakpoints in the host DR7 register. */
-
     /* Checks the reloaded host state for consistency. */
 
     /* If the host's rIP reloaded by #VMEXIT is outside the limit of the

From 1a150d331d9bbd882c8b93146ff7fbc6259f0961 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Sat, 25 May 2024 10:30:50 +0200
Subject: [PATCH 07/42] target/i386: fix INHIBIT_IRQ/TF/RF handling for VMRUN

From vm entry to exit, VMRUN is handled as a single instruction.  It
uses DISAS_NORETURN in order to avoid processing TF or RF before
the first instruction executes in the guest.  However, the corresponding
handling is missing in vmexit.  Add it, and at the same time reorganize
the comments with quotes from the manual about the tasks performed
by a #VMEXIT.

Another gen_eob() task that is missing in VMRUN is preparing the
HF_INHIBIT_IRQ flag for the next instruction, in this case by loading
it from the VMCB control state.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/tcg/sysemu/svm_helper.c | 46 +++++++++++++++++++++--------
 target/i386/tcg/translate.c         |  5 ++++
 2 files changed, 38 insertions(+), 13 deletions(-)

diff --git a/target/i386/tcg/sysemu/svm_helper.c b/target/i386/tcg/sysemu/svm_helper.c
index 922d8964f8..9db8ad62a0 100644
--- a/target/i386/tcg/sysemu/svm_helper.c
+++ b/target/i386/tcg/sysemu/svm_helper.c
@@ -254,6 +254,13 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend)
                                                   control.intercept_exceptions
                                                   ));
 
+    env->hflags &= ~HF_INHIBIT_IRQ_MASK;
+    if (x86_ldl_phys(cs, env->vm_vmcb +
+                offsetof(struct vmcb, control.int_state)) &
+                 SVM_INTERRUPT_SHADOW_MASK) {
+        env->hflags |= HF_INHIBIT_IRQ_MASK;
+    }
+
     nested_ctl = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb,
                                                           control.nested_ctl));
     asid = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb,
@@ -815,8 +822,12 @@ void do_vmexit(CPUX86State *env)
     env->hflags &= ~HF_GUEST_MASK;
     env->intercept = 0;
     env->intercept_exceptions = 0;
+
+    /* Clears the V_IRQ and V_INTR_MASKING bits inside the processor. */
     cs->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
     env->int_ctl = 0;
+
+    /* Clears the TSC_OFFSET inside the processor. */
     env->tsc_offset = 0;
 
     env->gdt.base  = x86_ldq_phys(cs, env->vm_hsave + offsetof(struct vmcb,
@@ -836,6 +847,15 @@ void do_vmexit(CPUX86State *env)
     cpu_x86_update_cr4(env, x86_ldq_phys(cs,
                                      env->vm_hsave + offsetof(struct vmcb,
                                                               save.cr4)));
+
+    /*
+     * Resets the current ASID register to zero (host ASID; TLB flush).
+     *
+     * If the host is in PAE mode, the processor reloads the host's PDPEs
+     * from the page table indicated the host's CR3. FIXME: If the PDPEs
+     * contain illegal state, the processor causes a shutdown (QEMU does
+     * not implement PDPTRs).
+     */
     cpu_x86_update_cr3(env, x86_ldq_phys(cs,
                                      env->vm_hsave + offsetof(struct vmcb,
                                                               save.cr3)));
@@ -843,12 +863,14 @@ void do_vmexit(CPUX86State *env)
        set properly */
     cpu_load_efer(env, x86_ldq_phys(cs, env->vm_hsave + offsetof(struct vmcb,
                                                          save.efer)));
+
+    /* Completion of the VMRUN instruction clears the host EFLAGS.RF bit.  */
     env->eflags = 0;
     cpu_load_eflags(env, x86_ldq_phys(cs,
                                   env->vm_hsave + offsetof(struct vmcb,
                                                            save.rflags)),
                     ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK |
-                      VM_MASK));
+                      RF_MASK | VM_MASK));
 
     svm_load_seg_cache(env, MMU_PHYS_IDX,
                        env->vm_hsave + offsetof(struct vmcb, save.es), R_ES);
@@ -888,19 +910,17 @@ void do_vmexit(CPUX86State *env)
 
     env->hflags2 &= ~HF2_GIF_MASK;
     env->hflags2 &= ~HF2_VGIF_MASK;
-    /* FIXME: Resets the current ASID register to zero (host ASID). */
 
-    /* Clears the V_IRQ and V_INTR_MASKING bits inside the processor. */
 
-    /* Clears the TSC_OFFSET inside the processor. */
+    /* FIXME: Checks the reloaded host state for consistency. */
 
-    /* If the host is in PAE mode, the processor reloads the host's PDPEs
-       from the page table indicated the host's CR3. If the PDPEs contain
-       illegal state, the processor causes a shutdown. */
-
-    /* Checks the reloaded host state for consistency. */
-
-    /* If the host's rIP reloaded by #VMEXIT is outside the limit of the
-       host's code segment or non-canonical (in the case of long mode), a
-       #GP fault is delivered inside the host. */
+    /*
+     * EFLAGS.TF causes a #DB trap after the VMRUN completes on the host
+     * side (i.e., after the #VMEXIT from the guest). Since we're running
+     * in the main loop, call do_interrupt_all directly.
+     */
+    if ((env->eflags & TF_MASK) != 0) {
+        env->dr[6] |= DR6_BS;
+        do_interrupt_all(X86_CPU(cs), EXCP01_DB, 0, 0, env->eip, 0);
+    }
 }
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 77ed9c1db4..a9c6424c7d 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -3745,6 +3745,11 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
             }
             gen_update_cc_op(s);
             gen_update_eip_cur(s);
+            /*
+             * Reloads INHIBIT_IRQ mask as well as TF and RF with guest state.
+             * The usual gen_eob() handling is performed on vmexit after
+             * host state is reloaded.
+             */
             gen_helper_vmrun(tcg_env, tcg_constant_i32(s->aflag - 1),
                              cur_insn_len_i32(s));
             tcg_gen_exit_tb(NULL, 0);

From 3718523d011e898d414f09a4ed43cf13d76de0b4 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Sat, 25 May 2024 10:47:31 +0200
Subject: [PATCH 08/42] target/i386: fix INHIBIT_IRQ/TF/RF handling for PAUSE

PAUSE uses DISAS_NORETURN because the corresponding helper
calls cpu_loop_exit().  However, while HLT clear HF_INHIBIT_IRQ_MASK
to correctly handle "STI; HLT", the same is missing from PAUSE.
And also gen_eob() clears HF_RF_MASK and synthesizes a #DB exception
if single-step is active; none of this is done by HLT and PAUSE.
Start fixing PAUSE, HLT will follow.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/tcg/misc_helper.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/target/i386/tcg/misc_helper.c b/target/i386/tcg/misc_helper.c
index 8316d42ffc..ed4cda8001 100644
--- a/target/i386/tcg/misc_helper.c
+++ b/target/i386/tcg/misc_helper.c
@@ -92,6 +92,10 @@ G_NORETURN void helper_pause(CPUX86State *env)
 {
     CPUState *cs = env_cpu(env);
 
+    /* Do gen_eob() tasks before going back to the main loop.  */
+    do_end_instruction(env);
+    helper_rechecking_single_step(env);
+
     /* Just let another CPU run.  */
     cs->exception_index = EXCP_INTERRUPT;
     cpu_loop_exit(cs);

From 6dd7d8c6490b73dcc33dfb1fe76c081e7e2eb820 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Sat, 25 May 2024 10:47:31 +0200
Subject: [PATCH 09/42] target/i386: fix TF/RF handling for HLT

HLT uses DISAS_NORETURN because the corresponding helper calls
cpu_loop_exit().  However, while gen_eob() clears HF_RF_MASK and
synthesizes a #DB exception if single-step is active, none of this is
done by HLT.  Note that the single-step trap is generated after the halt
is finished.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/tcg/sysemu/misc_helper.c |  2 +-
 target/i386/tcg/sysemu/seg_helper.c  | 17 ++++++++++++++---
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/target/i386/tcg/sysemu/misc_helper.c b/target/i386/tcg/sysemu/misc_helper.c
index 093cc2d0f9..7fa0c5a06d 100644
--- a/target/i386/tcg/sysemu/misc_helper.c
+++ b/target/i386/tcg/sysemu/misc_helper.c
@@ -520,7 +520,7 @@ G_NORETURN void helper_hlt(CPUX86State *env)
 {
     CPUState *cs = env_cpu(env);
 
-    env->hflags &= ~HF_INHIBIT_IRQ_MASK; /* needed if sti is just before */
+    do_end_instruction(env);
     cs->halted = 1;
     cs->exception_index = EXCP_HLT;
     cpu_loop_exit(cs);
diff --git a/target/i386/tcg/sysemu/seg_helper.c b/target/i386/tcg/sysemu/seg_helper.c
index 9ba94deb3a..05174a79e7 100644
--- a/target/i386/tcg/sysemu/seg_helper.c
+++ b/target/i386/tcg/sysemu/seg_helper.c
@@ -130,15 +130,26 @@ void x86_cpu_do_interrupt(CPUState *cs)
 
 bool x86_cpu_exec_halt(CPUState *cpu)
 {
-    if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
-        X86CPU *x86_cpu = X86_CPU(cpu);
+    X86CPU *x86_cpu = X86_CPU(cpu);
+    CPUX86State *env = &x86_cpu->env;
 
+    if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
         bql_lock();
         apic_poll_irq(x86_cpu->apic_state);
         cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL);
         bql_unlock();
     }
-    return cpu_has_work(cpu);
+
+    if (!cpu_has_work(cpu)) {
+        return false;
+    }
+
+    /* Complete HLT instruction.  */
+    if (env->eflags & TF_MASK) {
+        env->dr[6] |= DR6_BS;
+        do_interrupt_all(x86_cpu, EXCP01_DB, 0, 0, env->eip, 0);
+    }
+    return true;
 }
 
 bool x86_need_replay_interrupt(int interrupt_request)

From cdc829b37d4dff686f083c577490da1d75bc159f Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 29 May 2024 13:31:39 +0200
Subject: [PATCH 10/42] target/i386: document incorrect semantics of watchpoint
 following MOV/POP SS

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/tcg/sysemu/bpt_helper.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/target/i386/tcg/sysemu/bpt_helper.c b/target/i386/tcg/sysemu/bpt_helper.c
index c1d5fce250..b29acf41c3 100644
--- a/target/i386/tcg/sysemu/bpt_helper.c
+++ b/target/i386/tcg/sysemu/bpt_helper.c
@@ -215,6 +215,12 @@ void breakpoint_handler(CPUState *cs)
         if (cs->watchpoint_hit->flags & BP_CPU) {
             cs->watchpoint_hit = NULL;
             if (check_hw_breakpoints(env, false)) {
+                /*
+                 * FIXME: #DB should be delayed by one instruction if
+                 * INHIBIT_IRQ is set (STI cannot trigger a watchpoint).
+                 * The delayed #DB should also fuse with one generated
+                 * by ICEBP (aka INT1).
+                 */
                 raise_exception(env, EXCP01_DB);
             } else {
                 cpu_loop_exit_noexc(cs);

From b37c0dc85214e9d5e4a9b6f6a496ce4de3b8a4d6 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Sat, 25 May 2024 11:16:14 +0200
Subject: [PATCH 11/42] target/i386: document use of DISAS_NORETURN

DISAS_NORETURN suppresses the work normally done by gen_eob(), and therefore
must be used in special cases only.  Document them.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/tcg/translate.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index a9c6424c7d..2b6f67be40 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -4761,6 +4761,17 @@ static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
 
     switch (dc->base.is_jmp) {
     case DISAS_NORETURN:
+        /*
+         * Most instructions should not use DISAS_NORETURN, as that suppresses
+         * the handling of hflags normally done by gen_eob().  We can
+         * get here:
+         * - for exception and interrupts
+         * - for jump optimization (which is disabled by INHIBIT_IRQ/RF/TF)
+         * - for VMRUN because RF/TF handling for the host is done after vmexit,
+         *   and INHIBIT_IRQ is loaded from the VMCB
+         * - for HLT/PAUSE/MWAIT to exit the main loop with specific EXCP_* values;
+         *   the helpers handle themselves the tasks normally done by gen_eob().
+         */
         break;
     case DISAS_TOO_MANY:
         gen_update_cc_op(dc);

From f41990f552839ad016467586a9540b9455cc52fd Mon Sep 17 00:00:00 2001
From: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Date: Thu, 6 Jun 2024 10:53:16 +0100
Subject: [PATCH 12/42] target/i386: use local X86DecodedOp in gen_POP()

This will make subsequent changes a little easier to read.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Message-ID: <20240606095319.229650-2-mark.cave-ayland@ilande.co.uk>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/tcg/emit.c.inc | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index f90f3d3c58..ca78504b6e 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -2575,11 +2575,13 @@ static void gen_PMOVMSKB(DisasContext *s, CPUX86State *env, X86DecodedInsn *deco
 
 static void gen_POP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
+    X86DecodedOp *op = &decode->op[0];
     MemOp ot = gen_pop_T0(s);
-    if (decode->op[0].has_ea) {
+
+    if (op->has_ea) {
         /* NOTE: order is important for MMU exceptions */
         gen_op_st_v(s, ot, s->T0, s->A0);
-        decode->op[0].unit = X86_OP_SKIP;
+        op->unit = X86_OP_SKIP;
     }
     /* NOTE: writing back registers after update is important for pop %sp */
     gen_pop_update(s, ot);

From aea49fbb01a4a1191165af0b08b5e27994f22c35 Mon Sep 17 00:00:00 2001
From: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Date: Thu, 6 Jun 2024 10:53:17 +0100
Subject: [PATCH 13/42] target/i386: use gen_writeback() within gen_POP()

Instead of directly implementing the writeback using gen_op_st_v(), use the
existing gen_writeback() function.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Message-ID: <20240606095319.229650-3-mark.cave-ayland@ilande.co.uk>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/tcg/emit.c.inc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index ca78504b6e..6123235c00 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -2580,9 +2580,9 @@ static void gen_POP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 
     if (op->has_ea) {
         /* NOTE: order is important for MMU exceptions */
-        gen_op_st_v(s, ot, s->T0, s->A0);
-        op->unit = X86_OP_SKIP;
+        gen_writeback(s, decode, 0, s->T0);
     }
+
     /* NOTE: writing back registers after update is important for pop %sp */
     gen_pop_update(s, ot);
 }

From f1b8613da380af8931e34dfb4dd0a6cca392d43b Mon Sep 17 00:00:00 2001
From: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Date: Thu, 6 Jun 2024 10:53:18 +0100
Subject: [PATCH 14/42] target/i386: fix SP when taking a memory fault during
 POP

When OS/2 Warp configures its segment descriptors, many of them are configured with
the P flag clear to allow for a fault-on-demand implementation. In the case where
the stack value is POPped into the segment registers, the SP is incremented before
calling gen_helper_load_seg() to validate the segment descriptor:

IN:
0xffef2c0c:  66 07                    popl     %es

OP:
 ld_i32 loc9,env,$0xfffffffffffffff8
 sub_i32 loc9,loc9,$0x1
 brcond_i32 loc9,$0x0,lt,$L0
 st16_i32 loc9,env,$0xfffffffffffffff8
 st8_i32 $0x1,env,$0xfffffffffffffffc

 ---- 0000000000000c0c 0000000000000000
 ext16u_i64 loc0,rsp
 add_i64 loc0,loc0,ss_base
 ext32u_i64 loc0,loc0
 qemu_ld_a64_i64 loc0,loc0,noat+un+leul,5
 add_i64 loc3,rsp,$0x4
 deposit_i64 rsp,rsp,loc3,$0x0,$0x10
 extrl_i64_i32 loc5,loc0
 call load_seg,$0x0,$0,env,$0x0,loc5
 add_i64 rip,rip,$0x2
 ext16u_i64 rip,rip
 exit_tb $0x0
 set_label $L0
 exit_tb $0x7fff58000043

If helper_load_seg() generates a fault when validating the segment descriptor then as
the SP has already been incremented, the topmost word of the stack is overwritten by
the arguments pushed onto the stack by the CPU before taking the fault handler. As a
consequence things rapidly go wrong upon return from the fault handler due to the
corrupted stack.

Update the logic for the existing writeback condition so that a POP into the segment
registers also calls helper_load_seg() first before incrementing the SP, so that if a
fault occurs the SP remains unaltered.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2198
Message-ID: <20240606095319.229650-4-mark.cave-ayland@ilande.co.uk>
Fixes: cc1d28bdbe0 ("target/i386: move 00-5F opcodes to new decoder", 2024-05-07)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/tcg/emit.c.inc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 6123235c00..4be3d9a6fb 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -2578,7 +2578,7 @@ static void gen_POP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
     X86DecodedOp *op = &decode->op[0];
     MemOp ot = gen_pop_T0(s);
 
-    if (op->has_ea) {
+    if (op->has_ea || op->unit == X86_OP_SEG) {
         /* NOTE: order is important for MMU exceptions */
         gen_writeback(s, decode, 0, s->T0);
     }

From 3973615e7fbaeef1deeaa067577e373781ced70a Mon Sep 17 00:00:00 2001
From: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Date: Thu, 6 Jun 2024 10:53:19 +0100
Subject: [PATCH 15/42] target/i386: fix size of EBP writeback in gen_enter()

The calculation of FrameTemp is done using the size indicated by mo_pushpop()
before being written back to EBP, but the final writeback to EBP is done using
the size indicated by mo_stacksize().

In the case where mo_pushpop() is MO_32 and mo_stacksize() is MO_16 then the
final writeback to EBP is done using MO_16 which can leave junk in the top
16-bits of EBP after executing ENTER.

Change the writeback of EBP to use the same size indicated by mo_pushpop() to
ensure that the full value is written back.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2198
Message-ID: <20240606095319.229650-5-mark.cave-ayland@ilande.co.uk>
Cc: qemu-stable@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/tcg/translate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 2b6f67be40..fcba9c155f 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -2138,7 +2138,7 @@ static void gen_enter(DisasContext *s, int esp_addend, int level)
     }
 
     /* Copy the FrameTemp value to EBP.  */
-    gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1);
+    gen_op_mov_reg_v(s, d_ot, R_EBP, s->T1);
 
     /* Compute the final value of ESP.  */
     tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);

From 75dbebddb6a0f15bdfdae66d63de4905c793ccdb Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 6 Jun 2024 11:31:12 +0200
Subject: [PATCH 16/42] machine: default -M mem-merge to off is
 QEMU_MADV_MERGEABLE is not available

Otherwise, starting any guest on a non-Linux guests results in

qemu-system-arm: Couldn't set property 'merge' on 'memory-backend-ram': Invalid argument

Cc: Michal Privoznik <mprivozn@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/core/machine.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 77a356f232..a0ee43ca5c 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -17,6 +17,7 @@
 #include "hw/loader.h"
 #include "qapi/error.h"
 #include "qapi/qapi-visit-machine.h"
+#include "qemu/madvise.h"
 #include "qom/object_interfaces.h"
 #include "sysemu/cpus.h"
 #include "sysemu/sysemu.h"
@@ -1129,7 +1130,7 @@ static void machine_initfn(Object *obj)
     container_get(obj, "/peripheral-anon");
 
     ms->dump_guest_core = true;
-    ms->mem_merge = true;
+    ms->mem_merge = (QEMU_MADV_MERGEABLE != QEMU_MADV_INVALID);
     ms->enable_graphics = true;
     ms->kernel_cmdline = g_strdup("");
     ms->ram_size = mc->default_ram_size;

From 12d7d0c2496efe6325e73b04578213b8ad0af093 Mon Sep 17 00:00:00 2001
From: Michal Privoznik <mprivozn@redhat.com>
Date: Wed, 5 Jun 2024 12:44:54 +0200
Subject: [PATCH 17/42] meson: Don't even detect posix_madvise() on Darwin

On Darwin, posix_madvise() has the same return semantics as plain
madvise() [1]. That's not really what our usage expects.
Fortunately, madvise() is available and preferred anyways so we
may stop detecting posix_madvise() on Darwin.

1: https://opensource.apple.com/source/xnu/xnu-7195.81.3/bsd/man/man2/madvise.2.auto.html

Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
Message-ID: <00f71753bdeb8c0f049fda05fb63b84bb5502fb3.1717584048.git.mprivozn@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 meson.build | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/meson.build b/meson.build
index d80203f1cd..ec59effca2 100644
--- a/meson.build
+++ b/meson.build
@@ -2556,10 +2556,16 @@ config_host_data.set('CONFIG_OPEN_BY_HANDLE', cc.links(gnu_source_prefix + '''
   #else
   int main(void) { struct file_handle fh; return open_by_handle_at(0, &fh, 0); }
   #endif'''))
-config_host_data.set('CONFIG_POSIX_MADVISE', cc.links(gnu_source_prefix + '''
-  #include <sys/mman.h>
-  #include <stddef.h>
-  int main(void) { return posix_madvise(NULL, 0, POSIX_MADV_DONTNEED); }'''))
+
+# On Darwin posix_madvise() has the same return semantics as plain madvise(),
+# i.e. errno is set and -1 is returned. That's not really how POSIX defines the
+# function. On the flip side, it has madvise() which is preferred anyways.
+if host_os != 'darwin'
+  config_host_data.set('CONFIG_POSIX_MADVISE', cc.links(gnu_source_prefix + '''
+    #include <sys/mman.h>
+    #include <stddef.h>
+    int main(void) { return posix_madvise(NULL, 0, POSIX_MADV_DONTNEED); }'''))
+endif
 
 config_host_data.set('CONFIG_PTHREAD_SETNAME_NP_W_TID', cc.links(gnu_source_prefix + '''
   #include <pthread.h>

From bfb8c79f89773fb7ef6a373efebf3ce691716cb0 Mon Sep 17 00:00:00 2001
From: Michal Privoznik <mprivozn@redhat.com>
Date: Wed, 5 Jun 2024 12:44:55 +0200
Subject: [PATCH 18/42] osdep: Make qemu_madvise() to set errno in all cases

The unspoken premise of qemu_madvise() is that errno is set on
error. And it is mostly the case except for posix_madvise() which
is documented to return either zero (on success) or a positive
error number. This means, we must set errno ourselves. And while
at it, make the function return a negative value on error, just
like other error paths do.

Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Message-ID: <af17113e7c1f2cc909ffd36d23f5a411b63b8764.1717584048.git.mprivozn@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 util/osdep.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/util/osdep.c b/util/osdep.c
index e996c4744a..e42f4e8121 100644
--- a/util/osdep.c
+++ b/util/osdep.c
@@ -57,7 +57,12 @@ int qemu_madvise(void *addr, size_t len, int advice)
 #if defined(CONFIG_MADVISE)
     return madvise(addr, len, advice);
 #elif defined(CONFIG_POSIX_MADVISE)
-    return posix_madvise(addr, len, advice);
+    int rc = posix_madvise(addr, len, advice);
+    if (rc) {
+        errno = rc;
+        return -1;
+    }
+    return 0;
 #else
     errno = EINVAL;
     return -1;

From 210b7b2b3cdef01a15ebabf02dd1a9d8a51743ba Mon Sep 17 00:00:00 2001
From: Michal Privoznik <mprivozn@redhat.com>
Date: Wed, 5 Jun 2024 12:44:56 +0200
Subject: [PATCH 19/42] osdep: Make qemu_madvise() return ENOSYS on unsupported
 OSes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Not every OS is capable of madvise() or posix_madvise() even. In
that case, errno should be set to ENOSYS as it reflects the cause
better.

Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Message-ID: <b381c23bd8f413f1453a2c1a66e0979beaf27433.1717584048.git.mprivozn@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 util/osdep.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/util/osdep.c b/util/osdep.c
index e42f4e8121..5d23bbfbec 100644
--- a/util/osdep.c
+++ b/util/osdep.c
@@ -64,7 +64,7 @@ int qemu_madvise(void *addr, size_t len, int advice)
     }
     return 0;
 #else
-    errno = EINVAL;
+    errno = ENOSYS;
     return -1;
 #endif
 }

From 5d9a9a617053a97a76ef332896c386d8fc895631 Mon Sep 17 00:00:00 2001
From: Michal Privoznik <mprivozn@redhat.com>
Date: Wed, 5 Jun 2024 12:44:58 +0200
Subject: [PATCH 20/42] backends/hostmem: Report error when memory size is
 unaligned
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If memory-backend-{file,ram} has a size that's not aligned to
underlying page size it is not only wasteful, but also may lead
to hard to debug behaviour. For instance, in case
memory-backend-file and hugepages, madvise() and mbind() fail.
Rightfully so, page is the smallest unit they can work with. And
even though an error is reported, the root cause it not very
clear:

  qemu-system-x86_64: Couldn't set property 'dump' on 'memory-backend-file': Invalid argument

After this commit:

  qemu-system-x86_64: backend 'memory-backend-file' memory size must be multiple of 2 MiB

Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Tested-by: Mario Casquero <mcasquer@redhat.com>
Message-ID: <b5b9f9c6bba07879fb43f3c6f496c69867ae3716.1717584048.git.mprivozn@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 backends/hostmem-epc.c   |  1 +
 backends/hostmem-file.c  |  1 +
 backends/hostmem-memfd.c |  1 +
 backends/hostmem.c       | 10 ++++++++++
 include/sysemu/hostmem.h |  2 +-
 5 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/backends/hostmem-epc.c b/backends/hostmem-epc.c
index 735e2e1cf8..f58fcf00a1 100644
--- a/backends/hostmem-epc.c
+++ b/backends/hostmem-epc.c
@@ -36,6 +36,7 @@ sgx_epc_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
         return false;
     }
 
+    backend->aligned = true;
     name = object_get_canonical_path(OBJECT(backend));
     ram_flags = (backend->share ? RAM_SHARED : 0) | RAM_PROTECTED;
     return memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name,
diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
index 3c69db7946..7e5072e33e 100644
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -80,6 +80,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
         g_assert_not_reached();
     }
 
+    backend->aligned = true;
     name = host_memory_backend_get_name(backend);
     ram_flags = backend->share ? RAM_SHARED : 0;
     ram_flags |= fb->readonly ? RAM_READONLY_FD : 0;
diff --git a/backends/hostmem-memfd.c b/backends/hostmem-memfd.c
index 745ead0034..6a3c89a12b 100644
--- a/backends/hostmem-memfd.c
+++ b/backends/hostmem-memfd.c
@@ -52,6 +52,7 @@ memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
         return false;
     }
 
+    backend->aligned = true;
     name = host_memory_backend_get_name(backend);
     ram_flags = backend->share ? RAM_SHARED : 0;
     ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
diff --git a/backends/hostmem.c b/backends/hostmem.c
index eb9682b4a8..1edc0ede2a 100644
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -20,6 +20,7 @@
 #include "qom/object_interfaces.h"
 #include "qemu/mmap-alloc.h"
 #include "qemu/madvise.h"
+#include "qemu/cutils.h"
 #include "hw/qdev-core.h"
 
 #ifdef CONFIG_NUMA
@@ -325,6 +326,7 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
     HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
     void *ptr;
     uint64_t sz;
+    size_t pagesize;
     bool async = !phase_check(PHASE_LATE_BACKENDS_CREATED);
 
     if (!bc->alloc) {
@@ -336,6 +338,14 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
 
     ptr = memory_region_get_ram_ptr(&backend->mr);
     sz = memory_region_size(&backend->mr);
+    pagesize = qemu_ram_pagesize(backend->mr.ram_block);
+
+    if (backend->aligned && !QEMU_IS_ALIGNED(sz, pagesize)) {
+        g_autofree char *pagesize_str = size_to_str(pagesize);
+        error_setg(errp, "backend '%s' memory size must be multiple of %s",
+                   object_get_typename(OBJECT(uc)), pagesize_str);
+        return;
+    }
 
     if (backend->merge) {
         qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
diff --git a/include/sysemu/hostmem.h b/include/sysemu/hostmem.h
index 04b884bf42..de47ae59e4 100644
--- a/include/sysemu/hostmem.h
+++ b/include/sysemu/hostmem.h
@@ -74,7 +74,7 @@ struct HostMemoryBackend {
     uint64_t size;
     bool merge, dump, use_canonical_path;
     bool prealloc, is_mapped, share, reserve;
-    bool guest_memfd;
+    bool guest_memfd, aligned;
     uint32_t prealloc_threads;
     ThreadContext *prealloc_context;
     DECLARE_BITMAP(host_nodes, MAX_NODES + 1);

From a2b6a96505097c54f5db4c77f66e9c47af4dad22 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 6 Jun 2024 11:33:06 +0200
Subject: [PATCH 21/42] machine, hostmem: improve error messages for
 unsupported features

Detect early unsupported MADV_MERGEABLE and MADV_DONTDUMP, and print a clearer
error message that points to the deficiency of the host.

Cc: Michal Privoznik <mprivozn@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 backends/hostmem.c | 16 ++++++++++++++++
 hw/core/machine.c  |  8 ++++++++
 2 files changed, 24 insertions(+)

diff --git a/backends/hostmem.c b/backends/hostmem.c
index 1edc0ede2a..6da3d7383e 100644
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -170,6 +170,14 @@ static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp)
 {
     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 
+    if (QEMU_MADV_MERGEABLE == QEMU_MADV_INVALID) {
+        if (value) {
+            error_setg(errp, "Memory merging is not supported on this host");
+        }
+        assert(!backend->merge);
+        return;
+    }
+
     if (!host_memory_backend_mr_inited(backend)) {
         backend->merge = value;
         return;
@@ -196,6 +204,14 @@ static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp)
 {
     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 
+    if (QEMU_MADV_DONTDUMP == QEMU_MADV_INVALID) {
+        if (!value) {
+            error_setg(errp, "Dumping guest memory cannot be disabled on this host");
+        }
+        assert(backend->dump);
+        return;
+    }
+
     if (!host_memory_backend_mr_inited(backend)) {
         backend->dump = value;
         return;
diff --git a/hw/core/machine.c b/hw/core/machine.c
index a0ee43ca5c..c93d249244 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -428,6 +428,10 @@ static void machine_set_dump_guest_core(Object *obj, bool value, Error **errp)
 {
     MachineState *ms = MACHINE(obj);
 
+    if (!value && QEMU_MADV_DONTDUMP == QEMU_MADV_INVALID) {
+        error_setg(errp, "Dumping guest memory cannot be disabled on this host");
+        return;
+    }
     ms->dump_guest_core = value;
 }
 
@@ -442,6 +446,10 @@ static void machine_set_mem_merge(Object *obj, bool value, Error **errp)
 {
     MachineState *ms = MACHINE(obj);
 
+    if (value && QEMU_MADV_MERGEABLE == QEMU_MADV_INVALID) {
+        error_setg(errp, "Memory merging is not supported on this host");
+        return;
+    }
     ms->mem_merge = value;
 }
 

From 5becdc0ab083e8bf346270cd34cb11b568e7538c Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 6 Jun 2024 13:06:56 +0200
Subject: [PATCH 22/42] hostmem: simplify the code for merge and dump
 properties
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

No semantic change, just simpler control flow.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 backends/hostmem.c | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/backends/hostmem.c b/backends/hostmem.c
index 6da3d7383e..4e5576a4ad 100644
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -178,19 +178,16 @@ static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp)
         return;
     }
 
-    if (!host_memory_backend_mr_inited(backend)) {
-        backend->merge = value;
-        return;
-    }
-
-    if (value != backend->merge) {
+    if (!host_memory_backend_mr_inited(backend) &&
+        value != backend->merge) {
         void *ptr = memory_region_get_ram_ptr(&backend->mr);
         uint64_t sz = memory_region_size(&backend->mr);
 
         qemu_madvise(ptr, sz,
                      value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE);
-        backend->merge = value;
     }
+
+    backend->merge = value;
 }
 
 static bool host_memory_backend_get_dump(Object *obj, Error **errp)
@@ -212,19 +209,16 @@ static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp)
         return;
     }
 
-    if (!host_memory_backend_mr_inited(backend)) {
-        backend->dump = value;
-        return;
-    }
-
-    if (value != backend->dump) {
+    if (host_memory_backend_mr_inited(backend) &&
+        value != backend->dump) {
         void *ptr = memory_region_get_ram_ptr(&backend->mr);
         uint64_t sz = memory_region_size(&backend->mr);
 
         qemu_madvise(ptr, sz,
                      value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP);
-        backend->dump = value;
     }
+
+    backend->dump = value;
 }
 
 static bool host_memory_backend_get_prealloc(Object *obj, Error **errp)

From 75997e182b695f2e3f0a2d649734952af5caf3ee Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Tue, 4 Jun 2024 18:17:55 +0200
Subject: [PATCH 23/42] scsi-disk: Don't silently truncate serial number

Before this commit, scsi-disk accepts a string of arbitrary length for
its "serial" property. However, the value visible on the guest is
actually truncated to 36 characters. This limitation doesn't come from
the SCSI specification, it is an arbitrary limit that was initially
picked as 20 and later bumped to 36 by commit 48b62063.

Similarly, device_id was introduced as a copy of the serial number,
limited to 20 characters, but commit 48b62063 forgot to actually bump
it.

As long as we silently truncate the given string, extending the limit is
actually not a harmless change, but break the guest ABI. This is the
most important reason why commit 48b62063 was really wrong (and it's
also why we can't change device_id to be in sync with the serial number
again and use 36 characters now, it would be another guest ABI
breakage).

In order to avoid future breakage, don't silently truncate the serial
number string any more, but just error out if it would be truncated.

Buglink: https://issues.redhat.com/browse/RHEL-3542
Suggested-by: Peter Krempa <pkrempa@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-ID: <20240604161755.63448-1-kwolf@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/scsi/scsi-disk.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 4bd7af9d0c..5f55ae54e4 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -58,6 +58,9 @@
 
 #define TYPE_SCSI_DISK_BASE         "scsi-disk-base"
 
+#define MAX_SERIAL_LEN              36
+#define MAX_SERIAL_LEN_FOR_DEVID    20
+
 OBJECT_DECLARE_TYPE(SCSIDiskState, SCSIDiskClass, SCSI_DISK_BASE)
 
 struct SCSIDiskClass {
@@ -648,8 +651,8 @@ static int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf)
         }
 
         l = strlen(s->serial);
-        if (l > 36) {
-            l = 36;
+        if (l > MAX_SERIAL_LEN) {
+            l = MAX_SERIAL_LEN;
         }
 
         trace_scsi_disk_emulate_vpd_page_80(req->cmd.xfer);
@@ -2501,9 +2504,20 @@ static void scsi_realize(SCSIDevice *dev, Error **errp)
     if (!s->vendor) {
         s->vendor = g_strdup("QEMU");
     }
+    if (s->serial && strlen(s->serial) > MAX_SERIAL_LEN) {
+        error_setg(errp, "The serial number can't be longer than %d characters",
+                   MAX_SERIAL_LEN);
+        return;
+    }
     if (!s->device_id) {
         if (s->serial) {
-            s->device_id = g_strdup_printf("%.20s", s->serial);
+            if (strlen(s->serial) > MAX_SERIAL_LEN_FOR_DEVID) {
+                error_setg(errp, "The serial number can't be longer than %d "
+                           "characters when it is also used as the default for "
+                           "device_id", MAX_SERIAL_LEN_FOR_DEVID);
+                return;
+            }
+            s->device_id = g_strdup(s->serial);
         } else {
             const char *str = blk_name(s->qdev.conf.blk);
             if (str && *str) {

From fcce5287c009770d74f289185eadd6163a1b6a4b Mon Sep 17 00:00:00 2001
From: Zhao Liu <zhao1.liu@intel.com>
Date: Wed, 5 Jun 2024 23:25:49 +0800
Subject: [PATCH 24/42] stubs/meson: Fix qemuutil build when --disable-system
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Compiling without system, user, tools or guest-agent fails with the
following error message:

./configure --disable-system --disable-user --disable-tools \
--disable-guest-agent

error message:

/usr/bin/ld: libqemuutil.a.p/util_error-report.c.o: in function `error_printf':
/media/liuzhao/data/qemu-cook/build/../util/error-report.c:38: undefined reference to `error_vprintf'
/usr/bin/ld: libqemuutil.a.p/util_error-report.c.o: in function `vreport':
/media/liuzhao/data/qemu-cook/build/../util/error-report.c:215: undefined reference to `error_vprintf'
collect2: error: ld returned 1 exit status

This is because tests/bench and tests/unit both need qemuutil, which
requires error_vprintf stub when system is disabled.

Add error_vprintf stub into stub_ss for all cases other than disabling
system.

Fixes: 3a15604900c4 ("stubs: include stubs only if needed")
Reported-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
Message-ID: <20240605152549.1795762-1-zhao1.liu@intel.com>
[Include error-printf.c unconditionally. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 stubs/meson.build | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/stubs/meson.build b/stubs/meson.build
index 3b9d42023c..f15b48d01f 100644
--- a/stubs/meson.build
+++ b/stubs/meson.build
@@ -3,6 +3,7 @@
 # below, so that it is clear who needs the stubbed functionality.
 
 stub_ss.add(files('cpu-get-clock.c'))
+stub_ss.add(files('error-printf.c'))
 stub_ss.add(files('fdset.c'))
 stub_ss.add(files('iothread-lock.c'))
 stub_ss.add(files('is-daemonized.c'))
@@ -45,17 +46,10 @@ if have_block or have_ga
   stub_ss.add(files('qmp-quit.c'))
 endif
 
-if have_ga
-  stub_ss.add(files('error-printf.c'))
-endif
-
 if have_block or have_user
   stub_ss.add(files('qtest.c'))
   stub_ss.add(files('vm-stop.c'))
   stub_ss.add(files('vmstate.c'))
-
-  # more symbols provided by the monitor
-  stub_ss.add(files('error-printf.c'))
 endif
 
 if have_user

From 9c267239c7a307645849139389b42641655b7ece Mon Sep 17 00:00:00 2001
From: Phil Dennis-Jordan <phil@philjordan.eu>
Date: Wed, 5 Jun 2024 13:25:50 +0200
Subject: [PATCH 25/42] i386/hvf: Adds support for INVTSC cpuid bit

This patch adds the INVTSC bit to the Hypervisor.framework accelerator's
CPUID bit passthrough allow-list. Previously, specifying +invtsc in the CPU
configuration would fail with the following warning despite the host CPU
advertising the feature:

qemu-system-x86_64: warning: host doesn't support requested feature:
CPUID.80000007H:EDX.invtsc [bit 8]

x86 macOS itself relies on a fixed rate TSC for its own Mach absolute time
timestamp mechanism, so there's no reason we can't enable this bit for guests.
When the feature is enabled, a migration blocker is installed.

Signed-off-by: Phil Dennis-Jordan <phil@philjordan.eu>
Reviewed-by: Roman Bolshakov <roman@roolebo.dev>
Tested-by: Roman Bolshakov <roman@roolebo.dev>
Message-ID: <20240605112556.43193-2-phil@philjordan.eu>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/hvf/hvf.c       | 18 ++++++++++++++++++
 target/i386/hvf/x86_cpuid.c |  4 ++++
 2 files changed, 22 insertions(+)

diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
index e493452acb..e6e916225b 100644
--- a/target/i386/hvf/hvf.c
+++ b/target/i386/hvf/hvf.c
@@ -49,6 +49,8 @@
 #include "qemu/osdep.h"
 #include "qemu/error-report.h"
 #include "qemu/memalign.h"
+#include "qapi/error.h"
+#include "migration/blocker.h"
 
 #include "sysemu/hvf.h"
 #include "sysemu/hvf_int.h"
@@ -74,6 +76,8 @@
 #include "qemu/accel.h"
 #include "target/i386/cpu.h"
 
+static Error *invtsc_mig_blocker;
+
 void vmx_update_tpr(CPUState *cpu)
 {
     /* TODO: need integrate APIC handling */
@@ -221,6 +225,8 @@ int hvf_arch_init_vcpu(CPUState *cpu)
 {
     X86CPU *x86cpu = X86_CPU(cpu);
     CPUX86State *env = &x86cpu->env;
+    Error *local_err = NULL;
+    int r;
     uint64_t reqCap;
 
     init_emu();
@@ -238,6 +244,18 @@ int hvf_arch_init_vcpu(CPUState *cpu)
         }
     }
 
+    if ((env->features[FEAT_8000_0007_EDX] & CPUID_APM_INVTSC) &&
+        invtsc_mig_blocker == NULL) {
+        error_setg(&invtsc_mig_blocker,
+                   "State blocked by non-migratable CPU device (invtsc flag)");
+        r = migrate_add_blocker(&invtsc_mig_blocker, &local_err);
+        if (r < 0) {
+            error_report_err(local_err);
+            return r;
+        }
+    }
+
+
     if (hv_vmx_read_capability(HV_VMX_CAP_PINBASED,
         &hvf_state->hvf_caps->vmx_cap_pinbased)) {
         abort();
diff --git a/target/i386/hvf/x86_cpuid.c b/target/i386/hvf/x86_cpuid.c
index 9380b90496..e56cd8411b 100644
--- a/target/i386/hvf/x86_cpuid.c
+++ b/target/i386/hvf/x86_cpuid.c
@@ -146,6 +146,10 @@ uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx,
                 CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_OSVW | CPUID_EXT3_XOP |
                 CPUID_EXT3_FMA4 | CPUID_EXT3_TBM;
         break;
+    case 0x80000007:
+        edx &= CPUID_APM_INVTSC;
+        eax = ebx = ecx = 0;
+        break;
     default:
         return 0;
     }

From 0e4e622e328e203610ac300a140e43d5e2929eda Mon Sep 17 00:00:00 2001
From: Phil Dennis-Jordan <phil@philjordan.eu>
Date: Wed, 5 Jun 2024 13:25:51 +0200
Subject: [PATCH 26/42] i386/hvf: Fixes some compilation warnings

A bunch of function definitions used empty parentheses instead of (void) syntax, yielding the following warning when building with clang on macOS:

warning: a function declaration without a prototype is deprecated in all versions of C [-Wstrict-prototypes]

In addition to fixing these function headers, it also fixes what appears to be a typo causing a variable to be unused after initialisation.

warning: variable 'entry_ctls' set but not used [-Wunused-but-set-variable]

Signed-off-by: Phil Dennis-Jordan <phil@philjordan.eu>
Reviewed-by: Roman Bolshakov <roman@roolebo.dev>
Tested-by: Roman Bolshakov <roman@roolebo.dev>
Message-ID: <20240605112556.43193-3-phil@philjordan.eu>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/hvf/vmx.h        | 3 +--
 target/i386/hvf/x86_decode.c | 2 +-
 target/i386/hvf/x86_emu.c    | 4 ++--
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/target/i386/hvf/vmx.h b/target/i386/hvf/vmx.h
index 0fffcfa46c..3954ef883d 100644
--- a/target/i386/hvf/vmx.h
+++ b/target/i386/hvf/vmx.h
@@ -95,8 +95,7 @@ static void enter_long_mode(hv_vcpuid_t vcpu, uint64_t cr0, uint64_t efer)
     efer |= MSR_EFER_LMA;
     wvmcs(vcpu, VMCS_GUEST_IA32_EFER, efer);
     entry_ctls = rvmcs(vcpu, VMCS_ENTRY_CTLS);
-    wvmcs(vcpu, VMCS_ENTRY_CTLS, rvmcs(vcpu, VMCS_ENTRY_CTLS) |
-          VM_ENTRY_GUEST_LMA);
+    wvmcs(vcpu, VMCS_ENTRY_CTLS, entry_ctls | VM_ENTRY_GUEST_LMA);
 
     uint64_t guest_tr_ar = rvmcs(vcpu, VMCS_GUEST_TR_ACCESS_RIGHTS);
     if ((efer & MSR_EFER_LME) &&
diff --git a/target/i386/hvf/x86_decode.c b/target/i386/hvf/x86_decode.c
index 3728d7705e..a4a28f113f 100644
--- a/target/i386/hvf/x86_decode.c
+++ b/target/i386/hvf/x86_decode.c
@@ -2111,7 +2111,7 @@ uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode)
     return decode->len;
 }
 
-void init_decoder()
+void init_decoder(void)
 {
     int i;
     
diff --git a/target/i386/hvf/x86_emu.c b/target/i386/hvf/x86_emu.c
index 3a3f0a50d0..38c782b8e3 100644
--- a/target/i386/hvf/x86_emu.c
+++ b/target/i386/hvf/x86_emu.c
@@ -1409,7 +1409,7 @@ static struct cmd_handler {
 
 static struct cmd_handler _cmd_handler[X86_DECODE_CMD_LAST];
 
-static void init_cmd_handler()
+static void init_cmd_handler(void)
 {
     int i;
     for (i = 0; i < ARRAY_SIZE(handlers); i++) {
@@ -1481,7 +1481,7 @@ bool exec_instruction(CPUX86State *env, struct x86_decode *ins)
     return true;
 }
 
-void init_emu()
+void init_emu(void)
 {
     init_cmd_handler();
 }

From f21f0cbc2c37c35680317d9dccbcbaf4aedd3a25 Mon Sep 17 00:00:00 2001
From: Phil Dennis-Jordan <phil@philjordan.eu>
Date: Wed, 5 Jun 2024 13:25:52 +0200
Subject: [PATCH 27/42] hvf: Consistent types for vCPU handles
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

macOS Hypervisor.framework uses different types for identifying vCPUs, hv_vcpu_t or hv_vcpuid_t, depending on host architecture. They are not just differently named typedefs for the same primitive type, but reference different-width integers.

Instead of using an integer type and casting where necessary, this change introduces a typedef which resolves the active architecture’s hvf typedef. It also removes a now-unnecessary cast.

Signed-off-by: Phil Dennis-Jordan <phil@philjordan.eu>
Reviewed-by: Roman Bolshakov <roman@roolebo.dev>
Tested-by: Roman Bolshakov <roman@roolebo.dev>
Message-ID: <20240605112556.43193-4-phil@philjordan.eu>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 accel/hvf/hvf-accel-ops.c | 2 +-
 include/sysemu/hvf_int.h  | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c
index 6f1e27ef46..b2a37a2229 100644
--- a/accel/hvf/hvf-accel-ops.c
+++ b/accel/hvf/hvf-accel-ops.c
@@ -400,7 +400,7 @@ static int hvf_init_vcpu(CPUState *cpu)
     r = hv_vcpu_create(&cpu->accel->fd,
                        (hv_vcpu_exit_t **)&cpu->accel->exit, NULL);
 #else
-    r = hv_vcpu_create((hv_vcpuid_t *)&cpu->accel->fd, HV_VCPU_DEFAULT);
+    r = hv_vcpu_create(&cpu->accel->fd, HV_VCPU_DEFAULT);
 #endif
     cpu->accel->dirty = true;
     assert_hvf_ok(r);
diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
index 4a327fd526..30e739a2b5 100644
--- a/include/sysemu/hvf_int.h
+++ b/include/sysemu/hvf_int.h
@@ -13,8 +13,10 @@
 
 #ifdef __aarch64__
 #include <Hypervisor/Hypervisor.h>
+typedef hv_vcpu_t hvf_vcpuid;
 #else
 #include <Hypervisor/hv.h>
+typedef hv_vcpuid_t hvf_vcpuid;
 #endif
 
 /* hvf_slot flags */
@@ -50,7 +52,7 @@ struct HVFState {
 extern HVFState *hvf_state;
 
 struct AccelCPUState {
-    uint64_t fd;
+    hvf_vcpuid fd;
     void *exit;
     bool vtimer_masked;
     sigset_t unblock_ipi_mask;

From 3e2c6727cb6b6311ee129c24b561bb87495f1a25 Mon Sep 17 00:00:00 2001
From: Phil Dennis-Jordan <phil@philjordan.eu>
Date: Wed, 5 Jun 2024 13:25:53 +0200
Subject: [PATCH 28/42] i386/hvf: Fixes dirty memory tracking by page
 granularity RX->RWX change
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When using x86 macOS Hypervisor.framework as accelerator, detection of
dirty memory regions is implemented by marking logged memory region
slots as read-only in the EPT, then setting the dirty flag when a
guest write causes a fault. The area marked dirty should then be marked
writable in order for subsequent writes to succeed without a VM exit.

However, dirty bits are tracked on a per-page basis, whereas the fault
handler was marking the whole logged memory region as writable. This
change fixes the fault handler so only the protection of the single
faulting page is marked as dirty.

(Note: the dirty page tracking appeared to work despite this error
because HVF’s hv_vcpu_run() function generated unnecessary EPT fault
exits, which ended up causing the dirty marking handler to run even
when the memory region had been marked RW. When using
hv_vcpu_run_until(), a change planned for a subsequent commit, these
spurious exits no longer occur, so dirty memory tracking malfunctions.)

Additionally, the dirty page is set to permit code execution, the same
as all other guest memory; changing memory protection from RX to RW not
RWX appears to have been an oversight.

Signed-off-by: Phil Dennis-Jordan <phil@philjordan.eu>
Reviewed-by: Roman Bolshakov <roman@roolebo.dev>
Tested-by: Roman Bolshakov <roman@roolebo.dev>
Message-ID: <20240605112556.43193-5-phil@philjordan.eu>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/hvf/hvf.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
index e6e916225b..268c5734d5 100644
--- a/target/i386/hvf/hvf.c
+++ b/target/i386/hvf/hvf.c
@@ -135,9 +135,10 @@ static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual)
 
     if (write && slot) {
         if (slot->flags & HVF_SLOT_LOG) {
+            uint64_t dirty_page_start = gpa & ~(TARGET_PAGE_SIZE - 1u);
             memory_region_set_dirty(slot->region, gpa - slot->start, 1);
-            hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
-                          HV_MEMORY_READ | HV_MEMORY_WRITE);
+            hv_vm_protect(dirty_page_start, TARGET_PAGE_SIZE,
+                          HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC);
         }
     }
 

From bf9bf2306cc8ea31b2b9bf28002aacb188ec2568 Mon Sep 17 00:00:00 2001
From: Phil Dennis-Jordan <phil@philjordan.eu>
Date: Wed, 5 Jun 2024 13:25:54 +0200
Subject: [PATCH 29/42] i386/hvf: In kick_vcpu use hv_vcpu_interrupt to force
 exit

When interrupting a vCPU thread, this patch actually tells the hypervisor to
stop running guest code on that vCPU.

Calling hv_vcpu_interrupt actually forces a vCPU exit, analogously to
hv_vcpus_exit on aarch64. Alternatively, if the vCPU thread
is not
running the VM, it will immediately cause an exit when it attempts
to do so.

Previously, hvf_kick_vcpu_thread relied upon hv_vcpu_run returning very
frequently, including many spurious exits, which made it less of a problem that
nothing was actively done to stop the vCPU thread running guest code.
The newer, more efficient hv_vcpu_run_until exits much more rarely, so a true
"kick" is needed before switching to that.

Signed-off-by: Phil Dennis-Jordan <phil@philjordan.eu>
Message-ID: <20240605112556.43193-6-phil@philjordan.eu>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/hvf/hvf.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
index 268c5734d5..106ac5cbf6 100644
--- a/target/i386/hvf/hvf.c
+++ b/target/i386/hvf/hvf.c
@@ -215,6 +215,7 @@ static inline bool apic_bus_freq_is_known(CPUX86State *env)
 void hvf_kick_vcpu_thread(CPUState *cpu)
 {
     cpus_kick_thread(cpu);
+    hv_vcpu_interrupt(&cpu->accel->fd, 1);
 }
 
 int hvf_arch_init(void)

From a59f5b2f83d4de113556e5b68cbd68c03f712679 Mon Sep 17 00:00:00 2001
From: Phil Dennis-Jordan <phil@philjordan.eu>
Date: Wed, 5 Jun 2024 13:25:55 +0200
Subject: [PATCH 30/42] i386/hvf: Updates API usage to use modern vCPU run
 function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

macOS 10.15 introduced the more efficient hv_vcpu_run_until() function
to supersede hv_vcpu_run(). According to the documentation, there is no
longer any reason to use the latter on modern host OS versions, especially
after 11.0 added support for an indefinite deadline.

Observed behaviour of the newer function is that as documented, it exits
much less frequently - and most of the original function’s exits seem to
have been effectively pointless.

Another reason to use the new function is that it is a prerequisite for
using newer features such as in-kernel APIC support. (Not covered by
this patch.)

This change implements the upgrade by selecting one of three code paths
at compile time: two static code paths for the new and old functions
respectively, when building for targets where the new function is either
not available, or where the built executable won’t run on older
platforms lacking the new function anyway. The third code path selects
dynamically based on runtime detected availability of the weakly-linked
symbol.

Signed-off-by: Phil Dennis-Jordan <phil@philjordan.eu>
Message-ID: <20240605112556.43193-7-phil@philjordan.eu>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/hvf/hvf.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
index 106ac5cbf6..2d0eef6cd9 100644
--- a/target/i386/hvf/hvf.c
+++ b/target/i386/hvf/hvf.c
@@ -427,6 +427,27 @@ static void hvf_cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
     }
 }
 
+static hv_return_t hvf_vcpu_run(hv_vcpuid_t vcpu_id)
+{
+    /*
+     * hv_vcpu_run_until is available and recommended from macOS 10.15+,
+     * HV_DEADLINE_FOREVER from 11.0. Test for availability at runtime and fall
+     * back to hv_vcpu_run() only where necessary.
+     */
+#ifndef MAC_OS_VERSION_11_0
+    return hv_vcpu_run(vcpu_id);
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
+    return hv_vcpu_run_until(vcpu_id, HV_DEADLINE_FOREVER);
+#else /* MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_VERSION_11_0 */
+    /* 11.0 SDK or newer, but could be < 11 at runtime */
+    if (__builtin_available(macOS 11.0, *)) {
+        return hv_vcpu_run_until(vcpu_id, HV_DEADLINE_FOREVER);
+    } else {
+        return hv_vcpu_run(vcpu_id);
+    }
+#endif
+}
+
 int hvf_vcpu_exec(CPUState *cpu)
 {
     X86CPU *x86_cpu = X86_CPU(cpu);
@@ -455,7 +476,7 @@ int hvf_vcpu_exec(CPUState *cpu)
             return EXCP_HLT;
         }
 
-        hv_return_t r  = hv_vcpu_run(cpu->accel->fd);
+        hv_return_t r = hvf_vcpu_run(cpu->accel->fd);
         assert_hvf_ok(r);
 
         /* handle VMEXIT */

From a3c67dfc140018626f21ee507a726151cdb95ce3 Mon Sep 17 00:00:00 2001
From: Phil Dennis-Jordan <phil@philjordan.eu>
Date: Wed, 5 Jun 2024 13:25:56 +0200
Subject: [PATCH 31/42] hvf: Makes assert_hvf_ok report failed expression

When a macOS Hypervisor.framework call fails which is checked by
assert_hvf_ok(), Qemu exits printing the error value, but not the
location
in the code, as regular assert() macro expansions would.

This change turns assert_hvf_ok() into a macro similar to other
assertions, which expands to a call to the corresponding _impl()
function together with information about the expression that failed
the assertion and its location in the code.

Additionally, stringifying the numeric hv_return_t code is factored
into a helper function that can be reused for diagnostics and debugging
outside of assertions.

Signed-off-by: Phil Dennis-Jordan <phil@philjordan.eu>
Message-ID: <20240605112556.43193-8-phil@philjordan.eu>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 accel/hvf/hvf-all.c      | 51 +++++++++++++++++-----------------------
 include/sysemu/hvf_int.h |  5 +++-
 2 files changed, 26 insertions(+), 30 deletions(-)

diff --git a/accel/hvf/hvf-all.c b/accel/hvf/hvf-all.c
index db05b81be5..c008dc2f1e 100644
--- a/accel/hvf/hvf-all.c
+++ b/accel/hvf/hvf-all.c
@@ -13,40 +13,33 @@
 #include "sysemu/hvf.h"
 #include "sysemu/hvf_int.h"
 
-void assert_hvf_ok(hv_return_t ret)
+const char *hvf_return_string(hv_return_t ret)
+{
+    switch (ret) {
+    case HV_SUCCESS:      return "HV_SUCCESS";
+    case HV_ERROR:        return "HV_ERROR";
+    case HV_BUSY:         return "HV_BUSY";
+    case HV_BAD_ARGUMENT: return "HV_BAD_ARGUMENT";
+    case HV_NO_RESOURCES: return "HV_NO_RESOURCES";
+    case HV_NO_DEVICE:    return "HV_NO_DEVICE";
+    case HV_UNSUPPORTED:  return "HV_UNSUPPORTED";
+#if defined(MAC_OS_VERSION_11_0) && \
+    MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
+    case HV_DENIED:       return "HV_DENIED";
+#endif
+    default:              return "[unknown hv_return value]";
+    }
+}
+
+void assert_hvf_ok_impl(hv_return_t ret, const char *file, unsigned int line,
+                        const char *exp)
 {
     if (ret == HV_SUCCESS) {
         return;
     }
 
-    switch (ret) {
-    case HV_ERROR:
-        error_report("Error: HV_ERROR");
-        break;
-    case HV_BUSY:
-        error_report("Error: HV_BUSY");
-        break;
-    case HV_BAD_ARGUMENT:
-        error_report("Error: HV_BAD_ARGUMENT");
-        break;
-    case HV_NO_RESOURCES:
-        error_report("Error: HV_NO_RESOURCES");
-        break;
-    case HV_NO_DEVICE:
-        error_report("Error: HV_NO_DEVICE");
-        break;
-    case HV_UNSUPPORTED:
-        error_report("Error: HV_UNSUPPORTED");
-        break;
-#if defined(MAC_OS_VERSION_11_0) && \
-    MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
-    case HV_DENIED:
-        error_report("Error: HV_DENIED");
-        break;
-#endif
-    default:
-        error_report("Unknown Error");
-    }
+    error_report("Error: %s = %s (0x%x, at %s:%u)",
+        exp, hvf_return_string(ret), ret, file, line);
 
     abort();
 }
diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
index 30e739a2b5..5b28d17ba1 100644
--- a/include/sysemu/hvf_int.h
+++ b/include/sysemu/hvf_int.h
@@ -60,7 +60,10 @@ struct AccelCPUState {
     bool dirty;
 };
 
-void assert_hvf_ok(hv_return_t ret);
+void assert_hvf_ok_impl(hv_return_t ret, const char *file, unsigned int line,
+                        const char *exp);
+#define assert_hvf_ok(EX) assert_hvf_ok_impl((EX), __FILE__, __LINE__, #EX)
+const char *hvf_return_string(hv_return_t ret);
 int hvf_arch_init(void);
 int hvf_arch_init_vcpu(CPUState *cpu);
 void hvf_arch_vcpu_destroy(CPUState *cpu);

From c1acad9f72d14daf918563eb77d2b31c39fbd06a Mon Sep 17 00:00:00 2001
From: Xin Li <xin3.li@intel.com>
Date: Wed, 8 Nov 2023 23:20:07 -0800
Subject: [PATCH 32/42] target/i386: add support for FRED in CPUID enumeration

FRED, i.e., the Intel flexible return and event delivery architecture,
defines simple new transitions that change privilege level (ring
transitions).

The new transitions defined by the FRED architecture are FRED event
delivery and, for returning from events, two FRED return instructions.
FRED event delivery can effect a transition from ring 3 to ring 0, but
it is used also to deliver events incident to ring 0.  One FRED
instruction (ERETU) effects a return from ring 0 to ring 3, while the
other (ERETS) returns while remaining in ring 0.  Collectively, FRED
event delivery and the FRED return instructions are FRED transitions.

In addition to these transitions, the FRED architecture defines a new
instruction (LKGS) for managing the state of the GS segment register.
The LKGS instruction can be used by 64-bit operating systems that do
not use the new FRED transitions.

WRMSRNS is an instruction that behaves exactly like WRMSR, with the
only difference being that it is not a serializing instruction by
default.  Under certain conditions, WRMSRNS may replace WRMSR to improve
performance.  FRED uses it to switch RSP0 in a faster manner.

Search for the latest FRED spec in most search engines with this search
pattern:

  site:intel.com FRED (flexible return and event delivery) specification

The CPUID feature flag CPUID.(EAX=7,ECX=1):EAX[17] enumerates FRED, and
the CPUID feature flag CPUID.(EAX=7,ECX=1):EAX[18] enumerates LKGS, and
the CPUID feature flag CPUID.(EAX=7,ECX=1):EAX[19] enumerates WRMSRNS.

Add CPUID definitions for FRED/LKGS/WRMSRNS, and expose them to KVM guests.

Because FRED relies on LKGS and WRMSRNS, add that to feature dependency
map.

Tested-by: Shan Kang <shan.kang@intel.com>
Signed-off-by: Xin Li <xin3.li@intel.com>
Message-ID: <20231109072012.8078-2-xin3.li@intel.com>
[Fix order of dependencies, add dependencies from LM to FRED. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/cpu.c | 14 +++++++++++++-
 target/i386/cpu.h |  6 ++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 914bef442c..bfb5a25e59 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1114,7 +1114,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
             "avx-vnni", "avx512-bf16", NULL, "cmpccxadd",
             NULL, NULL, "fzrm", "fsrs",
             "fsrc", NULL, NULL, NULL,
-            NULL, NULL, NULL, NULL,
+            NULL, "fred", "lkgs", "wrmsrns",
             NULL, "amx-fp16", NULL, "avx-ifma",
             NULL, NULL, "lam", NULL,
             NULL, NULL, NULL, NULL,
@@ -1701,6 +1701,18 @@ static FeatureDep feature_dependencies[] = {
         .from = { FEAT_7_0_ECX,             CPUID_7_0_ECX_WAITPKG },
         .to = { FEAT_VMX_SECONDARY_CTLS,    VMX_SECONDARY_EXEC_ENABLE_USER_WAIT_PAUSE },
     },
+    {
+        .from = { FEAT_8000_0001_EDX,       CPUID_EXT2_LM },
+        .to = { FEAT_7_1_EAX,               CPUID_7_1_EAX_FRED },
+    },
+    {
+        .from = { FEAT_7_1_EAX,             CPUID_7_1_EAX_LKGS },
+        .to = { FEAT_7_1_EAX,               CPUID_7_1_EAX_FRED },
+    },
+    {
+        .from = { FEAT_7_1_EAX,             CPUID_7_1_EAX_WRMSRNS },
+        .to = { FEAT_7_1_EAX,               CPUID_7_1_EAX_FRED },
+    },
 };
 
 typedef struct X86RegisterInfo32 {
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index c64ef0c1a2..ad3577056d 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -941,6 +941,12 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
 #define CPUID_7_1_EDX_AMX_COMPLEX       (1U << 8)
 /* PREFETCHIT0/1 Instructions */
 #define CPUID_7_1_EDX_PREFETCHITI       (1U << 14)
+/* Flexible return and event delivery (FRED) */
+#define CPUID_7_1_EAX_FRED              (1U << 17)
+/* Load into IA32_KERNEL_GS_BASE (LKGS) */
+#define CPUID_7_1_EAX_LKGS              (1U << 18)
+/* Non-Serializing Write to Model Specific Register (WRMSRNS) */
+#define CPUID_7_1_EAX_WRMSRNS           (1U << 19)
 
 /* Do not exhibit MXCSR Configuration Dependent Timing (MCDT) behavior */
 #define CPUID_7_2_EDX_MCDT_NO           (1U << 5)

From f88ddc40c6d8b591a357108feec52cea13796d2d Mon Sep 17 00:00:00 2001
From: Xin Li <xin3.li@intel.com>
Date: Wed, 8 Nov 2023 23:20:08 -0800
Subject: [PATCH 33/42] target/i386: mark CR4.FRED not reserved

The CR4.FRED bit, i.e., CR4[32], is no longer a reserved bit when FRED
is exposed to guests, otherwise it is still a reserved bit.

Tested-by: Shan Kang <shan.kang@intel.com>
Signed-off-by: Xin Li <xin3.li@intel.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Message-ID: <20231109072012.8078-3-xin3.li@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/cpu.h | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index ad3577056d..9a582218f4 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -261,6 +261,18 @@ typedef enum X86Seg {
 #define CR4_PKS_MASK   (1U << 24)
 #define CR4_LAM_SUP_MASK (1U << 28)
 
+#ifdef TARGET_X86_64
+#define CR4_FRED_MASK   (1ULL << 32)
+#else
+#define CR4_FRED_MASK   0
+#endif
+
+#ifdef TARGET_X86_64
+#define CR4_FRED_MASK   (1ULL << 32)
+#else
+#define CR4_FRED_MASK   0
+#endif
+
 #define CR4_RESERVED_MASK \
 (~(target_ulong)(CR4_VME_MASK | CR4_PVI_MASK | CR4_TSD_MASK \
                 | CR4_DE_MASK | CR4_PSE_MASK | CR4_PAE_MASK \
@@ -269,7 +281,7 @@ typedef enum X86Seg {
                 | CR4_LA57_MASK \
                 | CR4_FSGSBASE_MASK | CR4_PCIDE_MASK | CR4_OSXSAVE_MASK \
                 | CR4_SMEP_MASK | CR4_SMAP_MASK | CR4_PKE_MASK | CR4_PKS_MASK \
-                | CR4_LAM_SUP_MASK))
+                | CR4_LAM_SUP_MASK | CR4_FRED_MASK))
 
 #define DR6_BD          (1 << 13)
 #define DR6_BS          (1 << 14)
@@ -2613,6 +2625,9 @@ static inline uint64_t cr4_reserved_bits(CPUX86State *env)
     if (!(env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_LAM)) {
         reserved_bits |= CR4_LAM_SUP_MASK;
     }
+    if (!(env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_FRED)) {
+        reserved_bits |= CR4_FRED_MASK;
+    }
     return reserved_bits;
 }
 

From 2e641870170e28df28c5d9914e76ea7cab141516 Mon Sep 17 00:00:00 2001
From: Xin Li <xin3.li@intel.com>
Date: Wed, 8 Nov 2023 23:20:10 -0800
Subject: [PATCH 34/42] vmxcap: add support for VMX FRED controls

Report secondary vm-exit controls and the VMX controls used to
save/load FRED MSRs.

Tested-by: Shan Kang <shan.kang@intel.com>
Signed-off-by: Xin Li <xin3.li@intel.com>
Message-ID: <20231109072012.8078-5-xin3.li@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 scripts/kvm/vmxcap | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap
index 3fb4d5b342..44898d73c2 100755
--- a/scripts/kvm/vmxcap
+++ b/scripts/kvm/vmxcap
@@ -24,6 +24,7 @@ MSR_IA32_VMX_TRUE_EXIT_CTLS = 0x48F
 MSR_IA32_VMX_TRUE_ENTRY_CTLS = 0x490
 MSR_IA32_VMX_VMFUNC = 0x491
 MSR_IA32_VMX_PROCBASED_CTLS3 = 0x492
+MSR_IA32_VMX_EXIT_CTLS2 = 0x493
 
 class msr(object):
     def __init__(self):
@@ -219,11 +220,21 @@ controls = [
             23: 'Clear IA32_BNDCFGS',
             24: 'Conceal VM exits from PT',
             25: 'Clear IA32_RTIT_CTL',
+            31: 'Activate secondary VM-exit controls',
             },
         cap_msr = MSR_IA32_VMX_EXIT_CTLS,
         true_cap_msr = MSR_IA32_VMX_TRUE_EXIT_CTLS,
         ),
 
+    Allowed1Control(
+        name = 'secondary VM-Exit controls',
+        bits = {
+            0: 'Save IA32 FRED MSRs',
+            1: 'Load IA32 FRED MSRs',
+            },
+        cap_msr = MSR_IA32_VMX_EXIT_CTLS2,
+        ),
+
     Control(
         name = 'VM-Entry controls',
         bits = {
@@ -237,6 +248,7 @@ controls = [
             16: 'Load IA32_BNDCFGS',
             17: 'Conceal VM entries from PT',
             18: 'Load IA32_RTIT_CTL',
+            23: 'Load IA32 FRED MSRs',
             },
         cap_msr = MSR_IA32_VMX_ENTRY_CTLS,
         true_cap_msr = MSR_IA32_VMX_TRUE_ENTRY_CTLS,

From ef202d64c3020f3df03c39d3ad688732d81aaae8 Mon Sep 17 00:00:00 2001
From: Xin Li <xin3.li@intel.com>
Date: Wed, 8 Nov 2023 23:20:11 -0800
Subject: [PATCH 35/42] target/i386: enumerate VMX nested-exception support

Allow VMX nested-exception support to be exposed in KVM guests, thus
nested KVM guests can enumerate it.

Tested-by: Shan Kang <shan.kang@intel.com>
Signed-off-by: Xin Li <xin3.li@intel.com>
Message-ID: <20231109072012.8078-6-xin3.li@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 scripts/kvm/vmxcap | 1 +
 target/i386/cpu.c  | 1 +
 target/i386/cpu.h  | 1 +
 3 files changed, 3 insertions(+)

diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap
index 44898d73c2..508be19c75 100755
--- a/scripts/kvm/vmxcap
+++ b/scripts/kvm/vmxcap
@@ -117,6 +117,7 @@ controls = [
             54: 'INS/OUTS instruction information',
             55: 'IA32_VMX_TRUE_*_CTLS support',
             56: 'Skip checks on event error code',
+            58: 'VMX nested exception support',
             },
         msr = MSR_IA32_VMX_BASIC,
         ),
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index bfb5a25e59..383230fa47 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1492,6 +1492,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
             [54] = "vmx-ins-outs",
             [55] = "vmx-true-ctls",
             [56] = "vmx-any-errcode",
+            [58] = "vmx-nested-exception",
         },
         .msr = {
             .index = MSR_IA32_VMX_BASIC,
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 9a582218f4..8ff27e933d 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1071,6 +1071,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
 #define MSR_VMX_BASIC_INS_OUTS                       (1ULL << 54)
 #define MSR_VMX_BASIC_TRUE_CTLS                      (1ULL << 55)
 #define MSR_VMX_BASIC_ANY_ERRCODE                    (1ULL << 56)
+#define MSR_VMX_BASIC_NESTED_EXCEPTION               (1ULL << 58)
 
 #define MSR_VMX_MISC_PREEMPTION_TIMER_SHIFT_MASK     0x1Full
 #define MSR_VMX_MISC_STORE_LMA                       (1ULL << 5)

From 4ebd98eb3ade5957a842da1420bda012eeeaab9c Mon Sep 17 00:00:00 2001
From: Xin Li <xin3.li@intel.com>
Date: Wed, 8 Nov 2023 23:20:12 -0800
Subject: [PATCH 36/42] target/i386: Add get/set/migrate support for FRED MSRs

FRED CPU states are managed in 9 new FRED MSRs, in addtion to a few
existing CPU registers and MSRs, e.g., CR4.FRED and MSR_IA32_PL0_SSP.

Save/restore/migrate FRED MSRs if FRED is exposed to the guest.

Tested-by: Shan Kang <shan.kang@intel.com>
Signed-off-by: Xin Li <xin3.li@intel.com>
Message-ID: <20231109072012.8078-7-xin3.li@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/cpu.h     | 22 +++++++++++++++++++
 target/i386/kvm/kvm.c | 49 +++++++++++++++++++++++++++++++++++++++++++
 target/i386/machine.c | 28 +++++++++++++++++++++++++
 3 files changed, 99 insertions(+)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 8ff27e933d..29d799adfd 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -538,6 +538,17 @@ typedef enum X86Seg {
 #define MSR_IA32_XFD                    0x000001c4
 #define MSR_IA32_XFD_ERR                0x000001c5
 
+/* FRED MSRs */
+#define MSR_IA32_FRED_RSP0              0x000001cc       /* Stack level 0 regular stack pointer */
+#define MSR_IA32_FRED_RSP1              0x000001cd       /* Stack level 1 regular stack pointer */
+#define MSR_IA32_FRED_RSP2              0x000001ce       /* Stack level 2 regular stack pointer */
+#define MSR_IA32_FRED_RSP3              0x000001cf       /* Stack level 3 regular stack pointer */
+#define MSR_IA32_FRED_STKLVLS           0x000001d0       /* FRED exception stack levels */
+#define MSR_IA32_FRED_SSP1              0x000001d1       /* Stack level 1 shadow stack pointer in ring 0 */
+#define MSR_IA32_FRED_SSP2              0x000001d2       /* Stack level 2 shadow stack pointer in ring 0 */
+#define MSR_IA32_FRED_SSP3              0x000001d3       /* Stack level 3 shadow stack pointer in ring 0 */
+#define MSR_IA32_FRED_CONFIG            0x000001d4       /* FRED Entrypoint and interrupt stack level */
+
 #define MSR_IA32_BNDCFGS                0x00000d90
 #define MSR_IA32_XSS                    0x00000da0
 #define MSR_IA32_UMWAIT_CONTROL         0xe1
@@ -1723,6 +1734,17 @@ typedef struct CPUArchState {
     target_ulong cstar;
     target_ulong fmask;
     target_ulong kernelgsbase;
+
+    /* FRED MSRs */
+    uint64_t fred_rsp0;
+    uint64_t fred_rsp1;
+    uint64_t fred_rsp2;
+    uint64_t fred_rsp3;
+    uint64_t fred_stklvls;
+    uint64_t fred_ssp1;
+    uint64_t fred_ssp2;
+    uint64_t fred_ssp3;
+    uint64_t fred_config;
 #endif
 
     uint64_t tsc_adjust;
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 0852ed077f..b563520981 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -3376,6 +3376,17 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
         kvm_msr_entry_add(cpu, MSR_KERNELGSBASE, env->kernelgsbase);
         kvm_msr_entry_add(cpu, MSR_FMASK, env->fmask);
         kvm_msr_entry_add(cpu, MSR_LSTAR, env->lstar);
+        if (env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_FRED) {
+            kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP0, env->fred_rsp0);
+            kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP1, env->fred_rsp1);
+            kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP2, env->fred_rsp2);
+            kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP3, env->fred_rsp3);
+            kvm_msr_entry_add(cpu, MSR_IA32_FRED_STKLVLS, env->fred_stklvls);
+            kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP1, env->fred_ssp1);
+            kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP2, env->fred_ssp2);
+            kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP3, env->fred_ssp3);
+            kvm_msr_entry_add(cpu, MSR_IA32_FRED_CONFIG, env->fred_config);
+        }
     }
 #endif
 
@@ -3848,6 +3859,17 @@ static int kvm_get_msrs(X86CPU *cpu)
         kvm_msr_entry_add(cpu, MSR_KERNELGSBASE, 0);
         kvm_msr_entry_add(cpu, MSR_FMASK, 0);
         kvm_msr_entry_add(cpu, MSR_LSTAR, 0);
+        if (env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_FRED) {
+            kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP0, 0);
+            kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP1, 0);
+            kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP2, 0);
+            kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP3, 0);
+            kvm_msr_entry_add(cpu, MSR_IA32_FRED_STKLVLS, 0);
+            kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP1, 0);
+            kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP2, 0);
+            kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP3, 0);
+            kvm_msr_entry_add(cpu, MSR_IA32_FRED_CONFIG, 0);
+        }
     }
 #endif
     kvm_msr_entry_add(cpu, MSR_KVM_SYSTEM_TIME, 0);
@@ -4069,6 +4091,33 @@ static int kvm_get_msrs(X86CPU *cpu)
         case MSR_LSTAR:
             env->lstar = msrs[i].data;
             break;
+        case MSR_IA32_FRED_RSP0:
+            env->fred_rsp0 = msrs[i].data;
+            break;
+        case MSR_IA32_FRED_RSP1:
+            env->fred_rsp1 = msrs[i].data;
+            break;
+        case MSR_IA32_FRED_RSP2:
+            env->fred_rsp2 = msrs[i].data;
+            break;
+        case MSR_IA32_FRED_RSP3:
+            env->fred_rsp3 = msrs[i].data;
+            break;
+        case MSR_IA32_FRED_STKLVLS:
+            env->fred_stklvls = msrs[i].data;
+            break;
+        case MSR_IA32_FRED_SSP1:
+            env->fred_ssp1 = msrs[i].data;
+            break;
+        case MSR_IA32_FRED_SSP2:
+            env->fred_ssp2 = msrs[i].data;
+            break;
+        case MSR_IA32_FRED_SSP3:
+            env->fred_ssp3 = msrs[i].data;
+            break;
+        case MSR_IA32_FRED_CONFIG:
+            env->fred_config = msrs[i].data;
+            break;
 #endif
         case MSR_IA32_TSC:
             env->tsc = msrs[i].data;
diff --git a/target/i386/machine.c b/target/i386/machine.c
index c3ae320814..39f8294f27 100644
--- a/target/i386/machine.c
+++ b/target/i386/machine.c
@@ -1544,6 +1544,33 @@ static const VMStateDescription vmstate_msr_xfd = {
 };
 
 #ifdef TARGET_X86_64
+static bool intel_fred_msrs_needed(void *opaque)
+{
+    X86CPU *cpu = opaque;
+    CPUX86State *env = &cpu->env;
+
+    return !!(env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_FRED);
+}
+
+static const VMStateDescription vmstate_msr_fred = {
+    .name = "cpu/fred",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = intel_fred_msrs_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64(env.fred_rsp0, X86CPU),
+        VMSTATE_UINT64(env.fred_rsp1, X86CPU),
+        VMSTATE_UINT64(env.fred_rsp2, X86CPU),
+        VMSTATE_UINT64(env.fred_rsp3, X86CPU),
+        VMSTATE_UINT64(env.fred_stklvls, X86CPU),
+        VMSTATE_UINT64(env.fred_ssp1, X86CPU),
+        VMSTATE_UINT64(env.fred_ssp2, X86CPU),
+        VMSTATE_UINT64(env.fred_ssp3, X86CPU),
+        VMSTATE_UINT64(env.fred_config, X86CPU),
+        VMSTATE_END_OF_LIST()
+            }
+        };
+
 static bool amx_xtile_needed(void *opaque)
 {
     X86CPU *cpu = opaque;
@@ -1747,6 +1774,7 @@ const VMStateDescription vmstate_x86_cpu = {
         &vmstate_pdptrs,
         &vmstate_msr_xfd,
 #ifdef TARGET_X86_64
+        &vmstate_msr_fred,
         &vmstate_amx_xtile,
 #endif
         &vmstate_arch_lbr,

From 888788dd7620f8e3fa14108eab2d708c16460266 Mon Sep 17 00:00:00 2001
From: Zhao Liu <zhao1.liu@intel.com>
Date: Thu, 6 Jun 2024 16:54:36 +0800
Subject: [PATCH 37/42] docs: i386: pc: Avoid mentioning limit of maximum vCPUs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Different versions of PC machine support different maximum vCPUs, and
even different features have limits on the maximum number of vCPUs (
For example, if x2apic is not enabled in the TCG case, the maximum of
255 vCPUs are supported).

It is difficult to list the maximum vCPUs under all restrictions. Thus,
to avoid confusion, avoid mentioning specific maximum vCPU number
limitations here.

Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Message-ID: <20240606085436.2028900-1-zhao1.liu@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 docs/system/target-i386-desc.rst.inc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/system/target-i386-desc.rst.inc b/docs/system/target-i386-desc.rst.inc
index 319e540573..ae312b1c1e 100644
--- a/docs/system/target-i386-desc.rst.inc
+++ b/docs/system/target-i386-desc.rst.inc
@@ -36,7 +36,8 @@ The QEMU PC System emulator simulates the following peripherals:
 -  PCI UHCI, OHCI, EHCI or XHCI USB controller and a virtual USB-1.1
    hub.
 
-SMP is supported with up to 255 CPUs (and 4096 CPUs for PC Q35 machine).
+SMP is supported with a large number of virtual CPUs (upper limit is
+configuration dependent).
 
 QEMU uses the PC BIOS from the Seabios project and the Plex86/Bochs LGPL
 VGA BIOS.

From 4b77512b2782a6b48691d4341991491de26415de Mon Sep 17 00:00:00 2001
From: John Allen <john.allen@amd.com>
Date: Mon, 3 Jun 2024 19:36:20 +0000
Subject: [PATCH 38/42] i386: Fix MCE support for AMD hosts

For the most part, AMD hosts can use the same MCE injection code as Intel, but
there are instances where the qemu implementation is Intel specific. First, MCE
delivery works differently on AMD and does not support broadcast. Second,
kvm_mce_inject generates MCEs that include a number of Intel specific status
bits. Modify kvm_mce_inject to properly generate MCEs on AMD platforms.

Reported-by: William Roche <william.roche@oracle.com>
Signed-off-by: John Allen <john.allen@amd.com>
Message-ID: <20240603193622.47156-2-john.allen@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/cpu.h     |  2 ++
 target/i386/helper.c  |  4 ++++
 target/i386/kvm/kvm.c | 39 +++++++++++++++++++++++++++++++--------
 3 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 29d799adfd..e6d5d1b483 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -377,6 +377,8 @@ typedef enum X86Seg {
 #define MCI_STATUS_PCC   (1ULL<<57)  /* processor context corrupt */
 #define MCI_STATUS_S     (1ULL<<56)  /* Signaled machine check */
 #define MCI_STATUS_AR    (1ULL<<55)  /* Action required */
+#define MCI_STATUS_DEFERRED    (1ULL<<44)  /* Deferred error */
+#define MCI_STATUS_POISON      (1ULL<<43)  /* Poisoned data consumed */
 
 /* MISC register defines */
 #define MCM_ADDR_SEGOFF  0      /* segment offset */
diff --git a/target/i386/helper.c b/target/i386/helper.c
index f9d1381f90..01a268a30b 100644
--- a/target/i386/helper.c
+++ b/target/i386/helper.c
@@ -91,6 +91,10 @@ int cpu_x86_support_mca_broadcast(CPUX86State *env)
     int family = 0;
     int model = 0;
 
+    if (IS_AMD_CPU(env)) {
+        return 0;
+    }
+
     cpu_x86_version(env, &family, &model);
     if ((family == 6 && model >= 14) || family > 6) {
         return 1;
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index b563520981..55a9e8a70c 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -638,17 +638,40 @@ static void kvm_mce_inject(X86CPU *cpu, hwaddr paddr, int code)
 {
     CPUState *cs = CPU(cpu);
     CPUX86State *env = &cpu->env;
-    uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN |
-                      MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S;
-    uint64_t mcg_status = MCG_STATUS_MCIP;
+    uint64_t status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_MISCV |
+                      MCI_STATUS_ADDRV;
+    uint64_t mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV;
     int flags = 0;
 
-    if (code == BUS_MCEERR_AR) {
-        status |= MCI_STATUS_AR | 0x134;
-        mcg_status |= MCG_STATUS_RIPV | MCG_STATUS_EIPV;
+    if (!IS_AMD_CPU(env)) {
+        status |= MCI_STATUS_S | MCI_STATUS_UC;
+        if (code == BUS_MCEERR_AR) {
+            status |= MCI_STATUS_AR | 0x134;
+            mcg_status |= MCG_STATUS_EIPV;
+        } else {
+            status |= 0xc0;
+        }
     } else {
-        status |= 0xc0;
-        mcg_status |= MCG_STATUS_RIPV;
+        if (code == BUS_MCEERR_AR) {
+            status |= MCI_STATUS_UC | MCI_STATUS_POISON;
+            mcg_status |= MCG_STATUS_EIPV;
+        } else {
+            /* Setting the POISON bit for deferred errors indicates to the
+             * guest kernel that the address provided by the MCE is valid
+             * and usable which will ensure that the guest kernel will send
+             * a SIGBUS_AO signal to the guest process. This allows for
+             * more desirable behavior in the case that the guest process
+             * with poisoned memory has set the MCE_KILL_EARLY prctl flag
+             * which indicates that the process would prefer to handle or
+             * shutdown due to the poisoned memory condition before the
+             * memory has been accessed.
+             *
+             * While the POISON bit would not be set in a deferred error
+             * sent from hardware, the bit is not meaningful for deferred
+             * errors and can be reused in this scenario.
+             */
+            status |= MCI_STATUS_DEFERRED | MCI_STATUS_POISON;
+        }
     }
 
     flags = cpu_x86_support_mca_broadcast(env) ? MCE_INJECT_BROADCAST : 0;

From 2ba8b7ee63589d4063c3b8dff3b70dbf9e224fc6 Mon Sep 17 00:00:00 2001
From: John Allen <john.allen@amd.com>
Date: Mon, 3 Jun 2024 19:36:21 +0000
Subject: [PATCH 39/42] i386: Add support for SUCCOR feature

Add cpuid bit definition for the SUCCOR feature. This cpuid bit is required to
be exposed to guests to allow them to handle machine check exceptions on AMD
hosts.

----
v2:
  - Add "succor" feature word.
  - Add case to kvm_arch_get_supported_cpuid for the SUCCOR feature.

Reported-by: William Roche <william.roche@oracle.com>
Reviewed-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: John Allen <john.allen@amd.com>
Message-ID: <20240603193622.47156-3-john.allen@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/cpu.c     | 18 +++++++++++++++++-
 target/i386/cpu.h     |  4 ++++
 target/i386/kvm/kvm.c |  2 ++
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 383230fa47..c5a532a254 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1180,6 +1180,22 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
         .tcg_features = TCG_APM_FEATURES,
         .unmigratable_flags = CPUID_APM_INVTSC,
     },
+    [FEAT_8000_0007_EBX] = {
+        .type = CPUID_FEATURE_WORD,
+        .feat_names = {
+            NULL, "succor", NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+        },
+        .cpuid = { .eax = 0x80000007, .reg = R_EBX, },
+        .tcg_features = 0,
+        .unmigratable_flags = 0,
+    },
     [FEAT_8000_0008_EBX] = {
         .type = CPUID_FEATURE_WORD,
         .feat_names = {
@@ -6887,7 +6903,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
         break;
     case 0x80000007:
         *eax = 0;
-        *ebx = 0;
+        *ebx = env->features[FEAT_8000_0007_EBX];
         *ecx = 0;
         *edx = env->features[FEAT_8000_0007_EDX];
         break;
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index e6d5d1b483..6786055ec6 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -630,6 +630,7 @@ typedef enum FeatureWord {
     FEAT_7_1_EAX,       /* CPUID[EAX=7,ECX=1].EAX */
     FEAT_8000_0001_EDX, /* CPUID[8000_0001].EDX */
     FEAT_8000_0001_ECX, /* CPUID[8000_0001].ECX */
+    FEAT_8000_0007_EBX, /* CPUID[8000_0007].EBX */
     FEAT_8000_0007_EDX, /* CPUID[8000_0007].EDX */
     FEAT_8000_0008_EBX, /* CPUID[8000_0008].EBX */
     FEAT_8000_0021_EAX, /* CPUID[8000_0021].EAX */
@@ -982,6 +983,9 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
 /* Packets which contain IP payload have LIP values */
 #define CPUID_14_0_ECX_LIP              (1U << 31)
 
+/* RAS Features */
+#define CPUID_8000_0007_EBX_SUCCOR      (1U << 1)
+
 /* CLZERO instruction */
 #define CPUID_8000_0008_EBX_CLZERO      (1U << 0)
 /* Always save/restore FP error pointers */
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 55a9e8a70c..56d8e2a89e 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -532,6 +532,8 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
          */
         cpuid_1_edx = kvm_arch_get_supported_cpuid(s, 1, 0, R_EDX);
         ret |= cpuid_1_edx & CPUID_EXT2_AMD_ALIASES;
+    } else if (function == 0x80000007 && reg == R_EBX) {
+        ret |= CPUID_8000_0007_EBX_SUCCOR;
     } else if (function == KVM_CPUID_FEATURES && reg == R_EAX) {
         /* kvm_pv_unhalt is reported by GET_SUPPORTED_CPUID, but it can't
          * be enabled without the in-kernel irqchip

From 1ea1432199cdddbb4e7f98cee71cabf50a9516f2 Mon Sep 17 00:00:00 2001
From: John Allen <john.allen@amd.com>
Date: Mon, 3 Jun 2024 19:36:22 +0000
Subject: [PATCH 40/42] i386: Add support for overflow recovery

Add cpuid bit definition for overflow recovery. This is needed in the case
where a deferred error has been sent to the guest, a guest process accesses the
poisoned memory, but the machine_check_poll function has not yet handled the
original deferred error. If overflow recovery is not set in this case, when we
handle the uncorrected error from the poisoned memory access, the overflow bit
will be set and will result in the guest being shut down.

By the time the MCE reaches the guest, the overflow has been handled
by the host and has not caused a shutdown, so include the bit unconditionally.

Signed-off-by: John Allen <john.allen@amd.com>
Message-ID: <20240603193622.47156-4-john.allen@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/cpu.c     | 2 +-
 target/i386/cpu.h     | 1 +
 target/i386/kvm/kvm.c | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index c5a532a254..7466217d5e 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1183,7 +1183,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
     [FEAT_8000_0007_EBX] = {
         .type = CPUID_FEATURE_WORD,
         .feat_names = {
-            NULL, "succor", NULL, NULL,
+            "overflow-recov", "succor", NULL, NULL,
             NULL, NULL, NULL, NULL,
             NULL, NULL, NULL, NULL,
             NULL, NULL, NULL, NULL,
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 6786055ec6..8fe28b67e0 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -984,6 +984,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
 #define CPUID_14_0_ECX_LIP              (1U << 31)
 
 /* RAS Features */
+#define CPUID_8000_0007_EBX_OVERFLOW_RECOV    (1U << 0)
 #define CPUID_8000_0007_EBX_SUCCOR      (1U << 1)
 
 /* CLZERO instruction */
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 56d8e2a89e..912f5d5a6b 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -533,7 +533,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
         cpuid_1_edx = kvm_arch_get_supported_cpuid(s, 1, 0, R_EDX);
         ret |= cpuid_1_edx & CPUID_EXT2_AMD_ALIASES;
     } else if (function == 0x80000007 && reg == R_EBX) {
-        ret |= CPUID_8000_0007_EBX_SUCCOR;
+        ret |= CPUID_8000_0007_EBX_OVERFLOW_RECOV | CPUID_8000_0007_EBX_SUCCOR;
     } else if (function == KVM_CPUID_FEATURES && reg == R_EAX) {
         /* kvm_pv_unhalt is reported by GET_SUPPORTED_CPUID, but it can't
          * be enabled without the in-kernel irqchip

From 1f97715c8390e582f154d8b579c70779bd8c9bdf Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 9 Aug 2023 01:10:34 +0200
Subject: [PATCH 41/42] Revert "python: use vendored tomli"

Now that Ubuntu 20.04 is not included anymore, there is no need to ship
it as part of QEMU; Ubuntu 22.04 includes it and Leap users anyway
need to install all the required dependencies from PyPI.

This mostly reverts commit ec77ee7634de123b7c899739711000fd21dab68b,
with just some changes to the wording.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 configure                                  |   4 ----
 docs/devel/build-system.rst                |  13 ++++++-------
 python/scripts/vendor.py                   |   3 ---
 python/wheels/tomli-2.0.1-py3-none-any.whl | Bin 12757 -> 0 bytes
 4 files changed, 6 insertions(+), 14 deletions(-)
 delete mode 100644 python/wheels/tomli-2.0.1-py3-none-any.whl

diff --git a/configure b/configure
index 4d01a42ba6..5ad1674ca5 100755
--- a/configure
+++ b/configure
@@ -955,10 +955,6 @@ mkvenv="$python ${source_path}/python/scripts/mkvenv.py"
 
 # Finish preparing the virtual environment using vendored .whl files
 
-if $python -c 'import sys; sys.exit(sys.version_info >= (3,11))'; then
-    $mkvenv ensure --dir "${source_path}/python/wheels" \
-        'tomli>=1.2.0' || exit 1
-fi
 $mkvenv ensuregroup --dir "${source_path}/python/wheels" \
      ${source_path}/pythondeps.toml meson || exit 1
 
diff --git a/docs/devel/build-system.rst b/docs/devel/build-system.rst
index 09caf2f8e1..f4fd76117d 100644
--- a/docs/devel/build-system.rst
+++ b/docs/devel/build-system.rst
@@ -185,14 +185,13 @@ Bundled Python packages
 
 Python packages that are **mandatory** dependencies to build QEMU,
 but are not available in all supported distros, are bundled with the
-QEMU sources.  Currently this includes Meson (outdated in CentOS 8
-and derivatives, Ubuntu 20.04 and 22.04, and openSUSE Leap) and tomli
-(absent in Ubuntu 20.04).
+QEMU sources.  The only one is currently Meson (outdated in Ubuntu
+22.04 and openSUSE Leap).
 
-If you need to update these, please do so by modifying and rerunning
-``python/scripts/vendor.py``.  This script embeds the sha256 hash of
-package sources and checks it.  The pypi.org web site provides an easy
-way to retrieve the sha256 hash of the sources.
+In order to include a new or updated wheel, modify and rerun the
+``python/scripts/vendor.py`` script.  The script embeds the
+sha256 hash of package sources and checks it.  The pypi.org web site
+provides an easy way to retrieve the sha256 hash of the sources.
 
 
 Stage 2: Meson
diff --git a/python/scripts/vendor.py b/python/scripts/vendor.py
index 1038b14ae0..07aff97cca 100755
--- a/python/scripts/vendor.py
+++ b/python/scripts/vendor.py
@@ -43,9 +43,6 @@ def main() -> int:
     packages = {
         "meson==1.2.3":
         "4533a43c34548edd1f63a276a42690fce15bde9409bcf20c4b8fa3d7e4d7cac1",
-
-        "tomli==2.0.1":
-        "939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc",
     }
 
     vendor_dir = Path(__file__, "..", "..", "wheels").resolve()
diff --git a/python/wheels/tomli-2.0.1-py3-none-any.whl b/python/wheels/tomli-2.0.1-py3-none-any.whl
deleted file mode 100644
index 29670b98d16e2bc770d4fea718582e1dc0dd8aca..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 12757
zcmZ{K19T?cvi6(2v8{=1TN67I+qN;W?TKyMwlT47dy-6?{B!QT=Y02k_dnfhuU_4&
zpQrb(uBz4bbjeEt!O#Ez02H7}RYJMAoa`MF1OSNoGm!sWb+)sywqVfHv#_;r*3+Z6
zch`}hi0BtU>O7{2a9ah3PcNtq)hCA71L7L+!5ht;=`*8pC+Iw0f6veEbjEa6aZ!t{
z-7eZr5K1xTPCr1$E}2eui*cb3n04x0V575s9eHtd3AnP2al@G=Ow7|XTHLl>i@Uu~
zQ2;lqTW6Tfh`@q@wZc7Dx~3&*V^rA36cr>aDz~D#4UVmCE<fvM!q-1B?4Vbqwh_f{
zIJ}vk_YLE*nG{ADo@GrB?u9zV$fy!XJ_0pNx>h*IfIByKl8D&D${Pjy<~={mz=uH0
z;OKxG#BXMAx=t8hwKuh0k+D=iu>QdmGLYQ_>$QTHK(gPU>jK%Hve4f5qUJ)GmUw5^
zilT(pi7l$g-h5`|Q5ZK@Iy|sLApdVx1nrKoh{6B>EYtx2q`$0aZ{X-;;`qmiL%gew
z>k{$TZxNPH{>B-G()C>|lj^cyTE!*UBx4BSTc<kVAc6V{BCud9oU~<ZFOQQyx#fcc
zLz}Wqg6L36$1Cc8mG7f>yxf1Z1UB>@D~ux!JK>x35$}xoWyMldsqH<45+0P?zA!=C
zrmO~iH3qtRxp@te>kbb1y{B()f+NS~wt=ylY5e5Z@z!@|J%+wpvA{KvWSnT=G#FS9
zx<e;CYtf7()n!`NInc<qUpDEp#G}Bz%cq>`qzx-jZob`TJmBX$^<>dw!!_f*H6^ke
z+osYFw-rsfaF|qDyxGvI*_|vQ^>NGWh1~?hFJ~V(sT=WuCT4KQ^}VQrwHrE4@PeNa
zJ@3x2Uu>Q?ylKO2yADm5HTL2^D}yv6kP9_iN{F03?unuwnodf2mgb1uY)iSNRll9v
zXZ+n&d*q&2W6FEl)E<Er+2L!S`<9SW_$G|yiR~3DXg^Rff4jn$rJY@e3bFYym^a4#
z!F&p))9jZArH)iL7-{Q%Jn7If@u6br>im{s+XBAt(HCe9O@hsU^(M;|h~dZgYrt!2
zm7oomE|;Ut2;g?~{N3WDXo5e3_L<}k1wsCeS5kjtK)m@r-yO<wuLxjqNk#~F<Vod-
zMg-9;W{T20O2Tzgb-TKHay`KEn6kV0w9u8V-Cojzz)<7I7B~$I1UV*mYHweEHp5D6
zxE(TQ0B|7t%`i?4C(M$)vAwd#sRVvkx)wWwIVd5Ro)*VUs0!0ch&?vD`dU-T^c@RY
zxZ5!l^LjIK_A4?Gs}kOa$!$#LCo|Z?`^ZTT+g!eXs)aYhLtrnwI|M3|NzEbH#dP30
z;lTEkKde2{;FBRQHVFZWGIVqFhJ|pAo_J3e!KXK`#c=llA}A`HB2nn<;_1!&Xcw+G
z_m}J#dGnPvYmh-WpZw3;{f&*EnILath=_=vXZKj1WD9PW&l}cBr>b8gRns_Fw#8rG
z<vS7kIO#caGNLisXUojN<{gH{t57!2y37wfB~jd3GRbghO$*nX?F5YLFK_G2BOB(3
zao0x4ufM^6*rzKtnAOr(B_sAI&4oX1#2$Mkk~+*iGcf!dTE6nb;fZ01!WyB_7X)p5
z?gOQL=l$)(dPOUNjpgur5jK#^|6CZ;@9QxvgqY<PNJ9@k*6-gV*{Kg#pf)SLwPu|6
zPc0J&AHzrfIXVMRk2AV*Y)lL3H;nah_!-E?&4A;fz18a-zA#YhufVXd?i_o<Lh0%H
z1BcwAB%weJ-*2FC*Uy2|%R8jZyQ4vj5%{FQkO(rcVm&196Tww#ITm)H;S#TqgJ{Gc
zy8bECR?lSvuAf&QMAmQOm{*NXn03CTZ>E=5nBoH$F!&665^%LM5GYn_l5||zSlb;T
zPfBsV-1?-Wb_u-H@@SvC@GGa&q2iPKgvZp;{OlgV^!`OwS1b&0`>$x3p7%)<*zJbL
z<o?h>yFg7sq;WO;yuv~w%=ZDnrcdhcRqW|CsuNskql<)quW*4wBz1ujWy0xCtXouY
zkK9448_4P&GU{B`4KWU*h^L;JB-;dc)>&p*u-T?wANcCm&Yl;-C!57t)h7*LhkX*z
zoeYgBmWEI$_|=^}SH)Wa9m97qe53&sL>yT1`skfRvP6JgCO8P=41{4CBmzI6jst^K
z8J8wzB2qYS^xfcbK>RBi^(N_<dtOn^ib9UgCb-os9T<ZA*$nG`a~>+FWM0`hgMTxE
zL|v$y=eVB49Qew9_W|LUFanUp!|!%7OaPh1&wro~!kV1zhD)C}-H0T!6uu^I<rmFY
z-MqN**z2c#IlWo+J_#_%XSyc<Sa^EW3kn1y2@g_aFWTGqEwo1!6zEqJv6cYT3vdzY
z1@9JH(sQ^dVe<*>?f^p|7ZDsKSgrTwNWJir;O)LV=?7bwZ~3-lAZrNHYo2k`4GU27
z`X04*5_Afhs%sN%<mCaHr+!vw6kmj^#t)!V@pMPsiHZxIBGxaO#dr^Sq$c+bf8xg3
z%FkL=fG%R@;JSTKS2X`gvaea(2~<`U2^5L7BsiDIjVoPqvl*%r44u&8wUWQr-PfPI
zum><f0O1>;1Mmi0JX!e(<iRM#sV<C4Bn8Qu`G%R)E_T2UieKy{?pAuKwd=iZ{toc!
zqP``=wMb{c=mX|-AQ@oQW;&39!q|}FO~7^iSgNwdmH9PD1HW+gsj5Xk@JnAv$*iFJ
zTIM~^ws;Auq&zUV5sU%JO60A3Fg#|yQ&fz~gGd3^a5<hx1DfSPAUZ^O>AWEM<mAUj
zA;RPd<`cl)jK(hdD4zTsmrmp*fjy%Pn>bULIE@+6>(esH%$x}Jb(~vJWJ7@Qf{$44
z7e~`MjMB^Qul>&``bN2BBtbO&yYN&p;~W+Q)6}oI*a(bF=-i0mR1#y5_sE>#Sikl~
zgsA#kaDi~Ytf$zBvbkx!qnNJ+kr-bt=U1Q5>Qf?T7lSO?4Bigg1<{PF*CpLsb7x@a
zf|0%Z_q(M-4E05Ho@`*^va_Y>ir^La2nE}xYHb~4UHl2hi&x<9ifsjhIC8A^1NA*P
zae)d_0s_E`6^YE?dc<1pES|*%EB&&SHi+apoIDWuUbTU=qSOU|``o5SZlVu@|F)XX
zFOYuW0pw-mOX2&3RFpBMCTF5f>al_lZRZ$LMLv0Uict{f9(-I}S(|zTk8K4*Fghfu
zMW_@-9w6RFTX%*o4>EYs)IxqqPs}o9lvVF4i3j`^rG+hy$vB|#0>T<&#Gua^`S?$Q
z-M%0Oy?<jI;u0FZ7PycC10zN92vc4fm-)S+W~s=|$%mUij13hH$x^D}G+|dr2+3HI
z{g}Y1Sg3AAD&w3o`5|ZCK%}eIPNMUI?e0#l@XaE5?u~YVSYnedfxyK;-!b~9q0u@6
zw4Fc$!h@)13TaGm9=Nb2&Ddj(PylX+7254!6J!gVmPT&Ajmpn?vCLGvMT>aos(hIu
zNyBgOO4B$lGAspX1-ABRdI1n)kREccqQQ%*&@pl4jt`<fyeq3c;V^!1phz^95i+P$
z(JE&~>vs3Pfr{utt-{gI7;tWIh8RC+#@megPSS`mU#fH7)V&_Opu`m;3Y82H>SSO@
zXwpwC!!D5^Bz`DbM1(GADdp+uudt_;GAk#5N0@?!sSyrGdTsU5^GW%1JUpST1Qx((
z=QqK@q*Z~C<*OjoQ-*Ufp0x%D4g2-iV(UQ&Xf_9#5#p3+EwO>w1<@vU&DYLpLNnIp
zR8%V<ni#??<SwOoinC><kCkpOAz&TqN_mu@vSTZj4Bt}1I$(`il1ExdLI|x&pfO`O
zAoN=&myy>pfUUZFuqV_z3Yp&lWOTmr$vt$c9Wr%EkV}pj>+7q7dr$(Eq`-{f=f0tv
z5TPhWfmG`}P=W_sTY4i>yd^?lZo!DC{wA~3p}x(pBLYmC#K3c{;ykf9Wx7}+XNhYd
z!8GJk*-1VHc!UF-81|bQ8I+m7@0-ihnjwAvt(7tYYvArt1@9%Sdpy3ue)|f-JdYe^
zVrx~anRs*l63W}}f0EvAi`<|TProaOq;QZe7m!O3Z#Gh?)e<$9i}Voaj9=I@;y*g=
zzzB(Q(74XIHs@v8$By_yK1a4nV2%P_|I<(N;5n7>E0yzLXc>OIfOJhHj<93{N0f;Z
zm1VoE(ZmCJ1CHsE8tIS;n<@brB(3*@GKj9!L!m2Pz1it#IG()=hYT@Q-U9gYfg=9(
z*0KhumaYq#jrr*mr<?i}u`v=bpE=amNoQOe?sDmO1zJ=TmC)t4aghaxkzlYh7%OyG
ztYdxNpd>P)u&8WZf0j6q(m7ilOV{eGC0nm^7sOxLl<eh${f~|oa4WPhDXC2I!?MGj
zb}C%$ShDQw=;+u+ICg2_C5^CPxL-MOp}u4T7FZ^(fH@P-YWi~~emMC!<)BLo^D31u
zfIjAean$S4P(zZB@q|~>QO-Lom7y=uM4FCaaPhQVi1j8TY21d4uA-&zcAjg)WTa8$
zCbQG1WXx1bYxTQ{W@V{wy-ih>sw&T-;~v(O=RF!)JqqA;plP>jili1riMuqviFPv&
zbXddoJQZx<nEha4=!QT`F~fJFYnE0E!nOq1RNsAi3RgvCrOtP0ASO-Si-mccA>bkT
z`^BTGlo-#6cq3B!Xa(4ic(TU#Ke|lib)r5#zXt0vhL$H##s?qMHJq$uiWUtq@_1C{
zNbouNw>boNqv4ff&!M6)Q$!5LPUJ&WBt5irMSS0)b%{ink)g-$V(%?|2tXozK)*GU
zE)Ucf&-{(oF#SN7n%q+C&((i|)+ABgX#@lhSX%^g&>)mZUu}n{amF^tRPv7o@bT^%
zwn+6G)S6p>sDTT7FB_KQfrBrxTgMEp-XgoIYc0BrF7B^ofthG9qG#CemO|7d_t?)w
z0T))-K@nTua0;0BPlxs-T#cP{`6a~dh^NC2`s4wv=c2~LOvjR&0_HsF+Yp5Fv$;z1
zRsCiy)m7Zd*2tT^d86^Hz6Q<3N9kUF;PpdQK4Oj*n;4H_gh&UmI$vYLMp+Bxcdhyu
z4KBnBDVdpQros#14&PFryt=ADdmxUD(4%oWv=*KWWbl|jA;xoa#C1?5KSdc^e6*NH
z0addwfm6y_@<~y??luw8py(m0nJ<Ie+|v2r+<`|?7W*(ib(ZZl`qJu0gpZA*(hySm
zM>TJzEVhY~*P4<KO#?3lKV@`cIcGjlN!DYtYo&U^)ant<S}}kwLcRpfB={tS1A8UE
z$<eSPv3|NZwkwDRJZfdUe-qu#U6+Pds}Y38;WK*;?kABaY-8PEOkPFh#HISrT0xbw
zlLZK>rCi8#*9P^4YK+JrsKaFG+9FAd%BU-wlFb>s)bF!qtt<@iZ#a}4moI%LMZK`m
z^|)dI{p7m<p5Lv@Mw=QyTJM&XGRN7#QsSTXDLGR=WTZv~mrSSNy9;H|+lRU9p>70>
z<0vjX*2>j$i-j`yf_TvIq|iYFn#!of$=$yPR<|ispw1X6@m2pQFg(J}c!r3uw_+Nh
zD9T}9>tQZw|B^nrrTb->U)%?4--ANX$$BcnMrE7PgC|y>#jdzKVrQx-?2-&ex+BSw
zRh8C9=@xY|zphKO(6v$HLgccs`s|KeT!|#)xGKULNx5>3{p>!{(dzA@i%ZN_az*#&
zT6xebM&peVw1n}TkyKT;WqBKGO8&%`1{|lnOBTyxFA11BE{Zil`)jAQvS=Ze1OjJX
zIT-so=E8%8IG>Axxu?~|I6gZj<EHd#qR!spG-e0dibkjE(3~~JgJLu}Xvgc)1y;AU
zv<}{fu7{Aljo6Ts<#nOKbTub5BENQ|E@jADH_NpVBa;DE6sc-hKyItt)FgaC0*xDz
z>|1oES;L%Yk;{G|2;F_(%2#9BhyvigEV9_CcOL{UE$XCE&$PK|#9a9zpTt8lQ2SXi
zBkJ|&33G7X=O|;X0kUHX__knsWVhxf`)j+~Y8x&d&-3+ecyndp`ve>^XzD(6SMzAb
zkV5fXca+`C-b?TVj&7wvE|8GJT`9BY05ND<;IVlf?C=g>{zFq~htXow&FfCnsCll+
zo5nmtY(QIbMITq2kK<8_?5qLO#QrQE%HTjoQ6^7S2DKZ`UHM|Yk_@P2fTGnTQr|NL
zRkuU0^<Cll(32JI#QCzI4JobgIh`V3ih}!+o1D5S&_}gQ;Tu%Ani9e2WFn42EZ%#l
zs(fAD_+$Y9(YBp%b4J?ujL7r48;n$?h%x?`NW}<0?2}Ay%P6}Fj@%4Q<&s2l{Am}e
zqRk=I%#cSXWKBJ(E`z9_f|>v$oRr0RXrFV3?TGl{j)%o^=+-aIo<war9`l!>7eC1^
zDsnz-957J?jq$ma#%@VxyYxA*wR6K`Xjj7!C!zc~x+7jVMpwu>bPzgH%gKDzJ^FQ@
zSO0n20f80WN?yt@SLmhJWufJ6HXhMvK{_kJ{>Da@lKUb~kD5|*TWqBVv#x8dbA^4^
ziu^y45qWwH2c9!$+wwy7lWNpm2?B4YQKo0o?kTuWv-gn7Ox6-Vf833#L7EvHb&JkF
ze6xlvB;kVtDqYRSUtPN~zshd2l8}KDEv2(a4jHm-dLwhju{2rJi)+|`tN$k9u?@4&
zAXlI&x5*YWwbw+dR<<sIrE4@qvgijEt1Om2t>buC^u01$(^=7v-jq$HUTL>~hB&@A
zV?jjCP=}o)YRcZ^=dvc5z-LurkCjuye5qEc%nPv=pttZ?cgS?rAuA6!n!GZno@Ea2
zIPI;6Ql)9D6L6^q(rb*RNz=`qkt~)K<C_`PdaQTn=FRo|$Qkac*0RL<qw%z;KD<Hq
zteHgipJPLah{&^H@^>1$G^%60<4e=c{feFb>fM<M$3PCO{cQD$3M~h&HTCNM9>4{?
z`%%qB@pWgP963UwH#DdZFLmAocN)D`Z$^7mGKJbxooC{ADx|8VJhc14&e%+WHs88;
zw5X#!`3~!?e%0rKh$LD{XCh>WSHl2xLOj14dFMmEkaKg|3EYiMO>oa8x_FDsPUK`S
zCMV%~Ue9@%ky1bkQSxR*rZxxJg2#Jlb$zAsCS|{h;8!+(`sG&+eYb0a)L<td0~s>s
z0-q_pjtWG>bSr9=+;n-QtZNQDzA{IYS<k!;b<xB;H+!L!MjPiZKaS{I@3+cvX=GqX
z3`~M!b0@8AMk0T;zKbUY<btr?jVX-GHuf@Uonp<neDW5}#iG?AcTsZ4Z{s(N#~-^l
z+zOeMoioTOpQG<OdkeatRro7>S4=~mZ$}`yV#1kjvm~2sYQ{y4>pus)<3R)x1msCP
z-Sp(Zx-3{OqEO$RFS6OwW-jfa2r=3XPcjKbH$u#$<TvV;N%<SRMO}|3B6bERVpfZN
zY_+TWrE4pN4DaocNn4>^!!rUZ&{H_tmkgT=Q0$QCY8$_}tD3an(x!31U=y}OT#05b
zqkC>8%PX<;dl4%1B>N)9LA-T^C%3pX63S3zZjz^$=Z^{#7cC`G8YOMbu|!ZywIo4v
z_wDM!F!TK`TG)f#>PfJ7_r*R@iAX8my|M^TaZc%dVgf@zvzy14b{J4}Pyx63F+y(J
zzZ`vt>t@)7jJB~IA*|MvJwUa&ps;{7Iu%Kx=aVU(MyrPlQ#2B`IY_1{ETP7~g?UsV
zF_@*A(w^WaHV^F22x~CK*sFJ?#T9xF`uzHcmPm2?xLS9GOtsC?VxO%al%oL||4E_M
z4Z7;2b}pqfgqSmv{MtT$fns+Y_fCPooJe;&((!%dYfnJ|ZPs^R_q*RVMSZigvszCj
zGj?7374=fOZwu+%A~e_(__}d0<j4l~3=mNaa7_2XKQAzyt1?&EX>k>H%&6k!V5|8U
zYq~?1$HOCePPgQL3=F%6S#ekwq3dRi6$`Hr$5H<N4q9rRHg97C+x{jpc=b|0)qRgB
z0oEJ$rVqXdE38RJCLCAK5Ulk<45y$Fj=XuQsG3%V)aOk`F@WlS!6#GOEx~2Ziu?)l
zO+-LS+J0q(?divkT<S$y^<(Py=H2{cELLsMK>l3bmK}uWCW4w=gSg>YkZP{bt#(J&
zY!93i+r*@RkCBsRMx8it^aR<?mhYH}9_&@sI655FbudQSQ34}OZQGU7i+8IKs#e@7
zTz2ma|E^rSTqH30fB^u$umAwuU&^(k$sg6aNY%z}ogL|;T2GKtuTU9<)XWOUpgN@(
z0fW#YaF8y-JtSWuu}TF4K7!hC<oTMX`*d<O#NZ<?Zlq&>^KJvrCFsiTtB+I79asvw
zYmK6IJ?{h){Osi7PtN#}6nB4yQB&!=!-&ZnRM^>30&sJTl-AK%yNAbX_i$->wHU=g
zsP?WN5tlT>2t#K<DY3hebmWMs6tsOd?E`Evu`wJ`1=WW1JxZZx4`E7S2MrXAhRnpI
zIZ2}sjqYL1!0Rlrv)tcy5wI8&hcSuTtrHVb5*!*j<WX_?b3QPd;*yz_wRvD@(z4B)
z2ZxFkSt(lRVVHf;wik9rn!6JLgFpT5OSa|$ed&L_-x#C4bblxJ;Y&lFthOzl-DGXg
z+00|gFJ^lT9NPFkjq!^7#-p)96t<C5RM}HUzDtlzwlwX3_j?Fq!pPGQ2cq@u{_to(
z*=<NyXz7do6f@FWsYQB-onPAuze9u5BeQD*_-MYQnH@o3KeFa7MP56mFM)fM6iXcS
z`!=%2&xsfikl2#w>v`b>5)l;BmEo<_9tTX+nfu2(K)l5s$d`_S?7cxj_puukOG{KG
zh}Dgcc!!(ktFBhn9g-+dTk=gSj?Vf$P!rZ~3yQ5uznyq>!s2vY=Ich}zLeI+OIoc1
z!ti!UY4%LmAYi(`UGsZ&=WyFY9ccS@bM_n^JU*W;1<t&>@n_(dOX9Fh73_UJm!x2>
zH+lK5@dI|vXRnU?B+wVbb7cf5*C2$`@^>yac{l(G^Tm`$EFl<%4;#^^qClfb-)*{!
zuA#oV&mwb3`S2|=4OIhx<*#W8Y_SNI#u$zI2&{1D1T7&HM+w`faj~mcSNe+~#Uqd{
zgPpftulU?hgj~cqnw)QlYjg9FkYEF43_gCyLwH+5?%;f%Z1k4AMQHcG;Ofv-uM|N;
z8SCVQD2LF|g0cLVUXU@}sX-Iy1T)<D@Ja`lW)|Cqn4+o0vc&2`X4}pp=iJsYD<$|E
z$0yQvyDR(0<f-xs&n8@56vPeF*@-2!ln*EBYbp>B?9?C!2(2m2{Zm^C3%8V$ZB8=r
z(kKW{wPph7TUgPW%{?mm1TWE{=D=t23IyDP`31E?WLeP$&J|%!s&;keGS|6bSVH#;
zg=Y9&pk(=_a*>k0k*1H=2QSflRMMDsxx}4jG*1fKAR^OKM|oup08ZJ#n_(AuAcChF
z0<8uclEkc+Ri~{AJB5aCvWIoaLG3)2mIOJVT2*=~bSF`Ybavw&Jp@tAonz`Jl3q-C
zO^pOX%+H93riid(KxMuj8a3|=?=2r)hK$`PAM)@XwX7I=H13!a3Xv+>8OR4fSa+kd
zh?Y>Y93RIC^ghqFoAZhteNn1EhS(fzbdZ+1Q<ysrA5n=8e%u$Vd$>35<(ss{Lulnm
z%??QF!ONlRv*~h%6^)Iu9qV2j_-uLNInnqmx^p&$bWeGBL7q35L+~x9!3~cMsl9t3
zpYf~dly5Pl5TCH_W|Dc;uXM)M)FD~FA}5bO$>ttyiS7aXd!A~zMll=ulb=5R48*_k
zl(W0NiPN7{#TVUYIUoQha{Y=Ppp7?wgmpsHni|BeABGsbGf=Fm{iCVT++_5_o5SYR
zW$R}KdS?4_>;sQCF@-jPm~jD3p6#yN$>tem3tH`u)TCmD^IKc36<(X{yvHbi{hkfm
z!O@}*3Y<Q$b|qNY*wURmWcH#7!~EI|GzWU|CMUHdY+CU>X!DqS2%af)R(>#FXm!D!
zLBHzKS6pP?hExueZ`F6{weWlJx*w3oPRw}ap(gfrsjx3hyn8Tb?fBXhWrc%B-5~$I
zVj3#I6Yigt;`|v1|5~xVJN-Y_Y^<buBTGB}jZRTw;-{pF!~}yPls&vWBOA*B=-(`-
zqwW&$9TWgy{FCMJ{>ekfOwUNqL~m^2<V<H_Yih?JBPlHUO-WQoam;p|5vl8r27-&s
zFRq#Ex0tlEi1HUo?&{cj5iM(zS)?+Y=hvyxZ*ypjg!aiT(N1MV4OQ4(2DGln%VYSj
zfX7Q9uebLLL_7ARK2etMXLA8IcEwNU`YkPvfQOlku?xLjPBtS(9k9?bEPF`vS5xkE
zsDMCIui%C>UiwyV^9%D464n`F%z?2PlBn;Y_7>!U19+@k9ZT$CkXcs83E0A@Vb}VQ
z`@8)25LajG!<z#_b7`jfu3%?fO*@9d*GGVaTZ4K!0}=fZ*HoUln<e~<u`MifZ|GhY
zuq-}q!Yt%WZ8OAsw#cfxPJ+s7k`r3N>tjDrxgoN#FfT6$R5unlIAq#0-A<oUEmMh?
zG*I}KFkTh<E*@`JLq49L^I?gmtEaeP&5R$+Hn}j-1RIR4uo_mID;kamJuJm_ix-;&
z4Q_2)-<?5<bjfBwrU=x!n!L8*fESZ=j0Zz*8NL@XBG*9tEp}k$X5-Df-5v6?5{OIN
zl2$d?C=Rts_oxV|{fL#<wrFh<HysC#z0j@Db*?s$!SzNMLlSJytVl#hxGeQ&g$zvP
z81WQ)IBm8;u%~FauVI>WbZ$Esai^f90;)Q_a^_w_jWPKHy`fbZY`=C245zZT1-2b?
zb?#bec_sK91(epPVO5;yLH<naXapM)!;>HPp@x`pl%0GRu{To6;<vG5WnoRz@ZLSC
zT;}fa!6X>jA?y@)FzD&$2K8ns5rwGX>NEWe7;0pK5xq(ijkxGm+QxpfAo?U9z0Gk=
zk=~?As<<ZVyD){!Rt0*(R<$r17*Y5f`T&yqt%W^2B$Pgu!E%c<+HUx-^*w)iJ^(-@
zQE&aYq4>vB75<Db|IuUBBt%7JV5cQyCuiZM>8PiEPB$nrF0kx4%1uhsNzslm)c=W%
zF<J%2B^F==#Nlzui3i3xL?TAfkukXjRU%qav12f4u@)7YGS2zY;c?L!`H_kv$o~qS
ze@und>lBpy&v#J;F#v%7AEzQKsw^lXs4Uo};r)HS8Rv65M+i1W7}%812wc+!J*WJ_
zM>8Ycad~y`7-mEhuDZ?~zkxJ-W!BRp9&gk#sS2}xxhpU2v31}1^$gZ_kColw2~|yV
z)Nr&pU+{)`>NyTO&5YO_@7}>#`_R2IS-5$G`NyUKMU5_QJ+o?h#SUb*wnUO;D9(k7
zDZR?*ow~8GD+(qr>V><DD>;mAt+><cgDkiV9=hT!vz8c(!O<eAl<^0r+vK*2kOLk*
zogfqnFTRa9VxV_5Dwz~8ufL?8v6FFkte({W*>gV7*{n<(t#6)oNmB+~P1uzZMb~^P
zMELjFliy`hQT#D)(L-cl!pjrrctw20N|@nrOv>8LM*r|w#r}DoNPc0ml5k{>D2T;<
zlZ1HO`S!g7WyIM#_zek}8TFKt5NA}R2Kw}cY^LUk)w*uwXfnr8Ah%saos&`2<`*Nl
z7Y-4w(TS+zo1oN<0N3pH0fu6yM&9<)MO2N5pl>k-yP}nx>&M5u31oR(z;DWxV9RuK
z491!!Z~2(LW|DDv?|T-2Q=h|dXkRaHsN3T>L}jxKYg?JJ&@Nq696n5LzAw35lH?RO
z9-Q%KeBRxUzh2EkYr129NrUA~WTv!UH{A#9+*i!o(fla7z_Kj8&xk%J6`0A9WpQ)A
z4zeonOyJDAmB^XS7C;qcCg3`4Usj4ho$K)VRlR4)Rerj)wbc-G&E=@h9q^d@)UU6h
z>aj)EhMM+EBwJjFy)(UQ%i2fMC0VzM_eqS)F6i0^26_UW-?4W}C)+wsgg^3L`-XNg
zNWFif%PDqIVAhp%u~LoOYSB1Q>xR`TX5?Ja#{Th2l6Q<VLFp(Vx7#zNu7gEa-cit2
zOJL-JRjx$wunsy}=0oPwruAx*(CuCYtNu6nJh^H}aNbn;>L_wAb?mm;!Dj(vBT}%I
zwZSbXl#r{3JQldo2FiWMxvK!xgmutrpsB^Q?}EJ&uJV1y7sik-l6JPpESN?GC|m(`
zogfDZYS_&y?x(WRClm9vyV~h};$UJ?2YfT)CQ85T_05@UO;SWemq4{7EcOt!Fy5(r
z??=zdRpqe_r%7XLB;J;I9RB;Aj}!#xl}IE1%ZHxzSIoHLTb}8w<LCIawwbM8a`vsM
z48m(Lo5_&8GjP8fHA+Pm56&{@BI8v+ZWS2mK-T6|G^D^Wi8OQf>iQvQtZRj0(W`55
zTurCYE!;Cz-OENYtUKlVZ<rDyqTK}*SWM^3YXvXUG;r1X@`Et=3(5P0LfPzXaN6|C
zoL^Wa;?oZuprx@jC{#P73y&R<i{~unZTnd!;sgff0@`&d_DHHL^)SO9B25={jvqTE
zvN*QehblR~Mrim$SS-}>Z@RygV(u@9(FA7gX*U1F2p|JiC0)s7c`iCg>=AQ07eIJx
znykjtY7moB_Uqtk;bkJu1U}T{KDy01i4z_SS{s&=*)(Xt^5Mr))MY~!5jL$kZU5dK
zjIW<<%{MTr7wZn*XY+3Z)kV#w>z<<aA04aUo%i$U0;l{^_pJm-q5l38je}q+nR8_m
zehsq3EJ<-s*7IB~Q4fHC-^>x7U62Dacr{uVh2&PYRr>4-Trj&jy!9jbtu^9w4cF(L
z;6f`+>vN7o;{}KR@!?rjFD;cP)1XV{?3g7~d22Ej3BQy6tVObof<vvy6b)4jbXX{y
zH(}B?GiT`hi2pKQop573RV7bb(`bNdJ650@79cAkh>$^owi;p*ZEs>^=qE6>hy8ZG
zj`35dSI-RuQjW6#B`rQSBOOHK<@TV%ex%J_1*-gq!!S(uC?n838Aeu=b`y>~{))>a
zvQ6SBs-60<{S!@1fR!?MEv&T`rZFNqvwGS1I;na2;Km9MsbDgPQo+u7tJC2$^`j}Z
zet!NN$IdmXu7@GA&2@s}6H8H{qqB67|LVzG|Aef$H8l*Wz(z$}h=!rUNXBUqD{iLY
z0H;AirWFjoO_DI1XrZ=7Lz{MRX^16Rh+mjYr?Ce;4XcBQyHN9t%2i`Hqn;)VxmZ=0
zh5BsuYH0@6Ws+xsVvFKy31(--QmqId?_KdN?Q&6s3xp81G}tC6dT(ycJm5sUgGWp6
z+LlPO60)o5O+5S8V(Hf|siM0OMgNhNlS;}zjacX^b)@v8V63~tlACEmjBu7j2tN|2
zrnQVu5ZD<#lK3RPl~|?-j`=xsyl7a}7*MlndbbJi6tnv)P@h%xMUgp$J1nb`{oLBA
z*;R(s8pq=RbBkB@E5Qyiexx@xKGKwS8xc&t#SUvLm!an(?}~P)N{nX7gI@!A8O941
znqaDnQ>Yyo{yL;_C}eoS40TG5k)G*3K1>IUCSoTq0izYk$g6NXNmS(z^(oUyrst5Q
zX_c;0ibiENxH4Zi-S4QhC#W&BszB71bR+v^93cj72@*Hy$jV6=-i(ZE;QEr&k>UXx
z@!9wO$gTj4^~s4J$bxp7<OGE2en&mgnq5}Atr5IZm>{9~^Ql{|r0gkEbHBIG>K-^X
zc7!p5bN||sM(=KzktjnL4(DJcYis-piL<yP68Fg3OjF#Z!Q-i(jZMC(D@(oGg#s^c
z7s@R_H#S1{EIzgP(}ZEJBlDXmaU4o-e&2oOSRI3_#<D%BYM5+U@DEE1S(;hE^wH>q
zNPLO#cMA^sy|pl^Lc?RroLN^6y4C26>oo(t@llmu4m6K!lk8VU)GG2gf>VMfkFyp|
zaKXVr$r)4*0S?<?`7x$2TCXN|2vnI;qBs@Ki}UT=P7B&A+rrB(8R`_~j9~zCcNXrt
zUR7z#7yhQos}^j5-lzoM^@5^v0ebDZ;#AX;HnziHVXEnkpdHO4f<cev`4hew1%r!6
z<fWp6{i+>Z-<cTmhQgzOUnB>b<1k&3lfGNOI5Pa!Tt6GRO4%RF=%#n4m%DR=BI(2b
zRB)`YJhIB*)tYmFSS^h|w$qvpzFet%(yhB9$8?tR3Bib}smzgDNx2}2AgxfGhHc7^
zq98^!ZIfK3N~z6f`_o9GZH8H*O0P!rb|7qrRaQNK=Z&nwvc2!Uhqc1$laoAWdFf*<
zu{_^D!fvv3api;>Dbq0h@(lHDw}`;i<s9+**XI$(wG>H{`8g|SX^m5Fj8@8p<IN)P
z5`q~U-Fz;w7Upiam&42$9mIRzogm4aY59@$%pCz=V9B-hyo>+Q%J;9nOeJ<?+?@AL
zHs4S$q8a>sxJuOKsVw=|#RK5wM5d~)4GeDZ772{j*a*!11h1GFFAIe*h(s(MVIPSF
zgYmd31WP{u{K=f@JC#?ITW5TM`?jBwyyo)Vl9O$%qu1h1&0^ZYak;I3Z1b35udzhg
zi^%os%h5Z^Ns5zW%_1h(JKyR_Zth4h#z0RvYBXV0%HUCS$*NWtydoltpiR!a9))LV
zHHvUuJ_ROE5$h%eyk99}wUZT89eTZu-Q{_1`L^cNEftN9TIEs>bmJF<F9#kdRd4!%
z>}PziT1Z5bNc)s-I_RRZJ%W%AUXqR$uPXwEgfWQ(3>y-p4Knn8+>J(jUEqYLU`kXy
zX@ize+q&cL1cD!ZOU#J$^yg-hmi<(Y59X{Z(ai-Prt#qi)~p^_@cC7;i7qp=(wSo3
zmD{t9tY=yhDh&9B3ciR?KVgJ8fxZD^Ga$A)$v|#c3!L0<-lH)-T<#xdhj(53$0O6(
zm$6sYe62lBpXZPr@0&cnL7a_z%-s0iYreoorz|K~RM(Ln>jkC{xlbu)q7D(U;#<dp
z^)6KFhZYEfPlMK`cj!(C8{ItL9q7&)YjK|Yk)`u8>!woH7`Wb2Z3uCn{=B+5eyRG`
zcb}XeCmvW{y0H8B6C1SA@`u+<b~tL1YYufkl=LEy((OHpPMg~{VL#35vcEL=?4byM
zUAsg~9R@{ZX@NEgG!;lkx6-?#0;_H?BKqE3(D%L`ljuT=2q3%G^sB-ESh+Dh-2q{1
zzwoX6KEBpG4it3W&i)$Z+I-RWUB%buQ}BQkuyJ*Hl(b9%vhFN>m<@X=q1K}{V?>yb
zo|ADht7g~oP!TfE&;6_k`H(}^3GQ}w2lV>^S}HRS6nJGT%!*<Ul+h%2B?q}qBIdxL
zrf2i?xYk{c0h>bdd1Nb=c<C7a{9A?bP$?o){-^mm;7^JEk8=ON8nFLQd9ElbET<^q
zGQ$<0FAo6BbWZ=O`(g}Fo(lp3ZUQMuX`JBw>SzCwR%THm)7cA)Z+%Yv2^Y{gW`K0j
zjvn=hL^7&3($U39I_6t|O+Sg$<T9nOpO}HfWrX*#v&dq{VsjVfP-`}d_=I@YZy*zb
zNnwPvi(4NN?lteArORWxb|I|Zrjnt6VK}bVB=0qkmkYxfwQ-#@7uQL7TQv%Gb)~iP
zfyVr0KpK-FI&ak?GHW>Pj1AnoyKyms7GQMq38md(?B#$oi^4Q=HE~r}`mV}@`6f`+
zqJ%ra(VV6}CT*n_Tj^B;jOz7GwcUIV-YXHZ8agY;n?K9i{TU~8ZjtSgj~Um`{d5S8
zj9%6`GFBA>_^lME-%*3t9(jjM?4Hn^6Y&$;Ovx=BHKT2RRkwJ)SUuC4)GL2kh(m{Q
zR+%$3j=W3sedUA&VTJ+5v!FemkW#K;Mww)%g}I-PH`ZC*j#ZQG*h;?M=%vJ0y{wz<
zsA7-KPi#A^T)&Rjs!q;%{)v)nam^789rMMCOX68LPkY4ayrA{jybcFL!J&#Am|h&l
zQAuQv83?(v&mk`j0*VIw&u#X9Ui5!$h5r9I{GX=#zr+9DU;f`<0HDxc?2qgG5By)<
z=6^^3UE}`?4Z!*r`o9$aza#&yy8VSL{o||u^W1-GaDNB?UBLPaoc>3L`fu<*<*dJB
z|1Mhnh28rn_W#7l|43N>o9h3kCj5&EB$4ocrtyEM{!3f<JJH{@fWL?ci2f7NKb3*M
lbNxN)|Kjqe`%hf|ode{h!6E)}H}s!f^H0t&Vfe?{{{t3-h1385


From fc00123f3abeb027cd51eb58ea8845377794b3bc Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 8 Aug 2023 10:07:55 +0200
Subject: [PATCH 42/42] python: mkvenv: remove ensure command

This was used to bootstrap the venv with a TOML parser, after which
ensuregroup is used.  Now that we expect it to be present as a system
package (either tomli or, for Python 3.11, tomllib), it is not needed
anymore.

Note that this means that, when implemented, the hypothetical "isolated"
mode that does not use any system packages will only work with Python
3.11+.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 python/scripts/mkvenv.py | 105 ---------------------------------------
 1 file changed, 105 deletions(-)

diff --git a/python/scripts/mkvenv.py b/python/scripts/mkvenv.py
index d0b9c215ca..f2526af0a0 100644
--- a/python/scripts/mkvenv.py
+++ b/python/scripts/mkvenv.py
@@ -13,7 +13,6 @@ Commands:
     create    create a venv
     post_init
               post-venv initialization
-    ensure    Ensure that the specified package is installed.
     ensuregroup
               Ensure that the specified package group is installed.
 
@@ -36,18 +35,6 @@ options:
 
 --------------------------------------------------
 
-usage: mkvenv ensure [-h] [--online] [--dir DIR] dep_spec...
-
-positional arguments:
-  dep_spec    PEP 508 Dependency specification, e.g. 'meson>=0.61.5'
-
-options:
-  -h, --help  show this help message and exit
-  --online    Install packages from PyPI, if necessary.
-  --dir DIR   Path to vendored packages where we may install from.
-
---------------------------------------------------
-
 usage: mkvenv ensuregroup [-h] [--online] [--dir DIR] file group...
 
 positional arguments:
@@ -726,57 +713,6 @@ def _do_ensure(
     return None
 
 
-def ensure(
-    dep_specs: Sequence[str],
-    online: bool = False,
-    wheels_dir: Optional[Union[str, Path]] = None,
-    prog: Optional[str] = None,
-) -> None:
-    """
-    Use pip to ensure we have the package specified by @dep_specs.
-
-    If the package is already installed, do nothing. If online and
-    wheels_dir are both provided, prefer packages found in wheels_dir
-    first before connecting to PyPI.
-
-    :param dep_specs:
-        PEP 508 dependency specifications. e.g. ['meson>=0.61.5'].
-    :param online: If True, fall back to PyPI.
-    :param wheels_dir: If specified, search this path for packages.
-    :param prog:
-        If specified, use this program name for error diagnostics that will
-        be presented to the user. e.g., 'sphinx-build' can be used as a
-        bellwether for the presence of 'sphinx'.
-    """
-
-    if not HAVE_DISTLIB:
-        raise Ouch("a usable distlib could not be found, please install it")
-
-    # Convert the depspecs to a dictionary, as if they came
-    # from a section in a pythondeps.toml file
-    group: Dict[str, Dict[str, str]] = {}
-    for spec in dep_specs:
-        name = distlib.version.LegacyMatcher(spec).name
-        group[name] = {}
-
-        spec = spec.strip()
-        pos = len(name)
-        ver = spec[pos:].strip()
-        if ver:
-            group[name]["accepted"] = ver
-
-        if prog:
-            group[name]["canary"] = prog
-            prog = None
-
-    result = _do_ensure(group, online, wheels_dir)
-    if result:
-        # Well, that's not good.
-        if result[1]:
-            raise Ouch(result[0])
-        raise SystemExit(f"\n{result[0]}\n\n")
-
-
 def _parse_groups(file: str) -> Dict[str, Dict[str, Any]]:
     if not HAVE_TOMLLIB:
         if sys.version_info < (3, 11):
@@ -888,39 +824,6 @@ def _add_ensuregroup_subcommand(subparsers: Any) -> None:
     )
 
 
-def _add_ensure_subcommand(subparsers: Any) -> None:
-    subparser = subparsers.add_parser(
-        "ensure", help="Ensure that the specified package is installed."
-    )
-    subparser.add_argument(
-        "--online",
-        action="store_true",
-        help="Install packages from PyPI, if necessary.",
-    )
-    subparser.add_argument(
-        "--dir",
-        type=str,
-        action="store",
-        help="Path to vendored packages where we may install from.",
-    )
-    subparser.add_argument(
-        "--diagnose",
-        type=str,
-        action="store",
-        help=(
-            "Name of a shell utility to use for "
-            "diagnostics if this command fails."
-        ),
-    )
-    subparser.add_argument(
-        "dep_specs",
-        type=str,
-        action="store",
-        help="PEP 508 Dependency specification, e.g. 'meson>=0.61.5'",
-        nargs="+",
-    )
-
-
 def main() -> int:
     """CLI interface to make_qemu_venv. See module docstring."""
     if os.environ.get("DEBUG") or os.environ.get("GITLAB_CI"):
@@ -944,7 +847,6 @@ def main() -> int:
 
     _add_create_subcommand(subparsers)
     _add_post_init_subcommand(subparsers)
-    _add_ensure_subcommand(subparsers)
     _add_ensuregroup_subcommand(subparsers)
 
     args = parser.parse_args()
@@ -957,13 +859,6 @@ def main() -> int:
             )
         if args.command == "post_init":
             post_venv_setup()
-        if args.command == "ensure":
-            ensure(
-                dep_specs=args.dep_specs,
-                online=args.online,
-                wheels_dir=args.dir,
-                prog=args.diagnose,
-            )
         if args.command == "ensuregroup":
             ensure_group(
                 file=args.file,