From 9617cddb72649f563eef8114648140b8c5607a71 Mon Sep 17 00:00:00 2001
From: James Bottomley <jejb@linux.ibm.com>
Date: Thu, 4 Feb 2021 11:39:38 -0800
Subject: [PATCH 01/21] pc: add parser for OVMF reset block

OVMF is developing a mechanism for depositing a GUIDed table just
below the known location of the reset vector.  The table goes
backwards in memory so all entries are of the form

<data>|len|<GUID>

Where <data> is arbtrary size and type, <len> is a uint16_t and
describes the entire length of the entry from the beginning of the
data to the end of the guid.

The foot of the table is of this form and <len> for this case
describes the entire size of the table.  The table foot GUID is
defined by OVMF as 96b582de-1fb2-45f7-baea-a366c55a082d and if the
table is present this GUID is just below the reset vector, 48 bytes
before the end of the firmware file.

Add a parser for the ovmf reset block which takes a copy of the block,
if the table foot guid is found, minus the footer and a function for
later traversal to return the data area of any specified GUIDs.

Signed-off-by: James Bottomley <jejb@linux.ibm.com>

Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20210204193939.16617-2-jejb@linux.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/i386/pc_sysfw.c     | 122 +++++++++++++++++++++++++++++++++++++++--
 include/hw/i386/pc.h   |   4 ++
 include/sysemu/sev.h   |   1 +
 target/i386/sev_i386.h |   1 -
 4 files changed, 123 insertions(+), 5 deletions(-)
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
index 11172214f1..6404b5a86f 100644
--- a/hw/i386/pc_sysfw.c
+++ b/hw/i386/pc_sysfw.c
@@ -125,6 +125,113 @@ void pc_system_flash_cleanup_unused(PCMachineState *pcms)
     }
 }
 
+#define OVMF_TABLE_FOOTER_GUID "96b582de-1fb2-45f7-baea-a366c55a082d"
+
+static uint8_t *ovmf_table;
+static int ovmf_table_len;
+
+static void pc_system_parse_ovmf_flash(uint8_t *flash_ptr, size_t flash_size)
+{
+    uint8_t *ptr;
+    QemuUUID guid;
+    int tot_len;
+
+    /* should only be called once */
+    if (ovmf_table) {
+        return;
+    }
+
+    if (flash_size < TARGET_PAGE_SIZE) {
+        return;
+    }
+
+    /*
+     * if this is OVMF there will be a table footer
+     * guid 48 bytes before the end of the flash file.  If it's
+     * not found, silently abort the flash parsing.
+     */
+    qemu_uuid_parse(OVMF_TABLE_FOOTER_GUID, &guid);
+    guid = qemu_uuid_bswap(guid); /* guids are LE */
+    ptr = flash_ptr + flash_size - 48;
+    if (!qemu_uuid_is_equal((QemuUUID *)ptr, &guid)) {
+        return;
+    }
+
+    /* if found, just before is two byte table length */
+    ptr -= sizeof(uint16_t);
+    tot_len = le16_to_cpu(*(uint16_t *)ptr) - sizeof(guid) - sizeof(uint16_t);
+
+    if (tot_len <= 0) {
+        return;
+    }
+
+    ovmf_table = g_malloc(tot_len);
+    ovmf_table_len = tot_len;
+
+    /*
+     * ptr is the foot of the table, so copy it all to the newly
+     * allocated ovmf_table and then set the ovmf_table pointer
+     * to the table foot
+     */
+    memcpy(ovmf_table, ptr - tot_len, tot_len);
+    ovmf_table += tot_len;
+}
+
+bool pc_system_ovmf_table_find(const char *entry, uint8_t **data,
+                               int *data_len)
+{
+    uint8_t *ptr = ovmf_table;
+    int tot_len = ovmf_table_len;
+    QemuUUID entry_guid;
+
+    if (qemu_uuid_parse(entry, &entry_guid) < 0) {
+        return false;
+    }
+
+    if (!ptr) {
+        return false;
+    }
+
+    entry_guid = qemu_uuid_bswap(entry_guid); /* guids are LE */
+    while (tot_len >= sizeof(QemuUUID) + sizeof(uint16_t)) {
+        int len;
+        QemuUUID *guid;
+
+        /*
+         * The data structure is
+         *   arbitrary length data
+         *   2 byte length of entire entry
+         *   16 byte guid
+         */
+        guid = (QemuUUID *)(ptr - sizeof(QemuUUID));
+        len = le16_to_cpu(*(uint16_t *)(ptr - sizeof(QemuUUID) -
+                                        sizeof(uint16_t)));
+
+        /*
+         * just in case the table is corrupt, wouldn't want to spin in
+         * the zero case
+         */
+        if (len < sizeof(QemuUUID) + sizeof(uint16_t)) {
+            return false;
+        } else if (len > tot_len) {
+            return false;
+        }
+
+        ptr -= len;
+        tot_len -= len;
+        if (qemu_uuid_is_equal(guid, &entry_guid)) {
+            if (data) {
+                *data = ptr;
+            }
+            if (data_len) {
+                *data_len = len - sizeof(QemuUUID) - sizeof(uint16_t);
+            }
+            return true;
+        }
+    }
+    return false;
+}
+
 /*
  * Map the pcms->flash[] from 4GiB downward, and realize.
  * Map them in descending order, i.e. pcms->flash[0] at the top,
@@ -192,10 +299,17 @@ static void pc_system_flash_map(PCMachineState *pcms,
             flash_mem = pflash_cfi01_get_memory(system_flash);
             pc_isa_bios_init(rom_memory, flash_mem, size);
 
-            /* Encrypt the pflash boot ROM, if necessary */
-            flash_ptr = memory_region_get_ram_ptr(flash_mem);
-            flash_size = memory_region_size(flash_mem);
-            sev_encrypt_flash(flash_ptr, flash_size, &error_fatal);
+            /* Encrypt the pflash boot ROM */
+            if (sev_enabled()) {
+                flash_ptr = memory_region_get_ram_ptr(flash_mem);
+                flash_size = memory_region_size(flash_mem);
+                /*
+                 * OVMF places a GUIDed structures in the flash, so
+                 * search for them
+                 */
+                pc_system_parse_ovmf_flash(flash_ptr, flash_size);
+                sev_encrypt_flash(flash_ptr, flash_size, &error_fatal);
+            }
         }
     }
 }
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 5f93540a43..c9d194a5e7 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -3,6 +3,7 @@
 
 #include "qemu/notify.h"
 #include "qapi/qapi-types-common.h"
+#include "qemu/uuid.h"
 #include "hw/boards.h"
 #include "hw/block/fdc.h"
 #include "hw/block/flash.h"
@@ -191,6 +192,9 @@ ISADevice *pc_find_fdc0(void);
 void pc_system_flash_create(PCMachineState *pcms);
 void pc_system_flash_cleanup_unused(PCMachineState *pcms);
 void pc_system_firmware_init(PCMachineState *pcms, MemoryRegion *rom_memory);
+bool pc_system_ovmf_table_find(const char *entry, uint8_t **data,
+                               int *data_len);
+
 
 /* acpi-build.c */
 void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid,
diff --git a/include/sysemu/sev.h b/include/sysemu/sev.h
index 5c5a13c6ca..882e8a4fb1 100644
--- a/include/sysemu/sev.h
+++ b/include/sysemu/sev.h
@@ -16,6 +16,7 @@
 
 #include "sysemu/kvm.h"
 
+bool sev_enabled(void);
 int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp);
 int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp);
 int sev_inject_launch_secret(const char *hdr, const char *secret,
diff --git a/target/i386/sev_i386.h b/target/i386/sev_i386.h
index 4db6960f60..bd9f00a908 100644
--- a/target/i386/sev_i386.h
+++ b/target/i386/sev_i386.h
@@ -28,7 +28,6 @@
 #define SEV_POLICY_DOMAIN       0x10
 #define SEV_POLICY_SEV          0x20
 
-extern bool sev_enabled(void);
 extern uint64_t sev_get_me_mask(void);
 extern SevInfo *sev_get_info(void);
 extern uint32_t sev_get_cbit_position(void);

From f522cef9b352ac2f9880c5c8b2ea7b2033bdc9f0 Mon Sep 17 00:00:00 2001
From: James Bottomley <jejb@linux.ibm.com>
Date: Thu, 4 Feb 2021 11:39:39 -0800
Subject: [PATCH 02/21] sev: update sev-inject-launch-secret to make gpa
 optional

If the gpa isn't specified, it's value is extracted from the OVMF
properties table located below the reset vector (and if this doesn't
exist, an error is returned).  OVMF has defined the GUID for the SEV
secret area as 4c2eb361-7d9b-4cc3-8081-127c90d3d294 and the format of
the <data> is: <base>|<size> where both are uint32_t.  We extract
<base> and use it as the gpa for the injection.

Note: it is expected that the injected secret will also be GUID
described but since qemu can't interpret it, the format is left
undefined here.

Signed-off-by: James Bottomley <jejb@linux.ibm.com>

Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20210204193939.16617-3-jejb@linux.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 qapi/misc-target.json |  2 +-
 target/i386/monitor.c | 23 ++++++++++++++++++++++-
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/qapi/misc-target.json b/qapi/misc-target.json
index 06ef8757f0..0c7491cd82 100644
--- a/qapi/misc-target.json
+++ b/qapi/misc-target.json
@@ -216,7 +216,7 @@
 #
 ##
 { 'command': 'sev-inject-launch-secret',
-  'data': { 'packet-header': 'str', 'secret': 'str', 'gpa': 'uint64' },
+  'data': { 'packet-header': 'str', 'secret': 'str', '*gpa': 'uint64' },
   'if': 'defined(TARGET_I386)' }
 
 ##
diff --git a/target/i386/monitor.c b/target/i386/monitor.c
index 1bc91442b1..5994408bee 100644
--- a/target/i386/monitor.c
+++ b/target/i386/monitor.c
@@ -34,6 +34,7 @@
 #include "sev_i386.h"
 #include "qapi/qapi-commands-misc-target.h"
 #include "qapi/qapi-commands-misc.h"
+#include "hw/i386/pc.h"
 
 /* Perform linear address sign extension */
 static hwaddr addr_canonical(CPUArchState *env, hwaddr addr)
@@ -730,9 +731,29 @@ SevCapability *qmp_query_sev_capabilities(Error **errp)
     return sev_get_capabilities(errp);
 }
 
+#define SEV_SECRET_GUID "4c2eb361-7d9b-4cc3-8081-127c90d3d294"
+struct sev_secret_area {
+    uint32_t base;
+    uint32_t size;
+};
+
 void qmp_sev_inject_launch_secret(const char *packet_hdr,
-                                  const char *secret, uint64_t gpa,
+                                  const char *secret,
+                                  bool has_gpa, uint64_t gpa,
                                   Error **errp)
 {
+    if (!has_gpa) {
+        uint8_t *data;
+        struct sev_secret_area *area;
+
+        if (!pc_system_ovmf_table_find(SEV_SECRET_GUID, &data, NULL)) {
+            error_setg(errp, "SEV: no secret area found in OVMF,"
+                       " gpa must be specified.");
+            return;
+        }
+        area = (struct sev_secret_area *)data;
+        gpa = area->base;
+    }
+
     sev_inject_launch_secret(packet_hdr, secret, gpa, errp);
 }

From 6b98e96f1842a54c0bf074f4dad0928808afe287 Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Tue, 26 Jan 2021 11:36:44 -0600
Subject: [PATCH 03/21] sev/i386: Add initial support for SEV-ES

Provide initial support for SEV-ES. This includes creating a function to
indicate the guest is an SEV-ES guest (which will return false until all
support is in place), performing the proper SEV initialization and
ensuring that the guest CPU state is measured as part of the launch.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Co-developed-by: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Venu Busireddy <venu.busireddy@oracle.com>
Message-Id: <2e6386cbc1ddeaf701547dd5677adf5ddab2b6bd.1611682609.git.thomas.lendacky@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/cpu.c      |  1 +
 target/i386/sev-stub.c |  6 ++++++
 target/i386/sev.c      | 44 ++++++++++++++++++++++++++++++++++++++++--
 target/i386/sev_i386.h |  1 +
 4 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 9c3d2d60b7..20c3a5af3f 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -5984,6 +5984,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
         break;
     case 0x8000001F:
         *eax = sev_enabled() ? 0x2 : 0;
+        *eax |= sev_es_enabled() ? 0x8 : 0;
         *ebx = sev_get_cbit_position();
         *ebx |= sev_get_reduced_phys_bits() << 6;
         *ecx = 0;
diff --git a/target/i386/sev-stub.c b/target/i386/sev-stub.c
index 1ac1fd5b94..edf6c519d7 100644
--- a/target/i386/sev-stub.c
+++ b/target/i386/sev-stub.c
@@ -49,6 +49,7 @@ SevCapability *sev_get_capabilities(Error **errp)
     error_setg(errp, "SEV is not available in this QEMU");
     return NULL;
 }
+
 int sev_inject_launch_secret(const char *hdr, const char *secret,
                              uint64_t gpa, Error **errp)
 {
@@ -59,3 +60,8 @@ int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp)
 {
     return 0;
 }
+
+bool sev_es_enabled(void)
+{
+    return false;
+}
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 11c9a3cc21..dc0e53019b 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -341,6 +341,12 @@ sev_enabled(void)
     return !!sev_guest;
 }
 
+bool
+sev_es_enabled(void)
+{
+    return false;
+}
+
 uint64_t
 sev_get_me_mask(void)
 {
@@ -561,6 +567,20 @@ sev_launch_update_data(SevGuestState *sev, uint8_t *addr, uint64_t len)
     return ret;
 }
 
+static int
+sev_launch_update_vmsa(SevGuestState *sev)
+{
+    int ret, fw_error;
+
+    ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL, &fw_error);
+    if (ret) {
+        error_report("%s: LAUNCH_UPDATE_VMSA ret=%d fw_error=%d '%s'",
+                __func__, ret, fw_error, fw_error_to_str(fw_error));
+    }
+
+    return ret;
+}
+
 static void
 sev_launch_get_measure(Notifier *notifier, void *unused)
 {
@@ -573,6 +593,14 @@ sev_launch_get_measure(Notifier *notifier, void *unused)
         return;
     }
 
+    if (sev_es_enabled()) {
+        /* measure all the VM save areas before getting launch_measure */
+        ret = sev_launch_update_vmsa(sev);
+        if (ret) {
+            exit(1);
+        }
+    }
+
     measurement = g_new0(struct kvm_sev_launch_measure, 1);
 
     /* query the measurement blob length */
@@ -667,7 +695,7 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
     SevGuestState *sev
         = (SevGuestState *)object_dynamic_cast(OBJECT(cgs), TYPE_SEV_GUEST);
     char *devname;
-    int ret, fw_error;
+    int ret, fw_error, cmd;
     uint32_t ebx;
     uint32_t host_cbitpos;
     struct sev_user_data_status status = {};
@@ -724,8 +752,20 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
     sev->api_major = status.api_major;
     sev->api_minor = status.api_minor;
 
+    if (sev_es_enabled()) {
+        if (!(status.flags & SEV_STATUS_FLAGS_CONFIG_ES)) {
+            error_report("%s: guest policy requires SEV-ES, but "
+                         "host SEV-ES support unavailable",
+                         __func__);
+            goto err;
+        }
+        cmd = KVM_SEV_ES_INIT;
+    } else {
+        cmd = KVM_SEV_INIT;
+    }
+
     trace_kvm_sev_init();
-    ret = sev_ioctl(sev->sev_fd, KVM_SEV_INIT, NULL, &fw_error);
+    ret = sev_ioctl(sev->sev_fd, cmd, NULL, &fw_error);
     if (ret) {
         error_setg(errp, "%s: failed to initialize ret=%d fw_error=%d '%s'",
                    __func__, ret, fw_error, fw_error_to_str(fw_error));
diff --git a/target/i386/sev_i386.h b/target/i386/sev_i386.h
index bd9f00a908..ae221d4c72 100644
--- a/target/i386/sev_i386.h
+++ b/target/i386/sev_i386.h
@@ -28,6 +28,7 @@
 #define SEV_POLICY_DOMAIN       0x10
 #define SEV_POLICY_SEV          0x20
 
+extern bool sev_es_enabled(void);
 extern uint64_t sev_get_me_mask(void);
 extern SevInfo *sev_get_info(void);
 extern uint32_t sev_get_cbit_position(void);

From 9681f8677f26320fff488e56b500a3d7d5cf1a49 Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Tue, 26 Jan 2021 11:36:45 -0600
Subject: [PATCH 04/21] sev/i386: Require in-kernel irqchip support for SEV-ES
 guests

In prep for AP booting, require the use of in-kernel irqchip support. This
lessens the Qemu support burden required to boot APs.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Venu Busireddy <venu.busireddy@oracle.com>
Message-Id: <e9aec5941e613456f0757f5a73869cdc5deea105.1611682609.git.thomas.lendacky@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/sev.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/target/i386/sev.c b/target/i386/sev.c
index dc0e53019b..35b9259bfc 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -753,6 +753,12 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
     sev->api_minor = status.api_minor;
 
     if (sev_es_enabled()) {
+        if (!kvm_kernel_irqchip_allowed()) {
+            error_report("%s: SEV-ES guests require in-kernel irqchip support",
+                         __func__);
+            goto err;
+        }
+
         if (!(status.flags & SEV_STATUS_FLAGS_CONFIG_ES)) {
             error_report("%s: guest policy requires SEV-ES, but "
                          "host SEV-ES support unavailable",

From b2f73a0784b7a5eae2022ccf3293792bd008cc64 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 8 Feb 2021 15:04:52 +0100
Subject: [PATCH 05/21] sev/i386: Allow AP booting under SEV-ES

When SEV-ES is enabled, it is not possible modify the guests register
state after it has been initially created, encrypted and measured.

Normally, an INIT-SIPI-SIPI request is used to boot the AP. However, the
hypervisor cannot emulate this because it cannot update the AP register
state. For the very first boot by an AP, the reset vector CS segment
value and the EIP value must be programmed before the register has been
encrypted and measured. Search the guest firmware for the guest for a
specific GUID that tells Qemu the value of the reset vector to use.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Message-Id: <22db2bfb4d6551aed661a9ae95b4fdbef613ca21.1611682609.git.thomas.lendacky@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 accel/kvm/kvm-all.c    |   1 -
 hw/i386/pc_sysfw.c     |   8 +++
 include/sysemu/sev.h   |   4 ++
 target/i386/kvm/kvm.c  |   2 +
 target/i386/sev-stub.c |   9 +++
 target/i386/sev.c      | 128 +++++++++++++++++++++++++++++++++++++++++
 6 files changed, 151 insertions(+), 1 deletion(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 47516913b7..bf61ef4b54 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -39,7 +39,6 @@
 #include "qemu/main-loop.h"
 #include "trace.h"
 #include "hw/irq.h"
-#include "sysemu/sev.h"
 #include "qapi/visitor.h"
 #include "qapi/qapi-types-common.h"
 #include "qapi/qapi-visit-common.h"
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
index 6404b5a86f..9fe72b370e 100644
--- a/hw/i386/pc_sysfw.c
+++ b/hw/i386/pc_sysfw.c
@@ -256,6 +256,7 @@ static void pc_system_flash_map(PCMachineState *pcms,
     MemoryRegion *flash_mem;
     void *flash_ptr;
     int flash_size;
+    int ret;
 
     assert(PC_MACHINE_GET_CLASS(pcms)->pci_enabled);
 
@@ -308,6 +309,13 @@ static void pc_system_flash_map(PCMachineState *pcms,
                  * search for them
                  */
                 pc_system_parse_ovmf_flash(flash_ptr, flash_size);
+
+                ret = sev_es_save_reset_vector(flash_ptr, flash_size);
+                if (ret) {
+                    error_report("failed to locate and/or save reset vector");
+                    exit(1);
+                }
+
                 sev_encrypt_flash(flash_ptr, flash_size, &error_fatal);
             }
         }
diff --git a/include/sysemu/sev.h b/include/sysemu/sev.h
index 882e8a4fb1..94d821d737 100644
--- a/include/sysemu/sev.h
+++ b/include/sysemu/sev.h
@@ -21,4 +21,8 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp);
 int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp);
 int sev_inject_launch_secret(const char *hdr, const char *secret,
                              uint64_t gpa, Error **errp);
+
+int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size);
+void sev_es_set_reset_vector(CPUState *cpu);
+
 #endif
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index e97f841757..f56a8536d0 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -1922,6 +1922,8 @@ void kvm_arch_reset_vcpu(X86CPU *cpu)
     }
     /* enabled by default */
     env->poll_control_msr = 1;
+
+    sev_es_set_reset_vector(CPU(cpu));
 }
 
 void kvm_arch_do_init_vcpu(X86CPU *cpu)
diff --git a/target/i386/sev-stub.c b/target/i386/sev-stub.c
index edf6c519d7..0207f1c5aa 100644
--- a/target/i386/sev-stub.c
+++ b/target/i386/sev-stub.c
@@ -65,3 +65,12 @@ bool sev_es_enabled(void)
 {
     return false;
 }
+
+void sev_es_set_reset_vector(CPUState *cpu)
+{
+}
+
+int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size)
+{
+    abort();
+}
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 35b9259bfc..4b70d4284f 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -22,6 +22,7 @@
 #include "qom/object_interfaces.h"
 #include "qemu/base64.h"
 #include "qemu/module.h"
+#include "qemu/uuid.h"
 #include "sysemu/kvm.h"
 #include "sev_i386.h"
 #include "sysemu/sysemu.h"
@@ -32,6 +33,7 @@
 #include "exec/address-spaces.h"
 #include "monitor/monitor.h"
 #include "exec/confidential-guest-support.h"
+#include "hw/i386/pc.h"
 
 #define TYPE_SEV_GUEST "sev-guest"
 OBJECT_DECLARE_SIMPLE_TYPE(SevGuestState, SEV_GUEST)
@@ -67,11 +69,21 @@ struct SevGuestState {
     int sev_fd;
     SevState state;
     gchar *measurement;
+
+    uint32_t reset_cs;
+    uint32_t reset_ip;
+    bool reset_data_valid;
 };
 
 #define DEFAULT_GUEST_POLICY    0x1 /* disable debug */
 #define DEFAULT_SEV_DEVICE      "/dev/sev"
 
+#define SEV_INFO_BLOCK_GUID     "00f771de-1a7e-4fcb-890e-68c77e2fb44e"
+typedef struct __attribute__((__packed__)) SevInfoBlock {
+    /* SEV-ES Reset Vector Address */
+    uint32_t reset_addr;
+} SevInfoBlock;
+
 static SevGuestState *sev_guest;
 static Error *sev_mig_blocker;
 
@@ -879,6 +891,122 @@ int sev_inject_launch_secret(const char *packet_hdr, const char *secret,
     return 0;
 }
 
+static int
+sev_es_parse_reset_block(SevInfoBlock *info, uint32_t *addr)
+{
+    if (!info->reset_addr) {
+        error_report("SEV-ES reset address is zero");
+        return 1;
+    }
+
+    *addr = info->reset_addr;
+
+    return 0;
+}
+
+static int
+sev_es_find_reset_vector(void *flash_ptr, uint64_t flash_size,
+                         uint32_t *addr)
+{
+    QemuUUID info_guid, *guid;
+    SevInfoBlock *info;
+    uint8_t *data;
+    uint16_t *len;
+
+    /*
+     * Initialize the address to zero. An address of zero with a successful
+     * return code indicates that SEV-ES is not active.
+     */
+    *addr = 0;
+
+    /*
+     * Extract the AP reset vector for SEV-ES guests by locating the SEV GUID.
+     * The SEV GUID is located on its own (original implementation) or within
+     * the Firmware GUID Table (new implementation), either of which are
+     * located 32 bytes from the end of the flash.
+     *
+     * Check the Firmware GUID Table first.
+     */
+    if (pc_system_ovmf_table_find(SEV_INFO_BLOCK_GUID, &data, NULL)) {
+        return sev_es_parse_reset_block((SevInfoBlock *)data, addr);
+    }
+
+    /*
+     * SEV info block not found in the Firmware GUID Table (or there isn't
+     * a Firmware GUID Table), fall back to the original implementation.
+     */
+    data = flash_ptr + flash_size - 0x20;
+
+    qemu_uuid_parse(SEV_INFO_BLOCK_GUID, &info_guid);
+    info_guid = qemu_uuid_bswap(info_guid); /* GUIDs are LE */
+
+    guid = (QemuUUID *)(data - sizeof(info_guid));
+    if (!qemu_uuid_is_equal(guid, &info_guid)) {
+        error_report("SEV information block/Firmware GUID Table block not found in pflash rom");
+        return 1;
+    }
+
+    len = (uint16_t *)((uint8_t *)guid - sizeof(*len));
+    info = (SevInfoBlock *)(data - le16_to_cpu(*len));
+
+    return sev_es_parse_reset_block(info, addr);
+}
+
+void sev_es_set_reset_vector(CPUState *cpu)
+{
+    X86CPU *x86;
+    CPUX86State *env;
+
+    /* Only update if we have valid reset information */
+    if (!sev_guest || !sev_guest->reset_data_valid) {
+        return;
+    }
+
+    /* Do not update the BSP reset state */
+    if (cpu->cpu_index == 0) {
+        return;
+    }
+
+    x86 = X86_CPU(cpu);
+    env = &x86->env;
+
+    cpu_x86_load_seg_cache(env, R_CS, 0xf000, sev_guest->reset_cs, 0xffff,
+                           DESC_P_MASK | DESC_S_MASK | DESC_CS_MASK |
+                           DESC_R_MASK | DESC_A_MASK);
+
+    env->eip = sev_guest->reset_ip;
+}
+
+int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size)
+{
+    CPUState *cpu;
+    uint32_t addr;
+    int ret;
+
+    if (!sev_es_enabled()) {
+        return 0;
+    }
+
+    addr = 0;
+    ret = sev_es_find_reset_vector(flash_ptr, flash_size,
+                                   &addr);
+    if (ret) {
+        return ret;
+    }
+
+    if (addr) {
+        sev_guest->reset_cs = addr & 0xffff0000;
+        sev_guest->reset_ip = addr & 0x0000ffff;
+        sev_guest->reset_data_valid = true;
+
+        CPU_FOREACH(cpu) {
+            sev_es_set_reset_vector(cpu);
+        }
+    }
+
+    return 0;
+}
+
 static void
 sev_register_types(void)
 {

From 92a5199b29f6519aa5f774f4b96dc41954f641d1 Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Tue, 26 Jan 2021 11:36:47 -0600
Subject: [PATCH 06/21] sev/i386: Don't allow a system reset under an SEV-ES
 guest

An SEV-ES guest does not allow register state to be altered once it has
been measured. When an SEV-ES guest issues a reboot command, Qemu will
reset the vCPU state and resume the guest. This will cause failures under
SEV-ES. Prevent that from occuring by introducing an arch-specific
callback that returns a boolean indicating whether vCPUs are resettable.

Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: Aurelien Jarno <aurelien@aurel32.net>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: Aleksandar Rikalo <aleksandar.rikalo@syrmia.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: David Hildenbrand <david@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Venu Busireddy <venu.busireddy@oracle.com>
Message-Id: <1ac39c441b9a3e970e9556e1cc29d0a0814de6fd.1611682609.git.thomas.lendacky@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 accel/kvm/kvm-all.c       |  5 +++++
 include/sysemu/cpus.h     |  2 ++
 include/sysemu/hw_accel.h |  5 +++++
 include/sysemu/kvm.h      | 10 ++++++++++
 softmmu/cpus.c            |  5 +++++
 softmmu/runstate.c        |  3 +++
 target/arm/kvm.c          |  5 +++++
 target/i386/kvm/kvm.c     |  6 ++++++
 target/mips/kvm.c         |  5 +++++
 target/ppc/kvm.c          |  5 +++++
 target/s390x/kvm.c        |  5 +++++
 11 files changed, 56 insertions(+)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index bf61ef4b54..84c943fcdb 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2312,6 +2312,11 @@ void kvm_flush_coalesced_mmio_buffer(void)
     s->coalesced_flush_in_progress = false;
 }
 
+bool kvm_cpu_check_are_resettable(void)
+{
+    return kvm_arch_cpu_check_are_resettable();
+}
+
 static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
 {
     if (!cpu->vcpu_dirty) {
diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h
index 2cd74392e0..868f1192de 100644
--- a/include/sysemu/cpus.h
+++ b/include/sysemu/cpus.h
@@ -41,6 +41,8 @@ extern int icount_align_option;
 /* Unblock cpu */
 void qemu_cpu_kick_self(void);
 
+bool cpus_are_resettable(void);
+
 void cpu_synchronize_all_states(void);
 void cpu_synchronize_all_post_reset(void);
 void cpu_synchronize_all_post_init(void);
diff --git a/include/sysemu/hw_accel.h b/include/sysemu/hw_accel.h
index ffed6192a3..61672f9b32 100644
--- a/include/sysemu/hw_accel.h
+++ b/include/sysemu/hw_accel.h
@@ -22,4 +22,9 @@ void cpu_synchronize_post_reset(CPUState *cpu);
 void cpu_synchronize_post_init(CPUState *cpu);
 void cpu_synchronize_pre_loadvm(CPUState *cpu);
 
+static inline bool cpu_check_are_resettable(void)
+{
+    return kvm_enabled() ? kvm_cpu_check_are_resettable() : true;
+}
+
 #endif /* QEMU_HW_ACCEL_H */
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index c5546bdecc..687c598be9 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -541,4 +541,14 @@ int kvm_get_max_memslots(void);
 /* Notify resamplefd for EOI of specific interrupts. */
 void kvm_resample_fd_notify(int gsi);
 
+/**
+ * kvm_cpu_check_are_resettable - return whether CPUs can be reset
+ *
+ * Returns: true: CPUs are resettable
+ *          false: CPUs are not resettable
+ */
+bool kvm_cpu_check_are_resettable(void);
+
+bool kvm_arch_cpu_check_are_resettable(void);
+
 #endif
diff --git a/softmmu/cpus.c b/softmmu/cpus.c
index 112eba9d54..a7ee431187 100644
--- a/softmmu/cpus.c
+++ b/softmmu/cpus.c
@@ -194,6 +194,11 @@ void cpu_synchronize_pre_loadvm(CPUState *cpu)
     }
 }
 
+bool cpus_are_resettable(void)
+{
+    return cpu_check_are_resettable();
+}
+
 int64_t cpus_get_virtual_clock(void)
 {
     /*
diff --git a/softmmu/runstate.c b/softmmu/runstate.c
index a7fcb603f7..2874417b61 100644
--- a/softmmu/runstate.c
+++ b/softmmu/runstate.c
@@ -528,6 +528,9 @@ void qemu_system_reset_request(ShutdownCause reason)
     if (reboot_action == REBOOT_ACTION_SHUTDOWN &&
         reason != SHUTDOWN_CAUSE_SUBSYSTEM_RESET) {
         shutdown_requested = reason;
+    } else if (!cpus_are_resettable()) {
+        error_report("cpus are not resettable, terminating");
+        shutdown_requested = reason;
     } else {
         reset_requested = reason;
     }
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index ffe186de8d..00e124c812 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -1045,3 +1045,8 @@ int kvm_arch_msi_data_to_gsi(uint32_t data)
 {
     return (data - 32) & 0xffff;
 }
+
+bool kvm_arch_cpu_check_are_resettable(void)
+{
+    return true;
+}
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index f56a8536d0..d10667b21b 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -27,6 +27,7 @@
 #include "sysemu/kvm_int.h"
 #include "sysemu/runstate.h"
 #include "kvm_i386.h"
+#include "sev_i386.h"
 #include "hyperv.h"
 #include "hyperv-proto.h"
 
@@ -4821,3 +4822,8 @@ bool kvm_has_waitpkg(void)
 {
     return has_msr_umwait;
 }
+
+bool kvm_arch_cpu_check_are_resettable(void)
+{
+    return !sev_es_enabled();
+}
diff --git a/target/mips/kvm.c b/target/mips/kvm.c
index 84fb10ea35..123ec1be7e 100644
--- a/target/mips/kvm.c
+++ b/target/mips/kvm.c
@@ -1290,3 +1290,8 @@ int mips_kvm_type(MachineState *machine, const char *vm_type)
 
     return -1;
 }
+
+bool kvm_arch_cpu_check_are_resettable(void)
+{
+    return true;
+}
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 0c5056dd5b..298c1f882c 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -2929,3 +2929,8 @@ void kvmppc_set_reg_tb_offset(PowerPCCPU *cpu, int64_t tb_offset)
         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &tb_offset);
     }
 }
+
+bool kvm_arch_cpu_check_are_resettable(void)
+{
+    return true;
+}
diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index dc27fa36c9..7a892d663d 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -2599,3 +2599,8 @@ void kvm_s390_stop_interrupt(S390CPU *cpu)
 
     kvm_s390_vcpu_interrupt(cpu, &irq);
 }
+
+bool kvm_arch_cpu_check_are_resettable(void)
+{
+    return true;
+}

From 23edf8b549c7a8a520d42da19403864245f8977f Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Tue, 26 Jan 2021 11:36:48 -0600
Subject: [PATCH 07/21] kvm/i386: Use a per-VM check for SMM capability

SMM is not currently supported for an SEV-ES guest by KVM. Change the SMM
capability check from a KVM-wide check to a per-VM check in order to have
a finer-grained SMM capability check.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Venu Busireddy <venu.busireddy@oracle.com>
Message-Id: <f851903809e9d4e6a22d5dfd738dac8da991e28d.1611682609.git.thomas.lendacky@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/kvm/kvm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index d10667b21b..0b5755e42b 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -137,7 +137,7 @@ int kvm_has_pit_state2(void)
 
 bool kvm_has_smm(void)
 {
-    return kvm_check_extension(kvm_state, KVM_CAP_X86_SMM);
+    return kvm_vm_check_extension(kvm_state, KVM_CAP_X86_SMM);
 }
 
 bool kvm_has_adjust_clock_stable(void)

From 027b524d6a427d7c89f4e8af44c49d96796adab5 Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Tue, 26 Jan 2021 11:36:49 -0600
Subject: [PATCH 08/21] sev/i386: Enable an SEV-ES guest based on SEV policy

Update the sev_es_enabled() function return value to be based on the SEV
policy that has been specified. SEV-ES is enabled if SEV is enabled and
the SEV-ES policy bit is set in the policy object.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Venu Busireddy <venu.busireddy@oracle.com>
Message-Id: <c69f81c6029f31fc4c52a9f35f1bd704362476a5.1611682609.git.thomas.lendacky@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/sev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/i386/sev.c b/target/i386/sev.c
index 4b70d4284f..0f414df02f 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -356,7 +356,7 @@ sev_enabled(void)
 bool
 sev_es_enabled(void)
 {
-    return false;
+    return sev_enabled() && (sev_guest->policy & SEV_POLICY_ES);
 }
 
 uint64_t

From f6a2c6eee77458a1f2cf6632b2d9f2fd97bf595e Mon Sep 17 00:00:00 2001
From: Christian Schoenebeck <qemu_oss@crudebyte.com>
Date: Wed, 27 Jan 2021 00:00:34 +0100
Subject: [PATCH 09/21] libqos/qgraph: add qos_node_create_driver_named()

So far the qos subsystem of the qtest framework had the limitation
that only one instance of the same official QEMU (QMP) driver name
could be created for qtests. That's because a) the created qos
node names must always be unique, b) the node name must match the
official QEMU driver name being instantiated and c) all nodes are
in a global space shared by all tests.

This patch removes this limitation by introducing a new function
qos_node_create_driver_named() which allows test case authors to
specify a node name being different from the actual associated
QEMU driver name. It fills the new 'qemu_name' field of
QOSGraphNode for that purpose.

Adjust build_driver_cmd_line() and qos_graph_node_set_availability()
to correctly deal with either accessing node name vs. node's
qemu_name correctly.

Signed-off-by: Christian Schoenebeck <qemu_oss@crudebyte.com>
Message-Id: <3be962ff38f3396f8040deaa5ffdab525c4e0b16.1611704181.git.qemu_oss@crudebyte.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tests/qtest/libqos/qgraph.c          | 54 ++++++++++++++++++++++++++--
 tests/qtest/libqos/qgraph.h          | 16 +++++++++
 tests/qtest/libqos/qgraph_internal.h |  1 +
 3 files changed, 68 insertions(+), 3 deletions(-)

diff --git a/tests/qtest/libqos/qgraph.c b/tests/qtest/libqos/qgraph.c
index fc49cfa879..61faf6b27d 100644
--- a/tests/qtest/libqos/qgraph.c
+++ b/tests/qtest/libqos/qgraph.c
@@ -153,6 +153,7 @@ static QOSGraphNode *create_node(const char *name, QOSNodeType type)
 static void destroy_node(void *val)
 {
     QOSGraphNode *node = val;
+    g_free(node->qemu_name);
     g_free(node->command_line);
     g_free(node);
 }
@@ -286,7 +287,8 @@ static void build_machine_cmd_line(QOSGraphNode *node, const char *args)
  */
 static void build_driver_cmd_line(QOSGraphNode *node)
 {
-    node->command_line = g_strconcat(" -device ", node->name, NULL);
+    const char *name = node->qemu_name ?: node->name;
+    node->command_line = g_strconcat(" -device ", name, NULL);
 }
 
 /* qos_print_cb(): callback prints all path found by the DFS algorithm. */
@@ -631,6 +633,15 @@ void qos_node_create_driver(const char *name, QOSCreateDriverFunc function)
     node->u.driver.constructor = function;
 }
 
+void qos_node_create_driver_named(const char *name, const char *qemu_name,
+                                  QOSCreateDriverFunc function)
+{
+    QOSGraphNode *node = create_node(name, QNODE_DRIVER);
+    node->qemu_name = g_strdup(qemu_name);
+    build_driver_cmd_line(node);
+    node->u.driver.constructor = function;
+}
+
 void qos_node_contains(const char *container, const char *contained,
                        QOSGraphEdgeOptions *opts, ...)
 {
@@ -663,7 +674,7 @@ void qos_node_consumes(const char *consumer, const char *interface,
     add_edge(interface, consumer, QEDGE_CONSUMED_BY, opts);
 }
 
-void qos_graph_node_set_availability(const char *node, bool av)
+static void qos_graph_node_set_availability_explicit(const char *node, bool av)
 {
     QOSGraphEdgeList *elist;
     QOSGraphNode *n = search_node(node);
@@ -678,11 +689,48 @@ void qos_graph_node_set_availability(const char *node, bool av)
     }
     QSLIST_FOREACH_SAFE(e, elist, edge_list, next) {
         if (e->type == QEDGE_CONTAINS || e->type == QEDGE_PRODUCES) {
-            qos_graph_node_set_availability(e->dest, av);
+            qos_graph_node_set_availability_explicit(e->dest, av);
         }
     }
 }
 
+/*
+ * Behaves as qos_graph_node_set_availability_explicit(), except that the
+ * former always matches by node name only, whereas this function matches both
+ * by node name and node's optional 'qemu_name' field.
+ */
+void qos_graph_node_set_availability(const char *node, bool av)
+{
+    GList *l;
+    QOSGraphEdgeList *elist;
+    QOSGraphEdge *e, *next;
+    QOSGraphNode *n;
+    GList *keys = g_hash_table_get_keys(node_table);
+
+    for (l = keys; l != NULL; l = l->next) {
+        const gchar *key = l->data;
+        n = g_hash_table_lookup(node_table, key);
+        /*
+         * node's 'qemu_name' is set if there is more than one device with
+         * the same QEMU (QMP) device name
+         */
+        const char *node_name = n->qemu_name ?: n->name;
+        if (g_strcmp0(node_name, node) == 0) {
+            n->available = av;
+            elist = get_edgelist(n->name);
+            if (elist) {
+                QSLIST_FOREACH_SAFE(e, elist, edge_list, next) {
+                    if (e->type == QEDGE_CONTAINS || e->type == QEDGE_PRODUCES)
+                    {
+                        qos_graph_node_set_availability_explicit(e->dest, av);
+                    }
+                }
+            }
+        }
+    }
+    g_list_free(keys);
+}
+
 void qos_graph_foreach_test_path(QOSTestCallback fn)
 {
     QOSGraphNode *root = qos_graph_get_node(QOS_ROOT);
diff --git a/tests/qtest/libqos/qgraph.h b/tests/qtest/libqos/qgraph.h
index 5f63d352ca..f472949f68 100644
--- a/tests/qtest/libqos/qgraph.h
+++ b/tests/qtest/libqos/qgraph.h
@@ -452,6 +452,22 @@ void qos_node_create_machine_args(const char *name,
  */
 void qos_node_create_driver(const char *name, QOSCreateDriverFunc function);
 
+/**
+ * Behaves as qos_node_create_driver() with the extension of allowing to
+ * specify a different node name vs. associated QEMU device name.
+ *
+ * Use this function instead of qos_node_create_driver() if you need to create
+ * several instances of the same QEMU device. You are free to choose a custom
+ * node name, however the chosen node name must always be unique.
+ *
+ * @param name: custom, unique name of the node to be created
+ * @param qemu_name: actual (official) QEMU driver name the node shall be
+ *                   associated with
+ * @param function: driver constructor
+ */
+void qos_node_create_driver_named(const char *name, const char *qemu_name,
+                                  QOSCreateDriverFunc function);
+
 /**
  * qos_node_contains(): creates one or more edges of type QEDGE_CONTAINS
  * and adds them to the edge list mapped to @container in the
diff --git a/tests/qtest/libqos/qgraph_internal.h b/tests/qtest/libqos/qgraph_internal.h
index 968fa69450..974985dce9 100644
--- a/tests/qtest/libqos/qgraph_internal.h
+++ b/tests/qtest/libqos/qgraph_internal.h
@@ -56,6 +56,7 @@ struct QOSGraphNode {
     bool available;     /* set by QEMU via QMP, used during graph walk */
     bool visited;       /* used during graph walk */
     char *name;         /* used to identify the node */
+    char *qemu_name;    /* optional: see qos_node_create_driver_named() */
     char *command_line; /* used to start QEMU at test execution */
     union {
         struct {

From 23820025af6b356cd4061a8b029c1126e1ee915e Mon Sep 17 00:00:00 2001
From: Christian Schoenebeck <qemu_oss@crudebyte.com>
Date: Wed, 27 Jan 2021 00:04:22 +0100
Subject: [PATCH 10/21] libqos/qgraph_internal: add qos_printf() and
 qos_printf_literal()

These two are macros wrapping regular printf() call. They are intended
to be used instead of calling printf() directly in order to avoid
breaking TAP output format.

TAP output format is enabled by using --tap command line argument.
Starting with glib 2.62 it is enabled by default.

Unfortunately there is currently no public glib API available to check
whether TAP output format is enabled. For that reason qos_printf()
simply always prepends a '#' character for now.

Signed-off-by: Christian Schoenebeck <qemu_oss@crudebyte.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Message-Id: <653a5ef61c5e7d160e4d6294e542c57ea324cee4.1611704181.git.qemu_oss@crudebyte.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tests/qtest/libqos/qgraph_internal.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tests/qtest/libqos/qgraph_internal.h b/tests/qtest/libqos/qgraph_internal.h
index 974985dce9..c0025f5ab9 100644
--- a/tests/qtest/libqos/qgraph_internal.h
+++ b/tests/qtest/libqos/qgraph_internal.h
@@ -255,4 +255,15 @@ void qos_delete_cmd_line(const char *name);
  */
 void qos_graph_node_set_availability(const char *node, bool av);
 
+/*
+ * Prepends a '#' character in front for not breaking TAP output format.
+ */
+#define qos_printf(...) printf("# " __VA_ARGS__)
+
+/*
+ * Intended for printing something literally, i.e. for appending text as is
+ * to a line already been started by qos_printf() before.
+ */
+#define qos_printf_literal printf
+
 #endif

From 83ff78e5674ccf01a2092c230c893cb2ef41a1a6 Mon Sep 17 00:00:00 2001
From: Christian Schoenebeck <qemu_oss@crudebyte.com>
Date: Wed, 27 Jan 2021 00:08:03 +0100
Subject: [PATCH 11/21] tests/qtest/qos-test: dump qos graph if verbose

If qtests were run in verbose mode (i.e. if --verbose CL argument was
provided) then dump the generated qos graph (all nodes and edges,
along with their current individual availability status) to stdout,
which allows to identify problems in the created qos graph e.g. when
writing new qos tests.

See API doc comment on function qos_dump_graph() for details.

Signed-off-by: Christian Schoenebeck <qemu_oss@crudebyte.com>
Message-Id: <6bffb6e38589fb2c06a2c1b5deed33f3e710fed1.1611704181.git.qemu_oss@crudebyte.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tests/qtest/libqos/qgraph.c | 45 +++++++++++++++++++++++++++++++++++++
 tests/qtest/libqos/qgraph.h | 20 +++++++++++++++++
 tests/qtest/qos-test.c      |  3 +++
 3 files changed, 68 insertions(+)

diff --git a/tests/qtest/libqos/qgraph.c b/tests/qtest/libqos/qgraph.c
index 61faf6b27d..b3b1a31f81 100644
--- a/tests/qtest/libqos/qgraph.c
+++ b/tests/qtest/libqos/qgraph.c
@@ -805,3 +805,48 @@ void qos_delete_cmd_line(const char *name)
         node->command_line = NULL;
     }
 }
+
+void qos_dump_graph(void)
+{
+    GList *keys;
+    GList *l;
+    QOSGraphEdgeList *list;
+    QOSGraphEdge *e, *next;
+    QOSGraphNode *dest_node, *node;
+
+    qos_printf("ALL QGRAPH EDGES: {\n");
+    keys = g_hash_table_get_keys(edge_table);
+    for (l = keys; l != NULL; l = l->next) {
+        const gchar *key = l->data;
+        qos_printf("\t src='%s'\n", key);
+        list = get_edgelist(key);
+        QSLIST_FOREACH_SAFE(e, list, edge_list, next) {
+            dest_node = g_hash_table_lookup(node_table, e->dest);
+            qos_printf("\t\t|-> dest='%s' type=%d (node=%p)",
+                       e->dest, e->type, dest_node);
+            if (!dest_node) {
+                qos_printf_literal(" <------- ERROR !");
+            }
+            qos_printf_literal("\n");
+        }
+    }
+    g_list_free(keys);
+    qos_printf("}\n");
+
+    qos_printf("ALL QGRAPH NODES: {\n");
+    keys = g_hash_table_get_keys(node_table);
+    for (l = keys; l != NULL; l = l->next) {
+        const gchar *key = l->data;
+        node = g_hash_table_lookup(node_table, key);
+        qos_printf("\t name='%s' ", key);
+        if (node->qemu_name) {
+            qos_printf_literal("qemu_name='%s' ", node->qemu_name);
+        }
+        qos_printf_literal("type=%d cmd_line='%s' [%s]\n",
+                           node->type, node->command_line,
+                           node->available ? "available" : "UNAVAILBLE"
+        );
+    }
+    g_list_free(keys);
+    qos_printf("}\n");
+}
diff --git a/tests/qtest/libqos/qgraph.h b/tests/qtest/libqos/qgraph.h
index f472949f68..07a32535f1 100644
--- a/tests/qtest/libqos/qgraph.h
+++ b/tests/qtest/libqos/qgraph.h
@@ -586,5 +586,25 @@ QOSGraphObject *qos_machine_new(QOSGraphNode *node, QTestState *qts);
 QOSGraphObject *qos_driver_new(QOSGraphNode *node, QOSGraphObject *parent,
                                QGuestAllocator *alloc, void *arg);
 
+/**
+ * Just for debugging purpose: prints all currently existing nodes and
+ * edges to stdout.
+ *
+ * All qtests add themselves to the overall qos graph by calling qgraph
+ * functions that add device nodes and edges between the individual graph
+ * nodes for tests. As the actual graph is assmbled at runtime by the qos
+ * subsystem, it is sometimes not obvious how the overall graph looks like.
+ * E.g. when writing new tests it may happen that those new tests are simply
+ * ignored by the qtest framework.
+ *
+ * This function allows to identify problems in the created qgraph. Keep in
+ * mind: only tests with a path down from the actual test case node (leaf) up
+ * to the graph's root node are actually executed by the qtest framework. And
+ * the qtest framework uses QMP to automatically check which QEMU drivers are
+ * actually currently available, and accordingly qos marks certain pathes as
+ * 'unavailable' in such cases (e.g. when QEMU was compiled without support for
+ * a certain feature).
+ */
+void qos_dump_graph(void);
 
 #endif
diff --git a/tests/qtest/qos-test.c b/tests/qtest/qos-test.c
index 8fdf87b183..d98ef78613 100644
--- a/tests/qtest/qos-test.c
+++ b/tests/qtest/qos-test.c
@@ -322,6 +322,9 @@ int main(int argc, char **argv)
     qos_set_machines_devices_available();
 
     qos_graph_foreach_test_path(walk_path);
+    if (g_test_verbose()) {
+        qos_dump_graph();
+    }
     g_test_run();
     qtest_end();
     qos_graph_destroy();

From 093360dc32cf70d3651496b58dc16b22f4971dcc Mon Sep 17 00:00:00 2001
From: Christian Schoenebeck <qemu_oss@crudebyte.com>
Date: Wed, 27 Jan 2021 00:17:36 +0100
Subject: [PATCH 12/21] tests/qtest/qos-test: dump environment variables if
 verbose

If qtests are run in verbose mode (i.e. if --verbose CL argument
was provided) then print all environment variables to stdout
before running the individual tests.

It is common nowadays, at least being able to output all config
vectors in a build chain, especially if it is required to
investigate build- and test-issues on foreign/remote machines,
which includes environment variables. In the context of writing
new test cases this is also useful for finding out whether there
are already some existing options for common questions like is
there a preferred location for writing test files to? Is there
a maximum size for test data? Is there a deadline for running
tests?

Use qos_printf() instead of g_test_message() to avoid the latter
cluttering the output.

Signed-off-by: Christian Schoenebeck <qemu_oss@crudebyte.com>
Message-Id: <21d77b33c578d80b5bba1068e61fd3562958b3c2.1611704181.git.qemu_oss@crudebyte.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tests/qtest/qos-test.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tests/qtest/qos-test.c b/tests/qtest/qos-test.c
index d98ef78613..b279b6f816 100644
--- a/tests/qtest/qos-test.c
+++ b/tests/qtest/qos-test.c
@@ -313,9 +313,16 @@ static void walk_path(QOSGraphNode *orig_path, int len)
  *   machine/drivers/test objects
  * - Cleans up everything
  */
-int main(int argc, char **argv)
+int main(int argc, char **argv, char** envp)
 {
     g_test_init(&argc, &argv, NULL);
+    if (g_test_verbose()) {
+        qos_printf("ENVIRONMENT VARIABLES: {\n");
+        for (char **env = envp; *env != 0; env++) {
+            qos_printf("\t%s\n", *env);
+        }
+        qos_printf("}\n");
+    }
     qos_graph_init();
     module_call_init(MODULE_INIT_QOM);
     module_call_init(MODULE_INIT_LIBQOS);

From b0019c995e0397092d5db5caa8262b67036c2a89 Mon Sep 17 00:00:00 2001
From: Christian Schoenebeck <qemu_oss@crudebyte.com>
Date: Wed, 27 Jan 2021 00:26:16 +0100
Subject: [PATCH 13/21] tests/qtest/qos-test: dump QEMU command if verbose

If qtests are run in verbose mode (i.e. if --verbose CL argument
was provided) then print the assembled qemu command line for each
test.

Use qos_printf() instead of g_test_message() to avoid the latter
cluttering the output.

Signed-off-by: Christian Schoenebeck <qemu_oss@crudebyte.com>
Message-Id: <110bef3595cb841dfa1b86733c174ac9774eb37e.1611704181.git.qemu_oss@crudebyte.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tests/qtest/qos-test.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/qtest/qos-test.c b/tests/qtest/qos-test.c
index b279b6f816..f97d0a08fd 100644
--- a/tests/qtest/qos-test.c
+++ b/tests/qtest/qos-test.c
@@ -89,6 +89,9 @@ static void qos_set_machines_devices_available(void)
 
 static void restart_qemu_or_continue(char *path)
 {
+    if (g_test_verbose()) {
+        qos_printf("Run QEMU with: '%s'\n", path);
+    }
     /* compares the current command line with the
      * one previously executed: if they are the same,
      * don't restart QEMU, if they differ, stop previous

From 342e3a4f20653c2d419cc0e8fdc0b99dfea32fed Mon Sep 17 00:00:00 2001
From: Stefan Weil <sw@weilnetz.de>
Date: Mon, 8 Feb 2021 21:57:52 +0100
Subject: [PATCH 14/21] util/cutils: Skip "." when looking for next directory
 component

When looking for the next directory component, a "." component is now skipped.

This fixes the path(s) used for firmware lookup for the prefix == bindir case
which is standard for QEMU on Windows and where the internally
used bindir value ends with "/.".

Signed-off-by: Stefan Weil <sw@weilnetz.de>
Message-Id: <20210208205752.2488774-1-sw@weilnetz.de>
Cc: qemu-stable@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 util/cutils.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/util/cutils.c b/util/cutils.c
index 0b5073b330..70c7d6efbd 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -916,7 +916,8 @@ static inline bool starts_with_prefix(const char *dir)
 static inline const char *next_component(const char *dir, int *p_len)
 {
     int len;
-    while (*dir && G_IS_DIR_SEPARATOR(*dir)) {
+    while ((*dir && G_IS_DIR_SEPARATOR(*dir)) ||
+           (*dir == '.' && (G_IS_DIR_SEPARATOR(dir[1]) || dir[1] == '\0'))) {
         dir++;
     }
     len = 0;

From 118f2aadbc66aaae4e8d52259288e18f2aa4544a Mon Sep 17 00:00:00 2001
From: Hill Ma <maahiuzeon@gmail.com>
Date: Tue, 12 Jan 2021 22:07:35 -0800
Subject: [PATCH 15/21] hvf: Guard xgetbv call

This prevents illegal instruction on cpus that do not support xgetbv.

Buglink: https://bugs.launchpad.net/qemu/+bug/1758819
Reviewed-by: Cameron Esfahani <dirty@apple.com>
Signed-off-by: Hill Ma <maahiuzeon@gmail.com>
Message-Id: <X/6OJ7qk0W6bHkHQ@Hills-Mac-Pro.local>
Signed-off-by: Roman Bolshakov <r.bolshakov@yadro.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/hvf/x86_cpuid.c | 34 ++++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/target/i386/hvf/x86_cpuid.c b/target/i386/hvf/x86_cpuid.c
index a6842912f5..32b0d131df 100644
--- a/target/i386/hvf/x86_cpuid.c
+++ b/target/i386/hvf/x86_cpuid.c
@@ -27,15 +27,22 @@
 #include "vmx.h"
 #include "sysemu/hvf.h"
 
-static uint64_t xgetbv(uint32_t xcr)
+static bool xgetbv(uint32_t cpuid_ecx, uint32_t idx, uint64_t *xcr)
 {
-    uint32_t eax, edx;
+    uint32_t xcrl, xcrh;
 
-    __asm__ volatile ("xgetbv"
-                      : "=a" (eax), "=d" (edx)
-                      : "c" (xcr));
+    if (cpuid_ecx & CPUID_EXT_OSXSAVE) {
+        /*
+         * The xgetbv instruction is not available to older versions of
+         * the assembler, so we encode the instruction manually.
+         */
+        asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcrl), "=d" (xcrh) : "c" (idx));
 
-    return (((uint64_t)edx) << 32) | eax;
+        *xcr = (((uint64_t)xcrh) << 32) | xcrl;
+        return true;
+    }
+
+    return false;
 }
 
 uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx,
@@ -100,12 +107,15 @@ uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx,
         break;
     case 0xD:
         if (idx == 0) {
-            uint64_t host_xcr0 = xgetbv(0);
-            uint64_t supp_xcr0 = host_xcr0 & (XSTATE_FP_MASK | XSTATE_SSE_MASK |
-                                  XSTATE_YMM_MASK | XSTATE_BNDREGS_MASK |
-                                  XSTATE_BNDCSR_MASK | XSTATE_OPMASK_MASK |
-                                  XSTATE_ZMM_Hi256_MASK | XSTATE_Hi16_ZMM_MASK);
-            eax &= supp_xcr0;
+            uint64_t host_xcr0;
+            if (xgetbv(ecx, 0, &host_xcr0)) {
+                uint64_t supp_xcr0 = host_xcr0 & (XSTATE_FP_MASK |
+                                  XSTATE_SSE_MASK | XSTATE_YMM_MASK |
+                                  XSTATE_BNDREGS_MASK | XSTATE_BNDCSR_MASK |
+                                  XSTATE_OPMASK_MASK | XSTATE_ZMM_Hi256_MASK |
+                                  XSTATE_Hi16_ZMM_MASK);
+                eax &= supp_xcr0;
+            }
         } else if (idx == 1) {
             hv_vmx_read_capability(HV_VMX_CAP_PROCBASED2, &cap);
             eax &= CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XGETBV1;

From 3b502b0e470867369ba6e0a94e9ba6d91bb176c2 Mon Sep 17 00:00:00 2001
From: Vladislav Yaroshchuk <yaroshchuk2000@gmail.com>
Date: Fri, 22 Jan 2021 18:05:18 +0300
Subject: [PATCH 16/21] target/i386/hvf: add vmware-cpuid-freq cpu feature

For `-accel hvf` cpu_x86_cpuid() is wrapped with hvf_cpu_x86_cpuid() to
add paravirtualization cpuid leaf 0x40000010
https://lkml.org/lkml/2008/10/1/246

Leaf 0x40000010, Timing Information:
EAX: (Virtual) TSC frequency in kHz.
EBX: (Virtual) Bus (local apic timer) frequency in kHz.
ECX, EDX: RESERVED (Per above, reserved fields are set to zero).

On macOS TSC and APIC Bus frequencies can be readed by sysctl call with
names `machdep.tsc.frequency` and `hw.busfrequency`

This options is required for Darwin-XNU guest to be synchronized with
host

Leaf 0x40000000 not exposes HVF leaving hypervisor signature empty

Signed-off-by: Vladislav Yaroshchuk <yaroshchuk2000@gmail.com>
Message-Id: <20210122150518.3551-1-yaroshchuk2000@gmail.com>
Signed-off-by: Roman Bolshakov <r.bolshakov@yadro.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/hvf/hvf.c | 96 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 95 insertions(+), 1 deletion(-)

diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
index 5b90dcdf88..10a06c3c79 100644
--- a/target/i386/hvf/hvf.c
+++ b/target/i386/hvf/hvf.c
@@ -65,6 +65,7 @@
 
 #include <Hypervisor/hv.h>
 #include <Hypervisor/hv_vmx.h>
+#include <sys/sysctl.h>
 
 #include "exec/address-spaces.h"
 #include "hw/i386/apic_internal.h"
@@ -456,6 +457,48 @@ static void dummy_signal(int sig)
 {
 }
 
+static void init_tsc_freq(CPUX86State *env)
+{
+    size_t length;
+    uint64_t tsc_freq;
+
+    if (env->tsc_khz != 0) {
+        return;
+    }
+
+    length = sizeof(uint64_t);
+    if (sysctlbyname("machdep.tsc.frequency", &tsc_freq, &length, NULL, 0)) {
+        return;
+    }
+    env->tsc_khz = tsc_freq / 1000;  /* Hz to KHz */
+}
+
+static void init_apic_bus_freq(CPUX86State *env)
+{
+    size_t length;
+    uint64_t bus_freq;
+
+    if (env->apic_bus_freq != 0) {
+        return;
+    }
+
+    length = sizeof(uint64_t);
+    if (sysctlbyname("hw.busfrequency", &bus_freq, &length, NULL, 0)) {
+        return;
+    }
+    env->apic_bus_freq = bus_freq;
+}
+
+static inline bool tsc_is_known(CPUX86State *env)
+{
+    return env->tsc_khz != 0;
+}
+
+static inline bool apic_bus_freq_is_known(CPUX86State *env)
+{
+    return env->apic_bus_freq != 0;
+}
+
 int hvf_init_vcpu(CPUState *cpu)
 {
 
@@ -480,6 +523,15 @@ int hvf_init_vcpu(CPUState *cpu)
     hvf_state->hvf_caps = g_new0(struct hvf_vcpu_caps, 1);
     env->hvf_mmio_buf = g_new(char, 4096);
 
+    if (x86cpu->vmware_cpuid_freq) {
+        init_tsc_freq(env);
+        init_apic_bus_freq(env);
+
+        if (!tsc_is_known(env) || !apic_bus_freq_is_known(env)) {
+            error_report("vmware-cpuid-freq: feature couldn't be enabled");
+        }
+    }
+
     r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf_fd, HV_VCPU_DEFAULT);
     cpu->vcpu_dirty = 1;
     assert_hvf_ok(r);
@@ -597,6 +649,48 @@ static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_in
     }
 }
 
+static void hvf_cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
+                              uint32_t *eax, uint32_t *ebx,
+                              uint32_t *ecx, uint32_t *edx)
+{
+    /*
+     * A wrapper extends cpu_x86_cpuid with 0x40000000 and 0x40000010 leafs,
+     * leafs 0x40000001-0x4000000F are filled with zeros
+     * Provides vmware-cpuid-freq support to hvf
+     *
+     * Note: leaf 0x40000000 not exposes HVF,
+     * leaving hypervisor signature empty
+     */
+
+    if (index < 0x40000000 || index > 0x40000010 ||
+        !tsc_is_known(env) || !apic_bus_freq_is_known(env)) {
+
+        cpu_x86_cpuid(env, index, count, eax, ebx, ecx, edx);
+        return;
+    }
+
+    switch (index) {
+    case 0x40000000:
+        *eax = 0x40000010;    /* Max available cpuid leaf */
+        *ebx = 0;             /* Leave signature empty */
+        *ecx = 0;
+        *edx = 0;
+        break;
+    case 0x40000010:
+        *eax = env->tsc_khz;
+        *ebx = env->apic_bus_freq / 1000; /* Hz to KHz */
+        *ecx = 0;
+        *edx = 0;
+        break;
+    default:
+        *eax = 0;
+        *ebx = 0;
+        *ecx = 0;
+        *edx = 0;
+        break;
+    }
+}
+
 int hvf_vcpu_exec(CPUState *cpu)
 {
     X86CPU *x86_cpu = X86_CPU(cpu);
@@ -734,7 +828,7 @@ int hvf_vcpu_exec(CPUState *cpu)
             uint32_t rcx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX);
             uint32_t rdx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX);
 
-            cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx);
+            hvf_cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx);
 
             wreg(cpu->hvf_fd, HV_X86_RAX, rax);
             wreg(cpu->hvf_fd, HV_X86_RBX, rbx);

From 45f918ccf6c35ee1912a8847873b7ba5b6927b46 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@csgraf.de>
Date: Wed, 20 Jan 2021 23:44:35 +0100
Subject: [PATCH 17/21] hvf: x86: Remove unused definitions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The hvf i386 has a few struct and cpp definitions that are never
used. Remove them.

Suggested-by: Roman Bolshakov <r.bolshakov@yadro.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Alexander Graf <agraf@csgraf.de>
Message-Id: <20210120224444.71840-3-agraf@csgraf.de>
Signed-off-by: Roman Bolshakov <r.bolshakov@yadro.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/hvf/hvf-i386.h | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/target/i386/hvf/hvf-i386.h b/target/i386/hvf/hvf-i386.h
index 50b914fd67..59cfca8875 100644
--- a/target/i386/hvf/hvf-i386.h
+++ b/target/i386/hvf/hvf-i386.h
@@ -21,21 +21,6 @@
 #include "cpu.h"
 #include "x86.h"
 
-#define HVF_MAX_VCPU 0x10
-
-extern struct hvf_state hvf_global;
-
-struct hvf_vm {
-    int id;
-    struct hvf_vcpu_state *vcpus[HVF_MAX_VCPU];
-};
-
-struct hvf_state {
-    uint32_t version;
-    struct hvf_vm *vm;
-    uint64_t mem_quota;
-};
-
 /* hvf_slot flags */
 #define HVF_SLOT_LOG (1 << 0)
 
@@ -75,7 +60,6 @@ hvf_slot *hvf_find_overlap_slot(uint64_t, uint64_t);
 
 /* Host specific functions */
 int hvf_inject_interrupt(CPUArchState *env, int vector);
-int hvf_vcpu_run(struct hvf_vcpu_state *vcpu);
 #endif
 
 #endif

From 027ac0cb516cca4ce8a88dcca2f759c77e0e374b Mon Sep 17 00:00:00 2001
From: Vladislav Yaroshchuk <yaroshchuk2000@gmail.com>
Date: Wed, 13 Jan 2021 23:53:23 +0300
Subject: [PATCH 18/21] target/i386/hvf: add rdmsr 35H MSR_CORE_THREAD_COUNT

Some guests (ex. Darwin-XNU) can attemp to read this MSR to retrieve and
validate CPU topology comparing it to ACPI MADT content

MSR description from Intel Manual:
35H: MSR_CORE_THREAD_COUNT: Configured State of Enabled Processor Core
  Count and Logical Processor Count

Bits 15:0 THREAD_COUNT The number of logical processors that are
  currently enabled in the physical package

Bits 31:16 Core_COUNT The number of processor cores that are currently
  enabled in the physical package

Bits 63:32 Reserved

Signed-off-by: Vladislav Yaroshchuk <yaroshchuk2000@gmail.com>
Message-Id: <20210113205323.33310-1-yaroshchuk2000@gmail.com>
[RB: reordered MSR definition and dropped u suffix from shift offset]
Signed-off-by: Roman Bolshakov <r.bolshakov@yadro.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/cpu.h         | 1 +
 target/i386/hvf/x86_emu.c | 5 +++++
 2 files changed, 6 insertions(+)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 8d599bb5b8..82c1ac00ef 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -368,6 +368,7 @@ typedef enum X86Seg {
 
 #define MSR_IA32_SMBASE                 0x9e
 #define MSR_SMI_COUNT                   0x34
+#define MSR_CORE_THREAD_COUNT           0x35
 #define MSR_MTRRcap                     0xfe
 #define MSR_MTRRcap_VCNT                8
 #define MSR_MTRRcap_FIXRANGE_SUPPORT    (1 << 8)
diff --git a/target/i386/hvf/x86_emu.c b/target/i386/hvf/x86_emu.c
index da570e352b..e52c39ddb1 100644
--- a/target/i386/hvf/x86_emu.c
+++ b/target/i386/hvf/x86_emu.c
@@ -668,6 +668,7 @@ void simulate_rdmsr(struct CPUState *cpu)
 {
     X86CPU *x86_cpu = X86_CPU(cpu);
     CPUX86State *env = &x86_cpu->env;
+    CPUState *cs = env_cpu(env);
     uint32_t msr = ECX(env);
     uint64_t val = 0;
 
@@ -745,6 +746,10 @@ void simulate_rdmsr(struct CPUState *cpu)
     case MSR_MTRRdefType:
         val = env->mtrr_deftype;
         break;
+    case MSR_CORE_THREAD_COUNT:
+        val = cs->nr_threads * cs->nr_cores; /* thread count, bits 15..0 */
+        val |= ((uint32_t)cs->nr_cores << 16); /* core count, bits 31..16 */
+        break;
     default:
         /* fprintf(stderr, "%s: unknown msr 0x%x\n", __func__, msr); */
         val = 0;

From 106f91d59c373b63f227b8827ff18ac9c9068d2f Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@csgraf.de>
Date: Sat, 23 Jan 2021 01:41:29 +0100
Subject: [PATCH 19/21] hvf: Fetch cr4 before evaluating CPUID(1)

The CPUID function 1 has a bit called OSXSAVE which tells user space the
status of the CR4.OSXSAVE bit. Our generic CPUID function injects that bit
based on the status of CR4.

With Hypervisor.framework, we do not synchronize full CPU state often enough
for this function to see the CR4 update before guest user space asks for it.

To be on the save side, let's just always synchronize it when we receive a
CPUID(1) request. That way we can set the bit with real confidence.

Reported-by: Asad Ali <asad@osaro.com>
Signed-off-by: Alexander Graf <agraf@csgraf.de>
Message-Id: <20210123004129.6364-1-agraf@csgraf.de>
[RB: resolved conflict with another CPUID change]
Signed-off-by: Roman Bolshakov <r.bolshakov@yadro.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/hvf/hvf.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
index 10a06c3c79..15f14ac69e 100644
--- a/target/i386/hvf/hvf.c
+++ b/target/i386/hvf/hvf.c
@@ -828,6 +828,10 @@ int hvf_vcpu_exec(CPUState *cpu)
             uint32_t rcx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX);
             uint32_t rdx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX);
 
+            if (rax == 1) {
+                /* CPUID1.ecx.OSXSAVE needs to know CR4 */
+                env->cr[4] = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR4);
+            }
             hvf_cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx);
 
             wreg(cpu->hvf_fd, HV_X86_RAX, rax);

From 82e2756897810b6e17e0c352101878b97b1e2688 Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Tue, 16 Feb 2021 13:02:47 +0100
Subject: [PATCH 20/21] event_notifier: Set ->initialized earlier in
 event_notifier_init()

Otherwise the call to event_notifier_set() is a nop, which causes
the SLOF firmware on POWER to hang when booting from a virtio-scsi
device:

virtio_scsi_dataplane_start()
 virtio_scsi_vring_init()
  virtio_bus_set_host_notifier() <- assign == true
   event_notifier_init() <- active == 1
    event_notifier_set() <- fails right away if !e->initialized

Fixes: e34e47eb28c0 ("event_notifier: handle initialization failure better")
Cc: mlevitsk@redhat.com
Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <20210216120247.1293569-1-groug@kaod.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 util/event_notifier-posix.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/util/event_notifier-posix.c b/util/event_notifier-posix.c
index 5b2110e861..8307013c5d 100644
--- a/util/event_notifier-posix.c
+++ b/util/event_notifier-posix.c
@@ -66,10 +66,10 @@ int event_notifier_init(EventNotifier *e, int active)
         e->rfd = fds[0];
         e->wfd = fds[1];
     }
+    e->initialized = true;
     if (active) {
         event_notifier_set(e);
     }
-    e->initialized = true;
     return 0;
 
 fail:

From 366a85e4bb748794b1ae0ca0ccc2d95f316679a0 Mon Sep 17 00:00:00 2001
From: Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru>
Date: Tue, 16 Feb 2021 15:51:44 +0300
Subject: [PATCH 21/21] replay: fix icount request when replaying clock access

Record/replay provides REPLAY_CLOCK_LOCKED macro to access
the clock when vm_clock_seqlock is locked. This macro is
needed because replay internals operate icount. In locked case
replay use icount_get_raw_locked for icount request, which prevents
excess locking which leads to deadlock. But previously only
record code used *_locked function and replay did not.
Therefore sometimes clock access lead to deadlocks.
This patch fixes clock access for replay too and uses *_locked
icount access function.

Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
Message-Id: <161347990483.1313189.8371838968343494161.stgit@pasha-ThinkPad-X280>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/sysemu/replay.h  | 14 ++++++++------
 replay/replay-internal.c | 29 +++++++++++++++++++++++++----
 replay/replay-time.c     |  4 ++--
 replay/replay.c          | 23 +----------------------
 stubs/replay-tools.c     |  2 +-
 5 files changed, 37 insertions(+), 35 deletions(-)

diff --git a/include/sysemu/replay.h b/include/sysemu/replay.h
index 56c0c17c30..0f3b0f7eac 100644
--- a/include/sysemu/replay.h
+++ b/include/sysemu/replay.h
@@ -128,18 +128,20 @@ bool replay_has_interrupt(void);
 int64_t replay_save_clock(ReplayClockKind kind, int64_t clock,
                           int64_t raw_icount);
 /*! Read the specified clock from the log or return cached data */
-int64_t replay_read_clock(ReplayClockKind kind);
+int64_t replay_read_clock(ReplayClockKind kind, int64_t raw_icount);
 /*! Saves or reads the clock depending on the current replay mode. */
 #define REPLAY_CLOCK(clock, value)                                      \
-    (replay_mode == REPLAY_MODE_PLAY ? replay_read_clock((clock))       \
+    (replay_mode == REPLAY_MODE_PLAY                                    \
+        ? replay_read_clock((clock), icount_get_raw())                  \
         : replay_mode == REPLAY_MODE_RECORD                             \
-            ? replay_save_clock((clock), (value), icount_get_raw()) \
-        : (value))
+            ? replay_save_clock((clock), (value), icount_get_raw())     \
+            : (value))
 #define REPLAY_CLOCK_LOCKED(clock, value)                               \
-    (replay_mode == REPLAY_MODE_PLAY ? replay_read_clock((clock))       \
+    (replay_mode == REPLAY_MODE_PLAY                                    \
+        ? replay_read_clock((clock), icount_get_raw_locked())           \
         : replay_mode == REPLAY_MODE_RECORD                             \
             ? replay_save_clock((clock), (value), icount_get_raw_locked()) \
-        : (value))
+            : (value))
 
 /* Processing data from random generators */
 
diff --git a/replay/replay-internal.c b/replay/replay-internal.c
index 2e8a3e947a..77d0c82327 100644
--- a/replay/replay-internal.c
+++ b/replay/replay-internal.c
@@ -247,10 +247,31 @@ void replay_advance_current_icount(uint64_t current_icount)
     /* Time can only go forward */
     assert(diff >= 0);
 
-    if (diff > 0) {
-        replay_put_event(EVENT_INSTRUCTION);
-        replay_put_dword(diff);
-        replay_state.current_icount += diff;
+    if (replay_mode == REPLAY_MODE_RECORD) {
+        if (diff > 0) {
+            replay_put_event(EVENT_INSTRUCTION);
+            replay_put_dword(diff);
+            replay_state.current_icount += diff;
+        }
+    } else if (replay_mode == REPLAY_MODE_PLAY) {
+        if (diff > 0) {
+            replay_state.instruction_count -= diff;
+            replay_state.current_icount += diff;
+            if (replay_state.instruction_count == 0) {
+                assert(replay_state.data_kind == EVENT_INSTRUCTION);
+                replay_finish_event();
+                /* Wake up iothread. This is required because
+                    timers will not expire until clock counters
+                    will be read from the log. */
+                qemu_notify_event();
+            }
+        }
+        /* Execution reached the break step */
+        if (replay_break_icount == replay_state.current_icount) {
+            /* Cannot make callback directly from the vCPU thread */
+            timer_mod_ns(replay_break_timer,
+                qemu_clock_get_ns(QEMU_CLOCK_REALTIME));
+        }
     }
 }
 
diff --git a/replay/replay-time.c b/replay/replay-time.c
index 43357c9f24..00ebcb7a49 100644
--- a/replay/replay-time.c
+++ b/replay/replay-time.c
@@ -46,12 +46,12 @@ void replay_read_next_clock(ReplayClockKind kind)
 }
 
 /*! Reads next clock event from the input. */
-int64_t replay_read_clock(ReplayClockKind kind)
+int64_t replay_read_clock(ReplayClockKind kind, int64_t raw_icount)
 {
     int64_t ret;
     g_assert(replay_file && replay_mutex_locked());
 
-    replay_account_executed_instructions();
+    replay_advance_current_icount(raw_icount);
 
     if (replay_next_event_is(EVENT_CLOCK + kind)) {
         replay_read_next_clock(kind);
diff --git a/replay/replay.c b/replay/replay.c
index d4c228ab28..c806fec69a 100644
--- a/replay/replay.c
+++ b/replay/replay.c
@@ -94,28 +94,7 @@ void replay_account_executed_instructions(void)
     if (replay_mode == REPLAY_MODE_PLAY) {
         g_assert(replay_mutex_locked());
         if (replay_state.instruction_count > 0) {
-            int count = (int)(replay_get_current_icount()
-                              - replay_state.current_icount);
-
-            /* Time can only go forward */
-            assert(count >= 0);
-
-            replay_state.instruction_count -= count;
-            replay_state.current_icount += count;
-            if (replay_state.instruction_count == 0) {
-                assert(replay_state.data_kind == EVENT_INSTRUCTION);
-                replay_finish_event();
-                /* Wake up iothread. This is required because
-                   timers will not expire until clock counters
-                   will be read from the log. */
-                qemu_notify_event();
-            }
-            /* Execution reached the break step */
-            if (replay_break_icount == replay_state.current_icount) {
-                /* Cannot make callback directly from the vCPU thread */
-                timer_mod_ns(replay_break_timer,
-                    qemu_clock_get_ns(QEMU_CLOCK_REALTIME));
-            }
+            replay_advance_current_icount(replay_get_current_icount());
         }
     }
 }
diff --git a/stubs/replay-tools.c b/stubs/replay-tools.c
index c06b360e22..43296b3d4e 100644
--- a/stubs/replay-tools.c
+++ b/stubs/replay-tools.c
@@ -13,7 +13,7 @@ int64_t replay_save_clock(unsigned int kind, int64_t clock, int64_t raw_icount)
     return 0;
 }
 
-int64_t replay_read_clock(unsigned int kind)
+int64_t replay_read_clock(unsigned int kind, int64_t raw_icount)
 {
     abort();
     return 0;