From 9617cddb72649f563eef8114648140b8c5607a71 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Thu, 4 Feb 2021 11:39:38 -0800 Subject: [PATCH 01/21] pc: add parser for OVMF reset block OVMF is developing a mechanism for depositing a GUIDed table just below the known location of the reset vector. The table goes backwards in memory so all entries are of the form |len| Where is arbtrary size and type, is a uint16_t and describes the entire length of the entry from the beginning of the data to the end of the guid. The foot of the table is of this form and for this case describes the entire size of the table. The table foot GUID is defined by OVMF as 96b582de-1fb2-45f7-baea-a366c55a082d and if the table is present this GUID is just below the reset vector, 48 bytes before the end of the firmware file. Add a parser for the ovmf reset block which takes a copy of the block, if the table foot guid is found, minus the footer and a function for later traversal to return the data area of any specified GUIDs. Signed-off-by: James Bottomley Reviewed-by: Dr. David Alan Gilbert Message-Id: <20210204193939.16617-2-jejb@linux.ibm.com> Signed-off-by: Paolo Bonzini --- hw/i386/pc_sysfw.c | 122 +++++++++++++++++++++++++++++++++++++++-- include/hw/i386/pc.h | 4 ++ include/sysemu/sev.h | 1 + target/i386/sev_i386.h | 1 - 4 files changed, 123 insertions(+), 5 deletions(-) diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c index 11172214f1..6404b5a86f 100644 --- a/hw/i386/pc_sysfw.c +++ b/hw/i386/pc_sysfw.c @@ -125,6 +125,113 @@ void pc_system_flash_cleanup_unused(PCMachineState *pcms) } } +#define OVMF_TABLE_FOOTER_GUID "96b582de-1fb2-45f7-baea-a366c55a082d" + +static uint8_t *ovmf_table; +static int ovmf_table_len; + +static void pc_system_parse_ovmf_flash(uint8_t *flash_ptr, size_t flash_size) +{ + uint8_t *ptr; + QemuUUID guid; + int tot_len; + + /* should only be called once */ + if (ovmf_table) { + return; + } + + if (flash_size < TARGET_PAGE_SIZE) { + return; + } + + /* + * if this is OVMF there will be a table footer + * guid 48 bytes before the end of the flash file. If it's + * not found, silently abort the flash parsing. + */ + qemu_uuid_parse(OVMF_TABLE_FOOTER_GUID, &guid); + guid = qemu_uuid_bswap(guid); /* guids are LE */ + ptr = flash_ptr + flash_size - 48; + if (!qemu_uuid_is_equal((QemuUUID *)ptr, &guid)) { + return; + } + + /* if found, just before is two byte table length */ + ptr -= sizeof(uint16_t); + tot_len = le16_to_cpu(*(uint16_t *)ptr) - sizeof(guid) - sizeof(uint16_t); + + if (tot_len <= 0) { + return; + } + + ovmf_table = g_malloc(tot_len); + ovmf_table_len = tot_len; + + /* + * ptr is the foot of the table, so copy it all to the newly + * allocated ovmf_table and then set the ovmf_table pointer + * to the table foot + */ + memcpy(ovmf_table, ptr - tot_len, tot_len); + ovmf_table += tot_len; +} + +bool pc_system_ovmf_table_find(const char *entry, uint8_t **data, + int *data_len) +{ + uint8_t *ptr = ovmf_table; + int tot_len = ovmf_table_len; + QemuUUID entry_guid; + + if (qemu_uuid_parse(entry, &entry_guid) < 0) { + return false; + } + + if (!ptr) { + return false; + } + + entry_guid = qemu_uuid_bswap(entry_guid); /* guids are LE */ + while (tot_len >= sizeof(QemuUUID) + sizeof(uint16_t)) { + int len; + QemuUUID *guid; + + /* + * The data structure is + * arbitrary length data + * 2 byte length of entire entry + * 16 byte guid + */ + guid = (QemuUUID *)(ptr - sizeof(QemuUUID)); + len = le16_to_cpu(*(uint16_t *)(ptr - sizeof(QemuUUID) - + sizeof(uint16_t))); + + /* + * just in case the table is corrupt, wouldn't want to spin in + * the zero case + */ + if (len < sizeof(QemuUUID) + sizeof(uint16_t)) { + return false; + } else if (len > tot_len) { + return false; + } + + ptr -= len; + tot_len -= len; + if (qemu_uuid_is_equal(guid, &entry_guid)) { + if (data) { + *data = ptr; + } + if (data_len) { + *data_len = len - sizeof(QemuUUID) - sizeof(uint16_t); + } + return true; + } + } + return false; +} + /* * Map the pcms->flash[] from 4GiB downward, and realize. * Map them in descending order, i.e. pcms->flash[0] at the top, @@ -192,10 +299,17 @@ static void pc_system_flash_map(PCMachineState *pcms, flash_mem = pflash_cfi01_get_memory(system_flash); pc_isa_bios_init(rom_memory, flash_mem, size); - /* Encrypt the pflash boot ROM, if necessary */ - flash_ptr = memory_region_get_ram_ptr(flash_mem); - flash_size = memory_region_size(flash_mem); - sev_encrypt_flash(flash_ptr, flash_size, &error_fatal); + /* Encrypt the pflash boot ROM */ + if (sev_enabled()) { + flash_ptr = memory_region_get_ram_ptr(flash_mem); + flash_size = memory_region_size(flash_mem); + /* + * OVMF places a GUIDed structures in the flash, so + * search for them + */ + pc_system_parse_ovmf_flash(flash_ptr, flash_size); + sev_encrypt_flash(flash_ptr, flash_size, &error_fatal); + } } } } diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 5f93540a43..c9d194a5e7 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -3,6 +3,7 @@ #include "qemu/notify.h" #include "qapi/qapi-types-common.h" +#include "qemu/uuid.h" #include "hw/boards.h" #include "hw/block/fdc.h" #include "hw/block/flash.h" @@ -191,6 +192,9 @@ ISADevice *pc_find_fdc0(void); void pc_system_flash_create(PCMachineState *pcms); void pc_system_flash_cleanup_unused(PCMachineState *pcms); void pc_system_firmware_init(PCMachineState *pcms, MemoryRegion *rom_memory); +bool pc_system_ovmf_table_find(const char *entry, uint8_t **data, + int *data_len); + /* acpi-build.c */ void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid, diff --git a/include/sysemu/sev.h b/include/sysemu/sev.h index 5c5a13c6ca..882e8a4fb1 100644 --- a/include/sysemu/sev.h +++ b/include/sysemu/sev.h @@ -16,6 +16,7 @@ #include "sysemu/kvm.h" +bool sev_enabled(void); int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp); int sev_inject_launch_secret(const char *hdr, const char *secret, diff --git a/target/i386/sev_i386.h b/target/i386/sev_i386.h index 4db6960f60..bd9f00a908 100644 --- a/target/i386/sev_i386.h +++ b/target/i386/sev_i386.h @@ -28,7 +28,6 @@ #define SEV_POLICY_DOMAIN 0x10 #define SEV_POLICY_SEV 0x20 -extern bool sev_enabled(void); extern uint64_t sev_get_me_mask(void); extern SevInfo *sev_get_info(void); extern uint32_t sev_get_cbit_position(void); From f522cef9b352ac2f9880c5c8b2ea7b2033bdc9f0 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Thu, 4 Feb 2021 11:39:39 -0800 Subject: [PATCH 02/21] sev: update sev-inject-launch-secret to make gpa optional If the gpa isn't specified, it's value is extracted from the OVMF properties table located below the reset vector (and if this doesn't exist, an error is returned). OVMF has defined the GUID for the SEV secret area as 4c2eb361-7d9b-4cc3-8081-127c90d3d294 and the format of the is: | where both are uint32_t. We extract and use it as the gpa for the injection. Note: it is expected that the injected secret will also be GUID described but since qemu can't interpret it, the format is left undefined here. Signed-off-by: James Bottomley Reviewed-by: Dr. David Alan Gilbert Message-Id: <20210204193939.16617-3-jejb@linux.ibm.com> Signed-off-by: Paolo Bonzini --- qapi/misc-target.json | 2 +- target/i386/monitor.c | 23 ++++++++++++++++++++++- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/qapi/misc-target.json b/qapi/misc-target.json index 06ef8757f0..0c7491cd82 100644 --- a/qapi/misc-target.json +++ b/qapi/misc-target.json @@ -216,7 +216,7 @@ # ## { 'command': 'sev-inject-launch-secret', - 'data': { 'packet-header': 'str', 'secret': 'str', 'gpa': 'uint64' }, + 'data': { 'packet-header': 'str', 'secret': 'str', '*gpa': 'uint64' }, 'if': 'defined(TARGET_I386)' } ## diff --git a/target/i386/monitor.c b/target/i386/monitor.c index 1bc91442b1..5994408bee 100644 --- a/target/i386/monitor.c +++ b/target/i386/monitor.c @@ -34,6 +34,7 @@ #include "sev_i386.h" #include "qapi/qapi-commands-misc-target.h" #include "qapi/qapi-commands-misc.h" +#include "hw/i386/pc.h" /* Perform linear address sign extension */ static hwaddr addr_canonical(CPUArchState *env, hwaddr addr) @@ -730,9 +731,29 @@ SevCapability *qmp_query_sev_capabilities(Error **errp) return sev_get_capabilities(errp); } +#define SEV_SECRET_GUID "4c2eb361-7d9b-4cc3-8081-127c90d3d294" +struct sev_secret_area { + uint32_t base; + uint32_t size; +}; + void qmp_sev_inject_launch_secret(const char *packet_hdr, - const char *secret, uint64_t gpa, + const char *secret, + bool has_gpa, uint64_t gpa, Error **errp) { + if (!has_gpa) { + uint8_t *data; + struct sev_secret_area *area; + + if (!pc_system_ovmf_table_find(SEV_SECRET_GUID, &data, NULL)) { + error_setg(errp, "SEV: no secret area found in OVMF," + " gpa must be specified."); + return; + } + area = (struct sev_secret_area *)data; + gpa = area->base; + } + sev_inject_launch_secret(packet_hdr, secret, gpa, errp); } From 6b98e96f1842a54c0bf074f4dad0928808afe287 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Tue, 26 Jan 2021 11:36:44 -0600 Subject: [PATCH 03/21] sev/i386: Add initial support for SEV-ES Provide initial support for SEV-ES. This includes creating a function to indicate the guest is an SEV-ES guest (which will return false until all support is in place), performing the proper SEV initialization and ensuring that the guest CPU state is measured as part of the launch. Cc: Paolo Bonzini Cc: Richard Henderson Cc: Eduardo Habkost Reviewed-by: Dr. David Alan Gilbert Co-developed-by: Jiri Slaby Signed-off-by: Jiri Slaby Signed-off-by: Tom Lendacky Reviewed-by: Venu Busireddy Message-Id: <2e6386cbc1ddeaf701547dd5677adf5ddab2b6bd.1611682609.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 1 + target/i386/sev-stub.c | 6 ++++++ target/i386/sev.c | 44 ++++++++++++++++++++++++++++++++++++++++-- target/i386/sev_i386.h | 1 + 4 files changed, 50 insertions(+), 2 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 9c3d2d60b7..20c3a5af3f 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -5984,6 +5984,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, break; case 0x8000001F: *eax = sev_enabled() ? 0x2 : 0; + *eax |= sev_es_enabled() ? 0x8 : 0; *ebx = sev_get_cbit_position(); *ebx |= sev_get_reduced_phys_bits() << 6; *ecx = 0; diff --git a/target/i386/sev-stub.c b/target/i386/sev-stub.c index 1ac1fd5b94..edf6c519d7 100644 --- a/target/i386/sev-stub.c +++ b/target/i386/sev-stub.c @@ -49,6 +49,7 @@ SevCapability *sev_get_capabilities(Error **errp) error_setg(errp, "SEV is not available in this QEMU"); return NULL; } + int sev_inject_launch_secret(const char *hdr, const char *secret, uint64_t gpa, Error **errp) { @@ -59,3 +60,8 @@ int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp) { return 0; } + +bool sev_es_enabled(void) +{ + return false; +} diff --git a/target/i386/sev.c b/target/i386/sev.c index 11c9a3cc21..dc0e53019b 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -341,6 +341,12 @@ sev_enabled(void) return !!sev_guest; } +bool +sev_es_enabled(void) +{ + return false; +} + uint64_t sev_get_me_mask(void) { @@ -561,6 +567,20 @@ sev_launch_update_data(SevGuestState *sev, uint8_t *addr, uint64_t len) return ret; } +static int +sev_launch_update_vmsa(SevGuestState *sev) +{ + int ret, fw_error; + + ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL, &fw_error); + if (ret) { + error_report("%s: LAUNCH_UPDATE_VMSA ret=%d fw_error=%d '%s'", + __func__, ret, fw_error, fw_error_to_str(fw_error)); + } + + return ret; +} + static void sev_launch_get_measure(Notifier *notifier, void *unused) { @@ -573,6 +593,14 @@ sev_launch_get_measure(Notifier *notifier, void *unused) return; } + if (sev_es_enabled()) { + /* measure all the VM save areas before getting launch_measure */ + ret = sev_launch_update_vmsa(sev); + if (ret) { + exit(1); + } + } + measurement = g_new0(struct kvm_sev_launch_measure, 1); /* query the measurement blob length */ @@ -667,7 +695,7 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) SevGuestState *sev = (SevGuestState *)object_dynamic_cast(OBJECT(cgs), TYPE_SEV_GUEST); char *devname; - int ret, fw_error; + int ret, fw_error, cmd; uint32_t ebx; uint32_t host_cbitpos; struct sev_user_data_status status = {}; @@ -724,8 +752,20 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) sev->api_major = status.api_major; sev->api_minor = status.api_minor; + if (sev_es_enabled()) { + if (!(status.flags & SEV_STATUS_FLAGS_CONFIG_ES)) { + error_report("%s: guest policy requires SEV-ES, but " + "host SEV-ES support unavailable", + __func__); + goto err; + } + cmd = KVM_SEV_ES_INIT; + } else { + cmd = KVM_SEV_INIT; + } + trace_kvm_sev_init(); - ret = sev_ioctl(sev->sev_fd, KVM_SEV_INIT, NULL, &fw_error); + ret = sev_ioctl(sev->sev_fd, cmd, NULL, &fw_error); if (ret) { error_setg(errp, "%s: failed to initialize ret=%d fw_error=%d '%s'", __func__, ret, fw_error, fw_error_to_str(fw_error)); diff --git a/target/i386/sev_i386.h b/target/i386/sev_i386.h index bd9f00a908..ae221d4c72 100644 --- a/target/i386/sev_i386.h +++ b/target/i386/sev_i386.h @@ -28,6 +28,7 @@ #define SEV_POLICY_DOMAIN 0x10 #define SEV_POLICY_SEV 0x20 +extern bool sev_es_enabled(void); extern uint64_t sev_get_me_mask(void); extern SevInfo *sev_get_info(void); extern uint32_t sev_get_cbit_position(void); From 9681f8677f26320fff488e56b500a3d7d5cf1a49 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Tue, 26 Jan 2021 11:36:45 -0600 Subject: [PATCH 04/21] sev/i386: Require in-kernel irqchip support for SEV-ES guests In prep for AP booting, require the use of in-kernel irqchip support. This lessens the Qemu support burden required to boot APs. Cc: Paolo Bonzini Cc: Richard Henderson Cc: Eduardo Habkost Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Tom Lendacky Reviewed-by: Venu Busireddy Message-Id: Signed-off-by: Paolo Bonzini --- target/i386/sev.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/target/i386/sev.c b/target/i386/sev.c index dc0e53019b..35b9259bfc 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -753,6 +753,12 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) sev->api_minor = status.api_minor; if (sev_es_enabled()) { + if (!kvm_kernel_irqchip_allowed()) { + error_report("%s: SEV-ES guests require in-kernel irqchip support", + __func__); + goto err; + } + if (!(status.flags & SEV_STATUS_FLAGS_CONFIG_ES)) { error_report("%s: guest policy requires SEV-ES, but " "host SEV-ES support unavailable", From b2f73a0784b7a5eae2022ccf3293792bd008cc64 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 8 Feb 2021 15:04:52 +0100 Subject: [PATCH 05/21] sev/i386: Allow AP booting under SEV-ES When SEV-ES is enabled, it is not possible modify the guests register state after it has been initially created, encrypted and measured. Normally, an INIT-SIPI-SIPI request is used to boot the AP. However, the hypervisor cannot emulate this because it cannot update the AP register state. For the very first boot by an AP, the reset vector CS segment value and the EIP value must be programmed before the register has been encrypted and measured. Search the guest firmware for the guest for a specific GUID that tells Qemu the value of the reset vector to use. Cc: Paolo Bonzini Cc: "Michael S. Tsirkin" Cc: Marcel Apfelbaum Cc: Richard Henderson Cc: Eduardo Habkost Cc: Marcelo Tosatti Signed-off-by: Tom Lendacky Message-Id: <22db2bfb4d6551aed661a9ae95b4fdbef613ca21.1611682609.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- accel/kvm/kvm-all.c | 1 - hw/i386/pc_sysfw.c | 8 +++ include/sysemu/sev.h | 4 ++ target/i386/kvm/kvm.c | 2 + target/i386/sev-stub.c | 9 +++ target/i386/sev.c | 128 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 151 insertions(+), 1 deletion(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 47516913b7..bf61ef4b54 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -39,7 +39,6 @@ #include "qemu/main-loop.h" #include "trace.h" #include "hw/irq.h" -#include "sysemu/sev.h" #include "qapi/visitor.h" #include "qapi/qapi-types-common.h" #include "qapi/qapi-visit-common.h" diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c index 6404b5a86f..9fe72b370e 100644 --- a/hw/i386/pc_sysfw.c +++ b/hw/i386/pc_sysfw.c @@ -256,6 +256,7 @@ static void pc_system_flash_map(PCMachineState *pcms, MemoryRegion *flash_mem; void *flash_ptr; int flash_size; + int ret; assert(PC_MACHINE_GET_CLASS(pcms)->pci_enabled); @@ -308,6 +309,13 @@ static void pc_system_flash_map(PCMachineState *pcms, * search for them */ pc_system_parse_ovmf_flash(flash_ptr, flash_size); + + ret = sev_es_save_reset_vector(flash_ptr, flash_size); + if (ret) { + error_report("failed to locate and/or save reset vector"); + exit(1); + } + sev_encrypt_flash(flash_ptr, flash_size, &error_fatal); } } diff --git a/include/sysemu/sev.h b/include/sysemu/sev.h index 882e8a4fb1..94d821d737 100644 --- a/include/sysemu/sev.h +++ b/include/sysemu/sev.h @@ -21,4 +21,8 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp); int sev_inject_launch_secret(const char *hdr, const char *secret, uint64_t gpa, Error **errp); + +int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size); +void sev_es_set_reset_vector(CPUState *cpu); + #endif diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index e97f841757..f56a8536d0 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -1922,6 +1922,8 @@ void kvm_arch_reset_vcpu(X86CPU *cpu) } /* enabled by default */ env->poll_control_msr = 1; + + sev_es_set_reset_vector(CPU(cpu)); } void kvm_arch_do_init_vcpu(X86CPU *cpu) diff --git a/target/i386/sev-stub.c b/target/i386/sev-stub.c index edf6c519d7..0207f1c5aa 100644 --- a/target/i386/sev-stub.c +++ b/target/i386/sev-stub.c @@ -65,3 +65,12 @@ bool sev_es_enabled(void) { return false; } + +void sev_es_set_reset_vector(CPUState *cpu) +{ +} + +int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) +{ + abort(); +} diff --git a/target/i386/sev.c b/target/i386/sev.c index 35b9259bfc..4b70d4284f 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -22,6 +22,7 @@ #include "qom/object_interfaces.h" #include "qemu/base64.h" #include "qemu/module.h" +#include "qemu/uuid.h" #include "sysemu/kvm.h" #include "sev_i386.h" #include "sysemu/sysemu.h" @@ -32,6 +33,7 @@ #include "exec/address-spaces.h" #include "monitor/monitor.h" #include "exec/confidential-guest-support.h" +#include "hw/i386/pc.h" #define TYPE_SEV_GUEST "sev-guest" OBJECT_DECLARE_SIMPLE_TYPE(SevGuestState, SEV_GUEST) @@ -67,11 +69,21 @@ struct SevGuestState { int sev_fd; SevState state; gchar *measurement; + + uint32_t reset_cs; + uint32_t reset_ip; + bool reset_data_valid; }; #define DEFAULT_GUEST_POLICY 0x1 /* disable debug */ #define DEFAULT_SEV_DEVICE "/dev/sev" +#define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e" +typedef struct __attribute__((__packed__)) SevInfoBlock { + /* SEV-ES Reset Vector Address */ + uint32_t reset_addr; +} SevInfoBlock; + static SevGuestState *sev_guest; static Error *sev_mig_blocker; @@ -879,6 +891,122 @@ int sev_inject_launch_secret(const char *packet_hdr, const char *secret, return 0; } +static int +sev_es_parse_reset_block(SevInfoBlock *info, uint32_t *addr) +{ + if (!info->reset_addr) { + error_report("SEV-ES reset address is zero"); + return 1; + } + + *addr = info->reset_addr; + + return 0; +} + +static int +sev_es_find_reset_vector(void *flash_ptr, uint64_t flash_size, + uint32_t *addr) +{ + QemuUUID info_guid, *guid; + SevInfoBlock *info; + uint8_t *data; + uint16_t *len; + + /* + * Initialize the address to zero. An address of zero with a successful + * return code indicates that SEV-ES is not active. + */ + *addr = 0; + + /* + * Extract the AP reset vector for SEV-ES guests by locating the SEV GUID. + * The SEV GUID is located on its own (original implementation) or within + * the Firmware GUID Table (new implementation), either of which are + * located 32 bytes from the end of the flash. + * + * Check the Firmware GUID Table first. + */ + if (pc_system_ovmf_table_find(SEV_INFO_BLOCK_GUID, &data, NULL)) { + return sev_es_parse_reset_block((SevInfoBlock *)data, addr); + } + + /* + * SEV info block not found in the Firmware GUID Table (or there isn't + * a Firmware GUID Table), fall back to the original implementation. + */ + data = flash_ptr + flash_size - 0x20; + + qemu_uuid_parse(SEV_INFO_BLOCK_GUID, &info_guid); + info_guid = qemu_uuid_bswap(info_guid); /* GUIDs are LE */ + + guid = (QemuUUID *)(data - sizeof(info_guid)); + if (!qemu_uuid_is_equal(guid, &info_guid)) { + error_report("SEV information block/Firmware GUID Table block not found in pflash rom"); + return 1; + } + + len = (uint16_t *)((uint8_t *)guid - sizeof(*len)); + info = (SevInfoBlock *)(data - le16_to_cpu(*len)); + + return sev_es_parse_reset_block(info, addr); +} + +void sev_es_set_reset_vector(CPUState *cpu) +{ + X86CPU *x86; + CPUX86State *env; + + /* Only update if we have valid reset information */ + if (!sev_guest || !sev_guest->reset_data_valid) { + return; + } + + /* Do not update the BSP reset state */ + if (cpu->cpu_index == 0) { + return; + } + + x86 = X86_CPU(cpu); + env = &x86->env; + + cpu_x86_load_seg_cache(env, R_CS, 0xf000, sev_guest->reset_cs, 0xffff, + DESC_P_MASK | DESC_S_MASK | DESC_CS_MASK | + DESC_R_MASK | DESC_A_MASK); + + env->eip = sev_guest->reset_ip; +} + +int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) +{ + CPUState *cpu; + uint32_t addr; + int ret; + + if (!sev_es_enabled()) { + return 0; + } + + addr = 0; + ret = sev_es_find_reset_vector(flash_ptr, flash_size, + &addr); + if (ret) { + return ret; + } + + if (addr) { + sev_guest->reset_cs = addr & 0xffff0000; + sev_guest->reset_ip = addr & 0x0000ffff; + sev_guest->reset_data_valid = true; + + CPU_FOREACH(cpu) { + sev_es_set_reset_vector(cpu); + } + } + + return 0; +} + static void sev_register_types(void) { From 92a5199b29f6519aa5f774f4b96dc41954f641d1 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Tue, 26 Jan 2021 11:36:47 -0600 Subject: [PATCH 06/21] sev/i386: Don't allow a system reset under an SEV-ES guest An SEV-ES guest does not allow register state to be altered once it has been measured. When an SEV-ES guest issues a reboot command, Qemu will reset the vCPU state and resume the guest. This will cause failures under SEV-ES. Prevent that from occuring by introducing an arch-specific callback that returns a boolean indicating whether vCPUs are resettable. Cc: Peter Maydell Cc: Aurelien Jarno Cc: Jiaxun Yang Cc: Aleksandar Rikalo Cc: David Gibson Cc: David Hildenbrand Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Tom Lendacky Reviewed-by: Venu Busireddy Message-Id: <1ac39c441b9a3e970e9556e1cc29d0a0814de6fd.1611682609.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- accel/kvm/kvm-all.c | 5 +++++ include/sysemu/cpus.h | 2 ++ include/sysemu/hw_accel.h | 5 +++++ include/sysemu/kvm.h | 10 ++++++++++ softmmu/cpus.c | 5 +++++ softmmu/runstate.c | 3 +++ target/arm/kvm.c | 5 +++++ target/i386/kvm/kvm.c | 6 ++++++ target/mips/kvm.c | 5 +++++ target/ppc/kvm.c | 5 +++++ target/s390x/kvm.c | 5 +++++ 11 files changed, 56 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index bf61ef4b54..84c943fcdb 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -2312,6 +2312,11 @@ void kvm_flush_coalesced_mmio_buffer(void) s->coalesced_flush_in_progress = false; } +bool kvm_cpu_check_are_resettable(void) +{ + return kvm_arch_cpu_check_are_resettable(); +} + static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) { if (!cpu->vcpu_dirty) { diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h index 2cd74392e0..868f1192de 100644 --- a/include/sysemu/cpus.h +++ b/include/sysemu/cpus.h @@ -41,6 +41,8 @@ extern int icount_align_option; /* Unblock cpu */ void qemu_cpu_kick_self(void); +bool cpus_are_resettable(void); + void cpu_synchronize_all_states(void); void cpu_synchronize_all_post_reset(void); void cpu_synchronize_all_post_init(void); diff --git a/include/sysemu/hw_accel.h b/include/sysemu/hw_accel.h index ffed6192a3..61672f9b32 100644 --- a/include/sysemu/hw_accel.h +++ b/include/sysemu/hw_accel.h @@ -22,4 +22,9 @@ void cpu_synchronize_post_reset(CPUState *cpu); void cpu_synchronize_post_init(CPUState *cpu); void cpu_synchronize_pre_loadvm(CPUState *cpu); +static inline bool cpu_check_are_resettable(void) +{ + return kvm_enabled() ? kvm_cpu_check_are_resettable() : true; +} + #endif /* QEMU_HW_ACCEL_H */ diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index c5546bdecc..687c598be9 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -541,4 +541,14 @@ int kvm_get_max_memslots(void); /* Notify resamplefd for EOI of specific interrupts. */ void kvm_resample_fd_notify(int gsi); +/** + * kvm_cpu_check_are_resettable - return whether CPUs can be reset + * + * Returns: true: CPUs are resettable + * false: CPUs are not resettable + */ +bool kvm_cpu_check_are_resettable(void); + +bool kvm_arch_cpu_check_are_resettable(void); + #endif diff --git a/softmmu/cpus.c b/softmmu/cpus.c index 112eba9d54..a7ee431187 100644 --- a/softmmu/cpus.c +++ b/softmmu/cpus.c @@ -194,6 +194,11 @@ void cpu_synchronize_pre_loadvm(CPUState *cpu) } } +bool cpus_are_resettable(void) +{ + return cpu_check_are_resettable(); +} + int64_t cpus_get_virtual_clock(void) { /* diff --git a/softmmu/runstate.c b/softmmu/runstate.c index a7fcb603f7..2874417b61 100644 --- a/softmmu/runstate.c +++ b/softmmu/runstate.c @@ -528,6 +528,9 @@ void qemu_system_reset_request(ShutdownCause reason) if (reboot_action == REBOOT_ACTION_SHUTDOWN && reason != SHUTDOWN_CAUSE_SUBSYSTEM_RESET) { shutdown_requested = reason; + } else if (!cpus_are_resettable()) { + error_report("cpus are not resettable, terminating"); + shutdown_requested = reason; } else { reset_requested = reason; } diff --git a/target/arm/kvm.c b/target/arm/kvm.c index ffe186de8d..00e124c812 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -1045,3 +1045,8 @@ int kvm_arch_msi_data_to_gsi(uint32_t data) { return (data - 32) & 0xffff; } + +bool kvm_arch_cpu_check_are_resettable(void) +{ + return true; +} diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index f56a8536d0..d10667b21b 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -27,6 +27,7 @@ #include "sysemu/kvm_int.h" #include "sysemu/runstate.h" #include "kvm_i386.h" +#include "sev_i386.h" #include "hyperv.h" #include "hyperv-proto.h" @@ -4821,3 +4822,8 @@ bool kvm_has_waitpkg(void) { return has_msr_umwait; } + +bool kvm_arch_cpu_check_are_resettable(void) +{ + return !sev_es_enabled(); +} diff --git a/target/mips/kvm.c b/target/mips/kvm.c index 84fb10ea35..123ec1be7e 100644 --- a/target/mips/kvm.c +++ b/target/mips/kvm.c @@ -1290,3 +1290,8 @@ int mips_kvm_type(MachineState *machine, const char *vm_type) return -1; } + +bool kvm_arch_cpu_check_are_resettable(void) +{ + return true; +} diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 0c5056dd5b..298c1f882c 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -2929,3 +2929,8 @@ void kvmppc_set_reg_tb_offset(PowerPCCPU *cpu, int64_t tb_offset) kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &tb_offset); } } + +bool kvm_arch_cpu_check_are_resettable(void) +{ + return true; +} diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c index dc27fa36c9..7a892d663d 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c @@ -2599,3 +2599,8 @@ void kvm_s390_stop_interrupt(S390CPU *cpu) kvm_s390_vcpu_interrupt(cpu, &irq); } + +bool kvm_arch_cpu_check_are_resettable(void) +{ + return true; +} From 23edf8b549c7a8a520d42da19403864245f8977f Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Tue, 26 Jan 2021 11:36:48 -0600 Subject: [PATCH 07/21] kvm/i386: Use a per-VM check for SMM capability SMM is not currently supported for an SEV-ES guest by KVM. Change the SMM capability check from a KVM-wide check to a per-VM check in order to have a finer-grained SMM capability check. Cc: Paolo Bonzini Cc: Richard Henderson Cc: Eduardo Habkost Suggested-by: Sean Christopherson Signed-off-by: Tom Lendacky Reviewed-by: Venu Busireddy Message-Id: Signed-off-by: Paolo Bonzini --- target/i386/kvm/kvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index d10667b21b..0b5755e42b 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -137,7 +137,7 @@ int kvm_has_pit_state2(void) bool kvm_has_smm(void) { - return kvm_check_extension(kvm_state, KVM_CAP_X86_SMM); + return kvm_vm_check_extension(kvm_state, KVM_CAP_X86_SMM); } bool kvm_has_adjust_clock_stable(void) From 027b524d6a427d7c89f4e8af44c49d96796adab5 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Tue, 26 Jan 2021 11:36:49 -0600 Subject: [PATCH 08/21] sev/i386: Enable an SEV-ES guest based on SEV policy Update the sev_es_enabled() function return value to be based on the SEV policy that has been specified. SEV-ES is enabled if SEV is enabled and the SEV-ES policy bit is set in the policy object. Cc: Paolo Bonzini Cc: Richard Henderson Cc: Eduardo Habkost Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Tom Lendacky Reviewed-by: Venu Busireddy Message-Id: Signed-off-by: Paolo Bonzini --- target/i386/sev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index 4b70d4284f..0f414df02f 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -356,7 +356,7 @@ sev_enabled(void) bool sev_es_enabled(void) { - return false; + return sev_enabled() && (sev_guest->policy & SEV_POLICY_ES); } uint64_t From f6a2c6eee77458a1f2cf6632b2d9f2fd97bf595e Mon Sep 17 00:00:00 2001 From: Christian Schoenebeck Date: Wed, 27 Jan 2021 00:00:34 +0100 Subject: [PATCH 09/21] libqos/qgraph: add qos_node_create_driver_named() So far the qos subsystem of the qtest framework had the limitation that only one instance of the same official QEMU (QMP) driver name could be created for qtests. That's because a) the created qos node names must always be unique, b) the node name must match the official QEMU driver name being instantiated and c) all nodes are in a global space shared by all tests. This patch removes this limitation by introducing a new function qos_node_create_driver_named() which allows test case authors to specify a node name being different from the actual associated QEMU driver name. It fills the new 'qemu_name' field of QOSGraphNode for that purpose. Adjust build_driver_cmd_line() and qos_graph_node_set_availability() to correctly deal with either accessing node name vs. node's qemu_name correctly. Signed-off-by: Christian Schoenebeck Message-Id: <3be962ff38f3396f8040deaa5ffdab525c4e0b16.1611704181.git.qemu_oss@crudebyte.com> Signed-off-by: Paolo Bonzini --- tests/qtest/libqos/qgraph.c | 54 ++++++++++++++++++++++++++-- tests/qtest/libqos/qgraph.h | 16 +++++++++ tests/qtest/libqos/qgraph_internal.h | 1 + 3 files changed, 68 insertions(+), 3 deletions(-) diff --git a/tests/qtest/libqos/qgraph.c b/tests/qtest/libqos/qgraph.c index fc49cfa879..61faf6b27d 100644 --- a/tests/qtest/libqos/qgraph.c +++ b/tests/qtest/libqos/qgraph.c @@ -153,6 +153,7 @@ static QOSGraphNode *create_node(const char *name, QOSNodeType type) static void destroy_node(void *val) { QOSGraphNode *node = val; + g_free(node->qemu_name); g_free(node->command_line); g_free(node); } @@ -286,7 +287,8 @@ static void build_machine_cmd_line(QOSGraphNode *node, const char *args) */ static void build_driver_cmd_line(QOSGraphNode *node) { - node->command_line = g_strconcat(" -device ", node->name, NULL); + const char *name = node->qemu_name ?: node->name; + node->command_line = g_strconcat(" -device ", name, NULL); } /* qos_print_cb(): callback prints all path found by the DFS algorithm. */ @@ -631,6 +633,15 @@ void qos_node_create_driver(const char *name, QOSCreateDriverFunc function) node->u.driver.constructor = function; } +void qos_node_create_driver_named(const char *name, const char *qemu_name, + QOSCreateDriverFunc function) +{ + QOSGraphNode *node = create_node(name, QNODE_DRIVER); + node->qemu_name = g_strdup(qemu_name); + build_driver_cmd_line(node); + node->u.driver.constructor = function; +} + void qos_node_contains(const char *container, const char *contained, QOSGraphEdgeOptions *opts, ...) { @@ -663,7 +674,7 @@ void qos_node_consumes(const char *consumer, const char *interface, add_edge(interface, consumer, QEDGE_CONSUMED_BY, opts); } -void qos_graph_node_set_availability(const char *node, bool av) +static void qos_graph_node_set_availability_explicit(const char *node, bool av) { QOSGraphEdgeList *elist; QOSGraphNode *n = search_node(node); @@ -678,11 +689,48 @@ void qos_graph_node_set_availability(const char *node, bool av) } QSLIST_FOREACH_SAFE(e, elist, edge_list, next) { if (e->type == QEDGE_CONTAINS || e->type == QEDGE_PRODUCES) { - qos_graph_node_set_availability(e->dest, av); + qos_graph_node_set_availability_explicit(e->dest, av); } } } +/* + * Behaves as qos_graph_node_set_availability_explicit(), except that the + * former always matches by node name only, whereas this function matches both + * by node name and node's optional 'qemu_name' field. + */ +void qos_graph_node_set_availability(const char *node, bool av) +{ + GList *l; + QOSGraphEdgeList *elist; + QOSGraphEdge *e, *next; + QOSGraphNode *n; + GList *keys = g_hash_table_get_keys(node_table); + + for (l = keys; l != NULL; l = l->next) { + const gchar *key = l->data; + n = g_hash_table_lookup(node_table, key); + /* + * node's 'qemu_name' is set if there is more than one device with + * the same QEMU (QMP) device name + */ + const char *node_name = n->qemu_name ?: n->name; + if (g_strcmp0(node_name, node) == 0) { + n->available = av; + elist = get_edgelist(n->name); + if (elist) { + QSLIST_FOREACH_SAFE(e, elist, edge_list, next) { + if (e->type == QEDGE_CONTAINS || e->type == QEDGE_PRODUCES) + { + qos_graph_node_set_availability_explicit(e->dest, av); + } + } + } + } + } + g_list_free(keys); +} + void qos_graph_foreach_test_path(QOSTestCallback fn) { QOSGraphNode *root = qos_graph_get_node(QOS_ROOT); diff --git a/tests/qtest/libqos/qgraph.h b/tests/qtest/libqos/qgraph.h index 5f63d352ca..f472949f68 100644 --- a/tests/qtest/libqos/qgraph.h +++ b/tests/qtest/libqos/qgraph.h @@ -452,6 +452,22 @@ void qos_node_create_machine_args(const char *name, */ void qos_node_create_driver(const char *name, QOSCreateDriverFunc function); +/** + * Behaves as qos_node_create_driver() with the extension of allowing to + * specify a different node name vs. associated QEMU device name. + * + * Use this function instead of qos_node_create_driver() if you need to create + * several instances of the same QEMU device. You are free to choose a custom + * node name, however the chosen node name must always be unique. + * + * @param name: custom, unique name of the node to be created + * @param qemu_name: actual (official) QEMU driver name the node shall be + * associated with + * @param function: driver constructor + */ +void qos_node_create_driver_named(const char *name, const char *qemu_name, + QOSCreateDriverFunc function); + /** * qos_node_contains(): creates one or more edges of type QEDGE_CONTAINS * and adds them to the edge list mapped to @container in the diff --git a/tests/qtest/libqos/qgraph_internal.h b/tests/qtest/libqos/qgraph_internal.h index 968fa69450..974985dce9 100644 --- a/tests/qtest/libqos/qgraph_internal.h +++ b/tests/qtest/libqos/qgraph_internal.h @@ -56,6 +56,7 @@ struct QOSGraphNode { bool available; /* set by QEMU via QMP, used during graph walk */ bool visited; /* used during graph walk */ char *name; /* used to identify the node */ + char *qemu_name; /* optional: see qos_node_create_driver_named() */ char *command_line; /* used to start QEMU at test execution */ union { struct { From 23820025af6b356cd4061a8b029c1126e1ee915e Mon Sep 17 00:00:00 2001 From: Christian Schoenebeck Date: Wed, 27 Jan 2021 00:04:22 +0100 Subject: [PATCH 10/21] libqos/qgraph_internal: add qos_printf() and qos_printf_literal() These two are macros wrapping regular printf() call. They are intended to be used instead of calling printf() directly in order to avoid breaking TAP output format. TAP output format is enabled by using --tap command line argument. Starting with glib 2.62 it is enabled by default. Unfortunately there is currently no public glib API available to check whether TAP output format is enabled. For that reason qos_printf() simply always prepends a '#' character for now. Signed-off-by: Christian Schoenebeck Reviewed-by: Thomas Huth Message-Id: <653a5ef61c5e7d160e4d6294e542c57ea324cee4.1611704181.git.qemu_oss@crudebyte.com> Signed-off-by: Paolo Bonzini --- tests/qtest/libqos/qgraph_internal.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/qtest/libqos/qgraph_internal.h b/tests/qtest/libqos/qgraph_internal.h index 974985dce9..c0025f5ab9 100644 --- a/tests/qtest/libqos/qgraph_internal.h +++ b/tests/qtest/libqos/qgraph_internal.h @@ -255,4 +255,15 @@ void qos_delete_cmd_line(const char *name); */ void qos_graph_node_set_availability(const char *node, bool av); +/* + * Prepends a '#' character in front for not breaking TAP output format. + */ +#define qos_printf(...) printf("# " __VA_ARGS__) + +/* + * Intended for printing something literally, i.e. for appending text as is + * to a line already been started by qos_printf() before. + */ +#define qos_printf_literal printf + #endif From 83ff78e5674ccf01a2092c230c893cb2ef41a1a6 Mon Sep 17 00:00:00 2001 From: Christian Schoenebeck Date: Wed, 27 Jan 2021 00:08:03 +0100 Subject: [PATCH 11/21] tests/qtest/qos-test: dump qos graph if verbose If qtests were run in verbose mode (i.e. if --verbose CL argument was provided) then dump the generated qos graph (all nodes and edges, along with their current individual availability status) to stdout, which allows to identify problems in the created qos graph e.g. when writing new qos tests. See API doc comment on function qos_dump_graph() for details. Signed-off-by: Christian Schoenebeck Message-Id: <6bffb6e38589fb2c06a2c1b5deed33f3e710fed1.1611704181.git.qemu_oss@crudebyte.com> Signed-off-by: Paolo Bonzini --- tests/qtest/libqos/qgraph.c | 45 +++++++++++++++++++++++++++++++++++++ tests/qtest/libqos/qgraph.h | 20 +++++++++++++++++ tests/qtest/qos-test.c | 3 +++ 3 files changed, 68 insertions(+) diff --git a/tests/qtest/libqos/qgraph.c b/tests/qtest/libqos/qgraph.c index 61faf6b27d..b3b1a31f81 100644 --- a/tests/qtest/libqos/qgraph.c +++ b/tests/qtest/libqos/qgraph.c @@ -805,3 +805,48 @@ void qos_delete_cmd_line(const char *name) node->command_line = NULL; } } + +void qos_dump_graph(void) +{ + GList *keys; + GList *l; + QOSGraphEdgeList *list; + QOSGraphEdge *e, *next; + QOSGraphNode *dest_node, *node; + + qos_printf("ALL QGRAPH EDGES: {\n"); + keys = g_hash_table_get_keys(edge_table); + for (l = keys; l != NULL; l = l->next) { + const gchar *key = l->data; + qos_printf("\t src='%s'\n", key); + list = get_edgelist(key); + QSLIST_FOREACH_SAFE(e, list, edge_list, next) { + dest_node = g_hash_table_lookup(node_table, e->dest); + qos_printf("\t\t|-> dest='%s' type=%d (node=%p)", + e->dest, e->type, dest_node); + if (!dest_node) { + qos_printf_literal(" <------- ERROR !"); + } + qos_printf_literal("\n"); + } + } + g_list_free(keys); + qos_printf("}\n"); + + qos_printf("ALL QGRAPH NODES: {\n"); + keys = g_hash_table_get_keys(node_table); + for (l = keys; l != NULL; l = l->next) { + const gchar *key = l->data; + node = g_hash_table_lookup(node_table, key); + qos_printf("\t name='%s' ", key); + if (node->qemu_name) { + qos_printf_literal("qemu_name='%s' ", node->qemu_name); + } + qos_printf_literal("type=%d cmd_line='%s' [%s]\n", + node->type, node->command_line, + node->available ? "available" : "UNAVAILBLE" + ); + } + g_list_free(keys); + qos_printf("}\n"); +} diff --git a/tests/qtest/libqos/qgraph.h b/tests/qtest/libqos/qgraph.h index f472949f68..07a32535f1 100644 --- a/tests/qtest/libqos/qgraph.h +++ b/tests/qtest/libqos/qgraph.h @@ -586,5 +586,25 @@ QOSGraphObject *qos_machine_new(QOSGraphNode *node, QTestState *qts); QOSGraphObject *qos_driver_new(QOSGraphNode *node, QOSGraphObject *parent, QGuestAllocator *alloc, void *arg); +/** + * Just for debugging purpose: prints all currently existing nodes and + * edges to stdout. + * + * All qtests add themselves to the overall qos graph by calling qgraph + * functions that add device nodes and edges between the individual graph + * nodes for tests. As the actual graph is assmbled at runtime by the qos + * subsystem, it is sometimes not obvious how the overall graph looks like. + * E.g. when writing new tests it may happen that those new tests are simply + * ignored by the qtest framework. + * + * This function allows to identify problems in the created qgraph. Keep in + * mind: only tests with a path down from the actual test case node (leaf) up + * to the graph's root node are actually executed by the qtest framework. And + * the qtest framework uses QMP to automatically check which QEMU drivers are + * actually currently available, and accordingly qos marks certain pathes as + * 'unavailable' in such cases (e.g. when QEMU was compiled without support for + * a certain feature). + */ +void qos_dump_graph(void); #endif diff --git a/tests/qtest/qos-test.c b/tests/qtest/qos-test.c index 8fdf87b183..d98ef78613 100644 --- a/tests/qtest/qos-test.c +++ b/tests/qtest/qos-test.c @@ -322,6 +322,9 @@ int main(int argc, char **argv) qos_set_machines_devices_available(); qos_graph_foreach_test_path(walk_path); + if (g_test_verbose()) { + qos_dump_graph(); + } g_test_run(); qtest_end(); qos_graph_destroy(); From 093360dc32cf70d3651496b58dc16b22f4971dcc Mon Sep 17 00:00:00 2001 From: Christian Schoenebeck Date: Wed, 27 Jan 2021 00:17:36 +0100 Subject: [PATCH 12/21] tests/qtest/qos-test: dump environment variables if verbose If qtests are run in verbose mode (i.e. if --verbose CL argument was provided) then print all environment variables to stdout before running the individual tests. It is common nowadays, at least being able to output all config vectors in a build chain, especially if it is required to investigate build- and test-issues on foreign/remote machines, which includes environment variables. In the context of writing new test cases this is also useful for finding out whether there are already some existing options for common questions like is there a preferred location for writing test files to? Is there a maximum size for test data? Is there a deadline for running tests? Use qos_printf() instead of g_test_message() to avoid the latter cluttering the output. Signed-off-by: Christian Schoenebeck Message-Id: <21d77b33c578d80b5bba1068e61fd3562958b3c2.1611704181.git.qemu_oss@crudebyte.com> Signed-off-by: Paolo Bonzini --- tests/qtest/qos-test.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/qtest/qos-test.c b/tests/qtest/qos-test.c index d98ef78613..b279b6f816 100644 --- a/tests/qtest/qos-test.c +++ b/tests/qtest/qos-test.c @@ -313,9 +313,16 @@ static void walk_path(QOSGraphNode *orig_path, int len) * machine/drivers/test objects * - Cleans up everything */ -int main(int argc, char **argv) +int main(int argc, char **argv, char** envp) { g_test_init(&argc, &argv, NULL); + if (g_test_verbose()) { + qos_printf("ENVIRONMENT VARIABLES: {\n"); + for (char **env = envp; *env != 0; env++) { + qos_printf("\t%s\n", *env); + } + qos_printf("}\n"); + } qos_graph_init(); module_call_init(MODULE_INIT_QOM); module_call_init(MODULE_INIT_LIBQOS); From b0019c995e0397092d5db5caa8262b67036c2a89 Mon Sep 17 00:00:00 2001 From: Christian Schoenebeck Date: Wed, 27 Jan 2021 00:26:16 +0100 Subject: [PATCH 13/21] tests/qtest/qos-test: dump QEMU command if verbose If qtests are run in verbose mode (i.e. if --verbose CL argument was provided) then print the assembled qemu command line for each test. Use qos_printf() instead of g_test_message() to avoid the latter cluttering the output. Signed-off-by: Christian Schoenebeck Message-Id: <110bef3595cb841dfa1b86733c174ac9774eb37e.1611704181.git.qemu_oss@crudebyte.com> Signed-off-by: Paolo Bonzini --- tests/qtest/qos-test.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/qtest/qos-test.c b/tests/qtest/qos-test.c index b279b6f816..f97d0a08fd 100644 --- a/tests/qtest/qos-test.c +++ b/tests/qtest/qos-test.c @@ -89,6 +89,9 @@ static void qos_set_machines_devices_available(void) static void restart_qemu_or_continue(char *path) { + if (g_test_verbose()) { + qos_printf("Run QEMU with: '%s'\n", path); + } /* compares the current command line with the * one previously executed: if they are the same, * don't restart QEMU, if they differ, stop previous From 342e3a4f20653c2d419cc0e8fdc0b99dfea32fed Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Mon, 8 Feb 2021 21:57:52 +0100 Subject: [PATCH 14/21] util/cutils: Skip "." when looking for next directory component When looking for the next directory component, a "." component is now skipped. This fixes the path(s) used for firmware lookup for the prefix == bindir case which is standard for QEMU on Windows and where the internally used bindir value ends with "/.". Signed-off-by: Stefan Weil Message-Id: <20210208205752.2488774-1-sw@weilnetz.de> Cc: qemu-stable@nongnu.org Signed-off-by: Paolo Bonzini --- util/cutils.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/util/cutils.c b/util/cutils.c index 0b5073b330..70c7d6efbd 100644 --- a/util/cutils.c +++ b/util/cutils.c @@ -916,7 +916,8 @@ static inline bool starts_with_prefix(const char *dir) static inline const char *next_component(const char *dir, int *p_len) { int len; - while (*dir && G_IS_DIR_SEPARATOR(*dir)) { + while ((*dir && G_IS_DIR_SEPARATOR(*dir)) || + (*dir == '.' && (G_IS_DIR_SEPARATOR(dir[1]) || dir[1] == '\0'))) { dir++; } len = 0; From 118f2aadbc66aaae4e8d52259288e18f2aa4544a Mon Sep 17 00:00:00 2001 From: Hill Ma Date: Tue, 12 Jan 2021 22:07:35 -0800 Subject: [PATCH 15/21] hvf: Guard xgetbv call This prevents illegal instruction on cpus that do not support xgetbv. Buglink: https://bugs.launchpad.net/qemu/+bug/1758819 Reviewed-by: Cameron Esfahani Signed-off-by: Hill Ma Message-Id: Signed-off-by: Roman Bolshakov Signed-off-by: Paolo Bonzini --- target/i386/hvf/x86_cpuid.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/target/i386/hvf/x86_cpuid.c b/target/i386/hvf/x86_cpuid.c index a6842912f5..32b0d131df 100644 --- a/target/i386/hvf/x86_cpuid.c +++ b/target/i386/hvf/x86_cpuid.c @@ -27,15 +27,22 @@ #include "vmx.h" #include "sysemu/hvf.h" -static uint64_t xgetbv(uint32_t xcr) +static bool xgetbv(uint32_t cpuid_ecx, uint32_t idx, uint64_t *xcr) { - uint32_t eax, edx; + uint32_t xcrl, xcrh; - __asm__ volatile ("xgetbv" - : "=a" (eax), "=d" (edx) - : "c" (xcr)); + if (cpuid_ecx & CPUID_EXT_OSXSAVE) { + /* + * The xgetbv instruction is not available to older versions of + * the assembler, so we encode the instruction manually. + */ + asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcrl), "=d" (xcrh) : "c" (idx)); - return (((uint64_t)edx) << 32) | eax; + *xcr = (((uint64_t)xcrh) << 32) | xcrl; + return true; + } + + return false; } uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx, @@ -100,12 +107,15 @@ uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx, break; case 0xD: if (idx == 0) { - uint64_t host_xcr0 = xgetbv(0); - uint64_t supp_xcr0 = host_xcr0 & (XSTATE_FP_MASK | XSTATE_SSE_MASK | - XSTATE_YMM_MASK | XSTATE_BNDREGS_MASK | - XSTATE_BNDCSR_MASK | XSTATE_OPMASK_MASK | - XSTATE_ZMM_Hi256_MASK | XSTATE_Hi16_ZMM_MASK); - eax &= supp_xcr0; + uint64_t host_xcr0; + if (xgetbv(ecx, 0, &host_xcr0)) { + uint64_t supp_xcr0 = host_xcr0 & (XSTATE_FP_MASK | + XSTATE_SSE_MASK | XSTATE_YMM_MASK | + XSTATE_BNDREGS_MASK | XSTATE_BNDCSR_MASK | + XSTATE_OPMASK_MASK | XSTATE_ZMM_Hi256_MASK | + XSTATE_Hi16_ZMM_MASK); + eax &= supp_xcr0; + } } else if (idx == 1) { hv_vmx_read_capability(HV_VMX_CAP_PROCBASED2, &cap); eax &= CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XGETBV1; From 3b502b0e470867369ba6e0a94e9ba6d91bb176c2 Mon Sep 17 00:00:00 2001 From: Vladislav Yaroshchuk Date: Fri, 22 Jan 2021 18:05:18 +0300 Subject: [PATCH 16/21] target/i386/hvf: add vmware-cpuid-freq cpu feature For `-accel hvf` cpu_x86_cpuid() is wrapped with hvf_cpu_x86_cpuid() to add paravirtualization cpuid leaf 0x40000010 https://lkml.org/lkml/2008/10/1/246 Leaf 0x40000010, Timing Information: EAX: (Virtual) TSC frequency in kHz. EBX: (Virtual) Bus (local apic timer) frequency in kHz. ECX, EDX: RESERVED (Per above, reserved fields are set to zero). On macOS TSC and APIC Bus frequencies can be readed by sysctl call with names `machdep.tsc.frequency` and `hw.busfrequency` This options is required for Darwin-XNU guest to be synchronized with host Leaf 0x40000000 not exposes HVF leaving hypervisor signature empty Signed-off-by: Vladislav Yaroshchuk Message-Id: <20210122150518.3551-1-yaroshchuk2000@gmail.com> Signed-off-by: Roman Bolshakov Signed-off-by: Paolo Bonzini --- target/i386/hvf/hvf.c | 96 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 95 insertions(+), 1 deletion(-) diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 5b90dcdf88..10a06c3c79 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -65,6 +65,7 @@ #include #include +#include #include "exec/address-spaces.h" #include "hw/i386/apic_internal.h" @@ -456,6 +457,48 @@ static void dummy_signal(int sig) { } +static void init_tsc_freq(CPUX86State *env) +{ + size_t length; + uint64_t tsc_freq; + + if (env->tsc_khz != 0) { + return; + } + + length = sizeof(uint64_t); + if (sysctlbyname("machdep.tsc.frequency", &tsc_freq, &length, NULL, 0)) { + return; + } + env->tsc_khz = tsc_freq / 1000; /* Hz to KHz */ +} + +static void init_apic_bus_freq(CPUX86State *env) +{ + size_t length; + uint64_t bus_freq; + + if (env->apic_bus_freq != 0) { + return; + } + + length = sizeof(uint64_t); + if (sysctlbyname("hw.busfrequency", &bus_freq, &length, NULL, 0)) { + return; + } + env->apic_bus_freq = bus_freq; +} + +static inline bool tsc_is_known(CPUX86State *env) +{ + return env->tsc_khz != 0; +} + +static inline bool apic_bus_freq_is_known(CPUX86State *env) +{ + return env->apic_bus_freq != 0; +} + int hvf_init_vcpu(CPUState *cpu) { @@ -480,6 +523,15 @@ int hvf_init_vcpu(CPUState *cpu) hvf_state->hvf_caps = g_new0(struct hvf_vcpu_caps, 1); env->hvf_mmio_buf = g_new(char, 4096); + if (x86cpu->vmware_cpuid_freq) { + init_tsc_freq(env); + init_apic_bus_freq(env); + + if (!tsc_is_known(env) || !apic_bus_freq_is_known(env)) { + error_report("vmware-cpuid-freq: feature couldn't be enabled"); + } + } + r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf_fd, HV_VCPU_DEFAULT); cpu->vcpu_dirty = 1; assert_hvf_ok(r); @@ -597,6 +649,48 @@ static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_in } } +static void hvf_cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) +{ + /* + * A wrapper extends cpu_x86_cpuid with 0x40000000 and 0x40000010 leafs, + * leafs 0x40000001-0x4000000F are filled with zeros + * Provides vmware-cpuid-freq support to hvf + * + * Note: leaf 0x40000000 not exposes HVF, + * leaving hypervisor signature empty + */ + + if (index < 0x40000000 || index > 0x40000010 || + !tsc_is_known(env) || !apic_bus_freq_is_known(env)) { + + cpu_x86_cpuid(env, index, count, eax, ebx, ecx, edx); + return; + } + + switch (index) { + case 0x40000000: + *eax = 0x40000010; /* Max available cpuid leaf */ + *ebx = 0; /* Leave signature empty */ + *ecx = 0; + *edx = 0; + break; + case 0x40000010: + *eax = env->tsc_khz; + *ebx = env->apic_bus_freq / 1000; /* Hz to KHz */ + *ecx = 0; + *edx = 0; + break; + default: + *eax = 0; + *ebx = 0; + *ecx = 0; + *edx = 0; + break; + } +} + int hvf_vcpu_exec(CPUState *cpu) { X86CPU *x86_cpu = X86_CPU(cpu); @@ -734,7 +828,7 @@ int hvf_vcpu_exec(CPUState *cpu) uint32_t rcx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX); uint32_t rdx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX); - cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx); + hvf_cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx); wreg(cpu->hvf_fd, HV_X86_RAX, rax); wreg(cpu->hvf_fd, HV_X86_RBX, rbx); From 45f918ccf6c35ee1912a8847873b7ba5b6927b46 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 20 Jan 2021 23:44:35 +0100 Subject: [PATCH 17/21] hvf: x86: Remove unused definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hvf i386 has a few struct and cpp definitions that are never used. Remove them. Suggested-by: Roman Bolshakov Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Alexander Graf Message-Id: <20210120224444.71840-3-agraf@csgraf.de> Signed-off-by: Roman Bolshakov Signed-off-by: Paolo Bonzini --- target/i386/hvf/hvf-i386.h | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/target/i386/hvf/hvf-i386.h b/target/i386/hvf/hvf-i386.h index 50b914fd67..59cfca8875 100644 --- a/target/i386/hvf/hvf-i386.h +++ b/target/i386/hvf/hvf-i386.h @@ -21,21 +21,6 @@ #include "cpu.h" #include "x86.h" -#define HVF_MAX_VCPU 0x10 - -extern struct hvf_state hvf_global; - -struct hvf_vm { - int id; - struct hvf_vcpu_state *vcpus[HVF_MAX_VCPU]; -}; - -struct hvf_state { - uint32_t version; - struct hvf_vm *vm; - uint64_t mem_quota; -}; - /* hvf_slot flags */ #define HVF_SLOT_LOG (1 << 0) @@ -75,7 +60,6 @@ hvf_slot *hvf_find_overlap_slot(uint64_t, uint64_t); /* Host specific functions */ int hvf_inject_interrupt(CPUArchState *env, int vector); -int hvf_vcpu_run(struct hvf_vcpu_state *vcpu); #endif #endif From 027ac0cb516cca4ce8a88dcca2f759c77e0e374b Mon Sep 17 00:00:00 2001 From: Vladislav Yaroshchuk Date: Wed, 13 Jan 2021 23:53:23 +0300 Subject: [PATCH 18/21] target/i386/hvf: add rdmsr 35H MSR_CORE_THREAD_COUNT Some guests (ex. Darwin-XNU) can attemp to read this MSR to retrieve and validate CPU topology comparing it to ACPI MADT content MSR description from Intel Manual: 35H: MSR_CORE_THREAD_COUNT: Configured State of Enabled Processor Core Count and Logical Processor Count Bits 15:0 THREAD_COUNT The number of logical processors that are currently enabled in the physical package Bits 31:16 Core_COUNT The number of processor cores that are currently enabled in the physical package Bits 63:32 Reserved Signed-off-by: Vladislav Yaroshchuk Message-Id: <20210113205323.33310-1-yaroshchuk2000@gmail.com> [RB: reordered MSR definition and dropped u suffix from shift offset] Signed-off-by: Roman Bolshakov Signed-off-by: Paolo Bonzini --- target/i386/cpu.h | 1 + target/i386/hvf/x86_emu.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 8d599bb5b8..82c1ac00ef 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -368,6 +368,7 @@ typedef enum X86Seg { #define MSR_IA32_SMBASE 0x9e #define MSR_SMI_COUNT 0x34 +#define MSR_CORE_THREAD_COUNT 0x35 #define MSR_MTRRcap 0xfe #define MSR_MTRRcap_VCNT 8 #define MSR_MTRRcap_FIXRANGE_SUPPORT (1 << 8) diff --git a/target/i386/hvf/x86_emu.c b/target/i386/hvf/x86_emu.c index da570e352b..e52c39ddb1 100644 --- a/target/i386/hvf/x86_emu.c +++ b/target/i386/hvf/x86_emu.c @@ -668,6 +668,7 @@ void simulate_rdmsr(struct CPUState *cpu) { X86CPU *x86_cpu = X86_CPU(cpu); CPUX86State *env = &x86_cpu->env; + CPUState *cs = env_cpu(env); uint32_t msr = ECX(env); uint64_t val = 0; @@ -745,6 +746,10 @@ void simulate_rdmsr(struct CPUState *cpu) case MSR_MTRRdefType: val = env->mtrr_deftype; break; + case MSR_CORE_THREAD_COUNT: + val = cs->nr_threads * cs->nr_cores; /* thread count, bits 15..0 */ + val |= ((uint32_t)cs->nr_cores << 16); /* core count, bits 31..16 */ + break; default: /* fprintf(stderr, "%s: unknown msr 0x%x\n", __func__, msr); */ val = 0; From 106f91d59c373b63f227b8827ff18ac9c9068d2f Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sat, 23 Jan 2021 01:41:29 +0100 Subject: [PATCH 19/21] hvf: Fetch cr4 before evaluating CPUID(1) The CPUID function 1 has a bit called OSXSAVE which tells user space the status of the CR4.OSXSAVE bit. Our generic CPUID function injects that bit based on the status of CR4. With Hypervisor.framework, we do not synchronize full CPU state often enough for this function to see the CR4 update before guest user space asks for it. To be on the save side, let's just always synchronize it when we receive a CPUID(1) request. That way we can set the bit with real confidence. Reported-by: Asad Ali Signed-off-by: Alexander Graf Message-Id: <20210123004129.6364-1-agraf@csgraf.de> [RB: resolved conflict with another CPUID change] Signed-off-by: Roman Bolshakov Signed-off-by: Paolo Bonzini --- target/i386/hvf/hvf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 10a06c3c79..15f14ac69e 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -828,6 +828,10 @@ int hvf_vcpu_exec(CPUState *cpu) uint32_t rcx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX); uint32_t rdx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX); + if (rax == 1) { + /* CPUID1.ecx.OSXSAVE needs to know CR4 */ + env->cr[4] = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR4); + } hvf_cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx); wreg(cpu->hvf_fd, HV_X86_RAX, rax); From 82e2756897810b6e17e0c352101878b97b1e2688 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Tue, 16 Feb 2021 13:02:47 +0100 Subject: [PATCH 20/21] event_notifier: Set ->initialized earlier in event_notifier_init() Otherwise the call to event_notifier_set() is a nop, which causes the SLOF firmware on POWER to hang when booting from a virtio-scsi device: virtio_scsi_dataplane_start() virtio_scsi_vring_init() virtio_bus_set_host_notifier() <- assign == true event_notifier_init() <- active == 1 event_notifier_set() <- fails right away if !e->initialized Fixes: e34e47eb28c0 ("event_notifier: handle initialization failure better") Cc: mlevitsk@redhat.com Signed-off-by: Greg Kurz Message-Id: <20210216120247.1293569-1-groug@kaod.org> Signed-off-by: Paolo Bonzini --- util/event_notifier-posix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/event_notifier-posix.c b/util/event_notifier-posix.c index 5b2110e861..8307013c5d 100644 --- a/util/event_notifier-posix.c +++ b/util/event_notifier-posix.c @@ -66,10 +66,10 @@ int event_notifier_init(EventNotifier *e, int active) e->rfd = fds[0]; e->wfd = fds[1]; } + e->initialized = true; if (active) { event_notifier_set(e); } - e->initialized = true; return 0; fail: From 366a85e4bb748794b1ae0ca0ccc2d95f316679a0 Mon Sep 17 00:00:00 2001 From: Pavel Dovgalyuk Date: Tue, 16 Feb 2021 15:51:44 +0300 Subject: [PATCH 21/21] replay: fix icount request when replaying clock access Record/replay provides REPLAY_CLOCK_LOCKED macro to access the clock when vm_clock_seqlock is locked. This macro is needed because replay internals operate icount. In locked case replay use icount_get_raw_locked for icount request, which prevents excess locking which leads to deadlock. But previously only record code used *_locked function and replay did not. Therefore sometimes clock access lead to deadlocks. This patch fixes clock access for replay too and uses *_locked icount access function. Signed-off-by: Pavel Dovgalyuk Message-Id: <161347990483.1313189.8371838968343494161.stgit@pasha-ThinkPad-X280> Signed-off-by: Paolo Bonzini --- include/sysemu/replay.h | 14 ++++++++------ replay/replay-internal.c | 29 +++++++++++++++++++++++++---- replay/replay-time.c | 4 ++-- replay/replay.c | 23 +---------------------- stubs/replay-tools.c | 2 +- 5 files changed, 37 insertions(+), 35 deletions(-) diff --git a/include/sysemu/replay.h b/include/sysemu/replay.h index 56c0c17c30..0f3b0f7eac 100644 --- a/include/sysemu/replay.h +++ b/include/sysemu/replay.h @@ -128,18 +128,20 @@ bool replay_has_interrupt(void); int64_t replay_save_clock(ReplayClockKind kind, int64_t clock, int64_t raw_icount); /*! Read the specified clock from the log or return cached data */ -int64_t replay_read_clock(ReplayClockKind kind); +int64_t replay_read_clock(ReplayClockKind kind, int64_t raw_icount); /*! Saves or reads the clock depending on the current replay mode. */ #define REPLAY_CLOCK(clock, value) \ - (replay_mode == REPLAY_MODE_PLAY ? replay_read_clock((clock)) \ + (replay_mode == REPLAY_MODE_PLAY \ + ? replay_read_clock((clock), icount_get_raw()) \ : replay_mode == REPLAY_MODE_RECORD \ - ? replay_save_clock((clock), (value), icount_get_raw()) \ - : (value)) + ? replay_save_clock((clock), (value), icount_get_raw()) \ + : (value)) #define REPLAY_CLOCK_LOCKED(clock, value) \ - (replay_mode == REPLAY_MODE_PLAY ? replay_read_clock((clock)) \ + (replay_mode == REPLAY_MODE_PLAY \ + ? replay_read_clock((clock), icount_get_raw_locked()) \ : replay_mode == REPLAY_MODE_RECORD \ ? replay_save_clock((clock), (value), icount_get_raw_locked()) \ - : (value)) + : (value)) /* Processing data from random generators */ diff --git a/replay/replay-internal.c b/replay/replay-internal.c index 2e8a3e947a..77d0c82327 100644 --- a/replay/replay-internal.c +++ b/replay/replay-internal.c @@ -247,10 +247,31 @@ void replay_advance_current_icount(uint64_t current_icount) /* Time can only go forward */ assert(diff >= 0); - if (diff > 0) { - replay_put_event(EVENT_INSTRUCTION); - replay_put_dword(diff); - replay_state.current_icount += diff; + if (replay_mode == REPLAY_MODE_RECORD) { + if (diff > 0) { + replay_put_event(EVENT_INSTRUCTION); + replay_put_dword(diff); + replay_state.current_icount += diff; + } + } else if (replay_mode == REPLAY_MODE_PLAY) { + if (diff > 0) { + replay_state.instruction_count -= diff; + replay_state.current_icount += diff; + if (replay_state.instruction_count == 0) { + assert(replay_state.data_kind == EVENT_INSTRUCTION); + replay_finish_event(); + /* Wake up iothread. This is required because + timers will not expire until clock counters + will be read from the log. */ + qemu_notify_event(); + } + } + /* Execution reached the break step */ + if (replay_break_icount == replay_state.current_icount) { + /* Cannot make callback directly from the vCPU thread */ + timer_mod_ns(replay_break_timer, + qemu_clock_get_ns(QEMU_CLOCK_REALTIME)); + } } } diff --git a/replay/replay-time.c b/replay/replay-time.c index 43357c9f24..00ebcb7a49 100644 --- a/replay/replay-time.c +++ b/replay/replay-time.c @@ -46,12 +46,12 @@ void replay_read_next_clock(ReplayClockKind kind) } /*! Reads next clock event from the input. */ -int64_t replay_read_clock(ReplayClockKind kind) +int64_t replay_read_clock(ReplayClockKind kind, int64_t raw_icount) { int64_t ret; g_assert(replay_file && replay_mutex_locked()); - replay_account_executed_instructions(); + replay_advance_current_icount(raw_icount); if (replay_next_event_is(EVENT_CLOCK + kind)) { replay_read_next_clock(kind); diff --git a/replay/replay.c b/replay/replay.c index d4c228ab28..c806fec69a 100644 --- a/replay/replay.c +++ b/replay/replay.c @@ -94,28 +94,7 @@ void replay_account_executed_instructions(void) if (replay_mode == REPLAY_MODE_PLAY) { g_assert(replay_mutex_locked()); if (replay_state.instruction_count > 0) { - int count = (int)(replay_get_current_icount() - - replay_state.current_icount); - - /* Time can only go forward */ - assert(count >= 0); - - replay_state.instruction_count -= count; - replay_state.current_icount += count; - if (replay_state.instruction_count == 0) { - assert(replay_state.data_kind == EVENT_INSTRUCTION); - replay_finish_event(); - /* Wake up iothread. This is required because - timers will not expire until clock counters - will be read from the log. */ - qemu_notify_event(); - } - /* Execution reached the break step */ - if (replay_break_icount == replay_state.current_icount) { - /* Cannot make callback directly from the vCPU thread */ - timer_mod_ns(replay_break_timer, - qemu_clock_get_ns(QEMU_CLOCK_REALTIME)); - } + replay_advance_current_icount(replay_get_current_icount()); } } } diff --git a/stubs/replay-tools.c b/stubs/replay-tools.c index c06b360e22..43296b3d4e 100644 --- a/stubs/replay-tools.c +++ b/stubs/replay-tools.c @@ -13,7 +13,7 @@ int64_t replay_save_clock(unsigned int kind, int64_t clock, int64_t raw_icount) return 0; } -int64_t replay_read_clock(unsigned int kind) +int64_t replay_read_clock(unsigned int kind, int64_t raw_icount) { abort(); return 0;