From 60d09b8dc7dd4256d664ad680795cb1327805b2b Mon Sep 17 00:00:00 2001 From: Julia Suvorova Date: Thu, 23 Feb 2023 13:57:47 +0100 Subject: [PATCH 01/53] hw/smbios: fix field corruption in type 4 table Since table type 4 of SMBIOS version 2.6 is shorter than 3.0, the strings which follow immediately after the struct fields have been overwritten by unconditional filling of later fields such as core_count2. Make these fields dependent on the SMBIOS version. Fixes: 05e27d74c7 ("hw/smbios: add core_count2 to smbios table type 4") Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2169904 Signed-off-by: Julia Suvorova Message-Id: <20230223125747.254914-1-jusual@redhat.com> Reviewed-by: Igor Mammedov Reviewed-by: Ani Sinha Reviewed-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/smbios/smbios.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c index 4869566cf5..d2007e70fb 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -750,14 +750,16 @@ static void smbios_build_type_4_table(MachineState *ms, unsigned instance) t->core_count = (ms->smp.cores > 255) ? 0xFF : ms->smp.cores; t->core_enabled = t->core_count; - t->core_count2 = t->core_enabled2 = cpu_to_le16(ms->smp.cores); - t->thread_count = (ms->smp.threads > 255) ? 0xFF : ms->smp.threads; - t->thread_count2 = cpu_to_le16(ms->smp.threads); t->processor_characteristics = cpu_to_le16(0x02); /* Unknown */ t->processor_family2 = cpu_to_le16(0x01); /* Other */ + if (tbl_len == SMBIOS_TYPE_4_LEN_V30) { + t->core_count2 = t->core_enabled2 = cpu_to_le16(ms->smp.cores); + t->thread_count2 = cpu_to_le16(ms->smp.threads); + } + SMBIOS_BUILD_TABLE_POST; smbios_type4_count++; } From b34f2fd17e4276ac0a75f8d72485a0236a740954 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 8 Feb 2023 15:55:36 -0500 Subject: [PATCH 02/53] Revert "x86: don't let decompressed kernel image clobber setup_data" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit eac7a7791bb6d719233deed750034042318ffd56. Fixes: eac7a7791b ("x86: don't let decompressed kernel image clobber setup_data") Signed-off-by: Michael S. Tsirkin Tested-by: Nathan Chancellor Tested-by: Dov Murik Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Daniel P. Berrangé --- hw/i386/microvm.c | 15 ++++------- hw/i386/x86.c | 52 ++++++++++++++++++--------------------- hw/nvram/fw_cfg.c | 9 ------- include/hw/i386/microvm.h | 5 ++-- include/hw/nvram/fw_cfg.h | 9 ------- 5 files changed, 31 insertions(+), 59 deletions(-) diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c index 68c22016d2..6680530555 100644 --- a/hw/i386/microvm.c +++ b/hw/i386/microvm.c @@ -376,8 +376,7 @@ static void microvm_fix_kernel_cmdline(MachineState *machine) MicrovmMachineState *mms = MICROVM_MACHINE(machine); BusState *bus; BusChild *kid; - char *cmdline, *existing_cmdline; - size_t len; + char *cmdline; /* * Find MMIO transports with attached devices, and add them to the kernel @@ -386,8 +385,7 @@ static void microvm_fix_kernel_cmdline(MachineState *machine) * Yes, this is a hack, but one that heavily improves the UX without * introducing any significant issues. */ - existing_cmdline = fw_cfg_read_bytes_ptr(x86ms->fw_cfg, FW_CFG_CMDLINE_DATA); - cmdline = g_strdup(existing_cmdline); + cmdline = g_strdup(machine->kernel_cmdline); bus = sysbus_get_default(); QTAILQ_FOREACH(kid, &bus->children, sibling) { DeviceState *dev = kid->child; @@ -411,12 +409,9 @@ static void microvm_fix_kernel_cmdline(MachineState *machine) } } - len = strlen(cmdline); - if (len > VIRTIO_CMDLINE_TOTAL_MAX_LEN + strlen(existing_cmdline)) { - fprintf(stderr, "qemu: virtio mmio cmdline too large, skipping\n"); - } else { - memcpy(existing_cmdline, cmdline, len + 1); - } + fw_cfg_modify_i32(x86ms->fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(cmdline) + 1); + fw_cfg_modify_string(x86ms->fw_cfg, FW_CFG_CMDLINE_DATA, cmdline); + g_free(cmdline); } diff --git a/hw/i386/x86.c b/hw/i386/x86.c index c44846f47b..9b7476158c 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -49,7 +49,6 @@ #include "hw/intc/i8259.h" #include "hw/rtc/mc146818rtc.h" #include "target/i386/sev.h" -#include "hw/i386/microvm.h" #include "hw/acpi/cpu_hotplug.h" #include "hw/irq.h" @@ -815,18 +814,12 @@ void x86_load_linux(X86MachineState *x86ms, const char *kernel_filename = machine->kernel_filename; const char *initrd_filename = machine->initrd_filename; const char *dtb_filename = machine->dtb; - char *kernel_cmdline; + const char *kernel_cmdline = machine->kernel_cmdline; SevKernelLoaderContext sev_load_ctx = {}; enum { RNG_SEED_LENGTH = 32 }; - /* - * Add the NUL terminator, some padding for the microvm cmdline fiddling - * hack, and then align to 16 bytes as a paranoia measure - */ - cmdline_size = (strlen(machine->kernel_cmdline) + 1 + - VIRTIO_CMDLINE_TOTAL_MAX_LEN + 16) & ~15; - /* Make a copy, since we might append arbitrary bytes to it later. */ - kernel_cmdline = g_strndup(machine->kernel_cmdline, cmdline_size); + /* Align to 16 bytes as a paranoia measure */ + cmdline_size = (strlen(kernel_cmdline) + 16) & ~15; /* load the kernel header */ f = fopen(kernel_filename, "rb"); @@ -967,6 +960,12 @@ void x86_load_linux(X86MachineState *x86ms, initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; } + fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline) + 1); + fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); + sev_load_ctx.cmdline_data = (char *)kernel_cmdline; + sev_load_ctx.cmdline_size = strlen(kernel_cmdline) + 1; + if (protocol >= 0x202) { stl_p(header + 0x228, cmdline_addr); } else { @@ -1093,24 +1092,27 @@ void x86_load_linux(X86MachineState *x86ms, exit(1); } - setup_data_offset = cmdline_size; - cmdline_size += sizeof(SetupData) + dtb_size; - kernel_cmdline = g_realloc(kernel_cmdline, cmdline_size); - setup_data = (void *)kernel_cmdline + setup_data_offset; + setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); + kernel_size = setup_data_offset + sizeof(SetupData) + dtb_size; + kernel = g_realloc(kernel, kernel_size); + + + setup_data = (SetupData *)(kernel + setup_data_offset); setup_data->next = cpu_to_le64(first_setup_data); - first_setup_data = cmdline_addr + setup_data_offset; + first_setup_data = prot_addr + setup_data_offset; setup_data->type = cpu_to_le32(SETUP_DTB); setup_data->len = cpu_to_le32(dtb_size); + load_image_size(dtb_filename, setup_data->data, dtb_size); } - if (!legacy_no_rng_seed && protocol >= 0x209) { - setup_data_offset = cmdline_size; - cmdline_size += sizeof(SetupData) + RNG_SEED_LENGTH; - kernel_cmdline = g_realloc(kernel_cmdline, cmdline_size); - setup_data = (void *)kernel_cmdline + setup_data_offset; + if (!legacy_no_rng_seed) { + setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); + kernel_size = setup_data_offset + sizeof(SetupData) + RNG_SEED_LENGTH; + kernel = g_realloc(kernel, kernel_size); + setup_data = (SetupData *)(kernel + setup_data_offset); setup_data->next = cpu_to_le64(first_setup_data); - first_setup_data = cmdline_addr + setup_data_offset; + first_setup_data = prot_addr + setup_data_offset; setup_data->type = cpu_to_le32(SETUP_RNG_SEED); setup_data->len = cpu_to_le32(RNG_SEED_LENGTH); qemu_guest_getrandom_nofail(setup_data->data, RNG_SEED_LENGTH); @@ -1121,12 +1123,6 @@ void x86_load_linux(X86MachineState *x86ms, fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); } - fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr); - fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, cmdline_size); - fw_cfg_add_bytes(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline, cmdline_size); - sev_load_ctx.cmdline_data = (char *)kernel_cmdline; - sev_load_ctx.cmdline_size = cmdline_size; - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); sev_load_ctx.kernel_data = (char *)kernel; @@ -1139,7 +1135,7 @@ void x86_load_linux(X86MachineState *x86ms, * kernel on the other side of the fw_cfg interface matches the hash of the * file the user passed in. */ - if (!sev_enabled() && first_setup_data) { + if (!sev_enabled()) { SetupDataFixup *fixup = g_malloc(sizeof(*fixup)); memcpy(setup, header, MIN(sizeof(header), setup_size)); diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c index 432754eda4..a00881bc64 100644 --- a/hw/nvram/fw_cfg.c +++ b/hw/nvram/fw_cfg.c @@ -741,15 +741,6 @@ void fw_cfg_add_bytes(FWCfgState *s, uint16_t key, void *data, size_t len) fw_cfg_add_bytes_callback(s, key, NULL, NULL, NULL, data, len, true); } -void *fw_cfg_read_bytes_ptr(FWCfgState *s, uint16_t key) -{ - int arch = !!(key & FW_CFG_ARCH_LOCAL); - - key &= FW_CFG_ENTRY_MASK; - assert(key < fw_cfg_max_entry(s)); - return s->entries[arch][key].data; -} - void fw_cfg_add_string(FWCfgState *s, uint16_t key, const char *value) { size_t sz = strlen(value) + 1; diff --git a/include/hw/i386/microvm.h b/include/hw/i386/microvm.h index e8af61f194..fad97a891d 100644 --- a/include/hw/i386/microvm.h +++ b/include/hw/i386/microvm.h @@ -50,9 +50,8 @@ */ /* Platform virtio definitions */ -#define VIRTIO_MMIO_BASE 0xfeb00000 -#define VIRTIO_CMDLINE_MAXLEN 64 -#define VIRTIO_CMDLINE_TOTAL_MAX_LEN ((VIRTIO_CMDLINE_MAXLEN + 1) * 16) +#define VIRTIO_MMIO_BASE 0xfeb00000 +#define VIRTIO_CMDLINE_MAXLEN 64 #define GED_MMIO_BASE 0xfea00000 #define GED_MMIO_BASE_MEMHP (GED_MMIO_BASE + 0x100) diff --git a/include/hw/nvram/fw_cfg.h b/include/hw/nvram/fw_cfg.h index 990dcdbb2e..2e503904dc 100644 --- a/include/hw/nvram/fw_cfg.h +++ b/include/hw/nvram/fw_cfg.h @@ -139,15 +139,6 @@ void fw_cfg_add_bytes_callback(FWCfgState *s, uint16_t key, void *data, size_t len, bool read_only); -/** - * fw_cfg_read_bytes_ptr: - * @s: fw_cfg device being modified - * @key: selector key value for new fw_cfg item - * - * Reads an existing fw_cfg data pointer. - */ -void *fw_cfg_read_bytes_ptr(FWCfgState *s, uint16_t key); - /** * fw_cfg_add_string: * @s: fw_cfg device being modified From ef82d893de6d5bc0023026e636eae0f9a3e319dd Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 8 Feb 2023 15:55:38 -0500 Subject: [PATCH 03/53] Revert "x86: do not re-randomize RNG seed on snapshot load" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 14b29fea742034186403914b4d013d0e83f19e78. Signed-off-by: Michael S. Tsirkin Fixes: 14b29fea74 ("x86: do not re-randomize RNG seed on snapshot load") Tested-by: Nathan Chancellor Tested-by: Dov Murik Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Daniel P. Berrangé --- hw/i386/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/i386/x86.c b/hw/i386/x86.c index 9b7476158c..7a128a2899 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -1116,7 +1116,7 @@ void x86_load_linux(X86MachineState *x86ms, setup_data->type = cpu_to_le32(SETUP_RNG_SEED); setup_data->len = cpu_to_le32(RNG_SEED_LENGTH); qemu_guest_getrandom_nofail(setup_data->data, RNG_SEED_LENGTH); - qemu_register_reset_nosnapshotload(reset_rng_seed, setup_data); + qemu_register_reset(reset_rng_seed, setup_data); fw_cfg_add_bytes_callback(fw_cfg, FW_CFG_KERNEL_DATA, reset_rng_seed, NULL, setup_data, kernel, kernel_size, true); } else { From b4bfa0a31d86caf89223e10e701c5b00df369b37 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 8 Feb 2023 15:55:39 -0500 Subject: [PATCH 04/53] Revert "x86: re-initialize RNG seed when selecting kernel" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit cc63374a5a7c240b7d3be734ef589dabbefc7527. Fixes: cc63374a5a ("x86: re-initialize RNG seed when selecting kernel") Signed-off-by: Michael S. Tsirkin Tested-by: Nathan Chancellor Tested-by: Dov Murik Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Daniel P. Berrangé --- hw/i386/x86.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/hw/i386/x86.c b/hw/i386/x86.c index 7a128a2899..ec9c343cdb 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -1117,14 +1117,11 @@ void x86_load_linux(X86MachineState *x86ms, setup_data->len = cpu_to_le32(RNG_SEED_LENGTH); qemu_guest_getrandom_nofail(setup_data->data, RNG_SEED_LENGTH); qemu_register_reset(reset_rng_seed, setup_data); - fw_cfg_add_bytes_callback(fw_cfg, FW_CFG_KERNEL_DATA, reset_rng_seed, NULL, - setup_data, kernel, kernel_size, true); - } else { - fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); } fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); + fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); sev_load_ctx.kernel_data = (char *)kernel; sev_load_ctx.kernel_size = kernel_size; From fdc27ced04160904af1f290b561eded73abb8f1d Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 8 Feb 2023 15:55:40 -0500 Subject: [PATCH 05/53] Revert "x86: reinitialize RNG seed on system reboot" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 763a2828bf313ed55878b09759dc435355035f2e. Fixes: 763a2828bf ("x86: reinitialize RNG seed on system reboot") Signed-off-by: Michael S. Tsirkin Tested-by: Nathan Chancellor Tested-by: Dov Murik Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Daniel P. Berrangé --- hw/i386/x86.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/hw/i386/x86.c b/hw/i386/x86.c index ec9c343cdb..278dd54830 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -788,12 +788,6 @@ static void reset_setup_data(void *opaque) stq_p(fixup->pos, fixup->orig_val); } -static void reset_rng_seed(void *opaque) -{ - SetupData *setup_data = opaque; - qemu_guest_getrandom_nofail(setup_data->data, le32_to_cpu(setup_data->len)); -} - void x86_load_linux(X86MachineState *x86ms, FWCfgState *fw_cfg, int acpi_data_size, @@ -1116,7 +1110,6 @@ void x86_load_linux(X86MachineState *x86ms, setup_data->type = cpu_to_le32(SETUP_RNG_SEED); setup_data->len = cpu_to_le32(RNG_SEED_LENGTH); qemu_guest_getrandom_nofail(setup_data->data, RNG_SEED_LENGTH); - qemu_register_reset(reset_rng_seed, setup_data); } fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); From ea96a784773259d469f3f2465f09e04eabb80a66 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 8 Feb 2023 15:55:41 -0500 Subject: [PATCH 06/53] Revert "x86: use typedef for SetupData struct" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit eebb38a5633a77f5fa79d6486d5b2fcf8fbe3c07. Fixes: eebb38a563 ("x86: use typedef for SetupData struct") Signed-off-by: Michael S. Tsirkin Tested-by: Nathan Chancellor Tested-by: Dov Murik Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Daniel P. Berrangé --- hw/i386/x86.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/hw/i386/x86.c b/hw/i386/x86.c index 278dd54830..66cf171ace 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -658,12 +658,12 @@ DeviceState *ioapic_init_secondary(GSIState *gsi_state) return dev; } -typedef struct SetupData { +struct setup_data { uint64_t next; uint32_t type; uint32_t len; uint8_t data[]; -} __attribute__((packed)) SetupData; +} __attribute__((packed)); /* @@ -804,7 +804,7 @@ void x86_load_linux(X86MachineState *x86ms, FILE *f; char *vmode; MachineState *machine = MACHINE(x86ms); - SetupData *setup_data; + struct setup_data *setup_data; const char *kernel_filename = machine->kernel_filename; const char *initrd_filename = machine->initrd_filename; const char *dtb_filename = machine->dtb; @@ -1087,11 +1087,11 @@ void x86_load_linux(X86MachineState *x86ms, } setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); - kernel_size = setup_data_offset + sizeof(SetupData) + dtb_size; + kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size; kernel = g_realloc(kernel, kernel_size); - setup_data = (SetupData *)(kernel + setup_data_offset); + setup_data = (struct setup_data *)(kernel + setup_data_offset); setup_data->next = cpu_to_le64(first_setup_data); first_setup_data = prot_addr + setup_data_offset; setup_data->type = cpu_to_le32(SETUP_DTB); @@ -1102,9 +1102,9 @@ void x86_load_linux(X86MachineState *x86ms, if (!legacy_no_rng_seed) { setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); - kernel_size = setup_data_offset + sizeof(SetupData) + RNG_SEED_LENGTH; + kernel_size = setup_data_offset + sizeof(struct setup_data) + RNG_SEED_LENGTH; kernel = g_realloc(kernel, kernel_size); - setup_data = (SetupData *)(kernel + setup_data_offset); + setup_data = (struct setup_data *)(kernel + setup_data_offset); setup_data->next = cpu_to_le64(first_setup_data); first_setup_data = prot_addr + setup_data_offset; setup_data->type = cpu_to_le32(SETUP_RNG_SEED); From ae80d81cfa865cbe443543679e013e7fa5fcd12c Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 8 Feb 2023 16:04:40 -0500 Subject: [PATCH 07/53] Revert "x86: return modified setup_data only if read as memory, not as file" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit e935b735085dfa61d8e6d276b6f9e7687796a3c7. Fixes: e935b73508 ("x86: return modified setup_data only if read as memory, not as file") Signed-off-by: Michael S. Tsirkin Tested-by: Nathan Chancellor Tested-by: Dov Murik Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Daniel P. Berrangé --- hw/i386/x86.c | 46 +++++++++------------------------------ hw/nvram/fw_cfg.c | 12 +++++----- include/hw/nvram/fw_cfg.h | 22 ------------------- 3 files changed, 16 insertions(+), 64 deletions(-) diff --git a/hw/i386/x86.c b/hw/i386/x86.c index 66cf171ace..ed161a3409 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -36,7 +36,6 @@ #include "sysemu/whpx.h" #include "sysemu/numa.h" #include "sysemu/replay.h" -#include "sysemu/reset.h" #include "sysemu/sysemu.h" #include "sysemu/cpu-timers.h" #include "sysemu/xen.h" @@ -770,24 +769,6 @@ static bool load_elfboot(const char *kernel_filename, return true; } -typedef struct SetupDataFixup { - void *pos; - hwaddr orig_val, new_val; - uint32_t addr; -} SetupDataFixup; - -static void fixup_setup_data(void *opaque) -{ - SetupDataFixup *fixup = opaque; - stq_p(fixup->pos, fixup->new_val); -} - -static void reset_setup_data(void *opaque) -{ - SetupDataFixup *fixup = opaque; - stq_p(fixup->pos, fixup->orig_val); -} - void x86_load_linux(X86MachineState *x86ms, FWCfgState *fw_cfg, int acpi_data_size, @@ -1112,11 +1093,8 @@ void x86_load_linux(X86MachineState *x86ms, qemu_guest_getrandom_nofail(setup_data->data, RNG_SEED_LENGTH); } - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); - fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); - sev_load_ctx.kernel_data = (char *)kernel; - sev_load_ctx.kernel_size = kernel_size; + /* Offset 0x250 is a pointer to the first setup_data link. */ + stq_p(header + 0x250, first_setup_data); /* * If we're starting an encrypted VM, it will be OVMF based, which uses the @@ -1126,20 +1104,16 @@ void x86_load_linux(X86MachineState *x86ms, * file the user passed in. */ if (!sev_enabled()) { - SetupDataFixup *fixup = g_malloc(sizeof(*fixup)); - memcpy(setup, header, MIN(sizeof(header), setup_size)); - /* Offset 0x250 is a pointer to the first setup_data link. */ - fixup->pos = setup + 0x250; - fixup->orig_val = ldq_p(fixup->pos); - fixup->new_val = first_setup_data; - fixup->addr = cpu_to_le32(real_addr); - fw_cfg_add_bytes_callback(fw_cfg, FW_CFG_SETUP_ADDR, fixup_setup_data, NULL, - fixup, &fixup->addr, sizeof(fixup->addr), true); - qemu_register_reset(reset_setup_data, fixup); - } else { - fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr); } + + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); + fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); + sev_load_ctx.kernel_data = (char *)kernel; + sev_load_ctx.kernel_size = kernel_size; + + fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr); fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size); fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size); sev_load_ctx.setup_data = (char *)setup; diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c index a00881bc64..29a5bef1d5 100644 --- a/hw/nvram/fw_cfg.c +++ b/hw/nvram/fw_cfg.c @@ -693,12 +693,12 @@ static const VMStateDescription vmstate_fw_cfg = { } }; -void fw_cfg_add_bytes_callback(FWCfgState *s, uint16_t key, - FWCfgCallback select_cb, - FWCfgWriteCallback write_cb, - void *callback_opaque, - void *data, size_t len, - bool read_only) +static void fw_cfg_add_bytes_callback(FWCfgState *s, uint16_t key, + FWCfgCallback select_cb, + FWCfgWriteCallback write_cb, + void *callback_opaque, + void *data, size_t len, + bool read_only) { int arch = !!(key & FW_CFG_ARCH_LOCAL); diff --git a/include/hw/nvram/fw_cfg.h b/include/hw/nvram/fw_cfg.h index 2e503904dc..c1f81a5f13 100644 --- a/include/hw/nvram/fw_cfg.h +++ b/include/hw/nvram/fw_cfg.h @@ -117,28 +117,6 @@ struct FWCfgMemState { */ void fw_cfg_add_bytes(FWCfgState *s, uint16_t key, void *data, size_t len); -/** - * fw_cfg_add_bytes_callback: - * @s: fw_cfg device being modified - * @key: selector key value for new fw_cfg item - * @select_cb: callback function when selecting - * @write_cb: callback function after a write - * @callback_opaque: argument to be passed into callback function - * @data: pointer to start of item data - * @len: size of item data - * @read_only: is file read only - * - * Add a new fw_cfg item, available by selecting the given key, as a raw - * "blob" of the given size. The data referenced by the starting pointer - * is only linked, NOT copied, into the data structure of the fw_cfg device. - */ -void fw_cfg_add_bytes_callback(FWCfgState *s, uint16_t key, - FWCfgCallback select_cb, - FWCfgWriteCallback write_cb, - void *callback_opaque, - void *data, size_t len, - bool read_only); - /** * fw_cfg_add_string: * @s: fw_cfg device being modified From 167f4873580d3729565044cda73c3e20997950f2 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 8 Feb 2023 16:05:35 -0500 Subject: [PATCH 08/53] Revert "hw/i386: pass RNG seed via setup_data entry" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 67f7e426e53833a5db75b0d813e8d537b8a75bd2. Additionally to the automatic revert, I went over the code and dropped all mentions of legacy_no_rng_seed manually, effectively reverting a combination of 2 additional commits: commit ffe2d2382e5f1aae1abc4081af407905ef380311 Author: Jason A. Donenfeld Date: Wed Sep 21 11:31:34 2022 +0200 x86: re-enable rng seeding via SetupData commit 3824e25db1a84fadc50b88dfbe27047aa2f7f85d Author: Gerd Hoffmann Date: Wed Aug 17 10:39:40 2022 +0200 x86: disable rng seeding via setup_data Fixes: 67f7e426e5 ("hw/i386: pass RNG seed via setup_data entry") Signed-off-by: Michael S. Tsirkin Tested-by: Nathan Chancellor Tested-by: Dov Murik Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Daniel P. Berrangé --- hw/i386/microvm.c | 2 +- hw/i386/pc.c | 4 ++-- hw/i386/pc_piix.c | 2 -- hw/i386/pc_q35.c | 2 -- hw/i386/x86.c | 26 ++++---------------------- include/hw/i386/pc.h | 3 --- include/hw/i386/x86.h | 3 +-- 7 files changed, 8 insertions(+), 34 deletions(-) diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c index 6680530555..3d606a20b4 100644 --- a/hw/i386/microvm.c +++ b/hw/i386/microvm.c @@ -328,7 +328,7 @@ static void microvm_memory_init(MicrovmMachineState *mms) rom_set_fw(fw_cfg); if (machine->kernel_filename != NULL) { - x86_load_linux(x86ms, fw_cfg, 0, true, false); + x86_load_linux(x86ms, fw_cfg, 0, true); } if (mms->option_roms) { diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 992951c107..8b1ddc8d99 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -806,7 +806,7 @@ void xen_load_linux(PCMachineState *pcms) rom_set_fw(fw_cfg); x86_load_linux(x86ms, fw_cfg, pcmc->acpi_data_size, - pcmc->pvh_enabled, pcmc->legacy_no_rng_seed); + pcmc->pvh_enabled); for (i = 0; i < nb_option_roms; i++) { assert(!strcmp(option_rom[i].name, "linuxboot.bin") || !strcmp(option_rom[i].name, "linuxboot_dma.bin") || @@ -1126,7 +1126,7 @@ void pc_memory_init(PCMachineState *pcms, if (linux_boot) { x86_load_linux(x86ms, fw_cfg, pcmc->acpi_data_size, - pcmc->pvh_enabled, pcmc->legacy_no_rng_seed); + pcmc->pvh_enabled); } for (i = 0; i < nb_option_roms; i++) { diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 126b6c11df..2f16011bab 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -477,9 +477,7 @@ DEFINE_I440FX_MACHINE(v7_2, "pc-i440fx-7.2", NULL, static void pc_i440fx_7_1_machine_options(MachineClass *m) { - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_i440fx_7_2_machine_options(m); - pcmc->legacy_no_rng_seed = true; compat_props_add(m->compat_props, hw_compat_7_1, hw_compat_7_1_len); compat_props_add(m->compat_props, pc_compat_7_1, pc_compat_7_1_len); } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 09004f3f1f..797ba347fd 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -395,9 +395,7 @@ DEFINE_Q35_MACHINE(v7_2, "pc-q35-7.2", NULL, static void pc_q35_7_1_machine_options(MachineClass *m) { - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_q35_7_2_machine_options(m); - pcmc->legacy_no_rng_seed = true; compat_props_add(m->compat_props, hw_compat_7_1, hw_compat_7_1_len); compat_props_add(m->compat_props, pc_compat_7_1, pc_compat_7_1_len); } diff --git a/hw/i386/x86.c b/hw/i386/x86.c index ed161a3409..a33c879598 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -26,7 +26,6 @@ #include "qemu/cutils.h" #include "qemu/units.h" #include "qemu/datadir.h" -#include "qemu/guest-random.h" #include "qapi/error.h" #include "qapi/qapi-visit-common.h" #include "qapi/clone-visitor.h" @@ -772,8 +771,7 @@ static bool load_elfboot(const char *kernel_filename, void x86_load_linux(X86MachineState *x86ms, FWCfgState *fw_cfg, int acpi_data_size, - bool pvh_enabled, - bool legacy_no_rng_seed) + bool pvh_enabled) { bool linuxboot_dma_enabled = X86_MACHINE_GET_CLASS(x86ms)->fwcfg_dma_enabled; uint16_t protocol; @@ -781,7 +779,7 @@ void x86_load_linux(X86MachineState *x86ms, int dtb_size, setup_data_offset; uint32_t initrd_max; uint8_t header[8192], *setup, *kernel; - hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0, first_setup_data = 0; + hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0; FILE *f; char *vmode; MachineState *machine = MACHINE(x86ms); @@ -791,7 +789,6 @@ void x86_load_linux(X86MachineState *x86ms, const char *dtb_filename = machine->dtb; const char *kernel_cmdline = machine->kernel_cmdline; SevKernelLoaderContext sev_load_ctx = {}; - enum { RNG_SEED_LENGTH = 32 }; /* Align to 16 bytes as a paranoia measure */ cmdline_size = (strlen(kernel_cmdline) + 16) & ~15; @@ -1071,31 +1068,16 @@ void x86_load_linux(X86MachineState *x86ms, kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size; kernel = g_realloc(kernel, kernel_size); + stq_p(header + 0x250, prot_addr + setup_data_offset); setup_data = (struct setup_data *)(kernel + setup_data_offset); - setup_data->next = cpu_to_le64(first_setup_data); - first_setup_data = prot_addr + setup_data_offset; + setup_data->next = 0; setup_data->type = cpu_to_le32(SETUP_DTB); setup_data->len = cpu_to_le32(dtb_size); load_image_size(dtb_filename, setup_data->data, dtb_size); } - if (!legacy_no_rng_seed) { - setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); - kernel_size = setup_data_offset + sizeof(struct setup_data) + RNG_SEED_LENGTH; - kernel = g_realloc(kernel, kernel_size); - setup_data = (struct setup_data *)(kernel + setup_data_offset); - setup_data->next = cpu_to_le64(first_setup_data); - first_setup_data = prot_addr + setup_data_offset; - setup_data->type = cpu_to_le32(SETUP_RNG_SEED); - setup_data->len = cpu_to_le32(RNG_SEED_LENGTH); - qemu_guest_getrandom_nofail(setup_data->data, RNG_SEED_LENGTH); - } - - /* Offset 0x250 is a pointer to the first setup_data link. */ - stq_p(header + 0x250, first_setup_data); - /* * If we're starting an encrypted VM, it will be OVMF based, which uses the * efi stub for booting and doesn't require any values to be placed in the diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 66e3d059ef..44b08554fa 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -127,9 +127,6 @@ struct PCMachineClass { /* create kvmclock device even when KVM PV features are not exposed */ bool kvmclock_create_always; - - /* skip passing an rng seed for legacy machines */ - bool legacy_no_rng_seed; }; #define TYPE_PC_MACHINE "generic-pc-machine" diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h index 890dfad23e..0b337a036c 100644 --- a/include/hw/i386/x86.h +++ b/include/hw/i386/x86.h @@ -123,8 +123,7 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware, void x86_load_linux(X86MachineState *x86ms, FWCfgState *fw_cfg, int acpi_data_size, - bool pvh_enabled, - bool legacy_no_rng_seed); + bool pvh_enabled); bool x86_machine_is_smm_enabled(const X86MachineState *x86ms); bool x86_machine_is_acpi_enabled(const X86MachineState *x86ms); From cd69d47cddabc9adf4562ac968437bb547f46630 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Tue, 24 Jan 2023 17:11:59 +0100 Subject: [PATCH 09/53] virtio-net: clear guest_announce feature if no cvq backend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since GUEST_ANNOUNCE is emulated the feature bit could be set without backend support. This happens in the vDPA case. However, backend vDPA parent may not have CVQ support. This causes an incoherent feature set, and the driver may refuse to start. This happens in virtio-net Linux driver. This may be solved differently in the future. Qemu is able to emulate a CVQ just for guest_announce purposes, helping guest to notify the new location with vDPA devices that does not support it. However, this is left as a TODO as it is way more complex to backport. Tested with vdpa_net_sim, toggling manually VIRTIO_NET_F_CTRL_VQ in the driver and migrating it with x-svq=on. Fixes: 980003debddd ("vdpa: do not handle VIRTIO_NET_F_GUEST_ANNOUNCE in vhost-vdpa") Reported-by: Dawar, Gautam Signed-off-by: Eugenio Pérez Message-Id: <20230124161159.2182117-1-eperezma@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: David Edmondson Reviewed-by: Gautam Dawar Tested-by: Gautam Dawar Tested-by: Lei Yang --- hw/net/virtio-net.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 3ae909041a..09d5c7a664 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -820,6 +820,21 @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, features |= (1ULL << VIRTIO_NET_F_MTU); } + /* + * Since GUEST_ANNOUNCE is emulated the feature bit could be set without + * enabled. This happens in the vDPA case. + * + * Make sure the feature set is not incoherent, as the driver could refuse + * to start. + * + * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes, + * helping guest to notify the new location with vDPA devices that does not + * support it. + */ + if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) { + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE); + } + return features; } From e1a0e635c91e4e1255c16a178afde30aa32b7efe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Mon, 30 Jan 2023 12:47:28 +0000 Subject: [PATCH 10/53] backends/vhost-user: remove the ioeventfd check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While ioeventfds are needed for good performance with KVM guests it should not be a gating requirement. We can run vhost-user backends using simulated ioeventfds or inband signalling. With this change I can run: $QEMU $OPTS \ -display gtk,gl=on \ -device vhost-user-gpu-pci,chardev=vhgpu \ -chardev socket,id=vhgpu,path=vhgpu.sock with: ./contrib/vhost-user-gpu/vhost-user-gpu \ -s vhgpu.sock \ -v and at least see things start-up - although the display gets rotated by 180 degrees. Once lightdm takes over we never make it to the login prompt and just get a blank screen. Signed-off-by: Alex Bennée Cc: Gerd Hoffmann Message-Id: <20221202132231.1048669-1-alex.bennee@linaro.org> Message-Id: <20230130124728.175610-1-alex.bennee@linaro.org> Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- backends/vhost-user.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/backends/vhost-user.c b/backends/vhost-user.c index 0596223ac4..94c6a82d52 100644 --- a/backends/vhost-user.c +++ b/backends/vhost-user.c @@ -20,12 +20,6 @@ #include "io/channel-command.h" #include "hw/virtio/virtio-bus.h" -static bool -ioeventfd_enabled(void) -{ - return kvm_enabled() && kvm_eventfds_enabled(); -} - int vhost_user_backend_dev_init(VhostUserBackend *b, VirtIODevice *vdev, unsigned nvqs, Error **errp) @@ -34,11 +28,6 @@ vhost_user_backend_dev_init(VhostUserBackend *b, VirtIODevice *vdev, assert(!b->vdev && vdev); - if (!ioeventfd_enabled()) { - error_setg(errp, "vhost initialization failed: requires kvm"); - return -1; - } - if (!vhost_user_init(&b->vhost_user, &b->chr, errp)) { return -1; } From daae36c13abc73cf1055abc2d33cb71cc5d34310 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Mon, 30 Jan 2023 23:03:20 +0900 Subject: [PATCH 11/53] vhost-user-gpio: Configure vhost_dev when connecting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vhost_dev_cleanup(), called from vu_gpio_disconnect(), clears vhost_dev so vhost-user-gpio must set the members of vhost_dev each time connecting. do_vhost_user_cleanup() should also acquire the pointer to vqs directly from VHostUserGPIO instead of referring to vhost_dev as it can be called after vhost_dev_cleanup(). Fixes: 27ba7b027f ("hw/virtio: add boilerplate for vhost-user-gpio device") Signed-off-by: Akihiko Odaki Message-Id: <20230130140320.77999-1-akihiko.odaki@daynix.com> Reviewed-by: Viresh Kumar Reviewed-by: Alex Bennée Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-user-gpio.c | 10 ++++++---- include/hw/virtio/vhost-user-gpio.h | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/hw/virtio/vhost-user-gpio.c b/hw/virtio/vhost-user-gpio.c index fe3da32c74..d6927b610a 100644 --- a/hw/virtio/vhost-user-gpio.c +++ b/hw/virtio/vhost-user-gpio.c @@ -16,6 +16,7 @@ #include "trace.h" #define REALIZE_CONNECTION_RETRIES 3 +#define VHOST_NVQS 2 /* Features required from VirtIO */ static const int feature_bits[] = { @@ -208,8 +209,7 @@ static void do_vhost_user_cleanup(VirtIODevice *vdev, VHostUserGPIO *gpio) { virtio_delete_queue(gpio->command_vq); virtio_delete_queue(gpio->interrupt_vq); - g_free(gpio->vhost_dev.vqs); - gpio->vhost_dev.vqs = NULL; + g_free(gpio->vhost_vqs); virtio_cleanup(vdev); vhost_user_cleanup(&gpio->vhost_user); } @@ -229,6 +229,9 @@ static int vu_gpio_connect(DeviceState *dev, Error **errp) vhost_dev_set_config_notifier(vhost_dev, &gpio_ops); gpio->vhost_user.supports_config = true; + gpio->vhost_dev.nvqs = VHOST_NVQS; + gpio->vhost_dev.vqs = gpio->vhost_vqs; + ret = vhost_dev_init(vhost_dev, &gpio->vhost_user, VHOST_BACKEND_TYPE_USER, 0, errp); if (ret < 0) { @@ -347,10 +350,9 @@ static void vu_gpio_device_realize(DeviceState *dev, Error **errp) virtio_init(vdev, VIRTIO_ID_GPIO, sizeof(gpio->config)); - gpio->vhost_dev.nvqs = 2; gpio->command_vq = virtio_add_queue(vdev, 256, vu_gpio_handle_output); gpio->interrupt_vq = virtio_add_queue(vdev, 256, vu_gpio_handle_output); - gpio->vhost_dev.vqs = g_new0(struct vhost_virtqueue, gpio->vhost_dev.nvqs); + gpio->vhost_vqs = g_new0(struct vhost_virtqueue, VHOST_NVQS); gpio->connected = false; diff --git a/include/hw/virtio/vhost-user-gpio.h b/include/hw/virtio/vhost-user-gpio.h index a9305c5e6c..a9d3f9b049 100644 --- a/include/hw/virtio/vhost-user-gpio.h +++ b/include/hw/virtio/vhost-user-gpio.h @@ -23,7 +23,7 @@ struct VHostUserGPIO { VirtIODevice parent_obj; CharBackend chardev; struct virtio_gpio_config config; - struct vhost_virtqueue *vhost_vq; + struct vhost_virtqueue *vhost_vqs; struct vhost_dev vhost_dev; VhostUserState vhost_user; VirtQueue *command_vq; From 0126793bee853e7c134627f51d2de5428a612e99 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Mon, 30 Jan 2023 23:04:35 +0900 Subject: [PATCH 12/53] vhost-user-i2c: Back up vqs before cleaning up vhost_dev vhost_dev_cleanup() clears vhost_dev so back up its vqs member to free the memory pointed by the member. Fixes: 7221d3b634 ("hw/virtio: add boilerplate for vhost-user-i2c device") Signed-off-by: Akihiko Odaki Message-Id: <20230130140435.78049-1-akihiko.odaki@daynix.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-user-i2c.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hw/virtio/vhost-user-i2c.c b/hw/virtio/vhost-user-i2c.c index dc5c828ba6..60eaf0d95b 100644 --- a/hw/virtio/vhost-user-i2c.c +++ b/hw/virtio/vhost-user-i2c.c @@ -143,8 +143,6 @@ static void do_vhost_user_cleanup(VirtIODevice *vdev, VHostUserI2C *i2c) vhost_user_cleanup(&i2c->vhost_user); virtio_delete_queue(i2c->vq); virtio_cleanup(vdev); - g_free(i2c->vhost_dev.vqs); - i2c->vhost_dev.vqs = NULL; } static int vu_i2c_connect(DeviceState *dev) @@ -228,6 +226,7 @@ static void vu_i2c_device_realize(DeviceState *dev, Error **errp) ret = vhost_dev_init(&i2c->vhost_dev, &i2c->vhost_user, VHOST_BACKEND_TYPE_USER, 0, errp); if (ret < 0) { + g_free(i2c->vhost_dev.vqs); do_vhost_user_cleanup(vdev, i2c); } @@ -239,10 +238,12 @@ static void vu_i2c_device_unrealize(DeviceState *dev) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); VHostUserI2C *i2c = VHOST_USER_I2C(dev); + struct vhost_virtqueue *vhost_vqs = i2c->vhost_dev.vqs; /* This will stop vhost backend if appropriate. */ vu_i2c_set_status(vdev, 0); vhost_dev_cleanup(&i2c->vhost_dev); + g_free(vhost_vqs); do_vhost_user_cleanup(vdev, i2c); } From f0dac71596d4b87a1a77d1f4efb6a6adb4730d7b Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Mon, 30 Jan 2023 23:05:16 +0900 Subject: [PATCH 13/53] vhost-user-rng: Back up vqs before cleaning up vhost_dev vhost_dev_cleanup() clears vhost_dev so back up its vqs member to free the memory pointed by the member. Fixes: 821d28b88f ("vhost-user-rng: Add vhost-user-rng implementation") Signed-off-by: Akihiko Odaki Message-Id: <20230130140516.78078-1-akihiko.odaki@daynix.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-user-rng.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hw/virtio/vhost-user-rng.c b/hw/virtio/vhost-user-rng.c index 201a39e220..efc54cd3fb 100644 --- a/hw/virtio/vhost-user-rng.c +++ b/hw/virtio/vhost-user-rng.c @@ -229,6 +229,7 @@ static void vu_rng_device_realize(DeviceState *dev, Error **errp) return; vhost_dev_init_failed: + g_free(rng->vhost_dev.vqs); virtio_delete_queue(rng->req_vq); virtio_add_queue_failed: virtio_cleanup(vdev); @@ -239,12 +240,12 @@ static void vu_rng_device_unrealize(DeviceState *dev) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); VHostUserRNG *rng = VHOST_USER_RNG(dev); + struct vhost_virtqueue *vhost_vqs = rng->vhost_dev.vqs; vu_rng_set_status(vdev, 0); vhost_dev_cleanup(&rng->vhost_dev); - g_free(rng->vhost_dev.vqs); - rng->vhost_dev.vqs = NULL; + g_free(vhost_vqs); virtio_delete_queue(rng->req_vq); virtio_cleanup(vdev); vhost_user_cleanup(&rng->vhost_user); From 62bdb8871512076841f4464f7e26efdc7783f78d Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Tue, 7 Feb 2023 17:49:44 +0000 Subject: [PATCH 14/53] virtio-rng-pci: fix transitional migration compat for vectors In bad9c5a516 ("virtio-rng-pci: fix migration compat for vectors") I fixed the virtio-rng-pci migration compatibility, but it was discovered that we also need to fix the other aliases of the device for the transitional cases. Fixes: 9ea02e8f1 ('virtio-rng-pci: Allow setting nvectors, so we can use MSI-X') bz: https://bugzilla.redhat.com/show_bug.cgi?id=2162569 Signed-off-by: Dr. David Alan Gilbert Message-Id: <20230207174944.138255-1-dgilbert@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/core/machine.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/core/machine.c b/hw/core/machine.c index f29e700ee4..1cf6822e06 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -47,6 +47,8 @@ const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2); GlobalProperty hw_compat_7_1[] = { { "virtio-device", "queue_reset", "false" }, { "virtio-rng-pci", "vectors", "0" }, + { "virtio-rng-pci-transitional", "vectors", "0" }, + { "virtio-rng-pci-non-transitional", "vectors", "0" }, }; const size_t hw_compat_7_1_len = G_N_ELEMENTS(hw_compat_7_1); From 37d2bcbc2a4e9c2e9061bec72a32c7e49b9f81ec Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Tue, 31 Jan 2023 12:00:37 +0900 Subject: [PATCH 15/53] hw/timer/hpet: Fix expiration time overflow The expiration time provided for timer_mod() can overflow if a ridiculously large value is set to the comparator register. The resulting value can represent a past time after rounded, forcing the timer to fire immediately. If the timer is configured as periodic, it will rearm the timer again, and form an endless loop. Check if the expiration value will overflow, and if it will, stop the timer instead of rearming the timer with the overflowed time. This bug was found by Alexander Bulekov when fuzzing igb, a new network device emulation: https://patchew.org/QEMU/20230129053316.1071513-1-alxndr@bu.edu/ The fixed test case is: fuzz/crash_2d7036941dcda1ad4380bb8a9174ed0c949bcefd Fixes: 16b29ae180 ("Add HPET emulation to qemu (Beth Kon)") Signed-off-by: Akihiko Odaki Acked-by: Michael S. Tsirkin Message-Id: <20230131030037.18856-1-akihiko.odaki@daynix.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/timer/hpet.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c index 214d6a0501..6998094233 100644 --- a/hw/timer/hpet.c +++ b/hw/timer/hpet.c @@ -353,6 +353,16 @@ static const VMStateDescription vmstate_hpet = { } }; +static void hpet_arm(HPETTimer *t, uint64_t ticks) +{ + if (ticks < ns_to_ticks(INT64_MAX / 2)) { + timer_mod(t->qemu_timer, + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + ticks_to_ns(ticks)); + } else { + timer_del(t->qemu_timer); + } +} + /* * timer expiration callback */ @@ -375,13 +385,11 @@ static void hpet_timer(void *opaque) } } diff = hpet_calculate_diff(t, cur_tick); - timer_mod(t->qemu_timer, - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + (int64_t)ticks_to_ns(diff)); + hpet_arm(t, diff); } else if (t->config & HPET_TN_32BIT && !timer_is_periodic(t)) { if (t->wrap_flag) { diff = hpet_calculate_diff(t, cur_tick); - timer_mod(t->qemu_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + - (int64_t)ticks_to_ns(diff)); + hpet_arm(t, diff); t->wrap_flag = 0; } } @@ -408,8 +416,7 @@ static void hpet_set_timer(HPETTimer *t) t->wrap_flag = 1; } } - timer_mod(t->qemu_timer, - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + (int64_t)ticks_to_ns(diff)); + hpet_arm(t, diff); } static void hpet_del_timer(HPETTimer *t) From e9ca9f33f51faf775badbbb5752eaa928011607a Mon Sep 17 00:00:00 2001 From: Maxime Coquelin Date: Wed, 8 Feb 2023 21:32:57 +0100 Subject: [PATCH 16/53] docs: vhost-user: replace _SLAVE_ with _BACKEND_ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Backend's message and protocol features names were still using "_SLAVE_" naming. For consistency with the new naming convention, replace it with _BACKEND_. Signed-off-by: Maxime Coquelin Message-Id: <20230208203259.381326-2-maxime.coquelin@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- docs/interop/vhost-user.rst | 40 ++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst index 3f18ab424e..8a5924ea75 100644 --- a/docs/interop/vhost-user.rst +++ b/docs/interop/vhost-user.rst @@ -315,7 +315,7 @@ in the ancillary data: * ``VHOST_USER_SET_VRING_KICK`` * ``VHOST_USER_SET_VRING_CALL`` * ``VHOST_USER_SET_VRING_ERR`` -* ``VHOST_USER_SET_SLAVE_REQ_FD`` +* ``VHOST_USER_SET_BACKEND_REQ_FD`` (previous name ``VHOST_USER_SET_SLAVE_REQ_FD``) * ``VHOST_USER_SET_INFLIGHT_FD`` (if ``VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD``) If *front-end* is unable to send the full message or receives a wrong @@ -516,7 +516,7 @@ expected to reply with a zero payload, non-zero otherwise. The back-end relies on the back-end communication channel (see :ref:`Back-end communication ` section below) to send IOTLB miss -and access failure events, by sending ``VHOST_USER_SLAVE_IOTLB_MSG`` +and access failure events, by sending ``VHOST_USER_BACKEND_IOTLB_MSG`` requests to the front-end with a ``struct vhost_iotlb_msg`` as payload. For miss events, the iotlb payload has to be filled with the miss message type (1), the I/O virtual address and the permissions @@ -540,15 +540,15 @@ Back-end communication ---------------------- An optional communication channel is provided if the back-end declares -``VHOST_USER_PROTOCOL_F_SLAVE_REQ`` protocol feature, to allow the +``VHOST_USER_PROTOCOL_F_BACKEND_REQ`` protocol feature, to allow the back-end to make requests to the front-end. -The fd is provided via ``VHOST_USER_SET_SLAVE_REQ_FD`` ancillary data. +The fd is provided via ``VHOST_USER_SET_BACKEND_REQ_FD`` ancillary data. -A back-end may then send ``VHOST_USER_SLAVE_*`` messages to the front-end +A back-end may then send ``VHOST_USER_BACKEND_*`` messages to the front-end using this fd communication channel. -If ``VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD`` protocol feature is +If ``VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD`` protocol feature is negotiated, back-end can send file descriptors (at most 8 descriptors in each message) to front-end via ancillary data using this fd communication channel. @@ -835,7 +835,7 @@ Note that due to the fact that too many messages on the sockets can cause the sending application(s) to block, it is not advised to use this feature unless absolutely necessary. It is also considered an error to negotiate this feature without also negotiating -``VHOST_USER_PROTOCOL_F_SLAVE_REQ`` and ``VHOST_USER_PROTOCOL_F_REPLY_ACK``, +``VHOST_USER_PROTOCOL_F_BACKEND_REQ`` and ``VHOST_USER_PROTOCOL_F_REPLY_ACK``, the former is necessary for getting a message channel from the back-end to the front-end, while the latter needs to be used with the in-band notification messages to block until they are processed, both to avoid @@ -855,12 +855,12 @@ Protocol features #define VHOST_USER_PROTOCOL_F_RARP 2 #define VHOST_USER_PROTOCOL_F_REPLY_ACK 3 #define VHOST_USER_PROTOCOL_F_MTU 4 - #define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5 + #define VHOST_USER_PROTOCOL_F_BACKEND_REQ 5 #define VHOST_USER_PROTOCOL_F_CROSS_ENDIAN 6 #define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7 #define VHOST_USER_PROTOCOL_F_PAGEFAULT 8 #define VHOST_USER_PROTOCOL_F_CONFIG 9 - #define VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD 10 + #define VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD 10 #define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11 #define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD 12 #define VHOST_USER_PROTOCOL_F_RESET_DEVICE 13 @@ -1059,8 +1059,8 @@ Front-end message types in the ancillary data. This signals that polling will be used instead of waiting for the call. Note that if the protocol features ``VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS`` and - ``VHOST_USER_PROTOCOL_F_SLAVE_REQ`` have been negotiated this message - isn't necessary as the ``VHOST_USER_SLAVE_VRING_CALL`` message can be + ``VHOST_USER_PROTOCOL_F_BACKEND_REQ`` have been negotiated this message + isn't necessary as the ``VHOST_USER_BACKEND_VRING_CALL`` message can be used, it may however still be used to set an event file descriptor or to enable polling. @@ -1077,8 +1077,8 @@ Front-end message types invalid FD flag. This flag is set when there is no file descriptor in the ancillary data. Note that if the protocol features ``VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS`` and - ``VHOST_USER_PROTOCOL_F_SLAVE_REQ`` have been negotiated this message - isn't necessary as the ``VHOST_USER_SLAVE_VRING_ERR`` message can be + ``VHOST_USER_PROTOCOL_F_BACKEND_REQ`` have been negotiated this message + isn't necessary as the ``VHOST_USER_BACKEND_VRING_ERR`` message can be used, it may however still be used to set an event file descriptor (which will be preferred over the message). @@ -1139,7 +1139,7 @@ Front-end message types respond with zero in case the specified MTU is valid, or non-zero otherwise. -``VHOST_USER_SET_SLAVE_REQ_FD`` +``VHOST_USER_SET_BACKEND_REQ_FD`` (previous name ``VHOST_USER_SET_SLAVE_REQ_FD``) :id: 21 :equivalent ioctl: N/A :request payload: N/A @@ -1150,7 +1150,7 @@ Front-end message types This request should be sent only when ``VHOST_USER_F_PROTOCOL_FEATURES`` has been negotiated, and protocol - feature bit ``VHOST_USER_PROTOCOL_F_SLAVE_REQ`` bit is present in + feature bit ``VHOST_USER_PROTOCOL_F_BACKEND_REQ`` bit is present in ``VHOST_USER_GET_PROTOCOL_FEATURES``. If ``VHOST_USER_PROTOCOL_F_REPLY_ACK`` is negotiated, the back-end must respond with zero for success, non-zero otherwise. @@ -1429,7 +1429,7 @@ Back-end message types For this type of message, the request is sent by the back-end and the reply is sent by the front-end. -``VHOST_USER_SLAVE_IOTLB_MSG`` +``VHOST_USER_BACKEND_IOTLB_MSG`` (previous name ``VHOST_USER_SLAVE_IOTLB_MSG``) :id: 1 :equivalent ioctl: N/A (equivalent to ``VHOST_IOTLB_MSG`` message type) :request payload: ``struct vhost_iotlb_msg`` @@ -1444,7 +1444,7 @@ is sent by the front-end. ``VIRTIO_F_IOMMU_PLATFORM`` feature has been successfully negotiated. -``VHOST_USER_SLAVE_CONFIG_CHANGE_MSG`` +``VHOST_USER_BACKEND_CONFIG_CHANGE_MSG`` (previous name ``VHOST_USER_SLAVE_CONFIG_CHANGE_MSG``) :id: 2 :equivalent ioctl: N/A :request payload: N/A @@ -1459,7 +1459,7 @@ is sent by the front-end. ``VHOST_USER_NEED_REPLY`` flag, the front-end must respond with zero when operation is successfully completed, or non-zero otherwise. -``VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG`` +``VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG`` (previous name ``VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG``) :id: 3 :equivalent ioctl: N/A :request payload: vring area description @@ -1482,7 +1482,7 @@ is sent by the front-end. ``VHOST_USER_PROTOCOL_F_HOST_NOTIFIER`` protocol feature has been successfully negotiated. -``VHOST_USER_SLAVE_VRING_CALL`` +``VHOST_USER_BACKEND_VRING_CALL`` (previous name ``VHOST_USER_SLAVE_VRING_CALL``) :id: 4 :equivalent ioctl: N/A :request payload: vring state description @@ -1496,7 +1496,7 @@ is sent by the front-end. The state.num field is currently reserved and must be set to 0. -``VHOST_USER_SLAVE_VRING_ERR`` +``VHOST_USER_BACKEND_VRING_ERR`` (previous name ``VHOST_USER_SLAVE_VRING_ERR``) :id: 5 :equivalent ioctl: N/A :request payload: vring state description From e608feed51d5981a587cc0743ee57030a88a9265 Mon Sep 17 00:00:00 2001 From: Maxime Coquelin Date: Wed, 8 Feb 2023 21:32:58 +0100 Subject: [PATCH 17/53] libvhost-user: Adopt new backend naming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Vhost-user specification changed feature and request naming from _SLAVE_ to _BACKEND_. This patch adopts the new naming convention. Signed-off-by: Maxime Coquelin Message-Id: <20230208203259.381326-3-maxime.coquelin@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- subprojects/libvhost-user/libvhost-user.c | 20 ++++++++++---------- subprojects/libvhost-user/libvhost-user.h | 20 ++++++++++---------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c index fc69783d2b..f661af7c85 100644 --- a/subprojects/libvhost-user/libvhost-user.c +++ b/subprojects/libvhost-user/libvhost-user.c @@ -140,7 +140,7 @@ vu_request_to_string(unsigned int req) REQ(VHOST_USER_SET_VRING_ENABLE), REQ(VHOST_USER_SEND_RARP), REQ(VHOST_USER_NET_SET_MTU), - REQ(VHOST_USER_SET_SLAVE_REQ_FD), + REQ(VHOST_USER_SET_BACKEND_REQ_FD), REQ(VHOST_USER_IOTLB_MSG), REQ(VHOST_USER_SET_VRING_ENDIAN), REQ(VHOST_USER_GET_CONFIG), @@ -1365,7 +1365,7 @@ bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd, int qidx = vq - dev->vq; int fd_num = 0; VhostUserMsg vmsg = { - .request = VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG, + .request = VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG, .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, .size = sizeof(vmsg.payload.area), .payload.area = { @@ -1383,7 +1383,7 @@ bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd, vmsg.fd_num = fd_num; - if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD)) { + if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD)) { return false; } @@ -1461,9 +1461,9 @@ vu_get_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg) */ uint64_t features = 1ULL << VHOST_USER_PROTOCOL_F_MQ | 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | - 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ | + 1ULL << VHOST_USER_PROTOCOL_F_BACKEND_REQ | 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | - 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD | + 1ULL << VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD | 1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK | 1ULL << VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS; @@ -1494,7 +1494,7 @@ vu_set_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg) if (vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) && - (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SLAVE_REQ) || + (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_BACKEND_REQ) || !vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_REPLY_ACK))) { /* * The use case for using messages for kick/call is simulation, to make @@ -1507,7 +1507,7 @@ vu_set_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg) * that actually enables the simulation case. */ vu_panic(dev, - "F_IN_BAND_NOTIFICATIONS requires F_SLAVE_REQ && F_REPLY_ACK"); + "F_IN_BAND_NOTIFICATIONS requires F_BACKEND_REQ && F_REPLY_ACK"); return false; } @@ -1910,7 +1910,7 @@ vu_process_message(VuDev *dev, VhostUserMsg *vmsg) return vu_get_queue_num_exec(dev, vmsg); case VHOST_USER_SET_VRING_ENABLE: return vu_set_vring_enable_exec(dev, vmsg); - case VHOST_USER_SET_SLAVE_REQ_FD: + case VHOST_USER_SET_BACKEND_REQ_FD: return vu_set_slave_req_fd(dev, vmsg); case VHOST_USER_GET_CONFIG: return vu_get_config(dev, vmsg); @@ -2416,9 +2416,9 @@ static void _vu_queue_notify(VuDev *dev, VuVirtq *vq, bool sync) if (vq->call_fd < 0 && vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) && - vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { + vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_BACKEND_REQ)) { VhostUserMsg vmsg = { - .request = VHOST_USER_SLAVE_VRING_CALL, + .request = VHOST_USER_BACKEND_VRING_CALL, .flags = VHOST_USER_VERSION, .size = sizeof(vmsg.payload.state), .payload.state = { diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h index 8cda9b8f57..8c5a2719e3 100644 --- a/subprojects/libvhost-user/libvhost-user.h +++ b/subprojects/libvhost-user/libvhost-user.h @@ -54,12 +54,12 @@ enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_RARP = 2, VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, VHOST_USER_PROTOCOL_F_NET_MTU = 4, - VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5, + VHOST_USER_PROTOCOL_F_BACKEND_REQ = 5, VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, VHOST_USER_PROTOCOL_F_CONFIG = 9, - VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10, + VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD = 10, VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14, @@ -92,7 +92,7 @@ typedef enum VhostUserRequest { VHOST_USER_SET_VRING_ENABLE = 18, VHOST_USER_SEND_RARP = 19, VHOST_USER_NET_SET_MTU = 20, - VHOST_USER_SET_SLAVE_REQ_FD = 21, + VHOST_USER_SET_BACKEND_REQ_FD = 21, VHOST_USER_IOTLB_MSG = 22, VHOST_USER_SET_VRING_ENDIAN = 23, VHOST_USER_GET_CONFIG = 24, @@ -113,13 +113,13 @@ typedef enum VhostUserRequest { } VhostUserRequest; typedef enum VhostUserSlaveRequest { - VHOST_USER_SLAVE_NONE = 0, - VHOST_USER_SLAVE_IOTLB_MSG = 1, - VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, - VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, - VHOST_USER_SLAVE_VRING_CALL = 4, - VHOST_USER_SLAVE_VRING_ERR = 5, - VHOST_USER_SLAVE_MAX + VHOST_USER_BACKEND_NONE = 0, + VHOST_USER_BACKEND_IOTLB_MSG = 1, + VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2, + VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3, + VHOST_USER_BACKEND_VRING_CALL = 4, + VHOST_USER_BACKEND_VRING_ERR = 5, + VHOST_USER_BACKEND_MAX } VhostUserSlaveRequest; typedef struct VhostUserMemoryRegion { From a84ec9935f2082a56737859f8009cd5fa75aef41 Mon Sep 17 00:00:00 2001 From: Maxime Coquelin Date: Wed, 8 Feb 2023 21:32:59 +0100 Subject: [PATCH 18/53] vhost-user: Adopt new backend naming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Vhost-user specification changed feature and request naming from _SLAVE_ to _BACKEND_. This patch adopts the new naming convention. Signed-off-by: Maxime Coquelin Message-Id: <20230208203259.381326-4-maxime.coquelin@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-user.c | 30 +++++++++++++++--------------- hw/virtio/virtio-qmp.c | 12 ++++++------ 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index e68daa35d4..8968541514 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -40,7 +40,7 @@ #define VHOST_MEMORY_BASELINE_NREGIONS 8 #define VHOST_USER_F_PROTOCOL_FEATURES 30 -#define VHOST_USER_SLAVE_MAX_FDS 8 +#define VHOST_USER_BACKEND_MAX_FDS 8 /* * Set maximum number of RAM slots supported to @@ -71,12 +71,12 @@ enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_RARP = 2, VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, VHOST_USER_PROTOCOL_F_NET_MTU = 4, - VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5, + VHOST_USER_PROTOCOL_F_BACKEND_REQ = 5, VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, VHOST_USER_PROTOCOL_F_CONFIG = 9, - VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10, + VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD = 10, VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, @@ -110,7 +110,7 @@ typedef enum VhostUserRequest { VHOST_USER_SET_VRING_ENABLE = 18, VHOST_USER_SEND_RARP = 19, VHOST_USER_NET_SET_MTU = 20, - VHOST_USER_SET_SLAVE_REQ_FD = 21, + VHOST_USER_SET_BACKEND_REQ_FD = 21, VHOST_USER_IOTLB_MSG = 22, VHOST_USER_SET_VRING_ENDIAN = 23, VHOST_USER_GET_CONFIG = 24, @@ -134,11 +134,11 @@ typedef enum VhostUserRequest { } VhostUserRequest; typedef enum VhostUserSlaveRequest { - VHOST_USER_SLAVE_NONE = 0, - VHOST_USER_SLAVE_IOTLB_MSG = 1, - VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, - VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, - VHOST_USER_SLAVE_MAX + VHOST_USER_BACKEND_NONE = 0, + VHOST_USER_BACKEND_IOTLB_MSG = 1, + VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2, + VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3, + VHOST_USER_BACKEND_MAX } VhostUserSlaveRequest; typedef struct VhostUserMemoryRegion { @@ -1638,13 +1638,13 @@ static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, } switch (hdr.request) { - case VHOST_USER_SLAVE_IOTLB_MSG: + case VHOST_USER_BACKEND_IOTLB_MSG: ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb); break; - case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG : + case VHOST_USER_BACKEND_CONFIG_CHANGE_MSG: ret = vhost_user_slave_handle_config_change(dev); break; - case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG: + case VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG: ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area, fd ? fd[0] : -1); break; @@ -1696,7 +1696,7 @@ fdcleanup: static int vhost_setup_slave_channel(struct vhost_dev *dev) { VhostUserMsg msg = { - .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD, + .hdr.request = VHOST_USER_SET_BACKEND_REQ_FD, .hdr.flags = VHOST_USER_VERSION, }; struct vhost_user *u = dev->opaque; @@ -1707,7 +1707,7 @@ static int vhost_setup_slave_channel(struct vhost_dev *dev) QIOChannel *ioc; if (!virtio_has_feature(dev->protocol_features, - VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { + VHOST_USER_PROTOCOL_F_BACKEND_REQ)) { return 0; } @@ -2065,7 +2065,7 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && !(virtio_has_feature(dev->protocol_features, - VHOST_USER_PROTOCOL_F_SLAVE_REQ) && + VHOST_USER_PROTOCOL_F_BACKEND_REQ) && virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_REPLY_ACK))) { error_setg(errp, "IOMMU support requires reply-ack and " diff --git a/hw/virtio/virtio-qmp.c b/hw/virtio/virtio-qmp.c index e4d4bece2d..b70148aba9 100644 --- a/hw/virtio/virtio-qmp.c +++ b/hw/virtio/virtio-qmp.c @@ -42,12 +42,12 @@ enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_RARP = 2, VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, VHOST_USER_PROTOCOL_F_NET_MTU = 4, - VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5, + VHOST_USER_PROTOCOL_F_BACKEND_REQ = 5, VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, VHOST_USER_PROTOCOL_F_CONFIG = 9, - VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10, + VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD = 10, VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, @@ -101,8 +101,8 @@ static const qmp_virtio_feature_map_t vhost_user_protocol_map[] = { "supported"), FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_NET_MTU, \ "VHOST_USER_PROTOCOL_F_NET_MTU: Expose host MTU to guest supported"), - FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_SLAVE_REQ, \ - "VHOST_USER_PROTOCOL_F_SLAVE_REQ: Socket fd for back-end initiated " + FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_BACKEND_REQ, \ + "VHOST_USER_PROTOCOL_F_BACKEND_REQ: Socket fd for back-end initiated " "requests supported"), FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_CROSS_ENDIAN, \ "VHOST_USER_PROTOCOL_F_CROSS_ENDIAN: Endianness of VQs for legacy " @@ -116,8 +116,8 @@ static const qmp_virtio_feature_map_t vhost_user_protocol_map[] = { FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_CONFIG, \ "VHOST_USER_PROTOCOL_F_CONFIG: Vhost-user messaging for virtio " "device configuration space supported"), - FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD, \ - "VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD: Slave fd communication " + FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD, \ + "VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD: Slave fd communication " "channel supported"), FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_HOST_NOTIFIER, \ "VHOST_USER_PROTOCOL_F_HOST_NOTIFIER: Host notifiers for specified " From 2e1a9de96b487cf818a22d681cad8d3f5d18dcca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Thu, 9 Feb 2023 18:00:04 +0100 Subject: [PATCH 19/53] vdpa: stop all svq on device deletion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not stopping them leave the device in a bad state when virtio-net fronted device is unplugged with device_del monitor command. This is not triggable in regular poweroff or qemu forces shutdown because cleanup is called right after vhost_vdpa_dev_start(false). But devices hot unplug does not call vdpa device cleanups. This lead to all the vhost_vdpa devices without stop the SVQ but the last. Fix it and clean the code, making it symmetric with vhost_vdpa_svqs_start. Fixes: dff4426fa656 ("vhost: Add Shadow VirtQueue kick forwarding capabilities") Reported-by: Lei Yang Signed-off-by: Eugenio Pérez Message-Id: <20230209170004.899472-1-eperezma@redhat.com> Tested-by: Laurent Vivier Acked-by: Jason Wang --- hw/virtio/vhost-vdpa.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 542e003101..df3a1e92ac 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -689,26 +689,11 @@ static int vhost_vdpa_get_device_id(struct vhost_dev *dev, return ret; } -static void vhost_vdpa_reset_svq(struct vhost_vdpa *v) -{ - if (!v->shadow_vqs_enabled) { - return; - } - - for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { - VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); - vhost_svq_stop(svq); - } -} - static int vhost_vdpa_reset_device(struct vhost_dev *dev) { - struct vhost_vdpa *v = dev->opaque; int ret; uint8_t status = 0; - vhost_vdpa_reset_svq(v); - ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status); trace_vhost_vdpa_reset_device(dev, status); return ret; @@ -1100,6 +1085,8 @@ static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); + + vhost_svq_stop(svq); vhost_vdpa_svq_unmap_rings(dev, svq); event_notifier_cleanup(&svq->hdev_kick); From 93af1274ea6220ef5eae6058b2fe74ae2c8b842b Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 16 Feb 2023 21:03:39 +0300 Subject: [PATCH 20/53] pci/shpc: set attention led to OFF on reset 0 is not a valid state for the led. Let's start with OFF. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Anton Kuchin Message-Id: <20230216180356.156832-2-vsementsov@yandex-team.ru> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/shpc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c index fca7f6691a..1b3f619dc9 100644 --- a/hw/pci/shpc.c +++ b/hw/pci/shpc.c @@ -223,6 +223,7 @@ void shpc_reset(PCIDevice *d) SHPC_SLOT_STATUS_PRSNT_MASK); shpc_set_status(shpc, i, SHPC_LED_OFF, SHPC_SLOT_PWR_LED_MASK); } + shpc_set_status(shpc, i, SHPC_LED_OFF, SHPC_SLOT_ATTN_LED_MASK); shpc_set_status(shpc, i, 0, SHPC_SLOT_STATUS_66); } shpc_set_sec_bus_speed(shpc, SHPC_SEC_BUS_33); From 94c84780cebc3b3107c85c71e3ae4467b51b3bbe Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 16 Feb 2023 21:03:40 +0300 Subject: [PATCH 21/53] pci/shpc: change shpc_get_status() return type to uint8_t The result of the function is always one byte. The result is always assigned to uint8_t variable. Also, shpc_get_status() should be symmetric to shpc_set_status() which has uint8_t value argument. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Anton Kuchin Message-Id: <20230216180356.156832-3-vsementsov@yandex-team.ru> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/shpc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c index 1b3f619dc9..5d71569b13 100644 --- a/hw/pci/shpc.c +++ b/hw/pci/shpc.c @@ -123,10 +123,13 @@ #define SHPC_PCI_TO_IDX(pci_slot) ((pci_slot) - 1) #define SHPC_IDX_TO_PHYSICAL(slot) ((slot) + 1) -static uint16_t shpc_get_status(SHPCDevice *shpc, int slot, uint16_t msk) +static uint8_t shpc_get_status(SHPCDevice *shpc, int slot, uint16_t msk) { uint8_t *status = shpc->config + SHPC_SLOT_STATUS(slot); - return (pci_get_word(status) & msk) >> ctz32(msk); + uint16_t result = (pci_get_word(status) & msk) >> ctz32(msk); + + assert(result <= UINT8_MAX); + return result; } static void shpc_set_status(SHPCDevice *shpc, From 025e2088db8765e1b0db3703fe27073808d12c1b Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 16 Feb 2023 21:03:41 +0300 Subject: [PATCH 22/53] pci/shpc: shpc_slot_command(): handle PWRONLY -> ENABLED transition ENABLED -> PWRONLY transition is not allowed and we handle it by shpc_invalid_command(). But PWRONLY -> ENABLED transition is silently ignored, which seems wrong. Let's handle it as correct. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Anton Kuchin Message-Id: <20230216180356.156832-4-vsementsov@yandex-team.ru> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/shpc.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c index 5d71569b13..25e4172382 100644 --- a/hw/pci/shpc.c +++ b/hw/pci/shpc.c @@ -273,28 +273,22 @@ static void shpc_slot_command(SHPCDevice *shpc, uint8_t target, return; } - switch (power) { - case SHPC_LED_NO: - break; - default: + if (power != SHPC_LED_NO) { /* TODO: send event to monitor */ shpc_set_status(shpc, slot, power, SHPC_SLOT_PWR_LED_MASK); } - switch (attn) { - case SHPC_LED_NO: - break; - default: + if (attn != SHPC_LED_NO) { /* TODO: send event to monitor */ shpc_set_status(shpc, slot, attn, SHPC_SLOT_ATTN_LED_MASK); } + if (state != SHPC_STATE_NO) { + shpc_set_status(shpc, slot, state, SHPC_SLOT_STATE_MASK); + } - if ((current_state == SHPC_STATE_DISABLED && state == SHPC_STATE_PWRONLY) || - (current_state == SHPC_STATE_DISABLED && state == SHPC_STATE_ENABLED)) { - shpc_set_status(shpc, slot, state, SHPC_SLOT_STATE_MASK); - } else if ((current_state == SHPC_STATE_ENABLED || - current_state == SHPC_STATE_PWRONLY) && - state == SHPC_STATE_DISABLED) { - shpc_set_status(shpc, slot, state, SHPC_SLOT_STATE_MASK); + if ((current_state == SHPC_STATE_ENABLED || + current_state == SHPC_STATE_PWRONLY) && + state == SHPC_STATE_DISABLED) + { power = shpc_get_status(shpc, slot, SHPC_SLOT_PWR_LED_MASK); /* TODO: track what monitor requested. */ /* Look at LED to figure out whether it's ok to remove the device. */ From dedf052a254187c333b7d1c9b8c95a0c325e6a18 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 16 Feb 2023 21:03:42 +0300 Subject: [PATCH 23/53] pci/shpc: more generic handle hot-unplug in shpc_slot_command() Free slot if both conditions (power-led = OFF and state = DISABLED) becomes true regardless of the sequence. It is similar to how PCIe hotplug works. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Anton Kuchin Message-Id: <20230216180356.156832-5-vsementsov@yandex-team.ru> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/shpc.c | 52 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c index 25e4172382..959dc470f3 100644 --- a/hw/pci/shpc.c +++ b/hw/pci/shpc.c @@ -258,49 +258,59 @@ static void shpc_free_devices_in_slot(SHPCDevice *shpc, int slot) } } +static bool shpc_slot_is_off(uint8_t state, uint8_t power, uint8_t attn) +{ + return state == SHPC_STATE_DISABLED && power == SHPC_LED_OFF; +} + static void shpc_slot_command(SHPCDevice *shpc, uint8_t target, uint8_t state, uint8_t power, uint8_t attn) { - uint8_t current_state; int slot = SHPC_LOGICAL_TO_IDX(target); + uint8_t old_state = shpc_get_status(shpc, slot, SHPC_SLOT_STATE_MASK); + uint8_t old_power = shpc_get_status(shpc, slot, SHPC_SLOT_PWR_LED_MASK); + uint8_t old_attn = shpc_get_status(shpc, slot, SHPC_SLOT_ATTN_LED_MASK); + if (target < SHPC_CMD_TRGT_MIN || slot >= shpc->nslots) { shpc_invalid_command(shpc); return; } - current_state = shpc_get_status(shpc, slot, SHPC_SLOT_STATE_MASK); - if (current_state == SHPC_STATE_ENABLED && state == SHPC_STATE_PWRONLY) { + + if (old_state == SHPC_STATE_ENABLED && state == SHPC_STATE_PWRONLY) { shpc_invalid_command(shpc); return; } - if (power != SHPC_LED_NO) { + if (power == SHPC_LED_NO) { + power = old_power; + } else { /* TODO: send event to monitor */ shpc_set_status(shpc, slot, power, SHPC_SLOT_PWR_LED_MASK); } - if (attn != SHPC_LED_NO) { + + if (attn == SHPC_LED_NO) { + attn = old_attn; + } else { /* TODO: send event to monitor */ shpc_set_status(shpc, slot, attn, SHPC_SLOT_ATTN_LED_MASK); } - if (state != SHPC_STATE_NO) { + + if (state == SHPC_STATE_NO) { + state = old_state; + } else { shpc_set_status(shpc, slot, state, SHPC_SLOT_STATE_MASK); } - if ((current_state == SHPC_STATE_ENABLED || - current_state == SHPC_STATE_PWRONLY) && - state == SHPC_STATE_DISABLED) + if (!shpc_slot_is_off(old_state, old_power, old_attn) && + shpc_slot_is_off(state, power, attn)) { - power = shpc_get_status(shpc, slot, SHPC_SLOT_PWR_LED_MASK); - /* TODO: track what monitor requested. */ - /* Look at LED to figure out whether it's ok to remove the device. */ - if (power == SHPC_LED_OFF) { - shpc_free_devices_in_slot(shpc, slot); - shpc_set_status(shpc, slot, 1, SHPC_SLOT_STATUS_MRL_OPEN); - shpc_set_status(shpc, slot, SHPC_SLOT_STATUS_PRSNT_EMPTY, - SHPC_SLOT_STATUS_PRSNT_MASK); - shpc->config[SHPC_SLOT_EVENT_LATCH(slot)] |= - SHPC_SLOT_EVENT_MRL | - SHPC_SLOT_EVENT_PRESENCE; - } + shpc_free_devices_in_slot(shpc, slot); + shpc_set_status(shpc, slot, 1, SHPC_SLOT_STATUS_MRL_OPEN); + shpc_set_status(shpc, slot, SHPC_SLOT_STATUS_PRSNT_EMPTY, + SHPC_SLOT_STATUS_PRSNT_MASK); + shpc->config[SHPC_SLOT_EVENT_LATCH(slot)] |= + SHPC_SLOT_EVENT_MRL | + SHPC_SLOT_EVENT_PRESENCE; } } From 0adc05f480d7b8b3fe849345279beaee05df9b2d Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 16 Feb 2023 21:03:43 +0300 Subject: [PATCH 24/53] pci/shpc: pass PCIDevice pointer to shpc_slot_command() We'll need it in further patch to report bridge in QAPI event. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Anton Kuchin Message-Id: <20230216180356.156832-6-vsementsov@yandex-team.ru> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/shpc.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c index 959dc470f3..9f964b1d70 100644 --- a/hw/pci/shpc.c +++ b/hw/pci/shpc.c @@ -263,9 +263,10 @@ static bool shpc_slot_is_off(uint8_t state, uint8_t power, uint8_t attn) return state == SHPC_STATE_DISABLED && power == SHPC_LED_OFF; } -static void shpc_slot_command(SHPCDevice *shpc, uint8_t target, +static void shpc_slot_command(PCIDevice *d, uint8_t target, uint8_t state, uint8_t power, uint8_t attn) { + SHPCDevice *shpc = d->shpc; int slot = SHPC_LOGICAL_TO_IDX(target); uint8_t old_state = shpc_get_status(shpc, slot, SHPC_SLOT_STATE_MASK); uint8_t old_power = shpc_get_status(shpc, slot, SHPC_SLOT_PWR_LED_MASK); @@ -314,8 +315,9 @@ static void shpc_slot_command(SHPCDevice *shpc, uint8_t target, } } -static void shpc_command(SHPCDevice *shpc) +static void shpc_command(PCIDevice *d) { + SHPCDevice *shpc = d->shpc; uint8_t code = pci_get_byte(shpc->config + SHPC_CMD_CODE); uint8_t speed; uint8_t target; @@ -336,7 +338,7 @@ static void shpc_command(SHPCDevice *shpc) state = (code & SHPC_SLOT_STATE_MASK) >> SHPC_SLOT_STATE_SHIFT; power = (code & SHPC_SLOT_PWR_LED_MASK) >> SHPC_SLOT_PWR_LED_SHIFT; attn = (code & SHPC_SLOT_ATTN_LED_MASK) >> SHPC_SLOT_ATTN_LED_SHIFT; - shpc_slot_command(shpc, target, state, power, attn); + shpc_slot_command(d, target, state, power, attn); break; case 0x40 ... 0x47: speed = code & SHPC_SEC_BUS_MASK; @@ -354,10 +356,10 @@ static void shpc_command(SHPCDevice *shpc) } for (i = 0; i < shpc->nslots; ++i) { if (!(shpc_get_status(shpc, i, SHPC_SLOT_STATUS_MRL_OPEN))) { - shpc_slot_command(shpc, i + SHPC_CMD_TRGT_MIN, + shpc_slot_command(d, i + SHPC_CMD_TRGT_MIN, SHPC_STATE_PWRONLY, SHPC_LED_ON, SHPC_LED_NO); } else { - shpc_slot_command(shpc, i + SHPC_CMD_TRGT_MIN, + shpc_slot_command(d, i + SHPC_CMD_TRGT_MIN, SHPC_STATE_NO, SHPC_LED_OFF, SHPC_LED_NO); } } @@ -375,10 +377,10 @@ static void shpc_command(SHPCDevice *shpc) } for (i = 0; i < shpc->nslots; ++i) { if (!(shpc_get_status(shpc, i, SHPC_SLOT_STATUS_MRL_OPEN))) { - shpc_slot_command(shpc, i + SHPC_CMD_TRGT_MIN, + shpc_slot_command(d, i + SHPC_CMD_TRGT_MIN, SHPC_STATE_ENABLED, SHPC_LED_ON, SHPC_LED_NO); } else { - shpc_slot_command(shpc, i + SHPC_CMD_TRGT_MIN, + shpc_slot_command(d, i + SHPC_CMD_TRGT_MIN, SHPC_STATE_NO, SHPC_LED_OFF, SHPC_LED_NO); } } @@ -410,7 +412,7 @@ static void shpc_write(PCIDevice *d, unsigned addr, uint64_t val, int l) shpc->config[a] &= ~(val & w1cmask); /* W1C: Write 1 to Clear */ } if (ranges_overlap(addr, l, SHPC_CMD_CODE, 2)) { - shpc_command(shpc); + shpc_command(d); } shpc_interrupt_update(d); } From 05d8a107dba8f94ed6c29fef965a73dca7e549ce Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 16 Feb 2023 21:03:44 +0300 Subject: [PATCH 25/53] pci/shpc: refactor shpc_device_plug_common() Rename it to shpc_device_get_slot(), to mention what it does rather than how it is used. It also helps to reuse it in further commit. Also, add a return value and get rid of local_err. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Anton Kuchin Message-Id: <20230216180356.156832-7-vsementsov@yandex-team.ru> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/shpc.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c index 9f964b1d70..e7bc7192f1 100644 --- a/hw/pci/shpc.c +++ b/hw/pci/shpc.c @@ -496,8 +496,9 @@ static const MemoryRegionOps shpc_mmio_ops = { .max_access_size = 4, }, }; -static void shpc_device_plug_common(PCIDevice *affected_dev, int *slot, - SHPCDevice *shpc, Error **errp) + +static bool shpc_device_get_slot(PCIDevice *affected_dev, int *slot, + SHPCDevice *shpc, Error **errp) { int pci_slot = PCI_SLOT(affected_dev->devfn); *slot = SHPC_PCI_TO_IDX(pci_slot); @@ -507,21 +508,20 @@ static void shpc_device_plug_common(PCIDevice *affected_dev, int *slot, "controller. Valid slots are between %d and %d.", pci_slot, SHPC_IDX_TO_PCI(0), SHPC_IDX_TO_PCI(shpc->nslots) - 1); - return; + return false; } + + return true; } void shpc_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { - Error *local_err = NULL; PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev); SHPCDevice *shpc = pci_hotplug_dev->shpc; int slot; - shpc_device_plug_common(PCI_DEVICE(dev), &slot, shpc, &local_err); - if (local_err) { - error_propagate(errp, local_err); + if (!shpc_device_get_slot(PCI_DEVICE(dev), &slot, shpc, errp)) { return; } @@ -563,16 +563,13 @@ void shpc_device_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, void shpc_device_unplug_request_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { - Error *local_err = NULL; PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev); SHPCDevice *shpc = pci_hotplug_dev->shpc; uint8_t state; uint8_t led; int slot; - shpc_device_plug_common(PCI_DEVICE(dev), &slot, shpc, &local_err); - if (local_err) { - error_propagate(errp, local_err); + if (!shpc_device_get_slot(PCI_DEVICE(dev), &slot, shpc, errp)) { return; } From cd6992c6b5d508fea5ce6b351e5f528dbe9766d9 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 16 Feb 2023 21:03:45 +0300 Subject: [PATCH 26/53] pcie: pcie_cap_slot_write_config(): use correct macro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PCI_EXP_SLTCTL_PIC_OFF is a value, and PCI_EXP_SLTCTL_PIC is a mask. Happily PCI_EXP_SLTCTL_PIC_OFF is a maximum value for this mask and is equal to the mask itself. Still the code looks like a bug. Let's make it more reader-friendly. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Anton Kuchin Message-Id: <20230216180356.156832-8-vsementsov@yandex-team.ru> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 924fdabd15..82ef723983 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -770,9 +770,9 @@ void pcie_cap_slot_write_config(PCIDevice *dev, * control of powered off slots before powering them on. */ if ((sltsta & PCI_EXP_SLTSTA_PDS) && (val & PCI_EXP_SLTCTL_PCC) && - (val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF && + (val & PCI_EXP_SLTCTL_PIC) == PCI_EXP_SLTCTL_PIC_OFF && (!(old_slt_ctl & PCI_EXP_SLTCTL_PCC) || - (old_slt_ctl & PCI_EXP_SLTCTL_PIC_OFF) != PCI_EXP_SLTCTL_PIC_OFF)) { + (old_slt_ctl & PCI_EXP_SLTCTL_PIC) != PCI_EXP_SLTCTL_PIC_OFF)) { pcie_cap_slot_do_unplug(dev); } pcie_cap_update_power(dev); From 0a80f1cd0675838c0a68b00622ec689ebbdb1302 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 16 Feb 2023 21:03:46 +0300 Subject: [PATCH 27/53] pcie_regs: drop duplicated indicator value macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We already have indicator values in include/standard-headers/linux/pci_regs.h , no reason to reinvent them in include/hw/pci/pcie_regs.h. (and we already have usage of PCI_EXP_SLTCTL_PWR_IND_BLINK and PCI_EXP_SLTCTL_PWR_IND_OFF in hw/pci/pcie.c, so let's be consistent) Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Anton Kuchin Message-Id: <20230216180356.156832-9-vsementsov@yandex-team.ru> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie.c | 13 +++++++------ include/hw/pci/pcie_regs.h | 9 --------- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 82ef723983..ccdb2377e1 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -634,8 +634,8 @@ void pcie_cap_slot_init(PCIDevice *dev, PCIESlot *s) PCI_EXP_SLTCTL_PIC | PCI_EXP_SLTCTL_AIC); pci_word_test_and_set_mask(dev->config + pos + PCI_EXP_SLTCTL, - PCI_EXP_SLTCTL_PIC_OFF | - PCI_EXP_SLTCTL_AIC_OFF); + PCI_EXP_SLTCTL_PWR_IND_OFF | + PCI_EXP_SLTCTL_ATTN_IND_OFF); pci_word_test_and_set_mask(dev->wmask + pos + PCI_EXP_SLTCTL, PCI_EXP_SLTCTL_PIC | PCI_EXP_SLTCTL_AIC | @@ -679,7 +679,7 @@ void pcie_cap_slot_reset(PCIDevice *dev) PCI_EXP_SLTCTL_PDCE | PCI_EXP_SLTCTL_ABPE); pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTCTL, - PCI_EXP_SLTCTL_AIC_OFF); + PCI_EXP_SLTCTL_ATTN_IND_OFF); if (dev->cap_present & QEMU_PCIE_SLTCAP_PCP) { /* Downstream ports enforce device number 0. */ @@ -694,7 +694,8 @@ void pcie_cap_slot_reset(PCIDevice *dev) PCI_EXP_SLTCTL_PCC); } - pic = populated ? PCI_EXP_SLTCTL_PIC_ON : PCI_EXP_SLTCTL_PIC_OFF; + pic = populated ? + PCI_EXP_SLTCTL_PWR_IND_ON : PCI_EXP_SLTCTL_PWR_IND_OFF; pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTCTL, pic); } @@ -770,9 +771,9 @@ void pcie_cap_slot_write_config(PCIDevice *dev, * control of powered off slots before powering them on. */ if ((sltsta & PCI_EXP_SLTSTA_PDS) && (val & PCI_EXP_SLTCTL_PCC) && - (val & PCI_EXP_SLTCTL_PIC) == PCI_EXP_SLTCTL_PIC_OFF && + (val & PCI_EXP_SLTCTL_PIC) == PCI_EXP_SLTCTL_PWR_IND_OFF && (!(old_slt_ctl & PCI_EXP_SLTCTL_PCC) || - (old_slt_ctl & PCI_EXP_SLTCTL_PIC) != PCI_EXP_SLTCTL_PIC_OFF)) { + (old_slt_ctl & PCI_EXP_SLTCTL_PIC) != PCI_EXP_SLTCTL_PWR_IND_OFF)) { pcie_cap_slot_do_unplug(dev); } pcie_cap_update_power(dev); diff --git a/include/hw/pci/pcie_regs.h b/include/hw/pci/pcie_regs.h index 963dc2e170..00b595a82e 100644 --- a/include/hw/pci/pcie_regs.h +++ b/include/hw/pci/pcie_regs.h @@ -70,15 +70,6 @@ typedef enum PCIExpLinkWidth { #define PCI_EXP_SLTCTL_IND_ON 0x1 #define PCI_EXP_SLTCTL_IND_BLINK 0x2 #define PCI_EXP_SLTCTL_IND_OFF 0x3 -#define PCI_EXP_SLTCTL_AIC_SHIFT ctz32(PCI_EXP_SLTCTL_AIC) -#define PCI_EXP_SLTCTL_AIC_OFF \ - (PCI_EXP_SLTCTL_IND_OFF << PCI_EXP_SLTCTL_AIC_SHIFT) - -#define PCI_EXP_SLTCTL_PIC_SHIFT ctz32(PCI_EXP_SLTCTL_PIC) -#define PCI_EXP_SLTCTL_PIC_OFF \ - (PCI_EXP_SLTCTL_IND_OFF << PCI_EXP_SLTCTL_PIC_SHIFT) -#define PCI_EXP_SLTCTL_PIC_ON \ - (PCI_EXP_SLTCTL_IND_ON << PCI_EXP_SLTCTL_PIC_SHIFT) #define PCI_EXP_SLTCTL_SUPPORTED \ (PCI_EXP_SLTCTL_ABPE | \ From 6b72b84d089331f4c49b68fac0f7c5d25748ae46 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 16 Feb 2023 21:03:47 +0300 Subject: [PATCH 28/53] pcie: drop unused PCIExpressIndicator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The structure type is unused. Also, it's the only user of corresponding macros, so drop them too. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Anton Kuchin Message-Id: <20230216180356.156832-10-vsementsov@yandex-team.ru> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/hw/pci/pcie.h | 8 -------- include/hw/pci/pcie_regs.h | 5 ----- 2 files changed, 13 deletions(-) diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h index 798a262a0a..3cc2b15957 100644 --- a/include/hw/pci/pcie.h +++ b/include/hw/pci/pcie.h @@ -27,14 +27,6 @@ #include "hw/pci/pcie_sriov.h" #include "hw/hotplug.h" -typedef enum { - /* for attention and power indicator */ - PCI_EXP_HP_IND_RESERVED = PCI_EXP_SLTCTL_IND_RESERVED, - PCI_EXP_HP_IND_ON = PCI_EXP_SLTCTL_IND_ON, - PCI_EXP_HP_IND_BLINK = PCI_EXP_SLTCTL_IND_BLINK, - PCI_EXP_HP_IND_OFF = PCI_EXP_SLTCTL_IND_OFF, -} PCIExpressIndicator; - typedef enum { /* these bits must match the bits in Slot Control/Status registers. * PCI_EXP_HP_EV_xxx = PCI_EXP_SLTCTL_xxxE = PCI_EXP_SLTSTA_xxx diff --git a/include/hw/pci/pcie_regs.h b/include/hw/pci/pcie_regs.h index 00b595a82e..1fe0bdd25b 100644 --- a/include/hw/pci/pcie_regs.h +++ b/include/hw/pci/pcie_regs.h @@ -66,11 +66,6 @@ typedef enum PCIExpLinkWidth { #define PCI_EXP_SLTCAP_PSN_SHIFT ctz32(PCI_EXP_SLTCAP_PSN) -#define PCI_EXP_SLTCTL_IND_RESERVED 0x0 -#define PCI_EXP_SLTCTL_IND_ON 0x1 -#define PCI_EXP_SLTCTL_IND_BLINK 0x2 -#define PCI_EXP_SLTCTL_IND_OFF 0x3 - #define PCI_EXP_SLTCTL_SUPPORTED \ (PCI_EXP_SLTCTL_ABPE | \ PCI_EXP_SLTCTL_PDCE | \ From f90d932094c03cb2ebb856a6da4ad0a5b05d9c27 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 16 Feb 2023 21:03:48 +0300 Subject: [PATCH 29/53] pcie: pcie_cap_slot_enable_power() use correct helper *_by_mask() helpers shouldn't be used here (and that's the only one). *_by_mask() helpers do shift their value argument, but in pcie.c code we use values that are already shifted appropriately. Happily, PCI_EXP_SLTCTL_PWR_ON is zero, so shift doesn't matter. But if we apply same helper for PCI_EXP_SLTCTL_PWR_OFF constant it will do wrong thing. So, let's use instead pci_word_test_and_clear_mask() which is already used in the file to clear PCI_EXP_SLTCTL_PWR_OFF bit in pcie_cap_slot_init() and pcie_cap_slot_reset(). Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Anton Kuchin Message-Id: <20230216180356.156832-11-vsementsov@yandex-team.ru> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index ccdb2377e1..db8360226f 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -373,8 +373,8 @@ void pcie_cap_slot_enable_power(PCIDevice *dev) uint32_t sltcap = pci_get_long(exp_cap + PCI_EXP_SLTCAP); if (sltcap & PCI_EXP_SLTCAP_PCP) { - pci_set_word_by_mask(exp_cap + PCI_EXP_SLTCTL, - PCI_EXP_SLTCTL_PCC, PCI_EXP_SLTCTL_PWR_ON); + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTCTL, + PCI_EXP_SLTCTL_PCC); } } From 5aaed9caf150390bd702a7da86a287a579b20d73 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 16 Feb 2023 21:03:49 +0300 Subject: [PATCH 30/53] pcie: introduce pcie_sltctl_powered_off() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In pcie_cap_slot_write_config() we check for PCI_EXP_SLTCTL_PWR_OFF in a bad form. We should distinguish PCI_EXP_SLTCTL_PWR which is a "mask" and PCI_EXP_SLTCTL_PWR_OFF which is value for that mask. Better code is in pcie_cap_slot_unplug_request_cb() and in pcie_cap_update_power(). Let's use same pattern everywhere. To simplify things add also a helper. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Anton Kuchin Message-Id: <20230216180356.156832-12-vsementsov@yandex-team.ru> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index db8360226f..90faf0710a 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -39,6 +39,11 @@ #define PCIE_DEV_PRINTF(dev, fmt, ...) \ PCIE_DPRINTF("%s:%x "fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__) +static bool pcie_sltctl_powered_off(uint16_t sltctl) +{ + return (sltctl & PCI_EXP_SLTCTL_PCC) == PCI_EXP_SLTCTL_PWR_OFF + && (sltctl & PCI_EXP_SLTCTL_PIC) == PCI_EXP_SLTCTL_PWR_IND_OFF; +} /*************************************************************************** * pci express capability helper functions @@ -395,6 +400,7 @@ static void pcie_cap_update_power(PCIDevice *hotplug_dev) if (sltcap & PCI_EXP_SLTCAP_PCP) { power = (sltctl & PCI_EXP_SLTCTL_PCC) == PCI_EXP_SLTCTL_PWR_ON; + /* Don't we need to check also (sltctl & PCI_EXP_SLTCTL_PIC) ? */ } pci_for_each_device(sec_bus, pci_bus_num(sec_bus), @@ -579,8 +585,7 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, return; } - if (((sltctl & PCI_EXP_SLTCTL_PIC) == PCI_EXP_SLTCTL_PWR_IND_OFF) && - ((sltctl & PCI_EXP_SLTCTL_PCC) == PCI_EXP_SLTCTL_PWR_OFF)) { + if (pcie_sltctl_powered_off(sltctl)) { /* slot is powered off -> unplug without round-trip to the guest */ pcie_cap_slot_do_unplug(hotplug_pdev); hotplug_event_notify(hotplug_pdev); @@ -770,10 +775,9 @@ void pcie_cap_slot_write_config(PCIDevice *dev, * this is a work around for guests that overwrite * control of powered off slots before powering them on. */ - if ((sltsta & PCI_EXP_SLTSTA_PDS) && (val & PCI_EXP_SLTCTL_PCC) && - (val & PCI_EXP_SLTCTL_PIC) == PCI_EXP_SLTCTL_PWR_IND_OFF && - (!(old_slt_ctl & PCI_EXP_SLTCTL_PCC) || - (old_slt_ctl & PCI_EXP_SLTCTL_PIC) != PCI_EXP_SLTCTL_PWR_IND_OFF)) { + if ((sltsta & PCI_EXP_SLTSTA_PDS) && pcie_sltctl_powered_off(val) && + !pcie_sltctl_powered_off(old_slt_ctl)) + { pcie_cap_slot_do_unplug(dev); } pcie_cap_update_power(dev); From 1768e97b9186b8ea416cb81952773837bf1df87a Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 16 Feb 2023 21:03:50 +0300 Subject: [PATCH 31/53] pcie: set power indicator to off on reset by default It should not be zero, the only valid values are ON, OFF and BLINK. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Anton Kuchin Message-Id: <20230216180356.156832-13-vsementsov@yandex-team.ru> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 90faf0710a..b8c24cf45f 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -684,6 +684,7 @@ void pcie_cap_slot_reset(PCIDevice *dev) PCI_EXP_SLTCTL_PDCE | PCI_EXP_SLTCTL_ABPE); pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTCTL, + PCI_EXP_SLTCTL_PWR_IND_OFF | PCI_EXP_SLTCTL_ATTN_IND_OFF); if (dev->cap_present & QEMU_PCIE_SLTCAP_PCP) { From e4dd39c699b7d63a06f686ec06ded8adbee989c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20L=C3=B3pez?= Date: Mon, 13 Feb 2023 09:57:47 +0100 Subject: [PATCH 32/53] vhost: avoid a potential use of an uninitialized variable in vhost_svq_poll() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In vhost_svq_poll(), if vhost_svq_get_buf() fails due to a device providing invalid descriptors, len is left uninitialized and returned to the caller, potentally leaking stack data or causing undefined behavior. Fix this by initializing len to 0. Found with GCC 13 and -fanalyzer (abridged): ../hw/virtio/vhost-shadow-virtqueue.c: In function ‘vhost_svq_poll’: ../hw/virtio/vhost-shadow-virtqueue.c:538:12: warning: use of uninitialized value ‘len’ [CWE-457] [-Wanalyzer-use-of-uninitialized-value] 538 | return len; | ^~~ ‘vhost_svq_poll’: events 1-4 | | 522 | size_t vhost_svq_poll(VhostShadowVirtqueue *svq) | | ^~~~~~~~~~~~~~ | | | | | (1) entry to ‘vhost_svq_poll’ |...... | 525 | uint32_t len; | | ~~~ | | | | | (2) region created on stack here | | (3) capacity: 4 bytes |...... | 528 | if (vhost_svq_more_used(svq)) { | | ~ | | | | | (4) inlined call to ‘vhost_svq_more_used’ from ‘vhost_svq_poll’ (...) | 528 | if (vhost_svq_more_used(svq)) { | | ^~~~~~~~~~~~~~~~~~~~~~~~~ | | || | | |(8) ...to here | | (7) following ‘true’ branch... |...... | 537 | vhost_svq_get_buf(svq, &len); | | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | | | | | (9) calling ‘vhost_svq_get_buf’ from ‘vhost_svq_poll’ | +--> ‘vhost_svq_get_buf’: events 10-11 | | 416 | static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, | | ^~~~~~~~~~~~~~~~~ | | | | | (10) entry to ‘vhost_svq_get_buf’ |...... | 423 | if (!vhost_svq_more_used(svq)) { | | ~ | | | | | (11) inlined call to ‘vhost_svq_more_used’ from ‘vhost_svq_get_buf’ | (...) | ‘vhost_svq_get_buf’: event 14 | | 423 | if (!vhost_svq_more_used(svq)) { | | ^ | | | | | (14) following ‘false’ branch... | ‘vhost_svq_get_buf’: event 15 | |cc1: | (15): ...to here | <------+ | ‘vhost_svq_poll’: events 16-17 | | 537 | vhost_svq_get_buf(svq, &len); | | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ | | | | | (16) returning to ‘vhost_svq_poll’ from ‘vhost_svq_get_buf’ | 538 | return len; | | ~~~ | | | | | (17) use of uninitialized value ‘len’ here Note by Laurent Vivier : The return value is only used to detect an error: vhost_svq_poll vhost_vdpa_net_cvq_add vhost_vdpa_net_load_cmd vhost_vdpa_net_load_mac -> a negative return is only used to detect error vhost_vdpa_net_load_mq -> a negative return is only used to detect error vhost_vdpa_net_handle_ctrl_avail -> a negative return is only used to detect error Fixes: d368c0b052ad ("vhost: Do not depend on !NULL VirtQueueElement on vhost_svq_flush") Signed-off-by: Carlos López Message-Id: <20230213085747.19956-1-clopez@suse.de> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-shadow-virtqueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index 4307296358..515ccf870d 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -522,7 +522,7 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq, size_t vhost_svq_poll(VhostShadowVirtqueue *svq) { int64_t start_us = g_get_monotonic_time(); - uint32_t len; + uint32_t len = 0; do { if (vhost_svq_more_used(svq)) { From 9c1916057a8b14411116106e5a5c0c33d551cfeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20L=C3=B3pez?= Date: Fri, 10 Feb 2023 12:25:15 +0100 Subject: [PATCH 33/53] libvhost-user: check for NULL when allocating a virtqueue element MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check the return value for malloc(), avoiding a NULL pointer dereference, and propagate error in function callers. Found with GCC 13 and -fanalyzer: ../subprojects/libvhost-user/libvhost-user.c: In function ‘virtqueue_alloc_element’: ../subprojects/libvhost-user/libvhost-user.c:2556:19: error: dereference of possibly-NULL ‘elem’ [CWE-690] [-Werror=analyzer-possible-null-dereference] 2556 | elem->out_num = out_num; | ~~~~~~~~~~~~~~^~~~~~~~~ ‘virtqueue_alloc_element’: event 1 | | 2554 | assert(sz >= sizeof(VuVirtqElement)); | | ^~~~~~ | | | | | (1) following ‘true’ branch (when ‘sz > 31’)... | ‘virtqueue_alloc_element’: events 2-4 | | 2555 | elem = malloc(out_sg_end); | | ^~~~ ~~~~~~~~~~~~~~~~~~ | | | | | | | (3) this call could return NULL | | (2) ...to here | 2556 | elem->out_num = out_num; | | ~~~~~~~~~~~~~~~~~~~~~~~ | | | | | (4) ‘elem’ could be NULL: unchecked value from (3) | Signed-off-by: Carlos López Message-Id: <20230210112514.16858-1-clopez@suse.de> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- subprojects/libvhost-user/libvhost-user.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c index f661af7c85..0200b78e8e 100644 --- a/subprojects/libvhost-user/libvhost-user.c +++ b/subprojects/libvhost-user/libvhost-user.c @@ -2553,6 +2553,10 @@ virtqueue_alloc_element(size_t sz, assert(sz >= sizeof(VuVirtqElement)); elem = malloc(out_sg_end); + if (!elem) { + DPRINT("%s: failed to malloc virtqueue element\n", __func__); + return NULL; + } elem->out_num = out_num; elem->in_num = in_num; elem->in_sg = (void *)elem + in_sg_ofs; @@ -2639,6 +2643,9 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz) /* Now copy what we have collected and mapped */ elem = virtqueue_alloc_element(sz, out_num, in_num); + if (!elem) { + return NULL; + } elem->index = idx; for (i = 0; i < out_num; i++) { elem->out_sg[i] = iov[i]; From 28566eab2dd147f93735bc986513fff5fa5a236d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Sat, 11 Feb 2023 16:22:39 +0100 Subject: [PATCH 34/53] hw/pci: Trace IRQ routing on PCI topology MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Trace how IRQ are rooted from EP to RC. Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20230211152239.88106-3-philmd@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pci.c | 8 ++++++++ hw/pci/trace-events | 1 + 2 files changed, 9 insertions(+) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index bad8e63db3..08060b3e88 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -279,9 +279,13 @@ static void pci_change_irq_level(PCIDevice *pci_dev, int irq_num, int change) { PCIBus *bus; for (;;) { + int dev_irq = irq_num; bus = pci_get_bus(pci_dev); assert(bus->map_irq); irq_num = bus->map_irq(pci_dev, irq_num); + trace_pci_route_irq(dev_irq, DEVICE(pci_dev)->canonical_path, irq_num, + pci_bus_is_root(bus) ? "root-complex" + : DEVICE(bus->parent_dev)->canonical_path); if (bus->set_irq) break; pci_dev = bus->parent_dev; @@ -1600,8 +1604,12 @@ PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin) PCIBus *bus; do { + int dev_irq = pin; bus = pci_get_bus(dev); pin = bus->map_irq(dev, pin); + trace_pci_route_irq(dev_irq, DEVICE(dev)->canonical_path, pin, + pci_bus_is_root(bus) ? "root-complex" + : DEVICE(bus->parent_dev)->canonical_path); dev = bus->parent_dev; } while (dev); diff --git a/hw/pci/trace-events b/hw/pci/trace-events index aaf46bc92d..42430869ce 100644 --- a/hw/pci/trace-events +++ b/hw/pci/trace-events @@ -3,6 +3,7 @@ # pci.c pci_update_mappings_del(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64 pci_update_mappings_add(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64 +pci_route_irq(int dev_irq, const char *dev_path, int parent_irq, const char *parent_path) "IRQ %d @%s -> IRQ %d @%s" # pci_host.c pci_cfg_read(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, unsigned offs, unsigned val) "%s %02x:%02x.%x @0x%x -> 0x%x" From b8a7f51f59e28d5a8e0c07ed3919cc9695560ed2 Mon Sep 17 00:00:00 2001 From: Yajun Wu Date: Tue, 14 Feb 2023 10:14:30 +0800 Subject: [PATCH 35/53] chardev/char-socket: set s->listener = NULL in char_socket_finalize MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After live migration with virtio block device, qemu crash at: #0 0x000055914f46f795 in object_dynamic_cast_assert (obj=0x559151b7b090, typename=0x55914f80fbc4 "qio-channel", file=0x55914f80fb90 "/images/testvfe/sw/qemu.gerrit/include/io/channel.h", line=30, func=0x55914f80fcb8 <__func__.17257> "QIO_CHANNEL") at ../qom/object.c:872 #1 0x000055914f480d68 in QIO_CHANNEL (obj=0x559151b7b090) at /images/testvfe/sw/qemu.gerrit/include/io/channel.h:29 #2 0x000055914f4812f8 in qio_net_listener_set_client_func_full (listener=0x559151b7a720, func=0x55914f580b97 , data=0x5591519f4ea0, notify=0x0, context=0x0) at ../io/net-listener.c:166 #3 0x000055914f580059 in tcp_chr_update_read_handler (chr=0x5591519f4ea0) at ../chardev/char-socket.c:637 #4 0x000055914f583dca in qemu_chr_be_update_read_handlers (s=0x5591519f4ea0, context=0x0) at ../chardev/char.c:226 #5 0x000055914f57b7c9 in qemu_chr_fe_set_handlers_full (b=0x559152bf23a0, fd_can_read=0x0, fd_read=0x0, fd_event=0x0, be_change=0x0, opaque=0x0, context=0x0, set_open=false, sync_state=true) at ../chardev/char-fe.c:279 #6 0x000055914f57b86d in qemu_chr_fe_set_handlers (b=0x559152bf23a0, fd_can_read=0x0, fd_read=0x0, fd_event=0x0, be_change=0x0, opaque=0x0, context=0x0, set_open=false) at ../chardev/char-fe.c:304 #7 0x000055914f378caf in vhost_user_async_close (d=0x559152bf21a0, chardev=0x559152bf23a0, vhost=0x559152bf2420, cb=0x55914f2fb8c1 ) at ../hw/virtio/vhost-user.c:2725 #8 0x000055914f2fba40 in vhost_user_blk_event (opaque=0x559152bf21a0, event=CHR_EVENT_CLOSED) at ../hw/block/vhost-user-blk.c:395 #9 0x000055914f58388c in chr_be_event (s=0x5591519f4ea0, event=CHR_EVENT_CLOSED) at ../chardev/char.c:61 #10 0x000055914f583905 in qemu_chr_be_event (s=0x5591519f4ea0, event=CHR_EVENT_CLOSED) at ../chardev/char.c:81 #11 0x000055914f581275 in char_socket_finalize (obj=0x5591519f4ea0) at ../chardev/char-socket.c:1083 #12 0x000055914f46f073 in object_deinit (obj=0x5591519f4ea0, type=0x5591519055c0) at ../qom/object.c:680 #13 0x000055914f46f0e5 in object_finalize (data=0x5591519f4ea0) at ../qom/object.c:694 #14 0x000055914f46ff06 in object_unref (objptr=0x5591519f4ea0) at ../qom/object.c:1202 #15 0x000055914f4715a4 in object_finalize_child_property (obj=0x559151b76c50, name=0x559151b7b250 "char3", opaque=0x5591519f4ea0) at ../qom/object.c:1747 #16 0x000055914f46ee86 in object_property_del_all (obj=0x559151b76c50) at ../qom/object.c:632 #17 0x000055914f46f0d2 in object_finalize (data=0x559151b76c50) at ../qom/object.c:693 #18 0x000055914f46ff06 in object_unref (objptr=0x559151b76c50) at ../qom/object.c:1202 #19 0x000055914f4715a4 in object_finalize_child_property (obj=0x559151b6b560, name=0x559151b76630 "chardevs", opaque=0x559151b76c50) at ../qom/object.c:1747 #20 0x000055914f46ef67 in object_property_del_child (obj=0x559151b6b560, child=0x559151b76c50) at ../qom/object.c:654 #21 0x000055914f46f042 in object_unparent (obj=0x559151b76c50) at ../qom/object.c:673 #22 0x000055914f58632a in qemu_chr_cleanup () at ../chardev/char.c:1189 #23 0x000055914f16c66c in qemu_cleanup () at ../softmmu/runstate.c:830 #24 0x000055914eee7b9e in qemu_default_main () at ../softmmu/main.c:38 #25 0x000055914eee7bcc in main (argc=86, argv=0x7ffc97cb8d88) at ../softmmu/main.c:48 In char_socket_finalize after s->listener freed, event callback function vhost_user_blk_event will be called to handle CHR_EVENT_CLOSED. vhost_user_blk_event is calling qio_net_listener_set_client_func_full which is still using s->listener. Setting s->listener = NULL after object_unref(OBJECT(s->listener)) can solve this issue. Signed-off-by: Yajun Wu Acked-by: Jiri Pirko Message-Id: <20230214021430.3638579-1-yajunw@nvidia.com> Reviewed-by: Marc-André Lureau Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- chardev/char-socket.c | 1 + 1 file changed, 1 insertion(+) diff --git a/chardev/char-socket.c b/chardev/char-socket.c index c2265436ac..8c58532171 100644 --- a/chardev/char-socket.c +++ b/chardev/char-socket.c @@ -1065,6 +1065,7 @@ static void char_socket_finalize(Object *obj) qio_net_listener_set_client_func_full(s->listener, NULL, NULL, NULL, chr->gcontext); object_unref(OBJECT(s->listener)); + s->listener = NULL; } if (s->tls_creds) { object_unref(OBJECT(s->tls_creds)); From 6da24341866fa940fd7d575788a2319514941c77 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Wed, 15 Feb 2023 14:52:38 +0800 Subject: [PATCH 36/53] memory: Optimize replay of guest mapping On x86, there are two notifiers registered due to vtd-ir memory region splitting the whole address space. During replay of the address space for each notifier, the whole address space is scanned which is unnecessory. We only need to scan the space belong to notifier montiored space. Assert when notifier is used to monitor beyond iommu memory region's address space. Signed-off-by: Zhenzhong Duan Message-Id: <20230215065238.713041-1-zhenzhong.duan@intel.com> Acked-by: Peter Xu Acked-by: Jason Wang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 2 +- softmmu/memory.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 98a5c304a7..6b1de80e85 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -3831,7 +3831,7 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n) .domain_id = vtd_get_domain_id(s, &ce, vtd_as->pasid), }; - vtd_page_walk(s, &ce, 0, ~0ULL, &info, vtd_as->pasid); + vtd_page_walk(s, &ce, n->start, n->end, &info, vtd_as->pasid); } } else { trace_vtd_replay_ce_invalid(bus_n, PCI_SLOT(vtd_as->devfn), diff --git a/softmmu/memory.c b/softmmu/memory.c index 9d64efca26..da7d846619 100644 --- a/softmmu/memory.c +++ b/softmmu/memory.c @@ -1900,6 +1900,7 @@ int memory_region_register_iommu_notifier(MemoryRegion *mr, iommu_mr = IOMMU_MEMORY_REGION(mr); assert(n->notifier_flags != IOMMU_NOTIFIER_NONE); assert(n->start <= n->end); + assert(n->end <= memory_region_size(mr)); assert(n->iommu_idx >= 0 && n->iommu_idx < memory_region_iommu_num_indexes(iommu_mr)); @@ -1923,7 +1924,6 @@ uint64_t memory_region_iommu_get_min_page_size(IOMMUMemoryRegion *iommu_mr) void memory_region_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n) { - MemoryRegion *mr = MEMORY_REGION(iommu_mr); IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr); hwaddr addr, granularity; IOMMUTLBEntry iotlb; @@ -1936,7 +1936,7 @@ void memory_region_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n) granularity = memory_region_iommu_get_min_page_size(iommu_mr); - for (addr = 0; addr < memory_region_size(mr); addr += granularity) { + for (addr = n->start; addr < n->end; addr += granularity) { iotlb = imrc->translate(iommu_mr, addr, IOMMU_NONE, n->iommu_idx); if (iotlb.perm != IOMMU_NONE) { n->notify(n, &iotlb); From b8d78277c091f26fdd64f239bc8bb7e55d74cecf Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 23 Feb 2023 14:59:20 +0800 Subject: [PATCH 37/53] intel-iommu: fail MAP notifier without caching mode Without caching mode, MAP notifier won't work correctly since guest won't send IOTLB update event when it establishes new mappings in the I/O page tables. Let's fail the IOMMU notifiers early instead of misbehaving silently. Reviewed-by: Eric Auger Tested-by: Viktor Prutyanov Signed-off-by: Jason Wang Message-Id: <20230223065924.42503-2-jasowang@redhat.com> Reviewed-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 6b1de80e85..b520542c47 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -3186,6 +3186,13 @@ static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu, "Snoop Control with vhost or VFIO is not supported"); return -ENOTSUP; } + if (!s->caching_mode && (new & IOMMU_NOTIFIER_MAP)) { + error_setg_errno(errp, ENOTSUP, + "device %02x.%02x.%x requires caching mode", + pci_bus_num(vtd_as->bus), PCI_SLOT(vtd_as->devfn), + PCI_FUNC(vtd_as->devfn)); + return -ENOTSUP; + } /* Update per-address-space notifier flags */ vtd_as->notifier_flags = new; From 09adb0e021207b60a0c51a68939b4539d98d3ef3 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 23 Feb 2023 14:59:21 +0800 Subject: [PATCH 38/53] intel-iommu: fail DEVIOTLB_UNMAP without dt mode Without dt mode, device IOTLB notifier won't work since guest won't send device IOTLB invalidation descriptor in this case. Let's fail early instead of misbehaving silently. Reviewed-by: Laurent Vivier Tested-by: Laurent Vivier Tested-by: Viktor Prutyanov Buglink: https://bugzilla.redhat.com/2156876 Signed-off-by: Jason Wang Message-Id: <20230223065924.42503-3-jasowang@redhat.com> Reviewed-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index b520542c47..a6b35b07d2 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -3179,6 +3179,7 @@ static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu, { VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu); IntelIOMMUState *s = vtd_as->iommu_state; + X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); /* TODO: add support for VFIO and vhost users */ if (s->snoop_control) { @@ -3193,6 +3194,13 @@ static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu, PCI_FUNC(vtd_as->devfn)); return -ENOTSUP; } + if (!x86_iommu->dt_supported && (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP)) { + error_setg_errno(errp, ENOTSUP, + "device %02x.%02x.%x requires device IOTLB mode", + pci_bus_num(vtd_as->bus), PCI_SLOT(vtd_as->devfn), + PCI_FUNC(vtd_as->devfn)); + return -ENOTSUP; + } /* Update per-address-space notifier flags */ vtd_as->notifier_flags = new; From 7caebbf9ea534dc11ff52ba38f982da4962e6051 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 23 Feb 2023 14:59:22 +0800 Subject: [PATCH 39/53] memory: introduce memory_region_unmap_iommu_notifier_range() This patch introduces a new helper to unmap the range of a specific IOMMU notifier. Signed-off-by: Jason Wang Message-Id: <20230223065924.42503-4-jasowang@redhat.com> Reviewed-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/exec/memory.h | 10 ++++++++++ softmmu/memory.c | 13 +++++++++++++ 2 files changed, 23 insertions(+) diff --git a/include/exec/memory.h b/include/exec/memory.h index 2e602a2fad..6fa0b071f0 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -1731,6 +1731,16 @@ void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, void memory_region_notify_iommu_one(IOMMUNotifier *notifier, IOMMUTLBEvent *event); +/** + * memory_region_unmap_iommu_notifier_range: notify a unmap for an IOMMU + * translation that covers the + * range of a notifier + * + * @notifier: the notifier to be notified + */ +void memory_region_unmap_iommu_notifier_range(IOMMUNotifier *n); + + /** * memory_region_register_iommu_notifier: register a notifier for changes to * IOMMU translation entries. diff --git a/softmmu/memory.c b/softmmu/memory.c index da7d846619..4699ba55ec 100644 --- a/softmmu/memory.c +++ b/softmmu/memory.c @@ -1996,6 +1996,19 @@ void memory_region_notify_iommu_one(IOMMUNotifier *notifier, } } +void memory_region_unmap_iommu_notifier_range(IOMMUNotifier *n) +{ + IOMMUTLBEvent event; + + event.type = IOMMU_NOTIFIER_UNMAP; + event.entry.target_as = &address_space_memory; + event.entry.iova = n->start; + event.entry.perm = IOMMU_NONE; + event.entry.addr_mask = n->end - n->start; + + memory_region_notify_iommu_one(n, &event); +} + void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, int iommu_idx, IOMMUTLBEvent event) From 98332f643ebf92523ad9128c0437d9fe964bfd09 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 23 Feb 2023 14:59:23 +0800 Subject: [PATCH 40/53] smmu: switch to use memory_region_unmap_iommu_notifier_range() Signed-off-by: Jason Wang Message-Id: <20230223065924.42503-5-jasowang@redhat.com> Reviewed-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/arm/smmu-common.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c index 0a5a60ca1e..e7f1c1f219 100644 --- a/hw/arm/smmu-common.c +++ b/hw/arm/smmu-common.c @@ -467,20 +467,6 @@ IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid) return NULL; } -/* Unmap the whole notifier's range */ -static void smmu_unmap_notifier_range(IOMMUNotifier *n) -{ - IOMMUTLBEvent event; - - event.type = IOMMU_NOTIFIER_UNMAP; - event.entry.target_as = &address_space_memory; - event.entry.iova = n->start; - event.entry.perm = IOMMU_NONE; - event.entry.addr_mask = n->end - n->start; - - memory_region_notify_iommu_one(n, &event); -} - /* Unmap all notifiers attached to @mr */ static void smmu_inv_notifiers_mr(IOMMUMemoryRegion *mr) { @@ -488,7 +474,7 @@ static void smmu_inv_notifiers_mr(IOMMUMemoryRegion *mr) trace_smmu_inv_notifiers_mr(mr->parent_obj.name); IOMMU_NOTIFIER_FOREACH(n, mr) { - smmu_unmap_notifier_range(n); + memory_region_unmap_iommu_notifier_range(n); } } From 3e090e3489dbae7af65465d357a4772f012f4d90 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 23 Feb 2023 14:59:24 +0800 Subject: [PATCH 41/53] intel-iommu: send UNMAP notifications for domain or global inv desc We don't send UNMAP notification upon domain or global invalidation which will lead the notifier can't work correctly. One example is to use vhost remote IOTLB without enabling device IOTLB. Fixing this by sending UNMAP notification. Signed-off-by: Peter Xu Signed-off-by: Jason Wang Message-Id: <20230223065924.42503-6-jasowang@redhat.com> Reviewed-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index a6b35b07d2..faade7def8 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -1530,13 +1530,17 @@ static int vtd_sync_shadow_page_table_range(VTDAddressSpace *vtd_as, return vtd_page_walk(s, ce, addr, addr + size, &info, vtd_as->pasid); } -static int vtd_sync_shadow_page_table(VTDAddressSpace *vtd_as) +static int vtd_address_space_sync(VTDAddressSpace *vtd_as) { int ret; VTDContextEntry ce; IOMMUNotifier *n; - if (!(vtd_as->iommu.iommu_notify_flags & IOMMU_NOTIFIER_IOTLB_EVENTS)) { + /* If no MAP notifier registered, we simply invalidate all the cache */ + if (!vtd_as_has_map_notifier(vtd_as)) { + IOMMU_NOTIFIER_FOREACH(n, &vtd_as->iommu) { + memory_region_unmap_iommu_notifier_range(n); + } return 0; } @@ -2000,7 +2004,7 @@ static void vtd_iommu_replay_all(IntelIOMMUState *s) VTDAddressSpace *vtd_as; QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) { - vtd_sync_shadow_page_table(vtd_as); + vtd_address_space_sync(vtd_as); } } @@ -2082,7 +2086,7 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s, * framework will skip MAP notifications if that * happened. */ - vtd_sync_shadow_page_table(vtd_as); + vtd_address_space_sync(vtd_as); } } } @@ -2140,7 +2144,7 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id) if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), vtd_as->devfn, &ce) && domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) { - vtd_sync_shadow_page_table(vtd_as); + vtd_address_space_sync(vtd_as); } } } From 3456fedb77215dc42572ab7ec1d9ca0e324260ea Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Tue, 28 Feb 2023 11:39:26 +0000 Subject: [PATCH 42/53] MAINTAINERS: Add Fan Ni as Compute eXpress Link QEMU reviewer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fan Ni has offered to help out with QEMU CXL emulation reviewing. Add him as a designated reviewer. Thanks to Fan for stepping up after I requested help following Ben stepping down as co-maintainer. Fan base been active in testing and review recently so great to have Fan on board. Based on patch [PATCH] MAINTAINERS: Remove CXL maintainer Ben Widawsky Message-id: <20230220212437.1462314-1-armbru@redhat.com> Message-Id: <20230228113926.11485-1-Jonathan.Cameron@huawei.com> Signed-off-by: Jonathan Cameron Reviewed-by: Philippe Mathieu-Daudé Acked-by: Markus Armbruster Acked-by: Fan Ni Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index e96e9dbfe6..a4647dd1c4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2637,6 +2637,7 @@ T: git https://gitlab.com/vsementsov/qemu.git block Compute Express Link M: Ben Widawsky M: Jonathan Cameron +R: Fan Ni S: Supported F: hw/cxl/ F: hw/mem/cxl_type3.c From de8a7394f02e3fba3002242f04816cff33537dd8 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 6 Feb 2023 17:28:07 +0000 Subject: [PATCH 43/53] hw/mem/cxl_type3: Improve error handling in realize() msix_init_exclusive_bar() can fail, so if it does cleanup the address space. Reviewed-by: Ira Weiny Reviewed-by: Gregory Price Tested-by: Gregory Price Signed-off-by: Jonathan Cameron Message-Id: <20230206172816.8201-2-Jonathan.Cameron@huawei.com> Reviewed-by: Fan Ni Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/mem/cxl_type3.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index dae4fd89ca..252822bd82 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -401,7 +401,7 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp) MemoryRegion *mr = ®s->component_registers; uint8_t *pci_conf = pci_dev->config; unsigned short msix_num = 1; - int i; + int i, rc; if (!cxl_setup_memory(ct3d, errp)) { return; @@ -438,7 +438,10 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp) &ct3d->cxl_dstate.device_registers); /* MSI(-X) Initailization */ - msix_init_exclusive_bar(pci_dev, msix_num, 4, NULL); + rc = msix_init_exclusive_bar(pci_dev, msix_num, 4, NULL); + if (rc) { + goto err_address_space_free; + } for (i = 0; i < msix_num; i++) { msix_vector_use(pci_dev, i); } @@ -450,6 +453,11 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp) cxl_cstate->cdat.free_cdat_table = ct3_free_cdat_table; cxl_cstate->cdat.private = ct3d; cxl_doe_cdat_init(cxl_cstate, errp); + return; + +err_address_space_free: + address_space_destroy(&ct3d->hostmem_as); + return; } static void ct3_exit(PCIDevice *pci_dev) From 9518d8bc444f63f3925f2a586adfe9010758afa7 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 6 Feb 2023 17:28:08 +0000 Subject: [PATCH 44/53] hw/pci-bridge/cxl_downstream: Fix type naming mismatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix capitalization difference between struct name and typedef. Tested-by: Philippe Mathieu-Daudé Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Ira Weiny Reviewed-by: Gregory Price Tested-by: Gregory Price Signed-off-by: Jonathan Cameron Message-Id: <20230206172816.8201-3-Jonathan.Cameron@huawei.com> Reviewed-by: Fan Ni Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-bridge/cxl_downstream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/pci-bridge/cxl_downstream.c b/hw/pci-bridge/cxl_downstream.c index 3d4e6b59cd..54f507318f 100644 --- a/hw/pci-bridge/cxl_downstream.c +++ b/hw/pci-bridge/cxl_downstream.c @@ -15,7 +15,7 @@ #include "hw/pci/pcie_port.h" #include "qapi/error.h" -typedef struct CXLDownStreamPort { +typedef struct CXLDownstreamPort { /*< private >*/ PCIESlot parent_obj; From 2ef5063610016a75351fe3cd32879f11323d5927 Mon Sep 17 00:00:00 2001 From: Gregory Price Date: Mon, 6 Feb 2023 17:28:09 +0000 Subject: [PATCH 45/53] hw/cxl: set cxl-type3 device type to PCI_CLASS_MEMORY_CXL Current code sets to STORAGE_EXPRESS and then overrides it. Reviewed-by: Davidlohr Bueso Reviewed-by: Ira Weiny Signed-off-by: Gregory Price Signed-off-by: Jonathan Cameron Message-Id: <20230206172816.8201-4-Jonathan.Cameron@huawei.com> Reviewed-by: Fan Ni Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/mem/cxl_type3.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index 252822bd82..217a5e639b 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -408,7 +408,6 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp) } pci_config_set_prog_interface(pci_conf, 0x10); - pci_config_set_class(pci_conf, PCI_CLASS_MEMORY_CXL); pcie_endpoint_cap_init(pci_dev, 0x80); if (ct3d->sn != UI64_NULL) { @@ -627,7 +626,7 @@ static void ct3_class_init(ObjectClass *oc, void *data) pc->realize = ct3_realize; pc->exit = ct3_exit; - pc->class_id = PCI_CLASS_STORAGE_EXPRESS; + pc->class_id = PCI_CLASS_MEMORY_CXL; pc->vendor_id = PCI_VENDOR_ID_INTEL; pc->device_id = 0xd93; /* LVF for now */ pc->revision = 1; From 1c580bed9ab52c5207b49e15085f069b25d282a8 Mon Sep 17 00:00:00 2001 From: Gregory Price Date: Mon, 6 Feb 2023 17:28:10 +0000 Subject: [PATCH 46/53] hw/cxl: Add CXL_CAPACITY_MULTIPLIER definition Remove usage of magic numbers when accessing capacity fields and replace with CXL_CAPACITY_MULTIPLIER, matching the kernel definition. Signed-off-by: Gregory Price Reviewed-by: Davidlohr Bueso Signed-off-by: Jonathan Cameron Message-Id: <20230206172816.8201-5-Jonathan.Cameron@huawei.com> Reviewed-by: Fan Ni Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index bc1bb18844..3f67b665f5 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -12,8 +12,11 @@ #include "hw/pci/pci.h" #include "qemu/cutils.h" #include "qemu/log.h" +#include "qemu/units.h" #include "qemu/uuid.h" +#define CXL_CAPACITY_MULTIPLIER (256 * MiB) + /* * How to add a new command, example. The command set FOO, with cmd BAR. * 1. Add the command set and cmd to the enum. @@ -138,7 +141,7 @@ static ret_code cmd_firmware_update_get_info(struct cxl_cmd *cmd, } QEMU_PACKED *fw_info; QEMU_BUILD_BUG_ON(sizeof(*fw_info) != 0x50); - if (cxl_dstate->pmem_size < (256 << 20)) { + if (cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER) { return CXL_MBOX_INTERNAL_ERROR; } @@ -283,7 +286,7 @@ static ret_code cmd_identify_memory_device(struct cxl_cmd *cmd, CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d); uint64_t size = cxl_dstate->pmem_size; - if (!QEMU_IS_ALIGNED(size, 256 << 20)) { + if (!QEMU_IS_ALIGNED(size, CXL_CAPACITY_MULTIPLIER)) { return CXL_MBOX_INTERNAL_ERROR; } @@ -293,8 +296,8 @@ static ret_code cmd_identify_memory_device(struct cxl_cmd *cmd, /* PMEM only */ snprintf(id->fw_revision, 0x10, "BWFW VERSION %02d", 0); - id->total_capacity = size / (256 << 20); - id->persistent_capacity = size / (256 << 20); + id->total_capacity = size / CXL_CAPACITY_MULTIPLIER; + id->persistent_capacity = size / CXL_CAPACITY_MULTIPLIER; id->lsa_size = cvc->get_lsa_size(ct3d); *len = sizeof(*id); @@ -314,14 +317,14 @@ static ret_code cmd_ccls_get_partition_info(struct cxl_cmd *cmd, QEMU_BUILD_BUG_ON(sizeof(*part_info) != 0x20); uint64_t size = cxl_dstate->pmem_size; - if (!QEMU_IS_ALIGNED(size, 256 << 20)) { + if (!QEMU_IS_ALIGNED(size, CXL_CAPACITY_MULTIPLIER)) { return CXL_MBOX_INTERNAL_ERROR; } /* PMEM only */ part_info->active_vmem = 0; part_info->next_vmem = 0; - part_info->active_pmem = size / (256 << 20); + part_info->active_pmem = size / CXL_CAPACITY_MULTIPLIER; part_info->next_pmem = 0; *len = sizeof(*part_info); From defebbdf4940a27e094c75d108b27fdce28aeb82 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 6 Feb 2023 17:28:11 +0000 Subject: [PATCH 47/53] tests/acpi: Allow update of q35/DSDT.cxl Next patch will drop duplicate _UID entry so allow update. Reviewed-by: Gregory Price Tested-by: Gregory Price Signed-off-by: Jonathan Cameron Message-Id: <20230206172816.8201-6-Jonathan.Cameron@huawei.com> Reviewed-by: Fan Ni Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/qtest/bios-tables-test-allowed-diff.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h index dfb8523c8b..9ce0f596cc 100644 --- a/tests/qtest/bios-tables-test-allowed-diff.h +++ b/tests/qtest/bios-tables-test-allowed-diff.h @@ -1 +1,2 @@ /* List of comma-separated changed AML files to ignore */ +"tests/data/acpi/q35/DSDT.cxl", From ab99a33d6669a9fc3534cc368a944a8c218208d6 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 6 Feb 2023 17:28:12 +0000 Subject: [PATCH 48/53] hw/i386/acpi: Drop duplicate _UID entry for CXL root bridge Noticed as this prevents iASL disasembling the DSDT table. Reviewed-by: Ira Weiny Reviewed-by: Gregory Price Tested-by: Gregory Price Signed-off-by: Jonathan Cameron Message-Id: <20230206172816.8201-7-Jonathan.Cameron@huawei.com> Reviewed-by: Fan Ni Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 1 - 1 file changed, 1 deletion(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index d27921fd8f..b19fb4259e 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -1514,7 +1514,6 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(pkg, aml_eisaid("PNP0A03")); aml_append(dev, aml_name_decl("_CID", pkg)); aml_append(dev, aml_name_decl("_ADR", aml_int(0))); - aml_append(dev, aml_name_decl("_UID", aml_int(bus_num))); build_cxl_osc_method(dev); } else if (pci_bus_is_express(bus)) { aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08"))); From 21063bcee849d2af9a65703f9c3438ad2d4dbd26 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 6 Feb 2023 17:28:13 +0000 Subject: [PATCH 49/53] tests: acpi: Update q35/DSDT.cxl for removed duplicate UID Dropping the ID effects this table in trivial fashion. Reviewed-by: Gregory Price Tested-by: Gregory Price Signed-off-by: Jonathan Cameron Message-Id: <20230206172816.8201-8-Jonathan.Cameron@huawei.com> Reviewed-by: Fan Ni Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/data/acpi/q35/DSDT.cxl | Bin 9578 -> 9564 bytes tests/qtest/bios-tables-test-allowed-diff.h | 1 - 2 files changed, 1 deletion(-) diff --git a/tests/data/acpi/q35/DSDT.cxl b/tests/data/acpi/q35/DSDT.cxl index 3d18b9672d124a0cf11a79e92c396a1b883d0589..4586b9a18b24acd946cd32c7e3e3a70891a246d2 100644 GIT binary patch delta 65 zcmaFmb;pa#CDSUKwt0m6-Tor}*e5CzZ*UWUScYLp@!%?rjc`U&A Date: Mon, 6 Feb 2023 17:28:14 +0000 Subject: [PATCH 50/53] qemu/bswap: Add const_le64() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gcc requires constant versions of cpu_to_le* calls. Add a 64 bit version. Reviewed-by: Peter Maydell Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Gregory Price Tested-by: Gregory Price Signed-off-by: Ira Weiny Signed-off-by: Jonathan Cameron Message-Id: <20230206172816.8201-9-Jonathan.Cameron@huawei.com> Reviewed-by: Fan Ni Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/qemu/bswap.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/include/qemu/bswap.h b/include/qemu/bswap.h index b1650daedf..15a78c0db5 100644 --- a/include/qemu/bswap.h +++ b/include/qemu/bswap.h @@ -125,11 +125,20 @@ CPU_CONVERT(le, 32, uint32_t) CPU_CONVERT(le, 64, uint64_t) /* - * Same as cpu_to_le{16,32}, except that gcc will figure the result is + * Same as cpu_to_le{16,32,64}, except that gcc will figure the result is * a compile-time constant if you pass in a constant. So this can be * used to initialize static variables. */ #if HOST_BIG_ENDIAN +# define const_le64(_x) \ + ((((_x) & 0x00000000000000ffU) << 56) | \ + (((_x) & 0x000000000000ff00U) << 40) | \ + (((_x) & 0x0000000000ff0000U) << 24) | \ + (((_x) & 0x00000000ff000000U) << 8) | \ + (((_x) & 0x000000ff00000000U) >> 8) | \ + (((_x) & 0x0000ff0000000000U) >> 24) | \ + (((_x) & 0x00ff000000000000U) >> 40) | \ + (((_x) & 0xff00000000000000U) >> 56)) # define const_le32(_x) \ ((((_x) & 0x000000ffU) << 24) | \ (((_x) & 0x0000ff00U) << 8) | \ @@ -139,6 +148,7 @@ CPU_CONVERT(le, 64, uint64_t) ((((_x) & 0x00ff) << 8) | \ (((_x) & 0xff00) >> 8)) #else +# define const_le64(_x) (_x) # define const_le32(_x) (_x) # define const_le16(_x) (_x) #endif From 845476cb677f40cf8d1ef1a1bd9f924d75a556ef Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Mon, 6 Feb 2023 17:28:15 +0000 Subject: [PATCH 51/53] qemu/uuid: Add UUID static initializer UUID's are defined as network byte order fields. No static initializer was available for UUID's in their standard big endian format. Define a big endian initializer for UUIDs. Reviewed-by: Gregory Price Tested-by: Gregory Price Signed-off-by: Ira Weiny Signed-off-by: Jonathan Cameron Message-Id: <20230206172816.8201-10-Jonathan.Cameron@huawei.com> Reviewed-by: Fan Ni Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/qemu/uuid.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/qemu/uuid.h b/include/qemu/uuid.h index 9925febfa5..dc40ee1fc9 100644 --- a/include/qemu/uuid.h +++ b/include/qemu/uuid.h @@ -61,6 +61,18 @@ typedef struct { (clock_seq_hi_and_reserved), (clock_seq_low), (node0), (node1), (node2),\ (node3), (node4), (node5) } +/* Normal (network byte order) UUID */ +#define UUID(time_low, time_mid, time_hi_and_version, \ + clock_seq_hi_and_reserved, clock_seq_low, node0, node1, node2, \ + node3, node4, node5) \ + { ((time_low) >> 24) & 0xff, ((time_low) >> 16) & 0xff, \ + ((time_low) >> 8) & 0xff, (time_low) & 0xff, \ + ((time_mid) >> 8) & 0xff, (time_mid) & 0xff, \ + ((time_hi_and_version) >> 8) & 0xff, (time_hi_and_version) & 0xff, \ + (clock_seq_hi_and_reserved), (clock_seq_low), \ + (node0), (node1), (node2), (node3), (node4), (node5) \ + } + #define UUID_FMT "%02hhx%02hhx%02hhx%02hhx-" \ "%02hhx%02hhx-%02hhx%02hhx-" \ "%02hhx%02hhx-" \ From e16add2b6b888faa2591de035b6cda5f5aef7ae7 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Mon, 6 Feb 2023 17:28:16 +0000 Subject: [PATCH 52/53] hw/cxl/mailbox: Use new UUID network order define for cel_uuid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cel_uuid was programatically generated previously because there was no static initializer for network order UUIDs. Use the new network order initializer for cel_uuid. Adjust cxl_initialize_mailbox() because it can't fail now. Update specification reference. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Gregory Price Tested-by: Gregory Price Signed-off-by: Ira Weiny Signed-off-by: Jonathan Cameron Message-Id: <20230206172816.8201-11-Jonathan.Cameron@huawei.com> Reviewed-by: Fan Ni Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-device-utils.c | 2 +- hw/cxl/cxl-mailbox-utils.c | 13 ++++++------- include/hw/cxl/cxl_device.h | 2 +- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/hw/cxl/cxl-device-utils.c b/hw/cxl/cxl-device-utils.c index 83ce7a8270..4c5e88aaf5 100644 --- a/hw/cxl/cxl-device-utils.c +++ b/hw/cxl/cxl-device-utils.c @@ -267,5 +267,5 @@ void cxl_device_register_init_common(CXLDeviceState *cxl_dstate) cxl_device_cap_init(cxl_dstate, MEMORY_DEVICE, 0x4000); memdev_reg_init_common(cxl_dstate); - assert(cxl_initialize_mailbox(cxl_dstate) == 0); + cxl_initialize_mailbox(cxl_dstate); } diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 3f67b665f5..206e04a4b8 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -193,7 +193,11 @@ static ret_code cmd_timestamp_set(struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } -static QemuUUID cel_uuid; +/* CXL 3.0 8.2.9.5.2.1 Command Effects Log (CEL) */ +static const QemuUUID cel_uuid = { + .data = UUID(0x0da9c0b5, 0xbf41, 0x4b78, 0x8f, 0x79, + 0x96, 0xb1, 0x62, 0x3b, 0x3f, 0x17) +}; /* 8.2.9.4.1 */ static ret_code cmd_logs_get_supported(struct cxl_cmd *cmd, @@ -458,11 +462,8 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) DOORBELL, 0); } -int cxl_initialize_mailbox(CXLDeviceState *cxl_dstate) +void cxl_initialize_mailbox(CXLDeviceState *cxl_dstate) { - /* CXL 2.0: Table 169 Get Supported Logs Log Entry */ - const char *cel_uuidstr = "0da9c0b5-bf41-4b78-8f79-96b1623b3f17"; - for (int set = 0; set < 256; set++) { for (int cmd = 0; cmd < 256; cmd++) { if (cxl_cmd_set[set][cmd].handler) { @@ -476,6 +477,4 @@ int cxl_initialize_mailbox(CXLDeviceState *cxl_dstate) } } } - - return qemu_uuid_parse(cel_uuidstr, &cel_uuid); } diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h index 250adf18b2..7e5ad65c1d 100644 --- a/include/hw/cxl/cxl_device.h +++ b/include/hw/cxl/cxl_device.h @@ -170,7 +170,7 @@ CXL_DEVICE_CAPABILITY_HEADER_REGISTER(MEMORY_DEVICE, CXL_DEVICE_CAP_HDR1_OFFSET + CXL_DEVICE_CAP_REG_SIZE * 2) -int cxl_initialize_mailbox(CXLDeviceState *cxl_dstate); +void cxl_initialize_mailbox(CXLDeviceState *cxl_dstate); void cxl_process_mailbox(CXLDeviceState *cxl_dstate); #define cxl_device_cap_init(dstate, reg, cap_id) \ From ee92a56b08d0b59016a4a9bc1bf3a3de1fbe3956 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 2 Mar 2023 01:45:28 -0500 Subject: [PATCH 53/53] tests/data/acpi/virt: drop (most) duplicate files. When virt ACPI files were added, lots of duplicates were created because we forgot that there's a no-prefix fallback: e.g. if tests/data/acpi/virt/APIC.memhp is not there then test will use tests/data/acpi/virt/APIC. Drop these. These were found with $find tests/data/acpi/ -type f -exec sha256sum '{}' ';'|sort -d|uniq -w 64 --all-repeated=separate (trick: -d does a dictionary sort so a no-suffix file ends up first). Note: there are still a bunch of issues with duplicates left even after this. First pc and q35 are often identical. Second, sometimes files are identical but not identical to the default fallback, e.g. tests/data/acpi/pc/SLIT.cphp and tests/data/acpi/pc/SLIT.memhp or tests/data/acpi/q35/HMAT.acpihmat-noinitiator and tests/data/acpi/virt/HMAT.acpihmatvirt Finding a way to deduplicate these is still a TODO item - softlinks maybe? We also need to make rebuild-expected-aml.sh smarter about not creating these duplicates in the 1st place. And maybe we should use softlinks instead of relying on a fallback to make it explicit what version does each test expect? Signed-off-by: Michael S. Tsirkin --- tests/data/acpi/virt/APIC.memhp | Bin 172 -> 0 bytes tests/data/acpi/virt/APIC.numamem | Bin 172 -> 0 bytes tests/data/acpi/virt/DSDT.numamem | Bin 5196 -> 0 bytes tests/data/acpi/virt/FACP.memhp | Bin 276 -> 0 bytes tests/data/acpi/virt/FACP.numamem | Bin 276 -> 0 bytes tests/data/acpi/virt/GTDT.memhp | Bin 96 -> 0 bytes tests/data/acpi/virt/GTDT.numamem | Bin 96 -> 0 bytes tests/data/acpi/virt/IORT.memhp | Bin 128 -> 0 bytes tests/data/acpi/virt/IORT.numamem | Bin 128 -> 0 bytes tests/data/acpi/virt/IORT.pxb | Bin 128 -> 0 bytes tests/data/acpi/virt/MCFG.memhp | Bin 60 -> 0 bytes tests/data/acpi/virt/MCFG.numamem | Bin 60 -> 0 bytes tests/data/acpi/virt/SPCR.memhp | Bin 80 -> 0 bytes tests/data/acpi/virt/SPCR.numamem | Bin 80 -> 0 bytes 14 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 tests/data/acpi/virt/APIC.memhp delete mode 100644 tests/data/acpi/virt/APIC.numamem delete mode 100644 tests/data/acpi/virt/DSDT.numamem delete mode 100644 tests/data/acpi/virt/FACP.memhp delete mode 100644 tests/data/acpi/virt/FACP.numamem delete mode 100644 tests/data/acpi/virt/GTDT.memhp delete mode 100644 tests/data/acpi/virt/GTDT.numamem delete mode 100644 tests/data/acpi/virt/IORT.memhp delete mode 100644 tests/data/acpi/virt/IORT.numamem delete mode 100644 tests/data/acpi/virt/IORT.pxb delete mode 100644 tests/data/acpi/virt/MCFG.memhp delete mode 100644 tests/data/acpi/virt/MCFG.numamem delete mode 100644 tests/data/acpi/virt/SPCR.memhp delete mode 100644 tests/data/acpi/virt/SPCR.numamem diff --git a/tests/data/acpi/virt/APIC.memhp b/tests/data/acpi/virt/APIC.memhp deleted file mode 100644 index 179d274770a23209b949c90a929525e22368568b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 172 zcmZ<^@N{0oz`(%b?&R<65v<@85#X!<1dKp25F13p0FMNW#lQh$F##Fe0Wcl|15CX* gLI}uWgsNwO(#&xED9WH5UbsC>V09of9T)-_08#k}0RR91 diff --git a/tests/data/acpi/virt/APIC.numamem b/tests/data/acpi/virt/APIC.numamem deleted file mode 100644 index 179d274770a23209b949c90a929525e22368568b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 172 zcmZ<^@N{0oz`(%b?&R<65v<@85#X!<1dKp25F13p0FMNW#lQh$F##Fe0Wcl|15CX* gLI}uWgsNwO(#&xED9WH5UbsC>V09of9T)-_08#k}0RR91 diff --git a/tests/data/acpi/virt/DSDT.numamem b/tests/data/acpi/virt/DSDT.numamem deleted file mode 100644 index c47503990715d389914fdf9c8bccb510761741ac..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5196 zcmZvg%WoT16o>EFlh__VVmr>uc{qhq@vO#n^Jr;H?6H%$#EJ2w4N@w(5&}`OsYHcT zDx{D_3)#^~Yza~%{tYBn?AWnj&4zz~9p>D*Gs*8LXQYhh%-r+M{l>@f@oo97-K~;R zv7eed-lo6U{J7^W(q<{8^s#=;zie6$2Yz#~e^mBd*G&#KJFRTP>vbqtQOUvmPD||{ z-ST$2(Y1be({-!W@LF=<_5DKGnR<~@8kkafrM@3kmUV@qXOz3TzUQqQ?nmwJed5+A z*WYb8X-f7QmO&JpoI%7=(_v=Ae$bDmw6)#eq12^|+n#4$+}u&I@a8Tes^;z-p>KN$ z5mOh4YKUm+S=1zi6O$M=FlxxCi;TF$7zIWh88<|REisb7xPgo%kuf4M9AGRVV_9U3 zN{kFJmXWa{G7=Ia2aFYDtcr}J#K;3<6&W{0MoMB7fpHTU)$qf?OU!X3MhO_VkRg^S z|BytbJ_(HXks;S@3L$1@j z#8?Ez8ZzWMEl7;lfYCsPT&G2eaTOR%WXN?oE-|hF!vjW5uG5mlcoP^cWXN?oAu-+t zMjIJ&olZ)OcY(2v47pCHB*t}MY#>9f(`kv}0%H>ya-Ggdj2bXH$dK!_EHQ2X;~p~P zI-Qjm%fR>$8FHP@NsLusY#~Fg(|L(;3m9Es)a5!|kQldtv5gG5PAd}QE--dN#`Z5g zuM^Irx7~9a?kY7O9<@g%s_QPMy+QkCbNjq4@pt=$iZjgv z$fQtb(u{(;*gUuO`byZ9A%!=l+t{nq10lb zCxz&lV4ex)nc$ujqGytMCYdL-WavpDdZw5swbSP51&<`fJt;&_hj}{8lUg|Rq!2yR z%rnhA)7+Cn^vp2N4D+Ox4?QVF&n)xIGS4jcq!2xG%rnP4sYOIj3ej_nd5$s9G44qr zdeYAsQ{zDu=b0z9l;}wzdKQ>xfq53VCxz%)WS&LlNi8UPQiz`8%yXQ1j&n~6(UX2C zf@g_&Qp<{-6r$$@^PFIw6Wo(R^qgd#lgyJ^T=b+6J*SxG6!Vq!2x4nddC?oaLSrqURj*oMWEUa-%1O=sC|k z=b7g`_oNU#7ntV)^Q0CXJt;)b3iGTm&kFaX(0J}2b!`1snxAj_OWAYR&%cO!v@DTx z(!o;1>%mt#eeYg6R~jAoecuXEVaLEwv`&Dis{+cLJ4fBqvkDtrhSKW=$a+IynRA>K zHoBZe>jucWCa}!8kX6bLyk33NbqkreL4fW1?NuRC^br->w z9}fT1Kg(zvUZ*QohI#MjIN$ezB-Ay^2Uc57ft+$_sgb>m6Pa#^|%@=cw`t+!0-VY^R# zDZQh~lwBbPf<BbPf<!1Bl59VJZSCW(Gzk28RDY I01|@%0RBM>0RR91 diff --git a/tests/data/acpi/virt/SPCR.numamem b/tests/data/acpi/virt/SPCR.numamem deleted file mode 100644 index 24e0a579e7d73f432a614380e29aa95113344186..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 80 zcmWFza1IJ!U|?W6?d0$55v<@85#X!<1dKp25F11@12F>!1Bl59VJZSCW(Gzk28RDY I01|@%0RBM>0RR91