From 99d88de6eb577ff70cc1b1ed59f1a50f16038c2e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 23 Jun 2023 14:45:44 +0200 Subject: [PATCH 01/21] memory-device: Unify enabled vs. supported error messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's unify the error messages, such that we can simply stop allocating ms->device_memory if the size would be 0 (and there are no memory devices ever). The case of "not supported by the machine" should barely pop up either way: if the machine doesn't support memory devices, it usually doesn't call the pre_plug handler ... Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: David Hildenbrand Message-Id: <20230623124553.400585-2-david@redhat.com> Signed-off-by: David Hildenbrand --- hw/mem/memory-device.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c index 1636db9679..49f86ec8a8 100644 --- a/hw/mem/memory-device.c +++ b/hw/mem/memory-device.c @@ -104,15 +104,10 @@ static uint64_t memory_device_get_free_addr(MachineState *ms, GSList *list = NULL, *item; Range as, new = range_empty; - if (!ms->device_memory) { - error_setg(errp, "memory devices (e.g. for memory hotplug) are not " - "supported by the machine"); - return 0; - } - - if (!memory_region_size(&ms->device_memory->mr)) { - error_setg(errp, "memory devices (e.g. for memory hotplug) are not " - "enabled, please specify the maxmem option"); + if (!ms->device_memory || !memory_region_size(&ms->device_memory->mr)) { + error_setg(errp, "the configuration is not prepared for memory devices" + " (e.g., for memory hotplug), consider specifying the" + " maxmem option"); return 0; } range_init_nofail(&as, ms->device_memory->base, From cc0afd8a72996352e9f3f11b1cdde49b08aa11b6 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 23 Jun 2023 14:45:45 +0200 Subject: [PATCH 02/21] memory-device: Introduce machine_memory_devices_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's intrduce a new helper that we will use to replace existing memory device setup code during machine initialization. We'll enforce that the size has to be > 0. Once all machines were converted, we'll only allocate ms->device_memory if the size > 0. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: David Hildenbrand Message-Id: <20230623124553.400585-3-david@redhat.com> Signed-off-by: David Hildenbrand --- hw/mem/memory-device.c | 14 ++++++++++++++ include/hw/boards.h | 1 + 2 files changed, 15 insertions(+) diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c index 49f86ec8a8..bb9d7c2a20 100644 --- a/hw/mem/memory-device.c +++ b/hw/mem/memory-device.c @@ -17,6 +17,7 @@ #include "qemu/range.h" #include "hw/virtio/vhost.h" #include "sysemu/kvm.h" +#include "exec/address-spaces.h" #include "trace.h" static gint memory_device_addr_sort(gconstpointer a, gconstpointer b) @@ -328,6 +329,19 @@ uint64_t memory_device_get_region_size(const MemoryDeviceState *md, return memory_region_size(mr); } +void machine_memory_devices_init(MachineState *ms, hwaddr base, uint64_t size) +{ + g_assert(size); + g_assert(!ms->device_memory); + ms->device_memory = g_new0(DeviceMemoryState, 1); + ms->device_memory->base = base; + + memory_region_init(&ms->device_memory->mr, OBJECT(ms), "device-memory", + size); + memory_region_add_subregion(get_system_memory(), ms->device_memory->base, + &ms->device_memory->mr); +} + static const TypeInfo memory_device_info = { .name = TYPE_MEMORY_DEVICE, .parent = TYPE_INTERFACE, diff --git a/include/hw/boards.h b/include/hw/boards.h index 12d9e9d17c..3b7c30e853 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -37,6 +37,7 @@ void machine_parse_smp_config(MachineState *ms, const SMPConfiguration *config, Error **errp); unsigned int machine_topo_get_cores_per_socket(const MachineState *ms); unsigned int machine_topo_get_threads_per_socket(const MachineState *ms); +void machine_memory_devices_init(MachineState *ms, hwaddr base, uint64_t size); /** * machine_class_allow_dynamic_sysbus_dev: Add type to list of valid devices From 176d073029083dd52e28e92c5712d0535a83c34d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 23 Jun 2023 14:45:46 +0200 Subject: [PATCH 03/21] hw/arm/virt: Use machine_memory_devices_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's use our new helper. We'll add the subregion to system RAM now earlier. That shouldn't matter, because the system RAM memory region should already be alive at that point. Cc: Peter Maydell Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: David Hildenbrand Message-Id: <20230623124553.400585-4-david@redhat.com> Signed-off-by: David Hildenbrand --- hw/arm/virt.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 8a4c663735..0546e43448 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -1813,10 +1813,7 @@ static void virt_set_memmap(VirtMachineState *vms, int pa_bits) virt_set_high_memmap(vms, base, pa_bits); if (device_memory_size > 0) { - ms->device_memory = g_malloc0(sizeof(*ms->device_memory)); - ms->device_memory->base = device_memory_base; - memory_region_init(&ms->device_memory->mr, OBJECT(vms), - "device-memory", device_memory_size); + machine_memory_devices_init(ms, device_memory_base, device_memory_size); } } @@ -2257,10 +2254,6 @@ static void machvirt_init(MachineState *machine) memory_region_add_subregion(sysmem, vms->memmap[VIRT_MEM].base, machine->ram); - if (machine->device_memory) { - memory_region_add_subregion(sysmem, machine->device_memory->base, - &machine->device_memory->mr); - } virt_flash_fdt(vms, sysmem, secure_sysmem ?: sysmem); From c0ce7b4acb61c6a8376ff9e0d5cae2d58ac3bf1b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 23 Jun 2023 14:45:47 +0200 Subject: [PATCH 04/21] hw/ppc/spapr: Use machine_memory_devices_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's use our new helper and stop always allocating ms->device_memory. There is no difference in common memory-device code anymore between ms->device_memory being NULL or the size being 0. So we only have to teach spapr code that ms->device_memory isn't always around. We can now modify two maxram_size checks to rely on ms->device_memory for detecting whether we have memory devices. Cc: Daniel Henrique Barboza Cc: "Cédric Le Goater" Cc: David Gibson Cc: Greg Kurz Cc: Harsh Prateek Bora Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Cédric Le Goater Signed-off-by: David Hildenbrand Message-Id: <20230623124553.400585-5-david@redhat.com> Signed-off-by: David Hildenbrand --- hw/ppc/spapr.c | 37 +++++++++++++++++++------------------ hw/ppc/spapr_hcall.c | 2 +- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 54dbfd7fe9..1c8b8d57a7 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -547,10 +547,8 @@ static int spapr_dt_dynamic_reconfiguration_memory(SpaprMachineState *spapr, cpu_to_be32(lmb_size & 0xffffffff)}; MemoryDeviceInfoList *dimms = NULL; - /* - * Don't create the node if there is no device memory - */ - if (machine->ram_size == machine->maxram_size) { + /* Don't create the node if there is no device memory. */ + if (!machine->device_memory) { return 0; } @@ -860,16 +858,23 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt) int rtas; GString *hypertas = g_string_sized_new(256); GString *qemu_hypertas = g_string_sized_new(256); - uint64_t max_device_addr = MACHINE(spapr)->device_memory->base + - memory_region_size(&MACHINE(spapr)->device_memory->mr); uint32_t lrdr_capacity[] = { - cpu_to_be32(max_device_addr >> 32), - cpu_to_be32(max_device_addr & 0xffffffff), + 0, + 0, cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE >> 32), cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE & 0xffffffff), cpu_to_be32(ms->smp.max_cpus / ms->smp.threads), }; + /* Do we have device memory? */ + if (MACHINE(spapr)->device_memory) { + uint64_t max_device_addr = MACHINE(spapr)->device_memory->base + + memory_region_size(&MACHINE(spapr)->device_memory->mr); + + lrdr_capacity[0] = cpu_to_be32(max_device_addr >> 32); + lrdr_capacity[1] = cpu_to_be32(max_device_addr & 0xffffffff); + } + _FDT(rtas = fdt_add_subnode(fdt, 0, "rtas")); /* hypertas */ @@ -2455,6 +2460,7 @@ static void spapr_create_lmb_dr_connectors(SpaprMachineState *spapr) uint32_t nr_lmbs = (machine->maxram_size - machine->ram_size)/lmb_size; int i; + g_assert(!nr_lmbs || machine->device_memory); for (i = 0; i < nr_lmbs; i++) { uint64_t addr; @@ -2876,12 +2882,11 @@ static void spapr_machine_init(MachineState *machine) /* map RAM */ memory_region_add_subregion(sysmem, 0, machine->ram); - /* always allocate the device memory information */ - machine->device_memory = g_malloc0(sizeof(*machine->device_memory)); - /* initialize hotplug memory address space */ if (machine->ram_size < machine->maxram_size) { ram_addr_t device_mem_size = machine->maxram_size - machine->ram_size; + hwaddr device_mem_base; + /* * Limit the number of hotpluggable memory slots to half the number * slots that KVM supports, leaving the other half for PCI and other @@ -2900,12 +2905,8 @@ static void spapr_machine_init(MachineState *machine) exit(1); } - machine->device_memory->base = ROUND_UP(machine->ram_size, - SPAPR_DEVICE_MEM_ALIGN); - memory_region_init(&machine->device_memory->mr, OBJECT(spapr), - "device-memory", device_mem_size); - memory_region_add_subregion(sysmem, machine->device_memory->base, - &machine->device_memory->mr); + device_mem_base = ROUND_UP(machine->ram_size, SPAPR_DEVICE_MEM_ALIGN); + machine_memory_devices_init(machine, device_mem_base, device_mem_size); } if (smc->dr_lmb_enabled) { @@ -5119,7 +5120,7 @@ static bool phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, int i; /* Do we have device memory? */ - if (MACHINE(spapr)->maxram_size > ram_top) { + if (MACHINE(spapr)->device_memory) { /* Can't just use maxram_size, because there may be an * alignment gap between normal and device memory regions */ diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 002ea0b7c1..9b1f225d4a 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -32,7 +32,7 @@ bool is_ram_address(SpaprMachineState *spapr, hwaddr addr) if (addr < machine->ram_size) { return true; } - if ((addr >= dms->base) + if (dms && (addr >= dms->base) && ((addr - dms->base) < memory_region_size(&dms->mr))) { return true; } From b13e115ff497581c60c94d4afca33225b1f69faf Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 23 Jun 2023 14:45:48 +0200 Subject: [PATCH 05/21] hw/loongarch/virt: Use machine_memory_devices_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's use our new helper. While at it, use VIRT_HIGHMEM_BASE. Cc: Xiaojuan Yang Cc: Song Gao Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Song Gao Signed-off-by: David Hildenbrand Message-Id: <20230623124553.400585-6-david@redhat.com> Signed-off-by: David Hildenbrand --- hw/loongarch/virt.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c index 51a453fa9a..e19b042ce8 100644 --- a/hw/loongarch/virt.c +++ b/hw/loongarch/virt.c @@ -857,8 +857,8 @@ static void loongarch_init(MachineState *machine) /* initialize device memory address space */ if (machine->ram_size < machine->maxram_size) { - machine->device_memory = g_malloc0(sizeof(*machine->device_memory)); ram_addr_t device_mem_size = machine->maxram_size - machine->ram_size; + hwaddr device_mem_base; if (machine->ram_slots > ACPI_MAX_RAM_SLOTS) { error_report("unsupported amount of memory slots: %"PRIu64, @@ -873,14 +873,8 @@ static void loongarch_init(MachineState *machine) exit(EXIT_FAILURE); } /* device memory base is the top of high memory address. */ - machine->device_memory->base = 0x90000000 + highram_size; - machine->device_memory->base = - ROUND_UP(machine->device_memory->base, 1 * GiB); - - memory_region_init(&machine->device_memory->mr, OBJECT(lams), - "device-memory", device_mem_size); - memory_region_add_subregion(address_space_mem, machine->device_memory->base, - &machine->device_memory->mr); + device_mem_base = ROUND_UP(VIRT_HIGHMEM_BASE + highram_size, 1 * GiB); + machine_memory_devices_init(machine, device_mem_base, device_mem_size); } /* Add isa io region */ From 78732a765986d5270d6b3d88afeb9e4d33092360 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 23 Jun 2023 14:45:49 +0200 Subject: [PATCH 06/21] hw/i386/pc: Use machine_memory_devices_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's use our new helper and stop always allocating ms->device_memory. Once allcoated, we're sure that the size > 0 and that the base was initialized. Adjust the code in pc_memory_init() to check for machine->device_memory instead of pcmc->has_reserved_memory and machine->device_memory->base. Cc: Paolo Bonzini Cc: Richard Henderson Cc: Eduardo Habkost Cc: "Michael S. Tsirkin" Cc: Marcel Apfelbaum Reviewed-by: Philippe Mathieu-Daudé Acked-by: Michael S. Tsirkin Signed-off-by: David Hildenbrand Message-Id: <20230623124553.400585-7-david@redhat.com> Signed-off-by: David Hildenbrand --- hw/i386/pc.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index f01d7de5ad..55a49a9028 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1039,13 +1039,11 @@ void pc_memory_init(PCMachineState *pcms, exit(EXIT_FAILURE); } - /* always allocate the device memory information */ - machine->device_memory = g_malloc0(sizeof(*machine->device_memory)); - /* initialize device memory address space */ if (pcmc->has_reserved_memory && (machine->ram_size < machine->maxram_size)) { ram_addr_t device_mem_size; + hwaddr device_mem_base; if (machine->ram_slots > ACPI_MAX_RAM_SLOTS) { error_report("unsupported amount of memory slots: %"PRIu64, @@ -1060,19 +1058,14 @@ void pc_memory_init(PCMachineState *pcms, exit(EXIT_FAILURE); } - pc_get_device_memory_range(pcms, &machine->device_memory->base, &device_mem_size); + pc_get_device_memory_range(pcms, &device_mem_base, &device_mem_size); - if ((machine->device_memory->base + device_mem_size) < - device_mem_size) { + if (device_mem_base + device_mem_size < device_mem_size) { error_report("unsupported amount of maximum memory: " RAM_ADDR_FMT, machine->maxram_size); exit(EXIT_FAILURE); } - - memory_region_init(&machine->device_memory->mr, OBJECT(pcms), - "device-memory", device_mem_size); - memory_region_add_subregion(system_memory, machine->device_memory->base, - &machine->device_memory->mr); + machine_memory_devices_init(machine, device_mem_base, device_mem_size); } if (pcms->cxl_devices_state.is_enabled) { @@ -1120,7 +1113,7 @@ void pc_memory_init(PCMachineState *pcms, rom_set_fw(fw_cfg); - if (pcmc->has_reserved_memory && machine->device_memory->base) { + if (machine->device_memory) { uint64_t *val = g_malloc(sizeof(*val)); PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); uint64_t res_mem_end = machine->device_memory->base; From 75d5f34396c7f951f99a8c9800db29b4a98c8f67 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 23 Jun 2023 14:45:50 +0200 Subject: [PATCH 07/21] hw/i386/acpi-build: Rely on machine->device_memory when building SRAT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're already looking at machine->device_memory when calling build_srat_memory(), so let's simply avoid going via PC_MACHINE_DEVMEM_REGION_SIZE to get the size and rely on machine->device_memory directly. Once machine->device_memory is set, we know that the size > 0. The code now looks much more similar the hw/arm/virt-acpi-build.c variant. Cc: Paolo Bonzini Cc: Richard Henderson Cc: Eduardo Habkost Cc: "Michael S. Tsirkin" Cc: Marcel Apfelbaum Reviewed-by: Philippe Mathieu-Daudé Acked-by: Michael S. Tsirkin Signed-off-by: David Hildenbrand Message-Id: <20230623124553.400585-8-david@redhat.com> Signed-off-by: David Hildenbrand --- hw/i386/acpi-build.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 512162003b..9c74fa17ad 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -1950,12 +1950,8 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) MachineClass *mc = MACHINE_GET_CLASS(machine); X86MachineState *x86ms = X86_MACHINE(machine); const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(machine); - PCMachineState *pcms = PC_MACHINE(machine); int nb_numa_nodes = machine->numa_state->num_nodes; NodeInfo *numa_info = machine->numa_state->nodes; - ram_addr_t hotpluggable_address_space_size = - object_property_get_int(OBJECT(pcms), PC_MACHINE_DEVMEM_REGION_SIZE, - NULL); AcpiTable table = { .sig = "SRAT", .rev = 1, .oem_id = x86ms->oem_id, .oem_table_id = x86ms->oem_table_id }; @@ -2071,9 +2067,10 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) * Memory devices may override proximity set by this entry, * providing _PXM method if necessary. */ - if (hotpluggable_address_space_size) { + if (machine->device_memory) { build_srat_memory(table_data, machine->device_memory->base, - hotpluggable_address_space_size, nb_numa_nodes - 1, + memory_region_size(&machine->device_memory->mr), + nb_numa_nodes - 1, MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED); } From a8e67ce35b74e8bec70d39f86886439208c7b645 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 23 Jun 2023 14:45:51 +0200 Subject: [PATCH 08/21] hw/i386/pc: Remove PC_MACHINE_DEVMEM_REGION_SIZE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are no remaining users in the tree. Libvirt never used that property and a quick internet search revealed no other users. Further, we renamed that property already in commit f2ffbe2b7dd0 ("pc: rename "hotplug memory" terminology to "device memory"") without anybody complaining. So let's just get rid of it. Cc: Paolo Bonzini Cc: Richard Henderson Cc: Eduardo Habkost Cc: "Michael S. Tsirkin" Cc: Marcel Apfelbaum Reviewed-by: Philippe Mathieu-Daudé Acked-by: Michael S. Tsirkin Signed-off-by: David Hildenbrand Message-Id: <20230623124553.400585-9-david@redhat.com> Signed-off-by: David Hildenbrand --- hw/i386/pc.c | 19 ------------------- include/hw/i386/pc.h | 1 - 2 files changed, 20 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 55a49a9028..1bd1e5ead1 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1649,21 +1649,6 @@ static HotplugHandler *pc_get_hotplug_handler(MachineState *machine, return NULL; } -static void -pc_machine_get_device_memory_region_size(Object *obj, Visitor *v, - const char *name, void *opaque, - Error **errp) -{ - MachineState *ms = MACHINE(obj); - int64_t value = 0; - - if (ms->device_memory) { - value = memory_region_size(&ms->device_memory->mr); - } - - visit_type_int(v, name, &value, errp); -} - static void pc_machine_get_vmport(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { @@ -1977,10 +1962,6 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) object_class_property_set_description(oc, PC_MACHINE_MAX_RAM_BELOW_4G, "Maximum ram below the 4G boundary (32bit boundary)"); - object_class_property_add(oc, PC_MACHINE_DEVMEM_REGION_SIZE, "int", - pc_machine_get_device_memory_region_size, NULL, - NULL, NULL); - object_class_property_add(oc, PC_MACHINE_VMPORT, "OnOffAuto", pc_machine_get_vmport, pc_machine_set_vmport, NULL, NULL); diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index c34c698cdd..d54e8b1101 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -60,7 +60,6 @@ typedef struct PCMachineState { #define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device" #define PC_MACHINE_MAX_RAM_BELOW_4G "max-ram-below-4g" -#define PC_MACHINE_DEVMEM_REGION_SIZE "device-memory-region-size" #define PC_MACHINE_VMPORT "vmport" #define PC_MACHINE_SMBUS "smbus" #define PC_MACHINE_SATA "sata" From d7f4891c85886efde88e0893ec19f89a9d14138e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 23 Jun 2023 14:45:52 +0200 Subject: [PATCH 09/21] memory-device: Refactor memory_device_pre_plug() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's move memory_device_check_addable() and basic checks out of memory_device_get_free_addr() directly into memory_device_pre_plug(). Separating basic checks from address assignment is cleaner and prepares for further changes. As all memory device users now use memory_devices_init(), and that function enforces that the size is 0, we can drop the check for an empty region. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: David Hildenbrand Message-Id: <20230623124553.400585-10-david@redhat.com> Signed-off-by: David Hildenbrand --- hw/mem/memory-device.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c index bb9d7c2a20..00c7755557 100644 --- a/hw/mem/memory-device.c +++ b/hw/mem/memory-device.c @@ -69,9 +69,10 @@ static int memory_device_used_region_size(Object *obj, void *opaque) return 0; } -static void memory_device_check_addable(MachineState *ms, uint64_t size, +static void memory_device_check_addable(MachineState *ms, MemoryRegion *mr, Error **errp) { + const uint64_t size = memory_region_size(mr); uint64_t used_region_size = 0; /* we will need a new memory slot for kvm and vhost */ @@ -101,16 +102,9 @@ static uint64_t memory_device_get_free_addr(MachineState *ms, uint64_t align, uint64_t size, Error **errp) { - Error *err = NULL; GSList *list = NULL, *item; Range as, new = range_empty; - if (!ms->device_memory || !memory_region_size(&ms->device_memory->mr)) { - error_setg(errp, "the configuration is not prepared for memory devices" - " (e.g., for memory hotplug), consider specifying the" - " maxmem option"); - return 0; - } range_init_nofail(&as, ms->device_memory->base, memory_region_size(&ms->device_memory->mr)); @@ -122,12 +116,6 @@ static uint64_t memory_device_get_free_addr(MachineState *ms, align); } - memory_device_check_addable(ms, size, &err); - if (err) { - error_propagate(errp, err); - return 0; - } - if (hint && !QEMU_IS_ALIGNED(*hint, align)) { error_setg(errp, "address must be aligned to 0x%" PRIx64 " bytes", align); @@ -251,11 +239,23 @@ void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms, uint64_t addr, align = 0; MemoryRegion *mr; + if (!ms->device_memory) { + error_setg(errp, "the configuration is not prepared for memory devices" + " (e.g., for memory hotplug), consider specifying the" + " maxmem option"); + return; + } + mr = mdc->get_memory_region(md, &local_err); if (local_err) { goto out; } + memory_device_check_addable(ms, mr, &local_err); + if (local_err) { + goto out; + } + if (legacy_align) { align = *legacy_align; } else { From ac23dd2f293c59a5cd2fb385162277b4a6aef769 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 23 Jun 2023 14:45:53 +0200 Subject: [PATCH 10/21] memory-device: Track used region size in DeviceMemoryState MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's avoid iterating over all devices and simply track it in the DeviceMemoryState. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: David Hildenbrand Message-Id: <20230623124553.400585-11-david@redhat.com> Signed-off-by: David Hildenbrand --- hw/mem/memory-device.c | 22 +++------------------- include/hw/boards.h | 2 ++ 2 files changed, 5 insertions(+), 19 deletions(-) diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c index 00c7755557..667d56bd29 100644 --- a/hw/mem/memory-device.c +++ b/hw/mem/memory-device.c @@ -52,28 +52,11 @@ static int memory_device_build_list(Object *obj, void *opaque) return 0; } -static int memory_device_used_region_size(Object *obj, void *opaque) -{ - uint64_t *size = opaque; - - if (object_dynamic_cast(obj, TYPE_MEMORY_DEVICE)) { - const DeviceState *dev = DEVICE(obj); - const MemoryDeviceState *md = MEMORY_DEVICE(obj); - - if (dev->realized) { - *size += memory_device_get_region_size(md, &error_abort); - } - } - - object_child_foreach(obj, memory_device_used_region_size, opaque); - return 0; -} - static void memory_device_check_addable(MachineState *ms, MemoryRegion *mr, Error **errp) { + const uint64_t used_region_size = ms->device_memory->used_region_size; const uint64_t size = memory_region_size(mr); - uint64_t used_region_size = 0; /* we will need a new memory slot for kvm and vhost */ if (kvm_enabled() && !kvm_has_free_slot(ms)) { @@ -86,7 +69,6 @@ static void memory_device_check_addable(MachineState *ms, MemoryRegion *mr, } /* will we exceed the total amount of memory specified */ - memory_device_used_region_size(OBJECT(ms), &used_region_size); if (used_region_size + size < used_region_size || used_region_size + size > ms->maxram_size - ms->ram_size) { error_setg(errp, "not enough space, currently 0x%" PRIx64 @@ -292,6 +274,7 @@ void memory_device_plug(MemoryDeviceState *md, MachineState *ms) mr = mdc->get_memory_region(md, &error_abort); g_assert(ms->device_memory); + ms->device_memory->used_region_size += memory_region_size(mr); memory_region_add_subregion(&ms->device_memory->mr, addr - ms->device_memory->base, mr); trace_memory_device_plug(DEVICE(md)->id ? DEVICE(md)->id : "", addr); @@ -310,6 +293,7 @@ void memory_device_unplug(MemoryDeviceState *md, MachineState *ms) g_assert(ms->device_memory); memory_region_del_subregion(&ms->device_memory->mr, mr); + ms->device_memory->used_region_size -= memory_region_size(mr); trace_memory_device_unplug(DEVICE(md)->id ? DEVICE(md)->id : "", mdc->get_addr(md)); } diff --git a/include/hw/boards.h b/include/hw/boards.h index 3b7c30e853..ed83360198 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -298,11 +298,13 @@ struct MachineClass { * address space for memory devices starts * @mr: address space container for memory devices * @dimm_size: the sum of plugged DIMMs' sizes + * @used_region_size: the part of @mr already used by memory devices */ typedef struct DeviceMemoryState { hwaddr base; MemoryRegion mr; uint64_t dimm_size; + uint64_t used_region_size; } DeviceMemoryState; /** From 1d44ff586f8a8e113379430750b5a0a2a3f64cf9 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 6 Jul 2023 09:56:06 +0200 Subject: [PATCH 11/21] softmmu/physmem: Warn with ram_block_discard_range() on MAP_PRIVATE file mapping ram_block_discard_range() cannot possibly do the right thing in MAP_PRIVATE file mappings in the general case. To achieve the documented semantics, we also have to punch a hole into the file, possibly messing with other MAP_PRIVATE/MAP_SHARED mappings of such a file. For example, using VM templating -- see commit b17fbbe55cba ("migration: allow private destination ram with x-ignore-shared") -- in combination with any mechanism that relies on discarding of RAM is problematic. This includes: * Postcopy live migration * virtio-balloon inflation/deflation or free-page-reporting * virtio-mem So at least warn that there is something possibly dangerous is going on when using ram_block_discard_range() in these cases. Message-ID: <20230706075612.67404-2-david@redhat.com> Acked-by: Peter Xu Tested-by: Mario Casquero Reviewed-by: Juan Quintela Reviewed-by: Michael S. Tsirkin Signed-off-by: David Hildenbrand --- softmmu/physmem.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/softmmu/physmem.c b/softmmu/physmem.c index bda475a719..3df73542e1 100644 --- a/softmmu/physmem.c +++ b/softmmu/physmem.c @@ -3456,6 +3456,24 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length) * so a userfault will trigger. */ #ifdef CONFIG_FALLOCATE_PUNCH_HOLE + /* + * We'll discard data from the actual file, even though we only + * have a MAP_PRIVATE mapping, possibly messing with other + * MAP_PRIVATE/MAP_SHARED mappings. There is no easy way to + * change that behavior whithout violating the promised + * semantics of ram_block_discard_range(). + * + * Only warn, because it works as long as nobody else uses that + * file. + */ + if (!qemu_ram_is_shared(rb)) { + warn_report_once("ram_block_discard_range: Discarding RAM" + " in private file mappings is possibly" + " dangerous, because it will modify the" + " underlying file and will affect other" + " users of the file"); + } + ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, start, length); if (ret) { From 836f657b6a32baf5579c6f218c9c104363562bb6 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 6 Jul 2023 09:56:07 +0200 Subject: [PATCH 12/21] virtio-mem: Skip most of virtio_mem_unplug_all() without plugged memory Already when starting QEMU we perform one system reset that ends up triggering virtio_mem_unplug_all() with no actual memory plugged yet. That, in turn will trigger ram_block_discard_range() and perform some other actions that are not required in that case. Let's optimize virtio_mem_unplug_all() for the case that no memory is plugged. This will be beneficial for x-ignore-shared support as well. Message-ID: <20230706075612.67404-3-david@redhat.com> Tested-by: Mario Casquero Reviewed-by: Juan Quintela Reviewed-by: Michael S. Tsirkin Signed-off-by: David Hildenbrand --- hw/virtio/virtio-mem.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c index ec0ae32589..a922c21380 100644 --- a/hw/virtio/virtio-mem.c +++ b/hw/virtio/virtio-mem.c @@ -621,20 +621,20 @@ static int virtio_mem_unplug_all(VirtIOMEM *vmem) { RAMBlock *rb = vmem->memdev->mr.ram_block; - if (virtio_mem_is_busy()) { - return -EBUSY; - } - - if (ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb))) { - return -EBUSY; - } - virtio_mem_notify_unplug_all(vmem); - - bitmap_clear(vmem->bitmap, 0, vmem->bitmap_size); if (vmem->size) { + if (virtio_mem_is_busy()) { + return -EBUSY; + } + if (ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb))) { + return -EBUSY; + } + virtio_mem_notify_unplug_all(vmem); + + bitmap_clear(vmem->bitmap, 0, vmem->bitmap_size); vmem->size = 0; notifier_list_notify(&vmem->size_change_notifiers, &vmem->size); } + trace_virtio_mem_unplugged_all(); virtio_mem_resize_usable_region(vmem, vmem->requested_size, true); return 0; From f161c88a03c646ee308653d3ea99318901093309 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 6 Jul 2023 09:56:08 +0200 Subject: [PATCH 13/21] migration/ram: Expose ramblock_is_ignored() as migrate_ram_is_ignored() virtio-mem wants to know whether it should not mess with the RAMBlock content (e.g., discard RAM, preallocate memory) on incoming migration. So let's expose that function as migrate_ram_is_ignored() in migration/misc.h Message-ID: <20230706075612.67404-4-david@redhat.com> Acked-by: Peter Xu Tested-by: Mario Casquero Reviewed-by: Juan Quintela Reviewed-by: Michael S. Tsirkin Signed-off-by: David Hildenbrand --- include/migration/misc.h | 1 + migration/postcopy-ram.c | 2 +- migration/ram.c | 14 +++++++------- migration/ram.h | 3 +-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/migration/misc.h b/include/migration/misc.h index 5ebe13b4b9..7dcc0b5c2c 100644 --- a/include/migration/misc.h +++ b/include/migration/misc.h @@ -40,6 +40,7 @@ int precopy_notify(PrecopyNotifyReason reason, Error **errp); void ram_mig_init(void); void qemu_guest_free_page_hint(void *addr, size_t len); +bool migrate_ram_is_ignored(RAMBlock *block); /* migration/block.c */ diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 5615ec29eb..29aea9456d 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -408,7 +408,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis, Error **errp) /* * We don't support postcopy with some type of ramblocks. * - * NOTE: we explicitly ignored ramblock_is_ignored() instead we checked + * NOTE: we explicitly ignored migrate_ram_is_ignored() instead we checked * all possible ramblocks. This is because this function can be called * when creating the migration object, during the phase RAM_MIGRATABLE * is not even properly set for all the ramblocks. diff --git a/migration/ram.c b/migration/ram.c index 5283a75f02..0ada6477e8 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -194,7 +194,7 @@ static bool postcopy_preempt_active(void) return migrate_postcopy_preempt() && migration_in_postcopy(); } -bool ramblock_is_ignored(RAMBlock *block) +bool migrate_ram_is_ignored(RAMBlock *block) { return !qemu_ram_is_migratable(block) || (migrate_ignore_shared() && qemu_ram_is_shared(block) @@ -696,7 +696,7 @@ static void pss_find_next_dirty(PageSearchStatus *pss) unsigned long size = rb->used_length >> TARGET_PAGE_BITS; unsigned long *bitmap = rb->bmap; - if (ramblock_is_ignored(rb)) { + if (migrate_ram_is_ignored(rb)) { /* Points directly to the end, so we know no dirty page */ pss->page = size; return; @@ -780,7 +780,7 @@ unsigned long colo_bitmap_find_dirty(RAMState *rs, RAMBlock *rb, *num = 0; - if (ramblock_is_ignored(rb)) { + if (migrate_ram_is_ignored(rb)) { return size; } @@ -2260,7 +2260,7 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss) unsigned long start_page = pss->page; int res; - if (ramblock_is_ignored(pss->block)) { + if (migrate_ram_is_ignored(pss->block)) { error_report("block %s should not be migrated !", pss->block->idstr); return 0; } @@ -3347,7 +3347,7 @@ static inline RAMBlock *ram_block_from_stream(MigrationIncomingState *mis, return NULL; } - if (ramblock_is_ignored(block)) { + if (migrate_ram_is_ignored(block)) { error_report("block %s should not be migrated !", id); return NULL; } @@ -3958,7 +3958,7 @@ static int ram_load_precopy(QEMUFile *f) } if (migrate_ignore_shared()) { hwaddr addr = qemu_get_be64(f); - if (ramblock_is_ignored(block) && + if (migrate_ram_is_ignored(block) && block->mr->addr != addr) { error_report("Mismatched GPAs for block %s " "%" PRId64 "!= %" PRId64, @@ -4254,7 +4254,7 @@ static void ram_mig_ram_block_resized(RAMBlockNotifier *n, void *host, RAMBlock *rb = qemu_ram_block_from_host(host, false, &offset); Error *err = NULL; - if (ramblock_is_ignored(rb)) { + if (migrate_ram_is_ignored(rb)) { return; } diff --git a/migration/ram.h b/migration/ram.h index ea1f3c25b5..145c915ca7 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -36,11 +36,10 @@ extern XBZRLECacheStats xbzrle_counters; extern CompressionStats compression_counters; -bool ramblock_is_ignored(RAMBlock *block); /* Should be holding either ram_list.mutex, or the RCU lock. */ #define RAMBLOCK_FOREACH_NOT_IGNORED(block) \ INTERNAL_RAMBLOCK_FOREACH(block) \ - if (ramblock_is_ignored(block)) {} else + if (migrate_ram_is_ignored(block)) {} else #define RAMBLOCK_FOREACH_MIGRATABLE(block) \ INTERNAL_RAMBLOCK_FOREACH(block) \ From b01fd4b67a8fdf5a9ecf4ad5b49b70d52424f0f7 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 6 Jul 2023 09:56:09 +0200 Subject: [PATCH 14/21] virtio-mem: Support "x-ignore-shared" migration To achieve desired "x-ignore-shared" functionality, we should not discard all RAM when realizing the device and not mess with preallocation/postcopy when loading device state. In essence, we should not touch RAM content. As "x-ignore-shared" gets set after realizing the device, we cannot rely on that. Let's simply skip discarding of RAM on incoming migration. Note that virtio_mem_post_load() will call virtio_mem_restore_unplugged() -- unless "x-ignore-shared" is set. So once migration finished we'll have a consistent state. The initial system reset will also not discard any RAM, because virtio_mem_unplug_all() will not call virtio_mem_unplug_all() when no memory is plugged (which is the case before loading the device state). Note that something like VM templating -- see commit b17fbbe55cba ("migration: allow private destination ram with x-ignore-shared") -- is currently incompatible with virtio-mem and ram_block_discard_range() will warn in case a private file mapping is supplied by virtio-mem. For VM templating with virtio-mem, it makes more sense to either (a) Create the template without the virtio-mem device and hotplug a virtio-mem device to the new VM instances using proper own memory backend. (b) Use a virtio-mem device that doesn't provide any memory in the template (requested-size=0) and use private anonymous memory. Message-ID: <20230706075612.67404-5-david@redhat.com> Tested-by: Mario Casquero Reviewed-by: Juan Quintela Reviewed-by: Michael S. Tsirkin Signed-off-by: David Hildenbrand --- hw/virtio/virtio-mem.c | 47 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c index a922c21380..3f41e00e74 100644 --- a/hw/virtio/virtio-mem.c +++ b/hw/virtio/virtio-mem.c @@ -18,6 +18,7 @@ #include "sysemu/numa.h" #include "sysemu/sysemu.h" #include "sysemu/reset.h" +#include "sysemu/runstate.h" #include "hw/virtio/virtio.h" #include "hw/virtio/virtio-bus.h" #include "hw/virtio/virtio-mem.h" @@ -901,11 +902,23 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) return; } - ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb)); - if (ret) { - error_setg_errno(errp, -ret, "Unexpected error discarding RAM"); - ram_block_coordinated_discard_require(false); - return; + /* + * We don't know at this point whether shared RAM is migrated using + * QEMU or migrated using the file content. "x-ignore-shared" will be + * configured after realizing the device. So in case we have an + * incoming migration, simply always skip the discard step. + * + * Otherwise, make sure that we start with a clean slate: either the + * memory backend might get reused or the shared file might still have + * memory allocated. + */ + if (!runstate_check(RUN_STATE_INMIGRATE)) { + ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb)); + if (ret) { + error_setg_errno(errp, -ret, "Unexpected error discarding RAM"); + ram_block_coordinated_discard_require(false); + return; + } } virtio_mem_resize_usable_region(vmem, vmem->requested_size, true); @@ -977,10 +990,6 @@ static int virtio_mem_post_load(void *opaque, int version_id) RamDiscardListener *rdl; int ret; - if (vmem->prealloc && !vmem->early_migration) { - warn_report("Proper preallocation with migration requires a newer QEMU machine"); - } - /* * We started out with all memory discarded and our memory region is mapped * into an address space. Replay, now that we updated the bitmap. @@ -993,6 +1002,18 @@ static int virtio_mem_post_load(void *opaque, int version_id) } } + /* + * If shared RAM is migrated using the file content and not using QEMU, + * don't mess with preallocation and postcopy. + */ + if (migrate_ram_is_ignored(vmem->memdev->mr.ram_block)) { + return 0; + } + + if (vmem->prealloc && !vmem->early_migration) { + warn_report("Proper preallocation with migration requires a newer QEMU machine"); + } + if (migration_in_incoming_postcopy()) { return 0; } @@ -1025,6 +1046,14 @@ static int virtio_mem_post_load_early(void *opaque, int version_id) return 0; } + /* + * If shared RAM is migrated using the file content and not using QEMU, + * don't mess with preallocation and postcopy. + */ + if (migrate_ram_is_ignored(rb)) { + return 0; + } + /* * We restored the bitmap and verified that the basic properties * match on source and destination, so we can go ahead and preallocate From 18129c15bcefc0064febe2dc7759b93f7c5aaab3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 11 Jul 2023 17:34:39 +0200 Subject: [PATCH 15/21] virtio-md-pci: New parent type for virtio-mem-pci and virtio-pmem-pci Let's add a new abstract "virtio memory device" type, and use it as parent class of virtio-mem-pci and virtio-pmem-pci. Message-ID: <20230711153445.514112-2-david@redhat.com> Tested-by: Mario Casquero Reviewed-by: Michael S. Tsirkin Signed-off-by: David Hildenbrand --- MAINTAINERS | 6 ++++++ hw/virtio/Kconfig | 8 +++++-- hw/virtio/meson.build | 1 + hw/virtio/virtio-md-pci.c | 33 +++++++++++++++++++++++++++++ hw/virtio/virtio-mem-pci.c | 5 +---- hw/virtio/virtio-mem-pci.h | 6 +++--- hw/virtio/virtio-pmem-pci.c | 5 +---- hw/virtio/virtio-pmem-pci.h | 6 +++--- include/hw/virtio/virtio-md-pci.h | 35 +++++++++++++++++++++++++++++++ 9 files changed, 89 insertions(+), 16 deletions(-) create mode 100644 hw/virtio/virtio-md-pci.c create mode 100644 include/hw/virtio/virtio-md-pci.h diff --git a/MAINTAINERS b/MAINTAINERS index e158a25cfe..bb4626faf7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2229,6 +2229,12 @@ F: hw/virtio/virtio-crypto.c F: hw/virtio/virtio-crypto-pci.c F: include/hw/virtio/virtio-crypto.h +virtio based memory device +M: David Hildenbrand +S: Supported +F: hw/virtio/virtio-md-pci.c +F: include/hw/virtio/virtio-md-pci.h + virtio-mem M: David Hildenbrand S: Supported diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig index a9ee09062f..92c9cf6c96 100644 --- a/hw/virtio/Kconfig +++ b/hw/virtio/Kconfig @@ -35,6 +35,10 @@ config VIRTIO_CRYPTO default y depends on VIRTIO +config VIRTIO_MD + bool + select MEM_DEVICE + config VIRTIO_PMEM_SUPPORTED bool @@ -43,7 +47,7 @@ config VIRTIO_PMEM default y depends on VIRTIO depends on VIRTIO_PMEM_SUPPORTED - select MEM_DEVICE + select VIRTIO_MD config VIRTIO_MEM_SUPPORTED bool @@ -54,7 +58,7 @@ config VIRTIO_MEM depends on VIRTIO depends on LINUX depends on VIRTIO_MEM_SUPPORTED - select MEM_DEVICE + select VIRTIO_MD config VHOST_VSOCK_COMMON bool diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index c4f4fe05fa..13e7c6c272 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -63,6 +63,7 @@ virtio_pci_ss.add(when: 'CONFIG_VIRTIO_PMEM', if_true: files('virtio-pmem-pci.c' virtio_pci_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev-pci.c')) +virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MD', if_true: files('virtio-md-pci.c')) specific_virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss) diff --git a/hw/virtio/virtio-md-pci.c b/hw/virtio/virtio-md-pci.c new file mode 100644 index 0000000000..6b02ff908e --- /dev/null +++ b/hw/virtio/virtio-md-pci.c @@ -0,0 +1,33 @@ +/* + * Abstract virtio based memory device + * + * Copyright (C) 2023 Red Hat, Inc. + * + * Authors: + * David Hildenbrand + * + * This work is licensed under the terms of the GNU GPL, version 2. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "hw/virtio/virtio-md-pci.h" +#include "hw/mem/memory-device.h" + +static const TypeInfo virtio_md_pci_info = { + .name = TYPE_VIRTIO_MD_PCI, + .parent = TYPE_VIRTIO_PCI, + .instance_size = sizeof(VirtIOMDPCI), + .class_size = sizeof(VirtIOMDPCIClass), + .abstract = true, + .interfaces = (InterfaceInfo[]) { + { TYPE_MEMORY_DEVICE }, + { } + }, +}; + +static void virtio_md_pci_register(void) +{ + type_register_static(&virtio_md_pci_info); +} +type_init(virtio_md_pci_register) diff --git a/hw/virtio/virtio-mem-pci.c b/hw/virtio/virtio-mem-pci.c index b85c12668d..2ef0f07630 100644 --- a/hw/virtio/virtio-mem-pci.c +++ b/hw/virtio/virtio-mem-pci.c @@ -142,14 +142,11 @@ static void virtio_mem_pci_instance_init(Object *obj) static const VirtioPCIDeviceTypeInfo virtio_mem_pci_info = { .base_name = TYPE_VIRTIO_MEM_PCI, + .parent = TYPE_VIRTIO_MD_PCI, .generic_name = "virtio-mem-pci", .instance_size = sizeof(VirtIOMEMPCI), .instance_init = virtio_mem_pci_instance_init, .class_init = virtio_mem_pci_class_init, - .interfaces = (InterfaceInfo[]) { - { TYPE_MEMORY_DEVICE }, - { } - }, }; static void virtio_mem_pci_register_types(void) diff --git a/hw/virtio/virtio-mem-pci.h b/hw/virtio/virtio-mem-pci.h index e636e1a48d..c50b51d608 100644 --- a/hw/virtio/virtio-mem-pci.h +++ b/hw/virtio/virtio-mem-pci.h @@ -13,21 +13,21 @@ #ifndef QEMU_VIRTIO_MEM_PCI_H #define QEMU_VIRTIO_MEM_PCI_H -#include "hw/virtio/virtio-pci.h" +#include "hw/virtio/virtio-md-pci.h" #include "hw/virtio/virtio-mem.h" #include "qom/object.h" typedef struct VirtIOMEMPCI VirtIOMEMPCI; /* - * virtio-mem-pci: This extends VirtioPCIProxy. + * virtio-mem-pci: This extends VirtIOMDPCI. */ #define TYPE_VIRTIO_MEM_PCI "virtio-mem-pci-base" DECLARE_INSTANCE_CHECKER(VirtIOMEMPCI, VIRTIO_MEM_PCI, TYPE_VIRTIO_MEM_PCI) struct VirtIOMEMPCI { - VirtIOPCIProxy parent_obj; + VirtIOMDPCI parent_obj; VirtIOMEM vdev; Notifier size_change_notifier; }; diff --git a/hw/virtio/virtio-pmem-pci.c b/hw/virtio/virtio-pmem-pci.c index 197d219204..cfe7f3b67c 100644 --- a/hw/virtio/virtio-pmem-pci.c +++ b/hw/virtio/virtio-pmem-pci.c @@ -110,13 +110,10 @@ static void virtio_pmem_pci_instance_init(Object *obj) static const VirtioPCIDeviceTypeInfo virtio_pmem_pci_info = { .base_name = TYPE_VIRTIO_PMEM_PCI, .generic_name = "virtio-pmem-pci", + .parent = TYPE_VIRTIO_MD_PCI, .instance_size = sizeof(VirtIOPMEMPCI), .instance_init = virtio_pmem_pci_instance_init, .class_init = virtio_pmem_pci_class_init, - .interfaces = (InterfaceInfo[]) { - { TYPE_MEMORY_DEVICE }, - { } - }, }; static void virtio_pmem_pci_register_types(void) diff --git a/hw/virtio/virtio-pmem-pci.h b/hw/virtio/virtio-pmem-pci.h index 63cfe727f7..88b01ce2db 100644 --- a/hw/virtio/virtio-pmem-pci.h +++ b/hw/virtio/virtio-pmem-pci.h @@ -14,21 +14,21 @@ #ifndef QEMU_VIRTIO_PMEM_PCI_H #define QEMU_VIRTIO_PMEM_PCI_H -#include "hw/virtio/virtio-pci.h" +#include "hw/virtio/virtio-md-pci.h" #include "hw/virtio/virtio-pmem.h" #include "qom/object.h" typedef struct VirtIOPMEMPCI VirtIOPMEMPCI; /* - * virtio-pmem-pci: This extends VirtioPCIProxy. + * virtio-pmem-pci: This extends VirtIOMDPCI. */ #define TYPE_VIRTIO_PMEM_PCI "virtio-pmem-pci-base" DECLARE_INSTANCE_CHECKER(VirtIOPMEMPCI, VIRTIO_PMEM_PCI, TYPE_VIRTIO_PMEM_PCI) struct VirtIOPMEMPCI { - VirtIOPCIProxy parent_obj; + VirtIOMDPCI parent_obj; VirtIOPMEM vdev; }; diff --git a/include/hw/virtio/virtio-md-pci.h b/include/hw/virtio/virtio-md-pci.h new file mode 100644 index 0000000000..a241b54fcd --- /dev/null +++ b/include/hw/virtio/virtio-md-pci.h @@ -0,0 +1,35 @@ +/* + * Abstract virtio based memory device + * + * Copyright (C) 2023 Red Hat, Inc. + * + * Authors: + * David Hildenbrand + * + * This work is licensed under the terms of the GNU GPL, version 2. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_VIRTIO_MD_PCI_H +#define HW_VIRTIO_MD_PCI_H + +#include "hw/virtio/virtio-pci.h" +#include "qom/object.h" + +/* + * virtio-md-pci: This extends VirtioPCIProxy. + */ +#define TYPE_VIRTIO_MD_PCI "virtio-md-pci" + +OBJECT_DECLARE_TYPE(VirtIOMDPCI, VirtIOMDPCIClass, VIRTIO_MD_PCI) + +struct VirtIOMDPCIClass { + /* private */ + VirtioPCIClass parent; +}; + +struct VirtIOMDPCI { + VirtIOPCIProxy parent_obj; +}; + +#endif From dbdf841b2ed8b88d30a8a1f0c26029b2ebb93e76 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 11 Jul 2023 17:34:40 +0200 Subject: [PATCH 16/21] pc: Factor out (un)plug handling of virtio-md-pci devices Let's factor out (un)plug handling, to be reused from arm/virt code. Provide stubs for the case that CONFIG_VIRTIO_MD is not selected because neither virtio-mem nor virtio-pmem is enabled. While this cannot currently happen for x86, it will be possible for arm/virt. Message-ID: <20230711153445.514112-3-david@redhat.com> Tested-by: Mario Casquero Reviewed-by: Michael S. Tsirkin Signed-off-by: David Hildenbrand --- MAINTAINERS | 1 + hw/i386/pc.c | 90 ++++--------------------------- hw/virtio/virtio-md-pci.c | 63 ++++++++++++++++++++++ include/hw/virtio/virtio-md-pci.h | 6 +++ stubs/meson.build | 1 + stubs/virtio-md-pci.c | 24 +++++++++ 6 files changed, 106 insertions(+), 79 deletions(-) create mode 100644 stubs/virtio-md-pci.c diff --git a/MAINTAINERS b/MAINTAINERS index bb4626faf7..12e59b6b27 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2234,6 +2234,7 @@ M: David Hildenbrand S: Supported F: hw/virtio/virtio-md-pci.c F: include/hw/virtio/virtio-md-pci.h +F: stubs/virtio-md-pci.c virtio-mem M: David Hildenbrand diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 1bd1e5ead1..3109d5e0e0 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -88,13 +88,11 @@ #include "hw/net/ne2000-isa.h" #include "standard-headers/asm-x86/bootparam.h" #include "hw/virtio/virtio-iommu.h" -#include "hw/virtio/virtio-pmem-pci.h" -#include "hw/virtio/virtio-mem-pci.h" +#include "hw/virtio/virtio-md-pci.h" #include "hw/i386/kvm/xen_overlay.h" #include "hw/i386/kvm/xen_evtchn.h" #include "hw/i386/kvm/xen_gnttab.h" #include "hw/i386/kvm/xen_xenstore.h" -#include "hw/mem/memory-device.h" #include "sysemu/replay.h" #include "target/i386/cpu.h" #include "e820_memory_layout.h" @@ -1493,68 +1491,6 @@ static void pc_memory_unplug(HotplugHandler *hotplug_dev, error_propagate(errp, local_err); } -static void pc_virtio_md_pci_pre_plug(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - HotplugHandler *hotplug_dev2 = qdev_get_bus_hotplug_handler(dev); - Error *local_err = NULL; - - if (!hotplug_dev2 && dev->hotplugged) { - /* - * Without a bus hotplug handler, we cannot control the plug/unplug - * order. We should never reach this point when hotplugging on x86, - * however, better add a safety net. - */ - error_setg(errp, "hotplug of virtio based memory devices not supported" - " on this bus."); - return; - } - /* - * First, see if we can plug this memory device at all. If that - * succeeds, branch of to the actual hotplug handler. - */ - memory_device_pre_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev), NULL, - &local_err); - if (!local_err && hotplug_dev2) { - hotplug_handler_pre_plug(hotplug_dev2, dev, &local_err); - } - error_propagate(errp, local_err); -} - -static void pc_virtio_md_pci_plug(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - HotplugHandler *hotplug_dev2 = qdev_get_bus_hotplug_handler(dev); - Error *local_err = NULL; - - /* - * Plug the memory device first and then branch off to the actual - * hotplug handler. If that one fails, we can easily undo the memory - * device bits. - */ - memory_device_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev)); - if (hotplug_dev2) { - hotplug_handler_plug(hotplug_dev2, dev, &local_err); - if (local_err) { - memory_device_unplug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev)); - } - } - error_propagate(errp, local_err); -} - -static void pc_virtio_md_pci_unplug_request(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - /* We don't support hot unplug of virtio based memory devices */ - error_setg(errp, "virtio based memory devices cannot be unplugged."); -} - -static void pc_virtio_md_pci_unplug(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - /* We don't support hot unplug of virtio based memory devices */ -} - static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { @@ -1562,9 +1498,8 @@ static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, pc_memory_pre_plug(hotplug_dev, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { x86_cpu_pre_plug(hotplug_dev, dev, errp); - } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI) || - object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) { - pc_virtio_md_pci_pre_plug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { + virtio_md_pci_pre_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) { /* Declare the APIC range as the reserved MSI region */ char *resv_prop_str = g_strdup_printf("0xfee00000:0xfeefffff:%d", @@ -1596,9 +1531,8 @@ static void pc_machine_device_plug_cb(HotplugHandler *hotplug_dev, pc_memory_plug(hotplug_dev, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { x86_cpu_plug(hotplug_dev, dev, errp); - } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI) || - object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) { - pc_virtio_md_pci_plug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { + virtio_md_pci_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp); } } @@ -1609,9 +1543,9 @@ static void pc_machine_device_unplug_request_cb(HotplugHandler *hotplug_dev, pc_memory_unplug_request(hotplug_dev, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { x86_cpu_unplug_request_cb(hotplug_dev, dev, errp); - } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI) || - object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) { - pc_virtio_md_pci_unplug_request(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { + virtio_md_pci_unplug_request(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), + errp); } else { error_setg(errp, "acpi: device unplug request for not supported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -1625,9 +1559,8 @@ static void pc_machine_device_unplug_cb(HotplugHandler *hotplug_dev, pc_memory_unplug(hotplug_dev, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { x86_cpu_unplug_cb(hotplug_dev, dev, errp); - } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI) || - object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) { - pc_virtio_md_pci_unplug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { + virtio_md_pci_unplug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp); } else { error_setg(errp, "acpi: device unplug for not supported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -1639,8 +1572,7 @@ static HotplugHandler *pc_get_hotplug_handler(MachineState *machine, { if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) || object_dynamic_cast(OBJECT(dev), TYPE_CPU) || - object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI) || - object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI) || + object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI) || object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI) || object_dynamic_cast(OBJECT(dev), TYPE_X86_IOMMU_DEVICE)) { return HOTPLUG_HANDLER(machine); diff --git a/hw/virtio/virtio-md-pci.c b/hw/virtio/virtio-md-pci.c index 6b02ff908e..e849c3131d 100644 --- a/hw/virtio/virtio-md-pci.c +++ b/hw/virtio/virtio-md-pci.c @@ -13,6 +13,69 @@ #include "qemu/osdep.h" #include "hw/virtio/virtio-md-pci.h" #include "hw/mem/memory-device.h" +#include "qapi/error.h" + +void virtio_md_pci_pre_plug(VirtIOMDPCI *vmd, MachineState *ms, Error **errp) +{ + DeviceState *dev = DEVICE(vmd); + HotplugHandler *bus_handler = qdev_get_bus_hotplug_handler(dev); + MemoryDeviceState *md = MEMORY_DEVICE(vmd); + Error *local_err = NULL; + + if (!bus_handler && dev->hotplugged) { + /* + * Without a bus hotplug handler, we cannot control the plug/unplug + * order. We should never reach this point when hotplugging on x86, + * however, better add a safety net. + */ + error_setg(errp, "hotplug of virtio based memory devices not supported" + " on this bus."); + return; + } + /* + * First, see if we can plug this memory device at all. If that + * succeeds, branch of to the actual hotplug handler. + */ + memory_device_pre_plug(md, ms, NULL, &local_err); + if (!local_err && bus_handler) { + hotplug_handler_pre_plug(bus_handler, dev, &local_err); + } + error_propagate(errp, local_err); +} + +void virtio_md_pci_plug(VirtIOMDPCI *vmd, MachineState *ms, Error **errp) +{ + DeviceState *dev = DEVICE(vmd); + HotplugHandler *bus_handler = qdev_get_bus_hotplug_handler(dev); + MemoryDeviceState *md = MEMORY_DEVICE(vmd); + Error *local_err = NULL; + + /* + * Plug the memory device first and then branch off to the actual + * hotplug handler. If that one fails, we can easily undo the memory + * device bits. + */ + memory_device_plug(md, ms); + if (bus_handler) { + hotplug_handler_plug(bus_handler, dev, &local_err); + if (local_err) { + memory_device_unplug(md, ms); + } + } + error_propagate(errp, local_err); +} + +void virtio_md_pci_unplug_request(VirtIOMDPCI *vmd, MachineState *ms, + Error **errp) +{ + /* We don't support hot unplug of virtio based memory devices */ + error_setg(errp, "virtio based memory devices cannot be unplugged."); +} + +void virtio_md_pci_unplug(VirtIOMDPCI *vmd, MachineState *ms, Error **errp) +{ + /* We don't support hot unplug of virtio based memory devices */ +} static const TypeInfo virtio_md_pci_info = { .name = TYPE_VIRTIO_MD_PCI, diff --git a/include/hw/virtio/virtio-md-pci.h b/include/hw/virtio/virtio-md-pci.h index a241b54fcd..f9fa857aec 100644 --- a/include/hw/virtio/virtio-md-pci.h +++ b/include/hw/virtio/virtio-md-pci.h @@ -32,4 +32,10 @@ struct VirtIOMDPCI { VirtIOPCIProxy parent_obj; }; +void virtio_md_pci_pre_plug(VirtIOMDPCI *vmd, MachineState *ms, Error **errp); +void virtio_md_pci_plug(VirtIOMDPCI *vmd, MachineState *ms, Error **errp); +void virtio_md_pci_unplug_request(VirtIOMDPCI *vmd, MachineState *ms, + Error **errp); +void virtio_md_pci_unplug(VirtIOMDPCI *vmd, MachineState *ms, Error **errp); + #endif diff --git a/stubs/meson.build b/stubs/meson.build index a56645e2f7..ef6e39a64d 100644 --- a/stubs/meson.build +++ b/stubs/meson.build @@ -60,6 +60,7 @@ if have_system stub_ss.add(files('semihost.c')) stub_ss.add(files('usb-dev-stub.c')) stub_ss.add(files('xen-hw-stub.c')) + stub_ss.add(files('virtio-md-pci.c')) else stub_ss.add(files('qdev.c')) endif diff --git a/stubs/virtio-md-pci.c b/stubs/virtio-md-pci.c new file mode 100644 index 0000000000..ce5bba0c9d --- /dev/null +++ b/stubs/virtio-md-pci.c @@ -0,0 +1,24 @@ +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/virtio/virtio-md-pci.h" + +void virtio_md_pci_pre_plug(VirtIOMDPCI *vmd, MachineState *ms, Error **errp) +{ + error_setg(errp, "virtio based memory devices not supported"); +} + +void virtio_md_pci_plug(VirtIOMDPCI *vmd, MachineState *ms, Error **errp) +{ + error_setg(errp, "virtio based memory devices not supported"); +} + +void virtio_md_pci_unplug_request(VirtIOMDPCI *vmd, MachineState *ms, + Error **errp) +{ + error_setg(errp, "virtio based memory devices not supported"); +} + +void virtio_md_pci_unplug(VirtIOMDPCI *vmd, MachineState *ms, Error **errp) +{ + error_setg(errp, "virtio based memory devices not supported"); +} From 30ec5ccd3a2692464f832ec5a18b448d5ff86752 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 11 Jul 2023 17:34:41 +0200 Subject: [PATCH 17/21] arm/virt: Use virtio-md-pci (un)plug functions Let's use our new helper functions. Note that virtio-pmem-pci is not enabled for arm and, therefore, not compiled in. Message-ID: <20230711153445.514112-4-david@redhat.com> Tested-by: Mario Casquero Reviewed-by: Michael S. Tsirkin Signed-off-by: David Hildenbrand --- hw/arm/virt.c | 81 ++++++++------------------------------------------- 1 file changed, 12 insertions(+), 69 deletions(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 0546e43448..7d9dbc2663 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -73,11 +73,10 @@ #include "hw/arm/smmuv3.h" #include "hw/acpi/acpi.h" #include "target/arm/internals.h" -#include "hw/mem/memory-device.h" #include "hw/mem/pc-dimm.h" #include "hw/mem/nvdimm.h" #include "hw/acpi/generic_event_device.h" -#include "hw/virtio/virtio-mem-pci.h" +#include "hw/virtio/virtio-md-pci.h" #include "hw/virtio/virtio-iommu.h" #include "hw/char/pl011.h" #include "qemu/guest-random.h" @@ -2733,64 +2732,6 @@ static void virt_memory_plug(HotplugHandler *hotplug_dev, dev, &error_abort); } -static void virt_virtio_md_pci_pre_plug(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - HotplugHandler *hotplug_dev2 = qdev_get_bus_hotplug_handler(dev); - Error *local_err = NULL; - - if (!hotplug_dev2 && dev->hotplugged) { - /* - * Without a bus hotplug handler, we cannot control the plug/unplug - * order. We should never reach this point when hotplugging on ARM. - * However, it's nice to add a safety net, similar to what we have - * on x86. - */ - error_setg(errp, "hotplug of virtio based memory devices not supported" - " on this bus."); - return; - } - /* - * First, see if we can plug this memory device at all. If that - * succeeds, branch of to the actual hotplug handler. - */ - memory_device_pre_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev), NULL, - &local_err); - if (!local_err && hotplug_dev2) { - hotplug_handler_pre_plug(hotplug_dev2, dev, &local_err); - } - error_propagate(errp, local_err); -} - -static void virt_virtio_md_pci_plug(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - HotplugHandler *hotplug_dev2 = qdev_get_bus_hotplug_handler(dev); - Error *local_err = NULL; - - /* - * Plug the memory device first and then branch off to the actual - * hotplug handler. If that one fails, we can easily undo the memory - * device bits. - */ - memory_device_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev)); - if (hotplug_dev2) { - hotplug_handler_plug(hotplug_dev2, dev, &local_err); - if (local_err) { - memory_device_unplug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev)); - } - } - error_propagate(errp, local_err); -} - -static void virt_virtio_md_pci_unplug_request(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - /* We don't support hot unplug of virtio based memory devices */ - error_setg(errp, "virtio based memory devices cannot be unplugged."); -} - - static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { @@ -2798,8 +2739,8 @@ static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { virt_memory_pre_plug(hotplug_dev, dev, errp); - } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) { - virt_virtio_md_pci_pre_plug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { + virtio_md_pci_pre_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) { hwaddr db_start = 0, db_end = 0; char *resv_prop_str; @@ -2848,12 +2789,11 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev, SYS_BUS_DEVICE(dev)); } } + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { virt_memory_plug(hotplug_dev, dev, errp); - } - - if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) { - virt_virtio_md_pci_plug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { + virtio_md_pci_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp); } if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) { @@ -2908,8 +2848,9 @@ static void virt_machine_device_unplug_request_cb(HotplugHandler *hotplug_dev, { if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { virt_dimm_unplug_request(hotplug_dev, dev, errp); - } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) { - virt_virtio_md_pci_unplug_request(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { + virtio_md_pci_unplug_request(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), + errp); } else { error_setg(errp, "device unplug request for unsupported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -2921,6 +2862,8 @@ static void virt_machine_device_unplug_cb(HotplugHandler *hotplug_dev, { if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { virt_dimm_unplug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { + virtio_md_pci_unplug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp); } else { error_setg(errp, "virt: device unplug for unsupported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -2934,7 +2877,7 @@ static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine, if (device_is_dynamic_sysbus(mc, dev) || object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) || - object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI) || + object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI) || object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) { return HOTPLUG_HANDLER(machine); } From c29dd73f74fe6020ee0755d938885919a3719194 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 11 Jul 2023 17:34:42 +0200 Subject: [PATCH 18/21] virtio-md-pci: Handle unplug of virtio based memory devices While we fence unplug requests from the outside, the VM can still trigger unplug of virtio based memory devices, for example, in Linux doing on a virtio-mem-pci device: # echo 0 > /sys/bus/pci/slots/3/power While doing that is not really expected to work without harming the guest OS (e.g., removing a virtio-mem device while it still provides memory), let's make sure that we properly handle it on the QEMU side. We'll add support for unplugging of virtio-mem devices in some configurations next. Message-ID: <20230711153445.514112-5-david@redhat.com> Tested-by: Mario Casquero Reviewed-by: Michael S. Tsirkin Signed-off-by: David Hildenbrand --- hw/virtio/virtio-md-pci.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/hw/virtio/virtio-md-pci.c b/hw/virtio/virtio-md-pci.c index e849c3131d..a22a259e2d 100644 --- a/hw/virtio/virtio-md-pci.c +++ b/hw/virtio/virtio-md-pci.c @@ -14,6 +14,7 @@ #include "hw/virtio/virtio-md-pci.h" #include "hw/mem/memory-device.h" #include "qapi/error.h" +#include "qemu/error-report.h" void virtio_md_pci_pre_plug(VirtIOMDPCI *vmd, MachineState *ms, Error **errp) { @@ -74,7 +75,27 @@ void virtio_md_pci_unplug_request(VirtIOMDPCI *vmd, MachineState *ms, void virtio_md_pci_unplug(VirtIOMDPCI *vmd, MachineState *ms, Error **errp) { - /* We don't support hot unplug of virtio based memory devices */ + DeviceState *dev = DEVICE(vmd); + HotplugHandler *bus_handler = qdev_get_bus_hotplug_handler(dev); + MemoryDeviceState *md = MEMORY_DEVICE(vmd); + Error *local_err = NULL; + + /* Unplug the memory device while it is still realized. */ + memory_device_unplug(md, ms); + + if (bus_handler) { + hotplug_handler_unplug(bus_handler, dev, &local_err); + if (local_err) { + /* Not expected to fail ... but still try to recover. */ + memory_device_plug(md, ms); + error_propagate(errp, local_err); + return; + } + } else { + /* Very unexpected, but let's just try to do the right thing. */ + warn_report("Unexpected unplug of virtio based memory device"); + qdev_unrealize(dev); + } } static const TypeInfo virtio_md_pci_info = { From aac44204bc6d2a062c2e1658fe6a02a67f55b9e6 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 11 Jul 2023 17:34:43 +0200 Subject: [PATCH 19/21] virtio-md-pci: Support unplug requests for compatible devices Let's support unplug requests for virtio-md-pci devices that provide a unplug_request_check() callback. We'll wire that up for virtio-mem-pci next. Message-ID: <20230711153445.514112-6-david@redhat.com> Tested-by: Mario Casquero Reviewed-by: Michael S. Tsirkin Signed-off-by: David Hildenbrand --- hw/virtio/virtio-md-pci.c | 38 +++++++++++++++++++++++++++++-- include/hw/virtio/virtio-md-pci.h | 3 +++ 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/hw/virtio/virtio-md-pci.c b/hw/virtio/virtio-md-pci.c index a22a259e2d..62bfb7920b 100644 --- a/hw/virtio/virtio-md-pci.c +++ b/hw/virtio/virtio-md-pci.c @@ -69,8 +69,42 @@ void virtio_md_pci_plug(VirtIOMDPCI *vmd, MachineState *ms, Error **errp) void virtio_md_pci_unplug_request(VirtIOMDPCI *vmd, MachineState *ms, Error **errp) { - /* We don't support hot unplug of virtio based memory devices */ - error_setg(errp, "virtio based memory devices cannot be unplugged."); + VirtIOMDPCIClass *vmdc = VIRTIO_MD_PCI_GET_CLASS(vmd); + DeviceState *dev = DEVICE(vmd); + HotplugHandler *bus_handler = qdev_get_bus_hotplug_handler(dev); + HotplugHandlerClass *hdc; + Error *local_err = NULL; + + if (!vmdc->unplug_request_check) { + error_setg(errp, "this virtio based memory devices cannot be unplugged"); + return; + } + + if (!bus_handler) { + error_setg(errp, "hotunplug of virtio based memory devices not" + "supported on this bus"); + return; + } + + vmdc->unplug_request_check(vmd, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + /* + * Forward the async request or turn it into a sync request (handling it + * like qdev_unplug()). + */ + hdc = HOTPLUG_HANDLER_GET_CLASS(bus_handler); + if (hdc->unplug_request) { + hotplug_handler_unplug_request(bus_handler, dev, &local_err); + } else { + virtio_md_pci_unplug(vmd, ms, &local_err); + if (!local_err) { + object_unparent(OBJECT(dev)); + } + } } void virtio_md_pci_unplug(VirtIOMDPCI *vmd, MachineState *ms, Error **errp) diff --git a/include/hw/virtio/virtio-md-pci.h b/include/hw/virtio/virtio-md-pci.h index f9fa857aec..5912e16674 100644 --- a/include/hw/virtio/virtio-md-pci.h +++ b/include/hw/virtio/virtio-md-pci.h @@ -26,6 +26,9 @@ OBJECT_DECLARE_TYPE(VirtIOMDPCI, VirtIOMDPCIClass, VIRTIO_MD_PCI) struct VirtIOMDPCIClass { /* private */ VirtioPCIClass parent; + + /* public */ + void (*unplug_request_check)(VirtIOMDPCI *vmd, Error **errp); }; struct VirtIOMDPCI { From 92a8ee1b594244a82d4c955eab907034f8953f8b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 11 Jul 2023 17:34:44 +0200 Subject: [PATCH 20/21] virtio-mem: Prepare for device unplug support In many cases, blindly unplugging a virtio-mem device is problematic. We can only safely remove a device once: * The guest is not expecting to be able to read unplugged memory (unplugged-inaccessible == on) * The virtio-mem device does not have memory plugged (size == 0) * The virtio-mem device does not have outstanding requests to the VM to plug memory (requested-size == 0) So let's add a callback to the virtio-mem device class to check for that. We'll wire-up virtio-mem-pci next. Message-ID: <20230711153445.514112-7-david@redhat.com> Tested-by: Mario Casquero Reviewed-by: Michael S. Tsirkin Signed-off-by: David Hildenbrand --- hw/virtio/virtio-mem.c | 25 +++++++++++++++++++++++++ include/hw/virtio/virtio-mem.h | 1 + 2 files changed, 26 insertions(+) diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c index 3f41e00e74..b6e781741e 100644 --- a/hw/virtio/virtio-mem.c +++ b/hw/virtio/virtio-mem.c @@ -1512,6 +1512,30 @@ static void virtio_mem_rdm_unregister_listener(RamDiscardManager *rdm, QLIST_REMOVE(rdl, next); } +static void virtio_mem_unplug_request_check(VirtIOMEM *vmem, Error **errp) +{ + if (vmem->unplugged_inaccessible == ON_OFF_AUTO_OFF) { + /* + * We could allow it with a usable region size of 0, but let's just + * not care about that legacy setting. + */ + error_setg(errp, "virtio-mem device cannot get unplugged while" + " '" VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP "' != 'on'"); + return; + } + + if (vmem->size) { + error_setg(errp, "virtio-mem device cannot get unplugged while" + " '" VIRTIO_MEM_SIZE_PROP "' != '0'"); + return; + } + if (vmem->requested_size) { + error_setg(errp, "virtio-mem device cannot get unplugged while" + " '" VIRTIO_MEM_REQUESTED_SIZE_PROP "' != '0'"); + return; + } +} + static void virtio_mem_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -1534,6 +1558,7 @@ static void virtio_mem_class_init(ObjectClass *klass, void *data) vmc->get_memory_region = virtio_mem_get_memory_region; vmc->add_size_change_notifier = virtio_mem_add_size_change_notifier; vmc->remove_size_change_notifier = virtio_mem_remove_size_change_notifier; + vmc->unplug_request_check = virtio_mem_unplug_request_check; rdmc->get_min_granularity = virtio_mem_rdm_get_min_granularity; rdmc->is_populated = virtio_mem_rdm_is_populated; diff --git a/include/hw/virtio/virtio-mem.h b/include/hw/virtio/virtio-mem.h index f15e561785..ab0fe2b4f2 100644 --- a/include/hw/virtio/virtio-mem.h +++ b/include/hw/virtio/virtio-mem.h @@ -98,6 +98,7 @@ struct VirtIOMEMClass { MemoryRegion *(*get_memory_region)(VirtIOMEM *vmem, Error **errp); void (*add_size_change_notifier)(VirtIOMEM *vmem, Notifier *notifier); void (*remove_size_change_notifier)(VirtIOMEM *vmem, Notifier *notifier); + void (*unplug_request_check)(VirtIOMEM *vmem, Error **errp); }; #endif From 339a8bbdfed910d0baa392c2071fd0e09b30aed9 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 11 Jul 2023 17:34:45 +0200 Subject: [PATCH 21/21] virtio-mem-pci: Device unplug support Let's support device unplug by forwarding the unplug_request_check() callback to the virtio-mem device. Further, disallow changing the requested-size once an unplug request is pending. Disallowing requested-size changes handles corner cases such as (1) pausing the VM (2) requesting device unplug and (3) adjusting the requested size. If the VM would plug memory (due to the requested size change) before processing the unplug request, we would be in trouble. Message-ID: <20230711153445.514112-8-david@redhat.com> Tested-by: Mario Casquero Reviewed-by: Michael S. Tsirkin Signed-off-by: David Hildenbrand --- hw/virtio/virtio-mem-pci.c | 49 +++++++++++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/hw/virtio/virtio-mem-pci.c b/hw/virtio/virtio-mem-pci.c index 2ef0f07630..c4597e029e 100644 --- a/hw/virtio/virtio-mem-pci.c +++ b/hw/virtio/virtio-mem-pci.c @@ -93,12 +93,53 @@ static void virtio_mem_pci_size_change_notify(Notifier *notifier, void *data) g_free(qom_path); } +static void virtio_mem_pci_unplug_request_check(VirtIOMDPCI *vmd, Error **errp) +{ + VirtIOMEMPCI *pci_mem = VIRTIO_MEM_PCI(vmd); + VirtIOMEM *vmem = &pci_mem->vdev; + VirtIOMEMClass *vpc = VIRTIO_MEM_GET_CLASS(vmem); + + vpc->unplug_request_check(vmem, errp); +} + +static void virtio_mem_pci_get_requested_size(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + VirtIOMEMPCI *pci_mem = VIRTIO_MEM_PCI(obj); + + object_property_get(OBJECT(&pci_mem->vdev), name, v, errp); +} + +static void virtio_mem_pci_set_requested_size(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + VirtIOMEMPCI *pci_mem = VIRTIO_MEM_PCI(obj); + DeviceState *dev = DEVICE(obj); + + /* + * If we passed virtio_mem_pci_unplug_request_check(), making sure that + * the requested size is 0, don't allow modifying the requested size + * anymore, otherwise the VM might end up hotplugging memory before + * handling the unplug request. + */ + if (dev->pending_deleted_event) { + error_setg(errp, "'%s' cannot be changed if the device is in the" + " process of unplug", name); + return; + } + + object_property_set(OBJECT(&pci_mem->vdev), name, v, errp); +} + static void virtio_mem_pci_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); MemoryDeviceClass *mdc = MEMORY_DEVICE_CLASS(klass); + VirtIOMDPCIClass *vmdc = VIRTIO_MD_PCI_CLASS(klass); k->realize = virtio_mem_pci_realize; set_bit(DEVICE_CATEGORY_MISC, dc->categories); @@ -111,6 +152,8 @@ static void virtio_mem_pci_class_init(ObjectClass *klass, void *data) mdc->get_memory_region = virtio_mem_pci_get_memory_region; mdc->fill_device_info = virtio_mem_pci_fill_device_info; mdc->get_min_alignment = virtio_mem_pci_get_min_alignment; + + vmdc->unplug_request_check = virtio_mem_pci_unplug_request_check; } static void virtio_mem_pci_instance_init(Object *obj) @@ -135,9 +178,9 @@ static void virtio_mem_pci_instance_init(Object *obj) OBJECT(&dev->vdev), VIRTIO_MEM_BLOCK_SIZE_PROP); object_property_add_alias(obj, VIRTIO_MEM_SIZE_PROP, OBJECT(&dev->vdev), VIRTIO_MEM_SIZE_PROP); - object_property_add_alias(obj, VIRTIO_MEM_REQUESTED_SIZE_PROP, - OBJECT(&dev->vdev), - VIRTIO_MEM_REQUESTED_SIZE_PROP); + object_property_add(obj, VIRTIO_MEM_REQUESTED_SIZE_PROP, "size", + virtio_mem_pci_get_requested_size, + virtio_mem_pci_set_requested_size, NULL, NULL); } static const VirtioPCIDeviceTypeInfo virtio_mem_pci_info = {