diff --git a/MAINTAINERS b/MAINTAINERS index e158a25cfe..12e59b6b27 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2229,6 +2229,13 @@ F: hw/virtio/virtio-crypto.c F: hw/virtio/virtio-crypto-pci.c F: include/hw/virtio/virtio-crypto.h +virtio based memory device +M: David Hildenbrand <david@redhat.com> +S: Supported +F: hw/virtio/virtio-md-pci.c +F: include/hw/virtio/virtio-md-pci.h +F: stubs/virtio-md-pci.c + virtio-mem M: David Hildenbrand <david@redhat.com> S: Supported diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 8a4c663735..7d9dbc2663 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -73,11 +73,10 @@ #include "hw/arm/smmuv3.h" #include "hw/acpi/acpi.h" #include "target/arm/internals.h" -#include "hw/mem/memory-device.h" #include "hw/mem/pc-dimm.h" #include "hw/mem/nvdimm.h" #include "hw/acpi/generic_event_device.h" -#include "hw/virtio/virtio-mem-pci.h" +#include "hw/virtio/virtio-md-pci.h" #include "hw/virtio/virtio-iommu.h" #include "hw/char/pl011.h" #include "qemu/guest-random.h" @@ -1813,10 +1812,7 @@ static void virt_set_memmap(VirtMachineState *vms, int pa_bits) virt_set_high_memmap(vms, base, pa_bits); if (device_memory_size > 0) { - ms->device_memory = g_malloc0(sizeof(*ms->device_memory)); - ms->device_memory->base = device_memory_base; - memory_region_init(&ms->device_memory->mr, OBJECT(vms), - "device-memory", device_memory_size); + machine_memory_devices_init(ms, device_memory_base, device_memory_size); } } @@ -2257,10 +2253,6 @@ static void machvirt_init(MachineState *machine) memory_region_add_subregion(sysmem, vms->memmap[VIRT_MEM].base, machine->ram); - if (machine->device_memory) { - memory_region_add_subregion(sysmem, machine->device_memory->base, - &machine->device_memory->mr); - } virt_flash_fdt(vms, sysmem, secure_sysmem ?: sysmem); @@ -2740,64 +2732,6 @@ static void virt_memory_plug(HotplugHandler *hotplug_dev, dev, &error_abort); } -static void virt_virtio_md_pci_pre_plug(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - HotplugHandler *hotplug_dev2 = qdev_get_bus_hotplug_handler(dev); - Error *local_err = NULL; - - if (!hotplug_dev2 && dev->hotplugged) { - /* - * Without a bus hotplug handler, we cannot control the plug/unplug - * order. We should never reach this point when hotplugging on ARM. - * However, it's nice to add a safety net, similar to what we have - * on x86. - */ - error_setg(errp, "hotplug of virtio based memory devices not supported" - " on this bus."); - return; - } - /* - * First, see if we can plug this memory device at all. If that - * succeeds, branch of to the actual hotplug handler. - */ - memory_device_pre_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev), NULL, - &local_err); - if (!local_err && hotplug_dev2) { - hotplug_handler_pre_plug(hotplug_dev2, dev, &local_err); - } - error_propagate(errp, local_err); -} - -static void virt_virtio_md_pci_plug(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - HotplugHandler *hotplug_dev2 = qdev_get_bus_hotplug_handler(dev); - Error *local_err = NULL; - - /* - * Plug the memory device first and then branch off to the actual - * hotplug handler. If that one fails, we can easily undo the memory - * device bits. - */ - memory_device_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev)); - if (hotplug_dev2) { - hotplug_handler_plug(hotplug_dev2, dev, &local_err); - if (local_err) { - memory_device_unplug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev)); - } - } - error_propagate(errp, local_err); -} - -static void virt_virtio_md_pci_unplug_request(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - /* We don't support hot unplug of virtio based memory devices */ - error_setg(errp, "virtio based memory devices cannot be unplugged."); -} - - static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { @@ -2805,8 +2739,8 @@ static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { virt_memory_pre_plug(hotplug_dev, dev, errp); - } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) { - virt_virtio_md_pci_pre_plug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { + virtio_md_pci_pre_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) { hwaddr db_start = 0, db_end = 0; char *resv_prop_str; @@ -2855,12 +2789,11 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev, SYS_BUS_DEVICE(dev)); } } + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { virt_memory_plug(hotplug_dev, dev, errp); - } - - if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) { - virt_virtio_md_pci_plug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { + virtio_md_pci_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp); } if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) { @@ -2915,8 +2848,9 @@ static void virt_machine_device_unplug_request_cb(HotplugHandler *hotplug_dev, { if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { virt_dimm_unplug_request(hotplug_dev, dev, errp); - } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) { - virt_virtio_md_pci_unplug_request(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { + virtio_md_pci_unplug_request(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), + errp); } else { error_setg(errp, "device unplug request for unsupported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -2928,6 +2862,8 @@ static void virt_machine_device_unplug_cb(HotplugHandler *hotplug_dev, { if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { virt_dimm_unplug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { + virtio_md_pci_unplug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp); } else { error_setg(errp, "virt: device unplug for unsupported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -2941,7 +2877,7 @@ static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine, if (device_is_dynamic_sysbus(mc, dev) || object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) || - object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI) || + object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI) || object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) { return HOTPLUG_HANDLER(machine); } diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 512162003b..9c74fa17ad 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -1950,12 +1950,8 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) MachineClass *mc = MACHINE_GET_CLASS(machine); X86MachineState *x86ms = X86_MACHINE(machine); const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(machine); - PCMachineState *pcms = PC_MACHINE(machine); int nb_numa_nodes = machine->numa_state->num_nodes; NodeInfo *numa_info = machine->numa_state->nodes; - ram_addr_t hotpluggable_address_space_size = - object_property_get_int(OBJECT(pcms), PC_MACHINE_DEVMEM_REGION_SIZE, - NULL); AcpiTable table = { .sig = "SRAT", .rev = 1, .oem_id = x86ms->oem_id, .oem_table_id = x86ms->oem_table_id }; @@ -2071,9 +2067,10 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) * Memory devices may override proximity set by this entry, * providing _PXM method if necessary. */ - if (hotpluggable_address_space_size) { + if (machine->device_memory) { build_srat_memory(table_data, machine->device_memory->base, - hotpluggable_address_space_size, nb_numa_nodes - 1, + memory_region_size(&machine->device_memory->mr), + nb_numa_nodes - 1, MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED); } diff --git a/hw/i386/pc.c b/hw/i386/pc.c index f01d7de5ad..3109d5e0e0 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -88,13 +88,11 @@ #include "hw/net/ne2000-isa.h" #include "standard-headers/asm-x86/bootparam.h" #include "hw/virtio/virtio-iommu.h" -#include "hw/virtio/virtio-pmem-pci.h" -#include "hw/virtio/virtio-mem-pci.h" +#include "hw/virtio/virtio-md-pci.h" #include "hw/i386/kvm/xen_overlay.h" #include "hw/i386/kvm/xen_evtchn.h" #include "hw/i386/kvm/xen_gnttab.h" #include "hw/i386/kvm/xen_xenstore.h" -#include "hw/mem/memory-device.h" #include "sysemu/replay.h" #include "target/i386/cpu.h" #include "e820_memory_layout.h" @@ -1039,13 +1037,11 @@ void pc_memory_init(PCMachineState *pcms, exit(EXIT_FAILURE); } - /* always allocate the device memory information */ - machine->device_memory = g_malloc0(sizeof(*machine->device_memory)); - /* initialize device memory address space */ if (pcmc->has_reserved_memory && (machine->ram_size < machine->maxram_size)) { ram_addr_t device_mem_size; + hwaddr device_mem_base; if (machine->ram_slots > ACPI_MAX_RAM_SLOTS) { error_report("unsupported amount of memory slots: %"PRIu64, @@ -1060,19 +1056,14 @@ void pc_memory_init(PCMachineState *pcms, exit(EXIT_FAILURE); } - pc_get_device_memory_range(pcms, &machine->device_memory->base, &device_mem_size); + pc_get_device_memory_range(pcms, &device_mem_base, &device_mem_size); - if ((machine->device_memory->base + device_mem_size) < - device_mem_size) { + if (device_mem_base + device_mem_size < device_mem_size) { error_report("unsupported amount of maximum memory: " RAM_ADDR_FMT, machine->maxram_size); exit(EXIT_FAILURE); } - - memory_region_init(&machine->device_memory->mr, OBJECT(pcms), - "device-memory", device_mem_size); - memory_region_add_subregion(system_memory, machine->device_memory->base, - &machine->device_memory->mr); + machine_memory_devices_init(machine, device_mem_base, device_mem_size); } if (pcms->cxl_devices_state.is_enabled) { @@ -1120,7 +1111,7 @@ void pc_memory_init(PCMachineState *pcms, rom_set_fw(fw_cfg); - if (pcmc->has_reserved_memory && machine->device_memory->base) { + if (machine->device_memory) { uint64_t *val = g_malloc(sizeof(*val)); PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); uint64_t res_mem_end = machine->device_memory->base; @@ -1500,68 +1491,6 @@ static void pc_memory_unplug(HotplugHandler *hotplug_dev, error_propagate(errp, local_err); } -static void pc_virtio_md_pci_pre_plug(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - HotplugHandler *hotplug_dev2 = qdev_get_bus_hotplug_handler(dev); - Error *local_err = NULL; - - if (!hotplug_dev2 && dev->hotplugged) { - /* - * Without a bus hotplug handler, we cannot control the plug/unplug - * order. We should never reach this point when hotplugging on x86, - * however, better add a safety net. - */ - error_setg(errp, "hotplug of virtio based memory devices not supported" - " on this bus."); - return; - } - /* - * First, see if we can plug this memory device at all. If that - * succeeds, branch of to the actual hotplug handler. - */ - memory_device_pre_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev), NULL, - &local_err); - if (!local_err && hotplug_dev2) { - hotplug_handler_pre_plug(hotplug_dev2, dev, &local_err); - } - error_propagate(errp, local_err); -} - -static void pc_virtio_md_pci_plug(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - HotplugHandler *hotplug_dev2 = qdev_get_bus_hotplug_handler(dev); - Error *local_err = NULL; - - /* - * Plug the memory device first and then branch off to the actual - * hotplug handler. If that one fails, we can easily undo the memory - * device bits. - */ - memory_device_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev)); - if (hotplug_dev2) { - hotplug_handler_plug(hotplug_dev2, dev, &local_err); - if (local_err) { - memory_device_unplug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev)); - } - } - error_propagate(errp, local_err); -} - -static void pc_virtio_md_pci_unplug_request(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - /* We don't support hot unplug of virtio based memory devices */ - error_setg(errp, "virtio based memory devices cannot be unplugged."); -} - -static void pc_virtio_md_pci_unplug(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - /* We don't support hot unplug of virtio based memory devices */ -} - static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { @@ -1569,9 +1498,8 @@ static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, pc_memory_pre_plug(hotplug_dev, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { x86_cpu_pre_plug(hotplug_dev, dev, errp); - } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI) || - object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) { - pc_virtio_md_pci_pre_plug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { + virtio_md_pci_pre_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) { /* Declare the APIC range as the reserved MSI region */ char *resv_prop_str = g_strdup_printf("0xfee00000:0xfeefffff:%d", @@ -1603,9 +1531,8 @@ static void pc_machine_device_plug_cb(HotplugHandler *hotplug_dev, pc_memory_plug(hotplug_dev, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { x86_cpu_plug(hotplug_dev, dev, errp); - } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI) || - object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) { - pc_virtio_md_pci_plug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { + virtio_md_pci_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp); } } @@ -1616,9 +1543,9 @@ static void pc_machine_device_unplug_request_cb(HotplugHandler *hotplug_dev, pc_memory_unplug_request(hotplug_dev, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { x86_cpu_unplug_request_cb(hotplug_dev, dev, errp); - } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI) || - object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) { - pc_virtio_md_pci_unplug_request(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { + virtio_md_pci_unplug_request(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), + errp); } else { error_setg(errp, "acpi: device unplug request for not supported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -1632,9 +1559,8 @@ static void pc_machine_device_unplug_cb(HotplugHandler *hotplug_dev, pc_memory_unplug(hotplug_dev, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { x86_cpu_unplug_cb(hotplug_dev, dev, errp); - } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI) || - object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) { - pc_virtio_md_pci_unplug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { + virtio_md_pci_unplug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp); } else { error_setg(errp, "acpi: device unplug for not supported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -1646,8 +1572,7 @@ static HotplugHandler *pc_get_hotplug_handler(MachineState *machine, { if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) || object_dynamic_cast(OBJECT(dev), TYPE_CPU) || - object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI) || - object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI) || + object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI) || object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI) || object_dynamic_cast(OBJECT(dev), TYPE_X86_IOMMU_DEVICE)) { return HOTPLUG_HANDLER(machine); @@ -1656,21 +1581,6 @@ static HotplugHandler *pc_get_hotplug_handler(MachineState *machine, return NULL; } -static void -pc_machine_get_device_memory_region_size(Object *obj, Visitor *v, - const char *name, void *opaque, - Error **errp) -{ - MachineState *ms = MACHINE(obj); - int64_t value = 0; - - if (ms->device_memory) { - value = memory_region_size(&ms->device_memory->mr); - } - - visit_type_int(v, name, &value, errp); -} - static void pc_machine_get_vmport(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { @@ -1984,10 +1894,6 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) object_class_property_set_description(oc, PC_MACHINE_MAX_RAM_BELOW_4G, "Maximum ram below the 4G boundary (32bit boundary)"); - object_class_property_add(oc, PC_MACHINE_DEVMEM_REGION_SIZE, "int", - pc_machine_get_device_memory_region_size, NULL, - NULL, NULL); - object_class_property_add(oc, PC_MACHINE_VMPORT, "OnOffAuto", pc_machine_get_vmport, pc_machine_set_vmport, NULL, NULL); diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c index 51a453fa9a..e19b042ce8 100644 --- a/hw/loongarch/virt.c +++ b/hw/loongarch/virt.c @@ -857,8 +857,8 @@ static void loongarch_init(MachineState *machine) /* initialize device memory address space */ if (machine->ram_size < machine->maxram_size) { - machine->device_memory = g_malloc0(sizeof(*machine->device_memory)); ram_addr_t device_mem_size = machine->maxram_size - machine->ram_size; + hwaddr device_mem_base; if (machine->ram_slots > ACPI_MAX_RAM_SLOTS) { error_report("unsupported amount of memory slots: %"PRIu64, @@ -873,14 +873,8 @@ static void loongarch_init(MachineState *machine) exit(EXIT_FAILURE); } /* device memory base is the top of high memory address. */ - machine->device_memory->base = 0x90000000 + highram_size; - machine->device_memory->base = - ROUND_UP(machine->device_memory->base, 1 * GiB); - - memory_region_init(&machine->device_memory->mr, OBJECT(lams), - "device-memory", device_mem_size); - memory_region_add_subregion(address_space_mem, machine->device_memory->base, - &machine->device_memory->mr); + device_mem_base = ROUND_UP(VIRT_HIGHMEM_BASE + highram_size, 1 * GiB); + machine_memory_devices_init(machine, device_mem_base, device_mem_size); } /* Add isa io region */ diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c index 1636db9679..667d56bd29 100644 --- a/hw/mem/memory-device.c +++ b/hw/mem/memory-device.c @@ -17,6 +17,7 @@ #include "qemu/range.h" #include "hw/virtio/vhost.h" #include "sysemu/kvm.h" +#include "exec/address-spaces.h" #include "trace.h" static gint memory_device_addr_sort(gconstpointer a, gconstpointer b) @@ -51,27 +52,11 @@ static int memory_device_build_list(Object *obj, void *opaque) return 0; } -static int memory_device_used_region_size(Object *obj, void *opaque) -{ - uint64_t *size = opaque; - - if (object_dynamic_cast(obj, TYPE_MEMORY_DEVICE)) { - const DeviceState *dev = DEVICE(obj); - const MemoryDeviceState *md = MEMORY_DEVICE(obj); - - if (dev->realized) { - *size += memory_device_get_region_size(md, &error_abort); - } - } - - object_child_foreach(obj, memory_device_used_region_size, opaque); - return 0; -} - -static void memory_device_check_addable(MachineState *ms, uint64_t size, +static void memory_device_check_addable(MachineState *ms, MemoryRegion *mr, Error **errp) { - uint64_t used_region_size = 0; + const uint64_t used_region_size = ms->device_memory->used_region_size; + const uint64_t size = memory_region_size(mr); /* we will need a new memory slot for kvm and vhost */ if (kvm_enabled() && !kvm_has_free_slot(ms)) { @@ -84,7 +69,6 @@ static void memory_device_check_addable(MachineState *ms, uint64_t size, } /* will we exceed the total amount of memory specified */ - memory_device_used_region_size(OBJECT(ms), &used_region_size); if (used_region_size + size < used_region_size || used_region_size + size > ms->maxram_size - ms->ram_size) { error_setg(errp, "not enough space, currently 0x%" PRIx64 @@ -100,21 +84,9 @@ static uint64_t memory_device_get_free_addr(MachineState *ms, uint64_t align, uint64_t size, Error **errp) { - Error *err = NULL; GSList *list = NULL, *item; Range as, new = range_empty; - if (!ms->device_memory) { - error_setg(errp, "memory devices (e.g. for memory hotplug) are not " - "supported by the machine"); - return 0; - } - - if (!memory_region_size(&ms->device_memory->mr)) { - error_setg(errp, "memory devices (e.g. for memory hotplug) are not " - "enabled, please specify the maxmem option"); - return 0; - } range_init_nofail(&as, ms->device_memory->base, memory_region_size(&ms->device_memory->mr)); @@ -126,12 +98,6 @@ static uint64_t memory_device_get_free_addr(MachineState *ms, align); } - memory_device_check_addable(ms, size, &err); - if (err) { - error_propagate(errp, err); - return 0; - } - if (hint && !QEMU_IS_ALIGNED(*hint, align)) { error_setg(errp, "address must be aligned to 0x%" PRIx64 " bytes", align); @@ -255,11 +221,23 @@ void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms, uint64_t addr, align = 0; MemoryRegion *mr; + if (!ms->device_memory) { + error_setg(errp, "the configuration is not prepared for memory devices" + " (e.g., for memory hotplug), consider specifying the" + " maxmem option"); + return; + } + mr = mdc->get_memory_region(md, &local_err); if (local_err) { goto out; } + memory_device_check_addable(ms, mr, &local_err); + if (local_err) { + goto out; + } + if (legacy_align) { align = *legacy_align; } else { @@ -296,6 +274,7 @@ void memory_device_plug(MemoryDeviceState *md, MachineState *ms) mr = mdc->get_memory_region(md, &error_abort); g_assert(ms->device_memory); + ms->device_memory->used_region_size += memory_region_size(mr); memory_region_add_subregion(&ms->device_memory->mr, addr - ms->device_memory->base, mr); trace_memory_device_plug(DEVICE(md)->id ? DEVICE(md)->id : "", addr); @@ -314,6 +293,7 @@ void memory_device_unplug(MemoryDeviceState *md, MachineState *ms) g_assert(ms->device_memory); memory_region_del_subregion(&ms->device_memory->mr, mr); + ms->device_memory->used_region_size -= memory_region_size(mr); trace_memory_device_unplug(DEVICE(md)->id ? DEVICE(md)->id : "", mdc->get_addr(md)); } @@ -333,6 +313,19 @@ uint64_t memory_device_get_region_size(const MemoryDeviceState *md, return memory_region_size(mr); } +void machine_memory_devices_init(MachineState *ms, hwaddr base, uint64_t size) +{ + g_assert(size); + g_assert(!ms->device_memory); + ms->device_memory = g_new0(DeviceMemoryState, 1); + ms->device_memory->base = base; + + memory_region_init(&ms->device_memory->mr, OBJECT(ms), "device-memory", + size); + memory_region_add_subregion(get_system_memory(), ms->device_memory->base, + &ms->device_memory->mr); +} + static const TypeInfo memory_device_info = { .name = TYPE_MEMORY_DEVICE, .parent = TYPE_INTERFACE, diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 54dbfd7fe9..1c8b8d57a7 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -547,10 +547,8 @@ static int spapr_dt_dynamic_reconfiguration_memory(SpaprMachineState *spapr, cpu_to_be32(lmb_size & 0xffffffff)}; MemoryDeviceInfoList *dimms = NULL; - /* - * Don't create the node if there is no device memory - */ - if (machine->ram_size == machine->maxram_size) { + /* Don't create the node if there is no device memory. */ + if (!machine->device_memory) { return 0; } @@ -860,16 +858,23 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt) int rtas; GString *hypertas = g_string_sized_new(256); GString *qemu_hypertas = g_string_sized_new(256); - uint64_t max_device_addr = MACHINE(spapr)->device_memory->base + - memory_region_size(&MACHINE(spapr)->device_memory->mr); uint32_t lrdr_capacity[] = { - cpu_to_be32(max_device_addr >> 32), - cpu_to_be32(max_device_addr & 0xffffffff), + 0, + 0, cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE >> 32), cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE & 0xffffffff), cpu_to_be32(ms->smp.max_cpus / ms->smp.threads), }; + /* Do we have device memory? */ + if (MACHINE(spapr)->device_memory) { + uint64_t max_device_addr = MACHINE(spapr)->device_memory->base + + memory_region_size(&MACHINE(spapr)->device_memory->mr); + + lrdr_capacity[0] = cpu_to_be32(max_device_addr >> 32); + lrdr_capacity[1] = cpu_to_be32(max_device_addr & 0xffffffff); + } + _FDT(rtas = fdt_add_subnode(fdt, 0, "rtas")); /* hypertas */ @@ -2455,6 +2460,7 @@ static void spapr_create_lmb_dr_connectors(SpaprMachineState *spapr) uint32_t nr_lmbs = (machine->maxram_size - machine->ram_size)/lmb_size; int i; + g_assert(!nr_lmbs || machine->device_memory); for (i = 0; i < nr_lmbs; i++) { uint64_t addr; @@ -2876,12 +2882,11 @@ static void spapr_machine_init(MachineState *machine) /* map RAM */ memory_region_add_subregion(sysmem, 0, machine->ram); - /* always allocate the device memory information */ - machine->device_memory = g_malloc0(sizeof(*machine->device_memory)); - /* initialize hotplug memory address space */ if (machine->ram_size < machine->maxram_size) { ram_addr_t device_mem_size = machine->maxram_size - machine->ram_size; + hwaddr device_mem_base; + /* * Limit the number of hotpluggable memory slots to half the number * slots that KVM supports, leaving the other half for PCI and other @@ -2900,12 +2905,8 @@ static void spapr_machine_init(MachineState *machine) exit(1); } - machine->device_memory->base = ROUND_UP(machine->ram_size, - SPAPR_DEVICE_MEM_ALIGN); - memory_region_init(&machine->device_memory->mr, OBJECT(spapr), - "device-memory", device_mem_size); - memory_region_add_subregion(sysmem, machine->device_memory->base, - &machine->device_memory->mr); + device_mem_base = ROUND_UP(machine->ram_size, SPAPR_DEVICE_MEM_ALIGN); + machine_memory_devices_init(machine, device_mem_base, device_mem_size); } if (smc->dr_lmb_enabled) { @@ -5119,7 +5120,7 @@ static bool phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, int i; /* Do we have device memory? */ - if (MACHINE(spapr)->maxram_size > ram_top) { + if (MACHINE(spapr)->device_memory) { /* Can't just use maxram_size, because there may be an * alignment gap between normal and device memory regions */ diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 002ea0b7c1..9b1f225d4a 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -32,7 +32,7 @@ bool is_ram_address(SpaprMachineState *spapr, hwaddr addr) if (addr < machine->ram_size) { return true; } - if ((addr >= dms->base) + if (dms && (addr >= dms->base) && ((addr - dms->base) < memory_region_size(&dms->mr))) { return true; } diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig index a9ee09062f..92c9cf6c96 100644 --- a/hw/virtio/Kconfig +++ b/hw/virtio/Kconfig @@ -35,6 +35,10 @@ config VIRTIO_CRYPTO default y depends on VIRTIO +config VIRTIO_MD + bool + select MEM_DEVICE + config VIRTIO_PMEM_SUPPORTED bool @@ -43,7 +47,7 @@ config VIRTIO_PMEM default y depends on VIRTIO depends on VIRTIO_PMEM_SUPPORTED - select MEM_DEVICE + select VIRTIO_MD config VIRTIO_MEM_SUPPORTED bool @@ -54,7 +58,7 @@ config VIRTIO_MEM depends on VIRTIO depends on LINUX depends on VIRTIO_MEM_SUPPORTED - select MEM_DEVICE + select VIRTIO_MD config VHOST_VSOCK_COMMON bool diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index c4f4fe05fa..13e7c6c272 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -63,6 +63,7 @@ virtio_pci_ss.add(when: 'CONFIG_VIRTIO_PMEM', if_true: files('virtio-pmem-pci.c' virtio_pci_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev-pci.c')) +virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MD', if_true: files('virtio-md-pci.c')) specific_virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss) diff --git a/hw/virtio/virtio-md-pci.c b/hw/virtio/virtio-md-pci.c new file mode 100644 index 0000000000..62bfb7920b --- /dev/null +++ b/hw/virtio/virtio-md-pci.c @@ -0,0 +1,151 @@ +/* + * Abstract virtio based memory device + * + * Copyright (C) 2023 Red Hat, Inc. + * + * Authors: + * David Hildenbrand <david@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "hw/virtio/virtio-md-pci.h" +#include "hw/mem/memory-device.h" +#include "qapi/error.h" +#include "qemu/error-report.h" + +void virtio_md_pci_pre_plug(VirtIOMDPCI *vmd, MachineState *ms, Error **errp) +{ + DeviceState *dev = DEVICE(vmd); + HotplugHandler *bus_handler = qdev_get_bus_hotplug_handler(dev); + MemoryDeviceState *md = MEMORY_DEVICE(vmd); + Error *local_err = NULL; + + if (!bus_handler && dev->hotplugged) { + /* + * Without a bus hotplug handler, we cannot control the plug/unplug + * order. We should never reach this point when hotplugging on x86, + * however, better add a safety net. + */ + error_setg(errp, "hotplug of virtio based memory devices not supported" + " on this bus."); + return; + } + /* + * First, see if we can plug this memory device at all. If that + * succeeds, branch of to the actual hotplug handler. + */ + memory_device_pre_plug(md, ms, NULL, &local_err); + if (!local_err && bus_handler) { + hotplug_handler_pre_plug(bus_handler, dev, &local_err); + } + error_propagate(errp, local_err); +} + +void virtio_md_pci_plug(VirtIOMDPCI *vmd, MachineState *ms, Error **errp) +{ + DeviceState *dev = DEVICE(vmd); + HotplugHandler *bus_handler = qdev_get_bus_hotplug_handler(dev); + MemoryDeviceState *md = MEMORY_DEVICE(vmd); + Error *local_err = NULL; + + /* + * Plug the memory device first and then branch off to the actual + * hotplug handler. If that one fails, we can easily undo the memory + * device bits. + */ + memory_device_plug(md, ms); + if (bus_handler) { + hotplug_handler_plug(bus_handler, dev, &local_err); + if (local_err) { + memory_device_unplug(md, ms); + } + } + error_propagate(errp, local_err); +} + +void virtio_md_pci_unplug_request(VirtIOMDPCI *vmd, MachineState *ms, + Error **errp) +{ + VirtIOMDPCIClass *vmdc = VIRTIO_MD_PCI_GET_CLASS(vmd); + DeviceState *dev = DEVICE(vmd); + HotplugHandler *bus_handler = qdev_get_bus_hotplug_handler(dev); + HotplugHandlerClass *hdc; + Error *local_err = NULL; + + if (!vmdc->unplug_request_check) { + error_setg(errp, "this virtio based memory devices cannot be unplugged"); + return; + } + + if (!bus_handler) { + error_setg(errp, "hotunplug of virtio based memory devices not" + "supported on this bus"); + return; + } + + vmdc->unplug_request_check(vmd, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + /* + * Forward the async request or turn it into a sync request (handling it + * like qdev_unplug()). + */ + hdc = HOTPLUG_HANDLER_GET_CLASS(bus_handler); + if (hdc->unplug_request) { + hotplug_handler_unplug_request(bus_handler, dev, &local_err); + } else { + virtio_md_pci_unplug(vmd, ms, &local_err); + if (!local_err) { + object_unparent(OBJECT(dev)); + } + } +} + +void virtio_md_pci_unplug(VirtIOMDPCI *vmd, MachineState *ms, Error **errp) +{ + DeviceState *dev = DEVICE(vmd); + HotplugHandler *bus_handler = qdev_get_bus_hotplug_handler(dev); + MemoryDeviceState *md = MEMORY_DEVICE(vmd); + Error *local_err = NULL; + + /* Unplug the memory device while it is still realized. */ + memory_device_unplug(md, ms); + + if (bus_handler) { + hotplug_handler_unplug(bus_handler, dev, &local_err); + if (local_err) { + /* Not expected to fail ... but still try to recover. */ + memory_device_plug(md, ms); + error_propagate(errp, local_err); + return; + } + } else { + /* Very unexpected, but let's just try to do the right thing. */ + warn_report("Unexpected unplug of virtio based memory device"); + qdev_unrealize(dev); + } +} + +static const TypeInfo virtio_md_pci_info = { + .name = TYPE_VIRTIO_MD_PCI, + .parent = TYPE_VIRTIO_PCI, + .instance_size = sizeof(VirtIOMDPCI), + .class_size = sizeof(VirtIOMDPCIClass), + .abstract = true, + .interfaces = (InterfaceInfo[]) { + { TYPE_MEMORY_DEVICE }, + { } + }, +}; + +static void virtio_md_pci_register(void) +{ + type_register_static(&virtio_md_pci_info); +} +type_init(virtio_md_pci_register) diff --git a/hw/virtio/virtio-mem-pci.c b/hw/virtio/virtio-mem-pci.c index b85c12668d..c4597e029e 100644 --- a/hw/virtio/virtio-mem-pci.c +++ b/hw/virtio/virtio-mem-pci.c @@ -93,12 +93,53 @@ static void virtio_mem_pci_size_change_notify(Notifier *notifier, void *data) g_free(qom_path); } +static void virtio_mem_pci_unplug_request_check(VirtIOMDPCI *vmd, Error **errp) +{ + VirtIOMEMPCI *pci_mem = VIRTIO_MEM_PCI(vmd); + VirtIOMEM *vmem = &pci_mem->vdev; + VirtIOMEMClass *vpc = VIRTIO_MEM_GET_CLASS(vmem); + + vpc->unplug_request_check(vmem, errp); +} + +static void virtio_mem_pci_get_requested_size(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + VirtIOMEMPCI *pci_mem = VIRTIO_MEM_PCI(obj); + + object_property_get(OBJECT(&pci_mem->vdev), name, v, errp); +} + +static void virtio_mem_pci_set_requested_size(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + VirtIOMEMPCI *pci_mem = VIRTIO_MEM_PCI(obj); + DeviceState *dev = DEVICE(obj); + + /* + * If we passed virtio_mem_pci_unplug_request_check(), making sure that + * the requested size is 0, don't allow modifying the requested size + * anymore, otherwise the VM might end up hotplugging memory before + * handling the unplug request. + */ + if (dev->pending_deleted_event) { + error_setg(errp, "'%s' cannot be changed if the device is in the" + " process of unplug", name); + return; + } + + object_property_set(OBJECT(&pci_mem->vdev), name, v, errp); +} + static void virtio_mem_pci_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); MemoryDeviceClass *mdc = MEMORY_DEVICE_CLASS(klass); + VirtIOMDPCIClass *vmdc = VIRTIO_MD_PCI_CLASS(klass); k->realize = virtio_mem_pci_realize; set_bit(DEVICE_CATEGORY_MISC, dc->categories); @@ -111,6 +152,8 @@ static void virtio_mem_pci_class_init(ObjectClass *klass, void *data) mdc->get_memory_region = virtio_mem_pci_get_memory_region; mdc->fill_device_info = virtio_mem_pci_fill_device_info; mdc->get_min_alignment = virtio_mem_pci_get_min_alignment; + + vmdc->unplug_request_check = virtio_mem_pci_unplug_request_check; } static void virtio_mem_pci_instance_init(Object *obj) @@ -135,21 +178,18 @@ static void virtio_mem_pci_instance_init(Object *obj) OBJECT(&dev->vdev), VIRTIO_MEM_BLOCK_SIZE_PROP); object_property_add_alias(obj, VIRTIO_MEM_SIZE_PROP, OBJECT(&dev->vdev), VIRTIO_MEM_SIZE_PROP); - object_property_add_alias(obj, VIRTIO_MEM_REQUESTED_SIZE_PROP, - OBJECT(&dev->vdev), - VIRTIO_MEM_REQUESTED_SIZE_PROP); + object_property_add(obj, VIRTIO_MEM_REQUESTED_SIZE_PROP, "size", + virtio_mem_pci_get_requested_size, + virtio_mem_pci_set_requested_size, NULL, NULL); } static const VirtioPCIDeviceTypeInfo virtio_mem_pci_info = { .base_name = TYPE_VIRTIO_MEM_PCI, + .parent = TYPE_VIRTIO_MD_PCI, .generic_name = "virtio-mem-pci", .instance_size = sizeof(VirtIOMEMPCI), .instance_init = virtio_mem_pci_instance_init, .class_init = virtio_mem_pci_class_init, - .interfaces = (InterfaceInfo[]) { - { TYPE_MEMORY_DEVICE }, - { } - }, }; static void virtio_mem_pci_register_types(void) diff --git a/hw/virtio/virtio-mem-pci.h b/hw/virtio/virtio-mem-pci.h index e636e1a48d..c50b51d608 100644 --- a/hw/virtio/virtio-mem-pci.h +++ b/hw/virtio/virtio-mem-pci.h @@ -13,21 +13,21 @@ #ifndef QEMU_VIRTIO_MEM_PCI_H #define QEMU_VIRTIO_MEM_PCI_H -#include "hw/virtio/virtio-pci.h" +#include "hw/virtio/virtio-md-pci.h" #include "hw/virtio/virtio-mem.h" #include "qom/object.h" typedef struct VirtIOMEMPCI VirtIOMEMPCI; /* - * virtio-mem-pci: This extends VirtioPCIProxy. + * virtio-mem-pci: This extends VirtIOMDPCI. */ #define TYPE_VIRTIO_MEM_PCI "virtio-mem-pci-base" DECLARE_INSTANCE_CHECKER(VirtIOMEMPCI, VIRTIO_MEM_PCI, TYPE_VIRTIO_MEM_PCI) struct VirtIOMEMPCI { - VirtIOPCIProxy parent_obj; + VirtIOMDPCI parent_obj; VirtIOMEM vdev; Notifier size_change_notifier; }; diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c index ec0ae32589..b6e781741e 100644 --- a/hw/virtio/virtio-mem.c +++ b/hw/virtio/virtio-mem.c @@ -18,6 +18,7 @@ #include "sysemu/numa.h" #include "sysemu/sysemu.h" #include "sysemu/reset.h" +#include "sysemu/runstate.h" #include "hw/virtio/virtio.h" #include "hw/virtio/virtio-bus.h" #include "hw/virtio/virtio-mem.h" @@ -621,20 +622,20 @@ static int virtio_mem_unplug_all(VirtIOMEM *vmem) { RAMBlock *rb = vmem->memdev->mr.ram_block; - if (virtio_mem_is_busy()) { - return -EBUSY; - } - - if (ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb))) { - return -EBUSY; - } - virtio_mem_notify_unplug_all(vmem); - - bitmap_clear(vmem->bitmap, 0, vmem->bitmap_size); if (vmem->size) { + if (virtio_mem_is_busy()) { + return -EBUSY; + } + if (ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb))) { + return -EBUSY; + } + virtio_mem_notify_unplug_all(vmem); + + bitmap_clear(vmem->bitmap, 0, vmem->bitmap_size); vmem->size = 0; notifier_list_notify(&vmem->size_change_notifiers, &vmem->size); } + trace_virtio_mem_unplugged_all(); virtio_mem_resize_usable_region(vmem, vmem->requested_size, true); return 0; @@ -901,11 +902,23 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) return; } - ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb)); - if (ret) { - error_setg_errno(errp, -ret, "Unexpected error discarding RAM"); - ram_block_coordinated_discard_require(false); - return; + /* + * We don't know at this point whether shared RAM is migrated using + * QEMU or migrated using the file content. "x-ignore-shared" will be + * configured after realizing the device. So in case we have an + * incoming migration, simply always skip the discard step. + * + * Otherwise, make sure that we start with a clean slate: either the + * memory backend might get reused or the shared file might still have + * memory allocated. + */ + if (!runstate_check(RUN_STATE_INMIGRATE)) { + ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb)); + if (ret) { + error_setg_errno(errp, -ret, "Unexpected error discarding RAM"); + ram_block_coordinated_discard_require(false); + return; + } } virtio_mem_resize_usable_region(vmem, vmem->requested_size, true); @@ -977,10 +990,6 @@ static int virtio_mem_post_load(void *opaque, int version_id) RamDiscardListener *rdl; int ret; - if (vmem->prealloc && !vmem->early_migration) { - warn_report("Proper preallocation with migration requires a newer QEMU machine"); - } - /* * We started out with all memory discarded and our memory region is mapped * into an address space. Replay, now that we updated the bitmap. @@ -993,6 +1002,18 @@ static int virtio_mem_post_load(void *opaque, int version_id) } } + /* + * If shared RAM is migrated using the file content and not using QEMU, + * don't mess with preallocation and postcopy. + */ + if (migrate_ram_is_ignored(vmem->memdev->mr.ram_block)) { + return 0; + } + + if (vmem->prealloc && !vmem->early_migration) { + warn_report("Proper preallocation with migration requires a newer QEMU machine"); + } + if (migration_in_incoming_postcopy()) { return 0; } @@ -1025,6 +1046,14 @@ static int virtio_mem_post_load_early(void *opaque, int version_id) return 0; } + /* + * If shared RAM is migrated using the file content and not using QEMU, + * don't mess with preallocation and postcopy. + */ + if (migrate_ram_is_ignored(rb)) { + return 0; + } + /* * We restored the bitmap and verified that the basic properties * match on source and destination, so we can go ahead and preallocate @@ -1483,6 +1512,30 @@ static void virtio_mem_rdm_unregister_listener(RamDiscardManager *rdm, QLIST_REMOVE(rdl, next); } +static void virtio_mem_unplug_request_check(VirtIOMEM *vmem, Error **errp) +{ + if (vmem->unplugged_inaccessible == ON_OFF_AUTO_OFF) { + /* + * We could allow it with a usable region size of 0, but let's just + * not care about that legacy setting. + */ + error_setg(errp, "virtio-mem device cannot get unplugged while" + " '" VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP "' != 'on'"); + return; + } + + if (vmem->size) { + error_setg(errp, "virtio-mem device cannot get unplugged while" + " '" VIRTIO_MEM_SIZE_PROP "' != '0'"); + return; + } + if (vmem->requested_size) { + error_setg(errp, "virtio-mem device cannot get unplugged while" + " '" VIRTIO_MEM_REQUESTED_SIZE_PROP "' != '0'"); + return; + } +} + static void virtio_mem_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -1505,6 +1558,7 @@ static void virtio_mem_class_init(ObjectClass *klass, void *data) vmc->get_memory_region = virtio_mem_get_memory_region; vmc->add_size_change_notifier = virtio_mem_add_size_change_notifier; vmc->remove_size_change_notifier = virtio_mem_remove_size_change_notifier; + vmc->unplug_request_check = virtio_mem_unplug_request_check; rdmc->get_min_granularity = virtio_mem_rdm_get_min_granularity; rdmc->is_populated = virtio_mem_rdm_is_populated; diff --git a/hw/virtio/virtio-pmem-pci.c b/hw/virtio/virtio-pmem-pci.c index 197d219204..cfe7f3b67c 100644 --- a/hw/virtio/virtio-pmem-pci.c +++ b/hw/virtio/virtio-pmem-pci.c @@ -110,13 +110,10 @@ static void virtio_pmem_pci_instance_init(Object *obj) static const VirtioPCIDeviceTypeInfo virtio_pmem_pci_info = { .base_name = TYPE_VIRTIO_PMEM_PCI, .generic_name = "virtio-pmem-pci", + .parent = TYPE_VIRTIO_MD_PCI, .instance_size = sizeof(VirtIOPMEMPCI), .instance_init = virtio_pmem_pci_instance_init, .class_init = virtio_pmem_pci_class_init, - .interfaces = (InterfaceInfo[]) { - { TYPE_MEMORY_DEVICE }, - { } - }, }; static void virtio_pmem_pci_register_types(void) diff --git a/hw/virtio/virtio-pmem-pci.h b/hw/virtio/virtio-pmem-pci.h index 63cfe727f7..88b01ce2db 100644 --- a/hw/virtio/virtio-pmem-pci.h +++ b/hw/virtio/virtio-pmem-pci.h @@ -14,21 +14,21 @@ #ifndef QEMU_VIRTIO_PMEM_PCI_H #define QEMU_VIRTIO_PMEM_PCI_H -#include "hw/virtio/virtio-pci.h" +#include "hw/virtio/virtio-md-pci.h" #include "hw/virtio/virtio-pmem.h" #include "qom/object.h" typedef struct VirtIOPMEMPCI VirtIOPMEMPCI; /* - * virtio-pmem-pci: This extends VirtioPCIProxy. + * virtio-pmem-pci: This extends VirtIOMDPCI. */ #define TYPE_VIRTIO_PMEM_PCI "virtio-pmem-pci-base" DECLARE_INSTANCE_CHECKER(VirtIOPMEMPCI, VIRTIO_PMEM_PCI, TYPE_VIRTIO_PMEM_PCI) struct VirtIOPMEMPCI { - VirtIOPCIProxy parent_obj; + VirtIOMDPCI parent_obj; VirtIOPMEM vdev; }; diff --git a/include/hw/boards.h b/include/hw/boards.h index 12d9e9d17c..ed83360198 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -37,6 +37,7 @@ void machine_parse_smp_config(MachineState *ms, const SMPConfiguration *config, Error **errp); unsigned int machine_topo_get_cores_per_socket(const MachineState *ms); unsigned int machine_topo_get_threads_per_socket(const MachineState *ms); +void machine_memory_devices_init(MachineState *ms, hwaddr base, uint64_t size); /** * machine_class_allow_dynamic_sysbus_dev: Add type to list of valid devices @@ -297,11 +298,13 @@ struct MachineClass { * address space for memory devices starts * @mr: address space container for memory devices * @dimm_size: the sum of plugged DIMMs' sizes + * @used_region_size: the part of @mr already used by memory devices */ typedef struct DeviceMemoryState { hwaddr base; MemoryRegion mr; uint64_t dimm_size; + uint64_t used_region_size; } DeviceMemoryState; /** diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index c34c698cdd..d54e8b1101 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -60,7 +60,6 @@ typedef struct PCMachineState { #define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device" #define PC_MACHINE_MAX_RAM_BELOW_4G "max-ram-below-4g" -#define PC_MACHINE_DEVMEM_REGION_SIZE "device-memory-region-size" #define PC_MACHINE_VMPORT "vmport" #define PC_MACHINE_SMBUS "smbus" #define PC_MACHINE_SATA "sata" diff --git a/include/hw/virtio/virtio-md-pci.h b/include/hw/virtio/virtio-md-pci.h new file mode 100644 index 0000000000..5912e16674 --- /dev/null +++ b/include/hw/virtio/virtio-md-pci.h @@ -0,0 +1,44 @@ +/* + * Abstract virtio based memory device + * + * Copyright (C) 2023 Red Hat, Inc. + * + * Authors: + * David Hildenbrand <david@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_VIRTIO_MD_PCI_H +#define HW_VIRTIO_MD_PCI_H + +#include "hw/virtio/virtio-pci.h" +#include "qom/object.h" + +/* + * virtio-md-pci: This extends VirtioPCIProxy. + */ +#define TYPE_VIRTIO_MD_PCI "virtio-md-pci" + +OBJECT_DECLARE_TYPE(VirtIOMDPCI, VirtIOMDPCIClass, VIRTIO_MD_PCI) + +struct VirtIOMDPCIClass { + /* private */ + VirtioPCIClass parent; + + /* public */ + void (*unplug_request_check)(VirtIOMDPCI *vmd, Error **errp); +}; + +struct VirtIOMDPCI { + VirtIOPCIProxy parent_obj; +}; + +void virtio_md_pci_pre_plug(VirtIOMDPCI *vmd, MachineState *ms, Error **errp); +void virtio_md_pci_plug(VirtIOMDPCI *vmd, MachineState *ms, Error **errp); +void virtio_md_pci_unplug_request(VirtIOMDPCI *vmd, MachineState *ms, + Error **errp); +void virtio_md_pci_unplug(VirtIOMDPCI *vmd, MachineState *ms, Error **errp); + +#endif diff --git a/include/hw/virtio/virtio-mem.h b/include/hw/virtio/virtio-mem.h index f15e561785..ab0fe2b4f2 100644 --- a/include/hw/virtio/virtio-mem.h +++ b/include/hw/virtio/virtio-mem.h @@ -98,6 +98,7 @@ struct VirtIOMEMClass { MemoryRegion *(*get_memory_region)(VirtIOMEM *vmem, Error **errp); void (*add_size_change_notifier)(VirtIOMEM *vmem, Notifier *notifier); void (*remove_size_change_notifier)(VirtIOMEM *vmem, Notifier *notifier); + void (*unplug_request_check)(VirtIOMEM *vmem, Error **errp); }; #endif diff --git a/include/migration/misc.h b/include/migration/misc.h index 5ebe13b4b9..7dcc0b5c2c 100644 --- a/include/migration/misc.h +++ b/include/migration/misc.h @@ -40,6 +40,7 @@ int precopy_notify(PrecopyNotifyReason reason, Error **errp); void ram_mig_init(void); void qemu_guest_free_page_hint(void *addr, size_t len); +bool migrate_ram_is_ignored(RAMBlock *block); /* migration/block.c */ diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 5615ec29eb..29aea9456d 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -408,7 +408,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis, Error **errp) /* * We don't support postcopy with some type of ramblocks. * - * NOTE: we explicitly ignored ramblock_is_ignored() instead we checked + * NOTE: we explicitly ignored migrate_ram_is_ignored() instead we checked * all possible ramblocks. This is because this function can be called * when creating the migration object, during the phase RAM_MIGRATABLE * is not even properly set for all the ramblocks. diff --git a/migration/ram.c b/migration/ram.c index 5283a75f02..0ada6477e8 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -194,7 +194,7 @@ static bool postcopy_preempt_active(void) return migrate_postcopy_preempt() && migration_in_postcopy(); } -bool ramblock_is_ignored(RAMBlock *block) +bool migrate_ram_is_ignored(RAMBlock *block) { return !qemu_ram_is_migratable(block) || (migrate_ignore_shared() && qemu_ram_is_shared(block) @@ -696,7 +696,7 @@ static void pss_find_next_dirty(PageSearchStatus *pss) unsigned long size = rb->used_length >> TARGET_PAGE_BITS; unsigned long *bitmap = rb->bmap; - if (ramblock_is_ignored(rb)) { + if (migrate_ram_is_ignored(rb)) { /* Points directly to the end, so we know no dirty page */ pss->page = size; return; @@ -780,7 +780,7 @@ unsigned long colo_bitmap_find_dirty(RAMState *rs, RAMBlock *rb, *num = 0; - if (ramblock_is_ignored(rb)) { + if (migrate_ram_is_ignored(rb)) { return size; } @@ -2260,7 +2260,7 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss) unsigned long start_page = pss->page; int res; - if (ramblock_is_ignored(pss->block)) { + if (migrate_ram_is_ignored(pss->block)) { error_report("block %s should not be migrated !", pss->block->idstr); return 0; } @@ -3347,7 +3347,7 @@ static inline RAMBlock *ram_block_from_stream(MigrationIncomingState *mis, return NULL; } - if (ramblock_is_ignored(block)) { + if (migrate_ram_is_ignored(block)) { error_report("block %s should not be migrated !", id); return NULL; } @@ -3958,7 +3958,7 @@ static int ram_load_precopy(QEMUFile *f) } if (migrate_ignore_shared()) { hwaddr addr = qemu_get_be64(f); - if (ramblock_is_ignored(block) && + if (migrate_ram_is_ignored(block) && block->mr->addr != addr) { error_report("Mismatched GPAs for block %s " "%" PRId64 "!= %" PRId64, @@ -4254,7 +4254,7 @@ static void ram_mig_ram_block_resized(RAMBlockNotifier *n, void *host, RAMBlock *rb = qemu_ram_block_from_host(host, false, &offset); Error *err = NULL; - if (ramblock_is_ignored(rb)) { + if (migrate_ram_is_ignored(rb)) { return; } diff --git a/migration/ram.h b/migration/ram.h index ea1f3c25b5..145c915ca7 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -36,11 +36,10 @@ extern XBZRLECacheStats xbzrle_counters; extern CompressionStats compression_counters; -bool ramblock_is_ignored(RAMBlock *block); /* Should be holding either ram_list.mutex, or the RCU lock. */ #define RAMBLOCK_FOREACH_NOT_IGNORED(block) \ INTERNAL_RAMBLOCK_FOREACH(block) \ - if (ramblock_is_ignored(block)) {} else + if (migrate_ram_is_ignored(block)) {} else #define RAMBLOCK_FOREACH_MIGRATABLE(block) \ INTERNAL_RAMBLOCK_FOREACH(block) \ diff --git a/softmmu/physmem.c b/softmmu/physmem.c index bda475a719..3df73542e1 100644 --- a/softmmu/physmem.c +++ b/softmmu/physmem.c @@ -3456,6 +3456,24 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length) * so a userfault will trigger. */ #ifdef CONFIG_FALLOCATE_PUNCH_HOLE + /* + * We'll discard data from the actual file, even though we only + * have a MAP_PRIVATE mapping, possibly messing with other + * MAP_PRIVATE/MAP_SHARED mappings. There is no easy way to + * change that behavior whithout violating the promised + * semantics of ram_block_discard_range(). + * + * Only warn, because it works as long as nobody else uses that + * file. + */ + if (!qemu_ram_is_shared(rb)) { + warn_report_once("ram_block_discard_range: Discarding RAM" + " in private file mappings is possibly" + " dangerous, because it will modify the" + " underlying file and will affect other" + " users of the file"); + } + ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, start, length); if (ret) { diff --git a/stubs/meson.build b/stubs/meson.build index a56645e2f7..ef6e39a64d 100644 --- a/stubs/meson.build +++ b/stubs/meson.build @@ -60,6 +60,7 @@ if have_system stub_ss.add(files('semihost.c')) stub_ss.add(files('usb-dev-stub.c')) stub_ss.add(files('xen-hw-stub.c')) + stub_ss.add(files('virtio-md-pci.c')) else stub_ss.add(files('qdev.c')) endif diff --git a/stubs/virtio-md-pci.c b/stubs/virtio-md-pci.c new file mode 100644 index 0000000000..ce5bba0c9d --- /dev/null +++ b/stubs/virtio-md-pci.c @@ -0,0 +1,24 @@ +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/virtio/virtio-md-pci.h" + +void virtio_md_pci_pre_plug(VirtIOMDPCI *vmd, MachineState *ms, Error **errp) +{ + error_setg(errp, "virtio based memory devices not supported"); +} + +void virtio_md_pci_plug(VirtIOMDPCI *vmd, MachineState *ms, Error **errp) +{ + error_setg(errp, "virtio based memory devices not supported"); +} + +void virtio_md_pci_unplug_request(VirtIOMDPCI *vmd, MachineState *ms, + Error **errp) +{ + error_setg(errp, "virtio based memory devices not supported"); +} + +void virtio_md_pci_unplug(VirtIOMDPCI *vmd, MachineState *ms, Error **errp) +{ + error_setg(errp, "virtio based memory devices not supported"); +}