From a7d69ff10b085ba6f8236600829532984cdea714 Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Mon, 29 Jun 2015 13:50:22 +0530 Subject: [PATCH 1/6] pc,pc-dimm: Extract hotplug related fields in PCMachineState to a structure Move hotplug_memory_base and hotplug_memory fields of PCMachineState into a separate structure so that the same can be made use of from other architectures supporing memory hotplug. Signed-off-by: Bharata B Rao Reviewed-by: David Gibson Reviewed-by: Igor Mammedov Tested-by: Igor Mammedov Signed-off-by: Eduardo Habkost --- hw/i386/acpi-build.c | 2 +- hw/i386/pc.c | 26 +++++++++++++------------- include/hw/i386/pc.h | 7 ++----- include/hw/mem/pc-dimm.h | 11 +++++++++++ 4 files changed, 27 insertions(+), 19 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 00818b925b..aed811a166 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -1509,7 +1509,7 @@ build_srat(GArray *table_data, GArray *linker, PcGuestInfo *guest_info) */ if (hotplugabble_address_space_size) { numamem = acpi_data_push(table_data, sizeof *numamem); - acpi_build_srat_memory(numamem, pcms->hotplug_memory_base, + acpi_build_srat_memory(numamem, pcms->hotplug_memory.base, hotplugabble_address_space_size, 0, MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED); diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 7072930cfc..e51c3bed8a 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1297,7 +1297,7 @@ FWCfgState *pc_memory_init(MachineState *machine, exit(EXIT_FAILURE); } - pcms->hotplug_memory_base = + pcms->hotplug_memory.base = ROUND_UP(0x100000000ULL + above_4g_mem_size, 1ULL << 30); if (pcms->enforce_aligned_dimm) { @@ -1305,17 +1305,17 @@ FWCfgState *pc_memory_init(MachineState *machine, hotplug_mem_size += (1ULL << 30) * machine->ram_slots; } - if ((pcms->hotplug_memory_base + hotplug_mem_size) < + if ((pcms->hotplug_memory.base + hotplug_mem_size) < hotplug_mem_size) { error_report("unsupported amount of maximum memory: " RAM_ADDR_FMT, machine->maxram_size); exit(EXIT_FAILURE); } - memory_region_init(&pcms->hotplug_memory, OBJECT(pcms), + memory_region_init(&pcms->hotplug_memory.mr, OBJECT(pcms), "hotplug-memory", hotplug_mem_size); - memory_region_add_subregion(system_memory, pcms->hotplug_memory_base, - &pcms->hotplug_memory); + memory_region_add_subregion(system_memory, pcms->hotplug_memory.base, + &pcms->hotplug_memory.mr); } /* Initialize PC system firmware */ @@ -1333,9 +1333,9 @@ FWCfgState *pc_memory_init(MachineState *machine, fw_cfg = bochs_bios_init(); rom_set_fw(fw_cfg); - if (guest_info->has_reserved_memory && pcms->hotplug_memory_base) { + if (guest_info->has_reserved_memory && pcms->hotplug_memory.base) { uint64_t *val = g_malloc(sizeof(*val)); - *val = cpu_to_le64(ROUND_UP(pcms->hotplug_memory_base, 0x1ULL << 30)); + *val = cpu_to_le64(ROUND_UP(pcms->hotplug_memory.base, 0x1ULL << 30)); fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val)); } @@ -1575,8 +1575,8 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev, align = memory_region_get_alignment(mr); } - addr = pc_dimm_get_free_addr(pcms->hotplug_memory_base, - memory_region_size(&pcms->hotplug_memory), + addr = pc_dimm_get_free_addr(pcms->hotplug_memory.base, + memory_region_size(&pcms->hotplug_memory.mr), !addr ? NULL : &addr, align, memory_region_size(mr), &local_err); if (local_err) { @@ -1630,8 +1630,8 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev, goto out; } - memory_region_add_subregion(&pcms->hotplug_memory, - addr - pcms->hotplug_memory_base, mr); + memory_region_add_subregion(&pcms->hotplug_memory.mr, + addr - pcms->hotplug_memory.base, mr); vmstate_register_ram(mr, dev); hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); @@ -1677,7 +1677,7 @@ static void pc_dimm_unplug(HotplugHandler *hotplug_dev, goto out; } - memory_region_del_subregion(&pcms->hotplug_memory, mr); + memory_region_del_subregion(&pcms->hotplug_memory.mr, mr); vmstate_unregister_ram(mr, dev); object_unparent(OBJECT(dev)); @@ -1766,7 +1766,7 @@ pc_machine_get_hotplug_memory_region_size(Object *obj, Visitor *v, void *opaque, const char *name, Error **errp) { PCMachineState *pcms = PC_MACHINE(obj); - int64_t value = memory_region_size(&pcms->hotplug_memory); + int64_t value = memory_region_size(&pcms->hotplug_memory.mr); visit_type_int(v, &value, name, errp); } diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 86c565147c..328c8f72e0 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -15,14 +15,12 @@ #include "hw/pci/pci.h" #include "hw/boards.h" #include "hw/compat.h" +#include "hw/mem/pc-dimm.h" #define HPET_INTCAP "hpet-intcap" /** * PCMachineState: - * @hotplug_memory_base: address in guest RAM address space where hotplug memory - * address space begins. - * @hotplug_memory: hotplug memory addess space container * @acpi_dev: link to ACPI PM device that performs ACPI hotplug handling * @enforce_aligned_dimm: check that DIMM's address/size is aligned by * backend's alignment value if provided @@ -32,8 +30,7 @@ struct PCMachineState { MachineState parent_obj; /* */ - ram_addr_t hotplug_memory_base; - MemoryRegion hotplug_memory; + MemoryHotplugState hotplug_memory; HotplugHandler *acpi_dev; ISADevice *rtc; diff --git a/include/hw/mem/pc-dimm.h b/include/hw/mem/pc-dimm.h index f7b80b44b7..4bace7bed3 100644 --- a/include/hw/mem/pc-dimm.h +++ b/include/hw/mem/pc-dimm.h @@ -70,6 +70,17 @@ typedef struct PCDIMMDeviceClass { MemoryRegion *(*get_memory_region)(PCDIMMDevice *dimm); } PCDIMMDeviceClass; +/** + * MemoryHotplugState: + * @base: address in guest RAM address space where hotplug memory + * address space begins. + * @mr: hotplug memory address space container + */ +typedef struct MemoryHotplugState { + ram_addr_t base; + MemoryRegion mr; +} MemoryHotplugState; + uint64_t pc_dimm_get_free_addr(uint64_t address_space_start, uint64_t address_space_size, uint64_t *hint, uint64_t align, uint64_t size, From 43bbb49ef7032a8bfdafbd02d0286512af161089 Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Mon, 29 Jun 2015 13:50:23 +0530 Subject: [PATCH 2/6] pc,pc-dimm: Factor out reusable parts in pc_dimm_plug to a separate routine pc_dimm_plug() has code that will be needed for memory plug handlers in other archs too. Extract code from pc_dimm_plug() into a generic routine pc_dimm_memory_plug() that resides in pc-dimm.c. Also correspondingly refactor re-usable unplug code into pc_dimm_memory_unplug(). Signed-off-by: Bharata B Rao Reviewed-by: David Gibson Reviewed-by: Igor Mammedov Tested-by: Igor Mammedov Signed-off-by: Eduardo Habkost --- hw/i386/pc.c | 66 ++------------------------------- hw/mem/pc-dimm.c | 80 ++++++++++++++++++++++++++++++++++++++++ include/hw/mem/pc-dimm.h | 4 ++ 3 files changed, 87 insertions(+), 63 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index e51c3bed8a..8df1f3e033 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -64,7 +64,6 @@ #include "hw/pci/pci_host.h" #include "acpi-build.h" #include "hw/mem/pc-dimm.h" -#include "trace.h" #include "qapi/visitor.h" #include "qapi-visit.h" @@ -1554,86 +1553,29 @@ void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name) static void pc_dimm_plug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { - int slot; HotplugHandlerClass *hhc; Error *local_err = NULL; PCMachineState *pcms = PC_MACHINE(hotplug_dev); - MachineState *machine = MACHINE(hotplug_dev); PCDIMMDevice *dimm = PC_DIMM(dev); PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); MemoryRegion *mr = ddc->get_memory_region(dimm); - uint64_t existing_dimms_capacity = 0; uint64_t align = TARGET_PAGE_SIZE; - uint64_t addr; - - addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err); - if (local_err) { - goto out; - } if (memory_region_get_alignment(mr) && pcms->enforce_aligned_dimm) { align = memory_region_get_alignment(mr); } - addr = pc_dimm_get_free_addr(pcms->hotplug_memory.base, - memory_region_size(&pcms->hotplug_memory.mr), - !addr ? NULL : &addr, align, - memory_region_size(mr), &local_err); - if (local_err) { - goto out; - } - - existing_dimms_capacity = pc_existing_dimms_capacity(&local_err); - if (local_err) { - goto out; - } - - if (existing_dimms_capacity + memory_region_size(mr) > - machine->maxram_size - machine->ram_size) { - error_setg(&local_err, "not enough space, currently 0x%" PRIx64 - " in use of total hot pluggable 0x" RAM_ADDR_FMT, - existing_dimms_capacity, - machine->maxram_size - machine->ram_size); - goto out; - } - - object_property_set_int(OBJECT(dev), addr, PC_DIMM_ADDR_PROP, &local_err); - if (local_err) { - goto out; - } - trace_mhp_pc_dimm_assigned_address(addr); - - slot = object_property_get_int(OBJECT(dev), PC_DIMM_SLOT_PROP, &local_err); - if (local_err) { - goto out; - } - - slot = pc_dimm_get_free_slot(slot == PC_DIMM_UNASSIGNED_SLOT ? NULL : &slot, - machine->ram_slots, &local_err); - if (local_err) { - goto out; - } - object_property_set_int(OBJECT(dev), slot, PC_DIMM_SLOT_PROP, &local_err); - if (local_err) { - goto out; - } - trace_mhp_pc_dimm_assigned_slot(slot); - if (!pcms->acpi_dev) { error_setg(&local_err, "memory hotplug is not enabled: missing acpi device"); goto out; } - if (kvm_enabled() && !kvm_has_free_slot(machine)) { - error_setg(&local_err, "hypervisor has no free memory slots left"); + pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, &local_err); + if (local_err) { goto out; } - memory_region_add_subregion(&pcms->hotplug_memory.mr, - addr - pcms->hotplug_memory.base, mr); - vmstate_register_ram(mr, dev); - hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); out: @@ -1677,9 +1619,7 @@ static void pc_dimm_unplug(HotplugHandler *hotplug_dev, goto out; } - memory_region_del_subregion(&pcms->hotplug_memory.mr, mr); - vmstate_unregister_ram(mr, dev); - + pc_dimm_memory_unplug(dev, &pcms->hotplug_memory, mr); object_unparent(OBJECT(dev)); out: diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c index e70633d29f..98971b7a91 100644 --- a/hw/mem/pc-dimm.c +++ b/hw/mem/pc-dimm.c @@ -23,12 +23,92 @@ #include "qapi/visitor.h" #include "qemu/range.h" #include "sysemu/numa.h" +#include "sysemu/kvm.h" +#include "trace.h" typedef struct pc_dimms_capacity { uint64_t size; Error **errp; } pc_dimms_capacity; +void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms, + MemoryRegion *mr, uint64_t align, Error **errp) +{ + int slot; + MachineState *machine = MACHINE(qdev_get_machine()); + PCDIMMDevice *dimm = PC_DIMM(dev); + Error *local_err = NULL; + uint64_t existing_dimms_capacity = 0; + uint64_t addr; + + addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err); + if (local_err) { + goto out; + } + + addr = pc_dimm_get_free_addr(hpms->base, + memory_region_size(&hpms->mr), + !addr ? NULL : &addr, align, + memory_region_size(mr), &local_err); + if (local_err) { + goto out; + } + + existing_dimms_capacity = pc_existing_dimms_capacity(&local_err); + if (local_err) { + goto out; + } + + if (existing_dimms_capacity + memory_region_size(mr) > + machine->maxram_size - machine->ram_size) { + error_setg(&local_err, "not enough space, currently 0x%" PRIx64 + " in use of total hot pluggable 0x" RAM_ADDR_FMT, + existing_dimms_capacity, + machine->maxram_size - machine->ram_size); + goto out; + } + + object_property_set_int(OBJECT(dev), addr, PC_DIMM_ADDR_PROP, &local_err); + if (local_err) { + goto out; + } + trace_mhp_pc_dimm_assigned_address(addr); + + slot = object_property_get_int(OBJECT(dev), PC_DIMM_SLOT_PROP, &local_err); + if (local_err) { + goto out; + } + + slot = pc_dimm_get_free_slot(slot == PC_DIMM_UNASSIGNED_SLOT ? NULL : &slot, + machine->ram_slots, &local_err); + if (local_err) { + goto out; + } + object_property_set_int(OBJECT(dev), slot, PC_DIMM_SLOT_PROP, &local_err); + if (local_err) { + goto out; + } + trace_mhp_pc_dimm_assigned_slot(slot); + + if (kvm_enabled() && !kvm_has_free_slot(machine)) { + error_setg(&local_err, "hypervisor has no free memory slots left"); + goto out; + } + + memory_region_add_subregion(&hpms->mr, addr - hpms->base, mr); + vmstate_register_ram(mr, dev); + +out: + error_propagate(errp, local_err); +} + +void pc_dimm_memory_unplug(DeviceState *dev, MemoryHotplugState *hpms, + MemoryRegion *mr) +{ + memory_region_del_subregion(&hpms->mr, mr); + vmstate_unregister_ram(mr, dev); +} + static int pc_existing_dimms_capacity_internal(Object *obj, void *opaque) { pc_dimms_capacity *cap = opaque; diff --git a/include/hw/mem/pc-dimm.h b/include/hw/mem/pc-dimm.h index 4bace7bed3..d83bf30ea9 100644 --- a/include/hw/mem/pc-dimm.h +++ b/include/hw/mem/pc-dimm.h @@ -90,4 +90,8 @@ int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp); int qmp_pc_dimm_device_list(Object *obj, void *opaque); uint64_t pc_existing_dimms_capacity(Error **errp); +void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms, + MemoryRegion *mr, uint64_t align, Error **errp); +void pc_dimm_memory_unplug(DeviceState *dev, MemoryHotplugState *hpms, + MemoryRegion *mr); #endif From 8e23184b6b2f8426041854b18fb56a3ff197d5a0 Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Mon, 29 Jun 2015 13:50:24 +0530 Subject: [PATCH 3/6] pc: Abort if HotplugHandlerClass::plug() fails HotplugHandlerClass::plug() shouldn't fail and hence use error_abort to abort if it fails. Signed-off-by: Bharata B Rao Reviewed-by: David Gibson Reviewed-by: Igor Mammedov Tested-by: Igor Mammedov Signed-off-by: Eduardo Habkost --- hw/i386/pc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 8df1f3e033..a66416d188 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1577,7 +1577,7 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev, } hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); - hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); + hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &error_abort); out: error_propagate(errp, local_err); } From fa9ea81d15d459f6c4a39d77ae680af94cebd65e Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Mon, 29 Jun 2015 13:50:25 +0530 Subject: [PATCH 4/6] numa,pc-dimm: Store pc-dimm memory information in numa_info Start storing the (start_addr, end_addr) of the pc-dimm memory in corresponding numa_info[node] so that this information can be used to lookup node by address. Signed-off-by: Bharata B Rao Reviewed-by: David Gibson Reviewed-by: Igor Mammedov Tested-by: Igor Mammedov Signed-off-by: Eduardo Habkost --- hw/mem/pc-dimm.c | 4 ++++ include/sysemu/numa.h | 10 ++++++++++ numa.c | 26 ++++++++++++++++++++++++++ 3 files changed, 40 insertions(+) diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c index 98971b7a91..bb04862de8 100644 --- a/hw/mem/pc-dimm.c +++ b/hw/mem/pc-dimm.c @@ -97,6 +97,7 @@ void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms, memory_region_add_subregion(&hpms->mr, addr - hpms->base, mr); vmstate_register_ram(mr, dev); + numa_set_mem_node_id(addr, memory_region_size(mr), dimm->node); out: error_propagate(errp, local_err); @@ -105,6 +106,9 @@ out: void pc_dimm_memory_unplug(DeviceState *dev, MemoryHotplugState *hpms, MemoryRegion *mr) { + PCDIMMDevice *dimm = PC_DIMM(dev); + + numa_unset_mem_node_id(dimm->addr, memory_region_size(mr), dimm->node); memory_region_del_subregion(&hpms->mr, mr); vmstate_unregister_ram(mr, dev); } diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h index 6523b4d7f9..7176364062 100644 --- a/include/sysemu/numa.h +++ b/include/sysemu/numa.h @@ -10,16 +10,26 @@ extern int nb_numa_nodes; /* Number of NUMA nodes */ +struct numa_addr_range { + ram_addr_t mem_start; + ram_addr_t mem_end; + QLIST_ENTRY(numa_addr_range) entry; +}; + typedef struct node_info { uint64_t node_mem; DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS); struct HostMemoryBackend *node_memdev; bool present; + QLIST_HEAD(, numa_addr_range) addr; /* List to store address ranges */ } NodeInfo; + extern NodeInfo numa_info[MAX_NODES]; void parse_numa_opts(MachineClass *mc); void numa_post_machine_init(void); void query_numa_node_mem(uint64_t node_mem[]); extern QemuOptsList qemu_numa_opts; +void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node); +void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node); #endif diff --git a/numa.c b/numa.c index 91fc6c1d93..116d1fb57f 100644 --- a/numa.c +++ b/numa.c @@ -52,6 +52,28 @@ static int max_numa_nodeid; /* Highest specified NUMA node ID, plus one. int nb_numa_nodes; NodeInfo numa_info[MAX_NODES]; +void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node) +{ + struct numa_addr_range *range = g_malloc0(sizeof(*range)); + + range->mem_start = addr; + range->mem_end = addr + size - 1; + QLIST_INSERT_HEAD(&numa_info[node].addr, range, entry); +} + +void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node) +{ + struct numa_addr_range *range, *next; + + QLIST_FOREACH_SAFE(range, &numa_info[node].addr, entry, next) { + if (addr == range->mem_start && (addr + size - 1) == range->mem_end) { + QLIST_REMOVE(range, entry); + g_free(range); + return; + } + } +} + static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp) { uint16_t nodenr; @@ -273,6 +295,10 @@ void parse_numa_opts(MachineClass *mc) exit(1); } + for (i = 0; i < nb_numa_nodes; i++) { + QLIST_INIT(&numa_info[i].addr); + } + for (i = 0; i < nb_numa_nodes; i++) { if (!bitmap_empty(numa_info[i].node_cpu, MAX_CPUMASK_BITS)) { break; From abafabd8c982e875d60a10d37f0b91cff1003c55 Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Mon, 29 Jun 2015 13:50:26 +0530 Subject: [PATCH 5/6] numa: Store boot memory address range in node_info Store memory address range information of boot memory in address range list of numa_info. This helps to have a common NUMA node lookup by address function that works for both boot-time memory and hotplugged memory. Signed-off-by: Bharata B Rao Reviewed-by: David Gibson Tested-by: Igor Mammedov Signed-off-by: Eduardo Habkost --- numa.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/numa.c b/numa.c index 116d1fb57f..a73f6483ea 100644 --- a/numa.c +++ b/numa.c @@ -56,6 +56,14 @@ void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node) { struct numa_addr_range *range = g_malloc0(sizeof(*range)); + /* + * Memory-less nodes can come here with 0 size in which case, + * there is nothing to do. + */ + if (!size) { + return; + } + range->mem_start = addr; range->mem_end = addr + size - 1; QLIST_INSERT_HEAD(&numa_info[node].addr, range, entry); @@ -74,6 +82,21 @@ void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node) } } +static void numa_set_mem_ranges(void) +{ + int i; + ram_addr_t mem_start = 0; + + /* + * Deduce start address of each node and use it to store + * the address range info in numa_info address range list + */ + for (i = 0; i < nb_numa_nodes; i++) { + numa_set_mem_node_id(mem_start, numa_info[i].node_mem, i); + mem_start += numa_info[i].node_mem; + } +} + static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp) { uint16_t nodenr; @@ -299,6 +322,8 @@ void parse_numa_opts(MachineClass *mc) QLIST_INIT(&numa_info[i].addr); } + numa_set_mem_ranges(); + for (i = 0; i < nb_numa_nodes; i++) { if (!bitmap_empty(numa_info[i].node_cpu, MAX_CPUMASK_BITS)) { break; @@ -323,6 +348,8 @@ void parse_numa_opts(MachineClass *mc) } validate_numa_cpus(); + } else { + numa_set_mem_node_id(0, ram_size, 0); } } From e75e2a14d5c13ad38dcf72b69922dee2dafbb0d0 Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Mon, 29 Jun 2015 13:50:27 +0530 Subject: [PATCH 6/6] numa: API to lookup NUMA node by address Introduce an API numa_get_node(ram_addr_t addr, Error **errp) that returns the NUMA node to which the given address belongs to. This API works uniformly for both boot time as well as hotplugged memory. This API is needed by sPAPR PowerPC to support ibm,dynamic-reconfiguration-memory device tree node which is needed for memory hotplug. Signed-off-by: Bharata B Rao Reviewed-by: David Gibson Tested-by: Igor Mammedov Signed-off-by: Eduardo Habkost --- include/sysemu/numa.h | 1 + numa.c | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h index 7176364062..a6392bc50f 100644 --- a/include/sysemu/numa.h +++ b/include/sysemu/numa.h @@ -31,5 +31,6 @@ void query_numa_node_mem(uint64_t node_mem[]); extern QemuOptsList qemu_numa_opts; void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node); void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node); +uint32_t numa_get_node(ram_addr_t addr, Error **errp); #endif diff --git a/numa.c b/numa.c index a73f6483ea..3c8005913f 100644 --- a/numa.c +++ b/numa.c @@ -97,6 +97,47 @@ static void numa_set_mem_ranges(void) } } +/* + * Check if @addr falls under NUMA @node. + */ +static bool numa_addr_belongs_to_node(ram_addr_t addr, uint32_t node) +{ + struct numa_addr_range *range; + + QLIST_FOREACH(range, &numa_info[node].addr, entry) { + if (addr >= range->mem_start && addr <= range->mem_end) { + return true; + } + } + return false; +} + +/* + * Given an address, return the index of the NUMA node to which the + * address belongs to. + */ +uint32_t numa_get_node(ram_addr_t addr, Error **errp) +{ + uint32_t i; + + /* For non NUMA configurations, check if the addr falls under node 0 */ + if (!nb_numa_nodes) { + if (numa_addr_belongs_to_node(addr, 0)) { + return 0; + } + } + + for (i = 0; i < nb_numa_nodes; i++) { + if (numa_addr_belongs_to_node(addr, i)) { + return i; + } + } + + error_setg(errp, "Address 0x" RAM_ADDR_FMT " doesn't belong to any " + "NUMA node", addr); + return -1; +} + static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp) { uint16_t nodenr;