xemu/hw/core/machine-smp.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

303 lines
11 KiB
C
Raw Normal View History

/*
* QEMU Machine core (related to -smp parsing)
*
* Copyright (c) 2021 Huawei Technologies Co., Ltd
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License,
* or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
#include "hw/boards.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
/*
* Report information of a machine's supported CPU topology hierarchy.
* Topology members will be ordered from the largest to the smallest
* in the string.
*/
static char *cpu_hierarchy_to_string(MachineState *ms)
{
MachineClass *mc = MACHINE_GET_CLASS(ms);
GString *s = g_string_new(NULL);
if (mc->smp_props.drawers_supported) {
g_string_append_printf(s, "drawers (%u) * ", ms->smp.drawers);
}
if (mc->smp_props.books_supported) {
g_string_append_printf(s, "books (%u) * ", ms->smp.books);
}
g_string_append_printf(s, "sockets (%u)", ms->smp.sockets);
if (mc->smp_props.dies_supported) {
g_string_append_printf(s, " * dies (%u)", ms->smp.dies);
}
hw/core/machine: Introduce CPU cluster topology support The new Cluster-Aware Scheduling support has landed in Linux 5.16, which has been proved to benefit the scheduling performance (e.g. load balance and wake_affine strategy) on both x86_64 and AArch64. So now in Linux 5.16 we have four-level arch-neutral CPU topology definition like below and a new scheduler level for clusters. struct cpu_topology { int thread_id; int core_id; int cluster_id; int package_id; int llc_id; cpumask_t thread_sibling; cpumask_t core_sibling; cpumask_t cluster_sibling; cpumask_t llc_sibling; } A cluster generally means a group of CPU cores which share L2 cache or other mid-level resources, and it is the shared resources that is used to improve scheduler's behavior. From the point of view of the size range, it's between CPU die and CPU core. For example, on some ARM64 Kunpeng servers, we have 6 clusters in each NUMA node, and 4 CPU cores in each cluster. The 4 CPU cores share a separate L2 cache and a L3 cache tag, which brings cache affinity advantage. In virtualization, on the Hosts which have pClusters (physical clusters), if we can design a vCPU topology with cluster level for guest kernel and have a dedicated vCPU pinning. A Cluster-Aware Guest kernel can also make use of the cache affinity of CPU clusters to gain similar scheduling performance. This patch adds infrastructure for CPU cluster level topology configuration and parsing, so that the user can specify cluster parameter if their machines support it. Signed-off-by: Yanan Wang <wangyanan55@huawei.com> Message-Id: <20211228092221.21068-3-wangyanan55@huawei.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> [PMD: Added '(since 7.0)' to @clusters in qapi/machine.json] Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
2021-12-28 09:22:09 +00:00
if (mc->smp_props.clusters_supported) {
g_string_append_printf(s, " * clusters (%u)", ms->smp.clusters);
}
if (mc->smp_props.modules_supported) {
g_string_append_printf(s, " * modules (%u)", ms->smp.modules);
}
g_string_append_printf(s, " * cores (%u)", ms->smp.cores);
g_string_append_printf(s, " * threads (%u)", ms->smp.threads);
return g_string_free(s, false);
}
/*
* machine_parse_smp_config: Generic function used to parse the given
* SMP configuration
*
* Any missing parameter in "cpus/maxcpus/sockets/cores/threads" will be
* automatically computed based on the provided ones.
*
* In the calculation of omitted sockets/cores/threads: we prefer sockets
* over cores over threads before 6.2, while preferring cores over sockets
* over threads since 6.2.
*
* In the calculation of cpus/maxcpus: When both maxcpus and cpus are omitted,
* maxcpus will be computed from the given parameters and cpus will be set
* equal to maxcpus. When only one of maxcpus and cpus is given then the
* omitted one will be set to its given counterpart's value. Both maxcpus and
* cpus may be specified, but maxcpus must be equal to or greater than cpus.
*
* For compatibility, apart from the parameters that will be computed, newly
* introduced topology members which are likely to be target specific should
* be directly set as 1 if they are omitted (e.g. dies for PC since 4.1).
*/
void machine_parse_smp_config(MachineState *ms,
const SMPConfiguration *config, Error **errp)
{
MachineClass *mc = MACHINE_GET_CLASS(ms);
unsigned cpus = config->has_cpus ? config->cpus : 0;
unsigned drawers = config->has_drawers ? config->drawers : 0;
unsigned books = config->has_books ? config->books : 0;
unsigned sockets = config->has_sockets ? config->sockets : 0;
unsigned dies = config->has_dies ? config->dies : 0;
hw/core/machine: Introduce CPU cluster topology support The new Cluster-Aware Scheduling support has landed in Linux 5.16, which has been proved to benefit the scheduling performance (e.g. load balance and wake_affine strategy) on both x86_64 and AArch64. So now in Linux 5.16 we have four-level arch-neutral CPU topology definition like below and a new scheduler level for clusters. struct cpu_topology { int thread_id; int core_id; int cluster_id; int package_id; int llc_id; cpumask_t thread_sibling; cpumask_t core_sibling; cpumask_t cluster_sibling; cpumask_t llc_sibling; } A cluster generally means a group of CPU cores which share L2 cache or other mid-level resources, and it is the shared resources that is used to improve scheduler's behavior. From the point of view of the size range, it's between CPU die and CPU core. For example, on some ARM64 Kunpeng servers, we have 6 clusters in each NUMA node, and 4 CPU cores in each cluster. The 4 CPU cores share a separate L2 cache and a L3 cache tag, which brings cache affinity advantage. In virtualization, on the Hosts which have pClusters (physical clusters), if we can design a vCPU topology with cluster level for guest kernel and have a dedicated vCPU pinning. A Cluster-Aware Guest kernel can also make use of the cache affinity of CPU clusters to gain similar scheduling performance. This patch adds infrastructure for CPU cluster level topology configuration and parsing, so that the user can specify cluster parameter if their machines support it. Signed-off-by: Yanan Wang <wangyanan55@huawei.com> Message-Id: <20211228092221.21068-3-wangyanan55@huawei.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> [PMD: Added '(since 7.0)' to @clusters in qapi/machine.json] Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
2021-12-28 09:22:09 +00:00
unsigned clusters = config->has_clusters ? config->clusters : 0;
unsigned modules = config->has_modules ? config->modules : 0;
unsigned cores = config->has_cores ? config->cores : 0;
unsigned threads = config->has_threads ? config->threads : 0;
unsigned maxcpus = config->has_maxcpus ? config->maxcpus : 0;
unsigned total_cpus;
/*
* Specified CPU topology parameters must be greater than zero,
* explicit configuration like "cpus=0" is not allowed.
*/
if ((config->has_cpus && config->cpus == 0) ||
(config->has_drawers && config->drawers == 0) ||
(config->has_books && config->books == 0) ||
(config->has_sockets && config->sockets == 0) ||
(config->has_dies && config->dies == 0) ||
hw/core/machine: Introduce CPU cluster topology support The new Cluster-Aware Scheduling support has landed in Linux 5.16, which has been proved to benefit the scheduling performance (e.g. load balance and wake_affine strategy) on both x86_64 and AArch64. So now in Linux 5.16 we have four-level arch-neutral CPU topology definition like below and a new scheduler level for clusters. struct cpu_topology { int thread_id; int core_id; int cluster_id; int package_id; int llc_id; cpumask_t thread_sibling; cpumask_t core_sibling; cpumask_t cluster_sibling; cpumask_t llc_sibling; } A cluster generally means a group of CPU cores which share L2 cache or other mid-level resources, and it is the shared resources that is used to improve scheduler's behavior. From the point of view of the size range, it's between CPU die and CPU core. For example, on some ARM64 Kunpeng servers, we have 6 clusters in each NUMA node, and 4 CPU cores in each cluster. The 4 CPU cores share a separate L2 cache and a L3 cache tag, which brings cache affinity advantage. In virtualization, on the Hosts which have pClusters (physical clusters), if we can design a vCPU topology with cluster level for guest kernel and have a dedicated vCPU pinning. A Cluster-Aware Guest kernel can also make use of the cache affinity of CPU clusters to gain similar scheduling performance. This patch adds infrastructure for CPU cluster level topology configuration and parsing, so that the user can specify cluster parameter if their machines support it. Signed-off-by: Yanan Wang <wangyanan55@huawei.com> Message-Id: <20211228092221.21068-3-wangyanan55@huawei.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> [PMD: Added '(since 7.0)' to @clusters in qapi/machine.json] Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
2021-12-28 09:22:09 +00:00
(config->has_clusters && config->clusters == 0) ||
(config->has_modules && config->modules == 0) ||
(config->has_cores && config->cores == 0) ||
(config->has_threads && config->threads == 0) ||
(config->has_maxcpus && config->maxcpus == 0)) {
error_setg(errp, "Invalid CPU topology: "
"CPU topology parameters must be greater than zero");
return;
}
/*
* If not supported by the machine, a topology parameter must be
* omitted.
*/
if (!mc->smp_props.modules_supported && config->has_modules) {
if (config->modules > 1) {
error_setg(errp, "modules not supported by this "
"machine's CPU topology");
return;
} else {
/* Here modules only equals 1 since we've checked zero case. */
warn_report("Deprecated CPU topology (considered invalid): "
"Unsupported modules parameter mustn't be "
"specified as 1");
}
}
modules = modules > 0 ? modules : 1;
if (!mc->smp_props.clusters_supported && config->has_clusters) {
if (config->clusters > 1) {
error_setg(errp, "clusters not supported by this "
"machine's CPU topology");
return;
} else {
/* Here clusters only equals 1 since we've checked zero case. */
warn_report("Deprecated CPU topology (considered invalid): "
"Unsupported clusters parameter mustn't be "
"specified as 1");
}
hw/core/machine: Introduce CPU cluster topology support The new Cluster-Aware Scheduling support has landed in Linux 5.16, which has been proved to benefit the scheduling performance (e.g. load balance and wake_affine strategy) on both x86_64 and AArch64. So now in Linux 5.16 we have four-level arch-neutral CPU topology definition like below and a new scheduler level for clusters. struct cpu_topology { int thread_id; int core_id; int cluster_id; int package_id; int llc_id; cpumask_t thread_sibling; cpumask_t core_sibling; cpumask_t cluster_sibling; cpumask_t llc_sibling; } A cluster generally means a group of CPU cores which share L2 cache or other mid-level resources, and it is the shared resources that is used to improve scheduler's behavior. From the point of view of the size range, it's between CPU die and CPU core. For example, on some ARM64 Kunpeng servers, we have 6 clusters in each NUMA node, and 4 CPU cores in each cluster. The 4 CPU cores share a separate L2 cache and a L3 cache tag, which brings cache affinity advantage. In virtualization, on the Hosts which have pClusters (physical clusters), if we can design a vCPU topology with cluster level for guest kernel and have a dedicated vCPU pinning. A Cluster-Aware Guest kernel can also make use of the cache affinity of CPU clusters to gain similar scheduling performance. This patch adds infrastructure for CPU cluster level topology configuration and parsing, so that the user can specify cluster parameter if their machines support it. Signed-off-by: Yanan Wang <wangyanan55@huawei.com> Message-Id: <20211228092221.21068-3-wangyanan55@huawei.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> [PMD: Added '(since 7.0)' to @clusters in qapi/machine.json] Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
2021-12-28 09:22:09 +00:00
}
clusters = clusters > 0 ? clusters : 1;
if (!mc->smp_props.dies_supported && config->has_dies) {
if (config->dies > 1) {
error_setg(errp, "dies not supported by this "
"machine's CPU topology");
return;
} else {
/* Here dies only equals 1 since we've checked zero case. */
warn_report("Deprecated CPU topology (considered invalid): "
"Unsupported dies parameter mustn't be "
"specified as 1");
}
}
dies = dies > 0 ? dies : 1;
if (!mc->smp_props.books_supported && config->has_books) {
if (config->books > 1) {
error_setg(errp, "books not supported by this "
"machine's CPU topology");
return;
} else {
/* Here books only equals 1 since we've checked zero case. */
warn_report("Deprecated CPU topology (considered invalid): "
"Unsupported books parameter mustn't be "
"specified as 1");
}
}
books = books > 0 ? books : 1;
if (!mc->smp_props.drawers_supported && config->has_drawers) {
if (config->drawers > 1) {
error_setg(errp, "drawers not supported by this "
"machine's CPU topology");
return;
} else {
/* Here drawers only equals 1 since we've checked zero case. */
warn_report("Deprecated CPU topology (considered invalid): "
"Unsupported drawers parameter mustn't be "
"specified as 1");
}
}
drawers = drawers > 0 ? drawers : 1;
/* compute missing values based on the provided ones */
if (cpus == 0 && maxcpus == 0) {
sockets = sockets > 0 ? sockets : 1;
cores = cores > 0 ? cores : 1;
threads = threads > 0 ? threads : 1;
} else {
maxcpus = maxcpus > 0 ? maxcpus : cpus;
if (mc->smp_props.prefer_sockets) {
/* prefer sockets over cores before 6.2 */
if (sockets == 0) {
cores = cores > 0 ? cores : 1;
threads = threads > 0 ? threads : 1;
sockets = maxcpus /
(drawers * books * dies * clusters *
modules * cores * threads);
} else if (cores == 0) {
threads = threads > 0 ? threads : 1;
cores = maxcpus /
(drawers * books * sockets * dies *
clusters * modules * threads);
}
} else {
/* prefer cores over sockets since 6.2 */
if (cores == 0) {
sockets = sockets > 0 ? sockets : 1;
threads = threads > 0 ? threads : 1;
cores = maxcpus /
(drawers * books * sockets * dies *
clusters * modules * threads);
} else if (sockets == 0) {
threads = threads > 0 ? threads : 1;
sockets = maxcpus /
(drawers * books * dies * clusters *
modules * cores * threads);
}
}
/* try to calculate omitted threads at last */
if (threads == 0) {
threads = maxcpus /
(drawers * books * sockets * dies *
clusters * modules * cores);
}
}
total_cpus = drawers * books * sockets * dies *
clusters * modules * cores * threads;
maxcpus = maxcpus > 0 ? maxcpus : total_cpus;
cpus = cpus > 0 ? cpus : maxcpus;
ms->smp.cpus = cpus;
ms->smp.drawers = drawers;
ms->smp.books = books;
ms->smp.sockets = sockets;
ms->smp.dies = dies;
hw/core/machine: Introduce CPU cluster topology support The new Cluster-Aware Scheduling support has landed in Linux 5.16, which has been proved to benefit the scheduling performance (e.g. load balance and wake_affine strategy) on both x86_64 and AArch64. So now in Linux 5.16 we have four-level arch-neutral CPU topology definition like below and a new scheduler level for clusters. struct cpu_topology { int thread_id; int core_id; int cluster_id; int package_id; int llc_id; cpumask_t thread_sibling; cpumask_t core_sibling; cpumask_t cluster_sibling; cpumask_t llc_sibling; } A cluster generally means a group of CPU cores which share L2 cache or other mid-level resources, and it is the shared resources that is used to improve scheduler's behavior. From the point of view of the size range, it's between CPU die and CPU core. For example, on some ARM64 Kunpeng servers, we have 6 clusters in each NUMA node, and 4 CPU cores in each cluster. The 4 CPU cores share a separate L2 cache and a L3 cache tag, which brings cache affinity advantage. In virtualization, on the Hosts which have pClusters (physical clusters), if we can design a vCPU topology with cluster level for guest kernel and have a dedicated vCPU pinning. A Cluster-Aware Guest kernel can also make use of the cache affinity of CPU clusters to gain similar scheduling performance. This patch adds infrastructure for CPU cluster level topology configuration and parsing, so that the user can specify cluster parameter if their machines support it. Signed-off-by: Yanan Wang <wangyanan55@huawei.com> Message-Id: <20211228092221.21068-3-wangyanan55@huawei.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> [PMD: Added '(since 7.0)' to @clusters in qapi/machine.json] Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
2021-12-28 09:22:09 +00:00
ms->smp.clusters = clusters;
ms->smp.modules = modules;
ms->smp.cores = cores;
ms->smp.threads = threads;
ms->smp.max_cpus = maxcpus;
mc->smp_props.has_clusters = config->has_clusters;
/* sanity-check of the computed topology */
if (total_cpus != maxcpus) {
g_autofree char *topo_msg = cpu_hierarchy_to_string(ms);
error_setg(errp, "Invalid CPU topology: "
"product of the hierarchy must match maxcpus: "
"%s != maxcpus (%u)",
topo_msg, maxcpus);
return;
}
if (maxcpus < cpus) {
g_autofree char *topo_msg = cpu_hierarchy_to_string(ms);
error_setg(errp, "Invalid CPU topology: "
"maxcpus must be equal to or greater than smp: "
"%s == maxcpus (%u) < smp_cpus (%u)",
topo_msg, maxcpus, cpus);
return;
}
if (ms->smp.cpus < mc->min_cpus) {
error_setg(errp, "Invalid SMP CPUs %d. The min CPUs "
"supported by machine '%s' is %d",
ms->smp.cpus,
mc->name, mc->min_cpus);
return;
}
if (ms->smp.max_cpus > mc->max_cpus) {
error_setg(errp, "Invalid SMP CPUs %d. The max CPUs "
"supported by machine '%s' is %d",
ms->smp.max_cpus,
mc->name, mc->max_cpus);
return;
}
}
unsigned int machine_topo_get_cores_per_socket(const MachineState *ms)
{
hw/core/machine: Introduce the module as a CPU topology level In x86, module is the topology level above core, which contains a set of cores that share certain resources (in current products, the resource usually includes L2 cache, as well as module scoped features and MSRs). Though smp.clusters could also share the L2 cache resource [1], there are following reasons that drive us to introduce the new smp.modules: * As the CPU topology abstraction in device tree [2], cluster supports nesting (though currently QEMU hasn't support that). In contrast, (x86) module does not support nesting. * Due to nesting, there is great flexibility in sharing resources on cluster, rather than narrowing cluster down to sharing L2 (and L3 tags) as the lowest topology level that contains cores. * Flexible nesting of cluster allows it to correspond to any level between the x86 package and core. * In Linux kernel, x86's cluster only represents the L2 cache domain but QEMU's smp.clusters is the CPU topology level. Linux kernel will also expose module level topology information in sysfs for x86. To avoid cluster ambiguity and keep a consistent CPU topology naming style with the Linux kernel, we introduce module level for x86. The module is, in existing hardware practice, the lowest layer that contains the core, while the cluster is able to have a higher topological scope than the module due to its nesting. Therefore, place the module between the cluster and the core: drawer/book/socket/die/cluster/module/core/thread With the above topological hierarchy order, introduce module level support in MachineState and MachineClass. [1]: https://lore.kernel.org/qemu-devel/c3d68005-54e0-b8fe-8dc1-5989fe3c7e69@huawei.com/ [2]: https://www.kernel.org/doc/Documentation/devicetree/bindings/cpu/cpu-topology.txt Suggested-by: Xiaoyao Li <xiaoyao.li@intel.com> Tested-by: Yongwei Ma <yongwei.ma@intel.com> Signed-off-by: Zhao Liu <zhao1.liu@intel.com> Tested-by: Babu Moger <babu.moger@amd.com> Message-ID: <20240424154929.1487382-2-zhao1.liu@intel.com> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2024-04-24 15:49:09 +00:00
return ms->smp.cores * ms->smp.modules * ms->smp.clusters * ms->smp.dies;
}
unsigned int machine_topo_get_threads_per_socket(const MachineState *ms)
{
return ms->smp.threads * machine_topo_get_cores_per_socket(ms);
}