mirror of https://github.com/xemu-project/xemu.git
ppc patch queue 2020-10-09
Here's the next set of ppc related patches for qemu-5.2. There are two main things here: * Cleanups to error handling in spapr from Greg Kurz * Improvements to NUMA handling for spapr from Daniel Barboza There are also a handful of other bugfixes. -----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEEdfRlhq5hpmzETofcbDjKyiDZs5IFAl9//PUACgkQbDjKyiDZ s5KdQQ/9EKl8GRuNw1CaoMRZFnD5YCDnr6Piy24HpcINHm8khvC4SWEaMm2ESOLU J5e9rQn2vXlHLWDA0qQ8pTTEMqfgAOuYllGQXTnTKF3tjePEZzsYzdg49v8O3dsb EHOAvixsHocH+8KMsiQkbV5BZYEEJukX6RoGGm6vte+MTXdRlpyxmp9Xf52tGmEB pU/Q2Y9oLR6OW7POWv3kfpmCfxklkOXstguEMTP42+ZGP17PBvpKXAXfW13gCl8t yGvvcjWr64m9uTyqTxYWK/jFxxYa8hraKPk4BY/001UCypd+T/DrD7E/xlBMZwPh eDRX7fV+YPcRqv66x47Gu40afEVm3mlQXzr0QaK5qm772f+v6C/xyLUznLNxYdLy s9lKSi7wSxjBS8M8jztRoCJEx+zVe6BclJbwdzGQMYODiY13HKVENFUzPxrC9bfN IxYAU3uAN3VL/agslEYV+aBrX0qj96c1Ek6CcFG2XXdR3k9QnYvUcQuPKcfuCBSX TVS2mYger8Ba4E47tapH++TKj5jHoVKgTciSN663+gUCGzNTw+5UEZBxEHTQaPOX a5SKh5t06PEkxpBK4ITnQfeRwvkMg4ERjJoKPXWzcqvHUWK+xaI8XbBlqCDMiC3T mBAVHMIrKEe6J9tTqeURyct3ItUioneueLWNSplBUN3BPkE+7AQ= =dbvK -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-5.2-20201009' into staging ppc patch queue 2020-10-09 Here's the next set of ppc related patches for qemu-5.2. There are two main things here: * Cleanups to error handling in spapr from Greg Kurz * Improvements to NUMA handling for spapr from Daniel Barboza There are also a handful of other bugfixes. # gpg: Signature made Fri 09 Oct 2020 07:02:29 BST # gpg: using RSA key 75F46586AE61A66CC44E87DC6C38CACA20D9B392 # gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>" [full] # gpg: aka "David Gibson (Red Hat) <dgibson@redhat.com>" [full] # gpg: aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>" [full] # gpg: aka "David Gibson (kernel.org) <dwg@kernel.org>" [unknown] # Primary key fingerprint: 75F4 6586 AE61 A66C C44E 87DC 6C38 CACA 20D9 B392 * remotes/dgibson/tags/ppc-for-5.2-20201009: specs/ppc-spapr-numa: update with new NUMA support spapr_numa: consider user input when defining associativity spapr_numa: change reference-points and maxdomain settings spapr_numa: forbid asymmetrical NUMA setups spapr: add spapr_machine_using_legacy_numa() helper ppc/pnv: Increase max firmware size spapr: Add a return value to spapr_check_pagesize() spapr: Add a return value to spapr_nvdimm_validate() spapr: Simplify error handling in spapr_cpu_core_realize() spapr: Add a return value to spapr_set_vcpu_id() spapr: Simplify error handling in prop_get_fdt() spapr: Add a return value to spapr_drc_attach() spapr: Simplify error handling in spapr_vio_busdev_realize() spapr: Simplify error handling in do_client_architecture_support() spapr: Get rid of cas_check_pvr() error reporting spapr: Simplify error handling in callers of ppc_set_compat() ppc: Fix return value in cpu_post_load() error path ppc: Add a return value to ppc_set_compat() and ppc_set_compat_all() spapr: Fix error leak in spapr_realize_vcpu() spapr: Handle HPT allocation failure in nested guest Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
4a7c0bd9dc
|
@ -158,9 +158,235 @@ kernel tree). This results in the following distances:
|
|||
* resources four NUMA levels apart: 160
|
||||
|
||||
|
||||
Consequences for QEMU NUMA tuning
|
||||
pseries NUMA mechanics
|
||||
======================
|
||||
|
||||
Starting in QEMU 5.2, the pseries machine considers user input when setting NUMA
|
||||
topology of the guest. The overall design is:
|
||||
|
||||
* ibm,associativity-reference-points is set to {0x4, 0x3, 0x2, 0x1}, allowing
|
||||
for 4 distinct NUMA distance values based on the NUMA levels
|
||||
|
||||
* ibm,max-associativity-domains supports multiple associativity domains in all
|
||||
NUMA levels, granting user flexibility
|
||||
|
||||
* ibm,associativity for all resources varies with user input
|
||||
|
||||
These changes are only effective for pseries-5.2 and newer machines that are
|
||||
created with more than one NUMA node (disconsidering NUMA nodes created by
|
||||
the machine itself, e.g. NVLink 2 GPUs). The now legacy support has been
|
||||
around for such a long time, with users seeing NUMA distances 10 and 40
|
||||
(and 80 if using NVLink2 GPUs), and there is no need to disrupt the
|
||||
existing experience of those guests.
|
||||
|
||||
To bring the user experience x86 users have when tuning up NUMA, we had
|
||||
to operate under the current pseries Linux kernel logic described in
|
||||
`How the pseries Linux guest calculates NUMA distances`_. The result
|
||||
is that we needed to translate NUMA distance user input to pseries
|
||||
Linux kernel input.
|
||||
|
||||
Translating user distance to kernel distance
|
||||
--------------------------------------------
|
||||
|
||||
User input for NUMA distance can vary from 10 to 254. We need to translate
|
||||
that to the values that the Linux kernel operates on (10, 20, 40, 80, 160).
|
||||
This is how it is being done:
|
||||
|
||||
* user distance 11 to 30 will be interpreted as 20
|
||||
* user distance 31 to 60 will be interpreted as 40
|
||||
* user distance 61 to 120 will be interpreted as 80
|
||||
* user distance 121 and beyond will be interpreted as 160
|
||||
* user distance 10 stays 10
|
||||
|
||||
The reasoning behind this aproximation is to avoid any round up to the local
|
||||
distance (10), keeping it exclusive to the 4th NUMA level (which is still
|
||||
exclusive to the node_id). All other ranges were chosen under the developer
|
||||
discretion of what would be (somewhat) sensible considering the user input.
|
||||
Any other strategy can be used here, but in the end the reality is that we'll
|
||||
have to accept that a large array of values will be translated to the same
|
||||
NUMA topology in the guest, e.g. this user input:
|
||||
|
||||
::
|
||||
|
||||
0 1 2
|
||||
0 10 31 120
|
||||
1 31 10 30
|
||||
2 120 30 10
|
||||
|
||||
And this other user input:
|
||||
|
||||
::
|
||||
|
||||
0 1 2
|
||||
0 10 60 61
|
||||
1 60 10 11
|
||||
2 61 11 10
|
||||
|
||||
Will both be translated to the same values internally:
|
||||
|
||||
::
|
||||
|
||||
0 1 2
|
||||
0 10 40 80
|
||||
1 40 10 20
|
||||
2 80 20 10
|
||||
|
||||
Users are encouraged to use only the kernel values in the NUMA definition to
|
||||
avoid being taken by surprise with that the guest is actually seeing in the
|
||||
topology. There are enough potential surprises that are inherent to the
|
||||
associativity domain assignment process, discussed below.
|
||||
|
||||
|
||||
How associativity domains are assigned
|
||||
--------------------------------------
|
||||
|
||||
LOPAPR allows more than one associativity array (or 'string') per allocated
|
||||
resource. This would be used to represent that the resource has multiple
|
||||
connections with the board, and then the operational system, when deciding
|
||||
NUMA distancing, should consider the associativity information that provides
|
||||
the shortest distance.
|
||||
|
||||
The spapr implementation does not support multiple associativity arrays per
|
||||
resource, neither does the pseries Linux kernel. We'll have to represent the
|
||||
NUMA topology using one associativity per resource, which means that choices
|
||||
and compromises are going to be made.
|
||||
|
||||
Consider the following NUMA topology entered by user input:
|
||||
|
||||
::
|
||||
|
||||
0 1 2 3
|
||||
0 10 40 20 40
|
||||
1 40 10 80 40
|
||||
2 20 80 10 20
|
||||
3 40 40 20 10
|
||||
|
||||
All the associativity arrays are initialized with NUMA id in all associativity
|
||||
domains:
|
||||
|
||||
* node 0: 0 0 0 0
|
||||
* node 1: 1 1 1 1
|
||||
* node 2: 2 2 2 2
|
||||
* node 3: 3 3 3 3
|
||||
|
||||
|
||||
Honoring just the relative distances of node 0 to every other node, we find the
|
||||
NUMA level matches (considering the reference points {0x4, 0x3, 0x2, 0x1}) for
|
||||
each distance:
|
||||
|
||||
* distance from 0 to 1 is 40 (no match at 0x4 and 0x3, will match
|
||||
at 0x2)
|
||||
* distance from 0 to 2 is 20 (no match at 0x4, will match at 0x3)
|
||||
* distance from 0 to 3 is 40 (no match at 0x4 and 0x3, will match
|
||||
at 0x2)
|
||||
|
||||
We'll copy the associativity domains of node 0 to all other nodes, based on
|
||||
the NUMA level matches. Between 0 and 1, a match in 0x2, we'll also copy
|
||||
the domains 0x2 and 0x1 from 0 to 1 as well. This will give us:
|
||||
|
||||
* node 0: 0 0 0 0
|
||||
* node 1: 0 0 1 1
|
||||
|
||||
Doing the same to node 2 and node 3, these are the associativity arrays
|
||||
after considering all matches with node 0:
|
||||
|
||||
* node 0: 0 0 0 0
|
||||
* node 1: 0 0 1 1
|
||||
* node 2: 0 0 0 2
|
||||
* node 3: 0 0 3 3
|
||||
|
||||
The distances related to node 0 are accounted for. For node 1, and keeping
|
||||
in mind that we don't need to revisit node 0 again, the distance from
|
||||
node 1 to 2 is 80, matching at 0x1, and distance from 1 to 3 is 40,
|
||||
match in 0x2. Repeating the same logic of copying all domains up to
|
||||
the NUMA level match:
|
||||
|
||||
* node 0: 0 0 0 0
|
||||
* node 1: 1 0 1 1
|
||||
* node 2: 1 0 0 2
|
||||
* node 3: 1 0 3 3
|
||||
|
||||
In the last step we will analyze just nodes 2 and 3. The desired distance
|
||||
between 2 and 3 is 20, i.e. a match in 0x3:
|
||||
|
||||
* node 0: 0 0 0 0
|
||||
* node 1: 1 0 1 1
|
||||
* node 2: 1 0 0 2
|
||||
* node 3: 1 0 0 3
|
||||
|
||||
|
||||
The kernel will read these arrays and will calculate the following NUMA topology for
|
||||
the guest:
|
||||
|
||||
::
|
||||
|
||||
0 1 2 3
|
||||
0 10 40 20 20
|
||||
1 40 10 40 40
|
||||
2 20 40 10 20
|
||||
3 20 40 20 10
|
||||
|
||||
Note that this is not what the user wanted - the desired distance between
|
||||
0 and 3 is 40, we calculated it as 20. This is what the current logic and
|
||||
implementation constraints of the kernel and QEMU will provide inside the
|
||||
LOPAPR specification.
|
||||
|
||||
Users are welcome to use this knowledge and experiment with the input to get
|
||||
the NUMA topology they want, or as closer as they want. The important thing
|
||||
is to keep expectations up to par with what we are capable of provide at this
|
||||
moment: an approximation.
|
||||
|
||||
Limitations of the implementation
|
||||
---------------------------------
|
||||
|
||||
As mentioned above, the pSeries NUMA distance logic is, in fact, a way to approximate
|
||||
user choice. The Linux kernel, and PAPR itself, does not provide QEMU with the ways
|
||||
to fully map user input to actual NUMA distance the guest will use. These limitations
|
||||
creates two notable limitations in our support:
|
||||
|
||||
* Asymmetrical topologies aren't supported. We only support NUMA topologies where
|
||||
the distance from node A to B is always the same as B to A. We do not support
|
||||
any A-B pair where the distance back and forth is asymmetric. For example, the
|
||||
following topology isn't supported and the pSeries guest will not boot with this
|
||||
user input:
|
||||
|
||||
::
|
||||
|
||||
0 1
|
||||
0 10 40
|
||||
1 20 10
|
||||
|
||||
|
||||
* 'non-transitive' topologies will be poorly translated to the guest. This is the
|
||||
kind of topology where the distance from a node A to B is X, B to C is X, but
|
||||
the distance A to C is not X. E.g.:
|
||||
|
||||
::
|
||||
|
||||
0 1 2 3
|
||||
0 10 20 20 40
|
||||
1 20 10 80 40
|
||||
2 20 80 10 20
|
||||
3 40 40 20 10
|
||||
|
||||
In the example above, distance 0 to 2 is 20, 2 to 3 is 20, but 0 to 3 is 40.
|
||||
The kernel will always match with the shortest associativity domain possible,
|
||||
and we're attempting to retain the previous established relations between the
|
||||
nodes. This means that a distance equal to 20 between nodes 0 and 2 and the
|
||||
same distance 20 between nodes 2 and 3 will cause the distance between 0 and 3
|
||||
to also be 20.
|
||||
|
||||
|
||||
Legacy (5.1 and older) pseries NUMA mechanics
|
||||
=============================================
|
||||
|
||||
In short, we can summarize the NUMA distances seem in pseries Linux guests, using
|
||||
QEMU up to 5.1, as follows:
|
||||
|
||||
* local distance, i.e. the distance of the resource to its own NUMA node: 10
|
||||
* if it's a NVLink GPU device, distance: 80
|
||||
* every other resource, distance: 40
|
||||
|
||||
The way the pseries Linux guest calculates NUMA distances has a direct effect
|
||||
on what QEMU users can expect when doing NUMA tuning. As of QEMU 5.1, this is
|
||||
the default ibm,associativity-reference-points being used in the pseries
|
||||
|
@ -180,12 +406,5 @@ as far as NUMA distance goes:
|
|||
to the same third NUMA level, having distance = 40
|
||||
* for NVLink GPUs, distance = 80 from everything else
|
||||
|
||||
In short, we can summarize the NUMA distances seem in pseries Linux guests, using
|
||||
QEMU up to 5.1, as follows:
|
||||
|
||||
* local distance, i.e. the distance of the resource to its own NUMA node: 10
|
||||
* if it's a NVLink GPU device, distance: 80
|
||||
* every other resource, distance: 40
|
||||
|
||||
This also means that user input in QEMU command line does not change the
|
||||
NUMA distancing inside the guest for the pseries machine.
|
||||
|
|
|
@ -61,7 +61,7 @@
|
|||
|
||||
#define FW_FILE_NAME "skiboot.lid"
|
||||
#define FW_LOAD_ADDR 0x0
|
||||
#define FW_MAX_SIZE (4 * MiB)
|
||||
#define FW_MAX_SIZE (16 * MiB)
|
||||
|
||||
#define KERNEL_LOAD_ADDR 0x20000000
|
||||
#define KERNEL_MAX_SIZE (256 * MiB)
|
||||
|
|
|
@ -294,6 +294,15 @@ static hwaddr spapr_node0_size(MachineState *machine)
|
|||
return machine->ram_size;
|
||||
}
|
||||
|
||||
bool spapr_machine_using_legacy_numa(SpaprMachineState *spapr)
|
||||
{
|
||||
MachineState *machine = MACHINE(spapr);
|
||||
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
|
||||
|
||||
return smc->pre_5_2_numa_associativity ||
|
||||
machine->numa_state->num_nodes <= 1;
|
||||
}
|
||||
|
||||
static void add_str(GString *s, const gchar *s1)
|
||||
{
|
||||
g_string_append_len(s, s1, strlen(s1) + 1);
|
||||
|
@ -1483,6 +1492,12 @@ void spapr_reallocate_hpt(SpaprMachineState *spapr, int shift,
|
|||
spapr_free_hpt(spapr);
|
||||
|
||||
rc = kvmppc_reset_htab(shift);
|
||||
|
||||
if (rc == -EOPNOTSUPP) {
|
||||
error_setg(errp, "HPT not supported in nested guests");
|
||||
return;
|
||||
}
|
||||
|
||||
if (rc < 0) {
|
||||
/* kernel-side HPT needed, but couldn't allocate one */
|
||||
error_setg_errno(errp, errno,
|
||||
|
@ -3365,22 +3380,19 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size,
|
|||
int i;
|
||||
uint64_t addr = addr_start;
|
||||
bool hotplugged = spapr_drc_hotplugged(dev);
|
||||
Error *local_err = NULL;
|
||||
|
||||
for (i = 0; i < nr_lmbs; i++) {
|
||||
drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
|
||||
addr / SPAPR_MEMORY_BLOCK_SIZE);
|
||||
g_assert(drc);
|
||||
|
||||
spapr_drc_attach(drc, dev, &local_err);
|
||||
if (local_err) {
|
||||
if (!spapr_drc_attach(drc, dev, errp)) {
|
||||
while (addr > addr_start) {
|
||||
addr -= SPAPR_MEMORY_BLOCK_SIZE;
|
||||
drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
|
||||
addr / SPAPR_MEMORY_BLOCK_SIZE);
|
||||
spapr_drc_detach(drc);
|
||||
}
|
||||
error_propagate(errp, local_err);
|
||||
return;
|
||||
}
|
||||
if (!hotplugged) {
|
||||
|
@ -3475,9 +3487,7 @@ static void spapr_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
|
|||
}
|
||||
|
||||
if (is_nvdimm) {
|
||||
spapr_nvdimm_validate(hotplug_dev, NVDIMM(dev), size, &local_err);
|
||||
if (local_err) {
|
||||
error_propagate(errp, local_err);
|
||||
if (!spapr_nvdimm_validate(hotplug_dev, NVDIMM(dev), size, errp)) {
|
||||
return;
|
||||
}
|
||||
} else if (size % SPAPR_MEMORY_BLOCK_SIZE) {
|
||||
|
@ -3489,9 +3499,7 @@ static void spapr_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
|
|||
memdev = object_property_get_link(OBJECT(dimm), PC_DIMM_MEMDEV_PROP,
|
||||
&error_abort);
|
||||
pagesize = host_memory_backend_pagesize(MEMORY_BACKEND(memdev));
|
||||
spapr_check_pagesize(spapr, pagesize, &local_err);
|
||||
if (local_err) {
|
||||
error_propagate(errp, local_err);
|
||||
if (!spapr_check_pagesize(spapr, pagesize, errp)) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -3761,7 +3769,6 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
|
|||
CPUCore *cc = CPU_CORE(dev);
|
||||
CPUState *cs;
|
||||
SpaprDrc *drc;
|
||||
Error *local_err = NULL;
|
||||
CPUArchId *core_slot;
|
||||
int index;
|
||||
bool hotplugged = spapr_drc_hotplugged(dev);
|
||||
|
@ -3779,9 +3786,7 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
|
|||
g_assert(drc || !mc->has_hotpluggable_cpus);
|
||||
|
||||
if (drc) {
|
||||
spapr_drc_attach(drc, dev, &local_err);
|
||||
if (local_err) {
|
||||
error_propagate(errp, local_err);
|
||||
if (!spapr_drc_attach(drc, dev, errp)) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -3811,10 +3816,9 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
|
|||
*/
|
||||
if (hotplugged) {
|
||||
for (i = 0; i < cc->nr_threads; i++) {
|
||||
ppc_set_compat(core->threads[i], POWERPC_CPU(first_cpu)->compat_pvr,
|
||||
&local_err);
|
||||
if (local_err) {
|
||||
error_propagate(errp, local_err);
|
||||
if (ppc_set_compat(core->threads[i],
|
||||
POWERPC_CPU(first_cpu)->compat_pvr,
|
||||
errp) < 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -3934,7 +3938,6 @@ static void spapr_phb_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
|
|||
SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(dev);
|
||||
SpaprDrc *drc;
|
||||
bool hotplugged = spapr_drc_hotplugged(dev);
|
||||
Error *local_err = NULL;
|
||||
|
||||
if (!smc->dr_phb_enabled) {
|
||||
return;
|
||||
|
@ -3944,9 +3947,7 @@ static void spapr_phb_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
|
|||
/* hotplug hooks should check it's enabled before getting this far */
|
||||
assert(drc);
|
||||
|
||||
spapr_drc_attach(drc, dev, &local_err);
|
||||
if (local_err) {
|
||||
error_propagate(errp, local_err);
|
||||
if (!spapr_drc_attach(drc, dev, errp)) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -4290,7 +4291,7 @@ int spapr_get_vcpu_id(PowerPCCPU *cpu)
|
|||
return cpu->vcpu_id;
|
||||
}
|
||||
|
||||
void spapr_set_vcpu_id(PowerPCCPU *cpu, int cpu_index, Error **errp)
|
||||
bool spapr_set_vcpu_id(PowerPCCPU *cpu, int cpu_index, Error **errp)
|
||||
{
|
||||
SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
|
||||
MachineState *ms = MACHINE(spapr);
|
||||
|
@ -4303,10 +4304,11 @@ void spapr_set_vcpu_id(PowerPCCPU *cpu, int cpu_index, Error **errp)
|
|||
error_append_hint(errp, "Adjust the number of cpus to %d "
|
||||
"or try to raise the number of threads per core\n",
|
||||
vcpu_id * ms->smp.threads / spapr->vsmt);
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
cpu->vcpu_id = vcpu_id;
|
||||
return true;
|
||||
}
|
||||
|
||||
PowerPCCPU *spapr_find_cpu(int vcpu_id)
|
||||
|
@ -4526,8 +4528,11 @@ DEFINE_SPAPR_MACHINE(5_2, "5.2", true);
|
|||
*/
|
||||
static void spapr_machine_5_1_class_options(MachineClass *mc)
|
||||
{
|
||||
SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
|
||||
|
||||
spapr_machine_5_2_class_options(mc);
|
||||
compat_props_add(mc->compat_props, hw_compat_5_1, hw_compat_5_1_len);
|
||||
smc->pre_5_2_numa_associativity = true;
|
||||
}
|
||||
|
||||
DEFINE_SPAPR_MACHINE(5_1, "5.1", false);
|
||||
|
|
|
@ -310,13 +310,13 @@ static void cap_safe_indirect_branch_apply(SpaprMachineState *spapr,
|
|||
|
||||
#define VALUE_DESC_TRISTATE " (broken, workaround, fixed)"
|
||||
|
||||
void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
|
||||
bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
|
||||
Error **errp)
|
||||
{
|
||||
hwaddr maxpagesize = (1ULL << spapr->eff.caps[SPAPR_CAP_HPT_MAXPAGESIZE]);
|
||||
|
||||
if (!kvmppc_hpt_needs_host_contiguous_pages()) {
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (maxpagesize > pagesize) {
|
||||
|
@ -324,7 +324,10 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
|
|||
"Can't support %"HWADDR_PRIu" kiB guest pages with %"
|
||||
HWADDR_PRIu" kiB host pages with this KVM implementation",
|
||||
maxpagesize >> 10, pagesize >> 10);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr,
|
||||
|
|
|
@ -227,15 +227,14 @@ static void spapr_cpu_core_unrealize(DeviceState *dev)
|
|||
g_free(sc->threads);
|
||||
}
|
||||
|
||||
static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr,
|
||||
static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr,
|
||||
SpaprCpuCore *sc, Error **errp)
|
||||
{
|
||||
CPUPPCState *env = &cpu->env;
|
||||
CPUState *cs = CPU(cpu);
|
||||
Error *local_err = NULL;
|
||||
|
||||
if (!qdev_realize(DEVICE(cpu), NULL, errp)) {
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Set time-base frequency to 512 MHz */
|
||||
|
@ -244,15 +243,16 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr,
|
|||
cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr));
|
||||
kvmppc_set_papr(cpu);
|
||||
|
||||
if (spapr_irq_cpu_intc_create(spapr, cpu, &local_err) < 0) {
|
||||
if (spapr_irq_cpu_intc_create(spapr, cpu, errp) < 0) {
|
||||
cpu_remove_sync(CPU(cpu));
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!sc->pre_3_0_migration) {
|
||||
vmstate_register(NULL, cs->cpu_index, &vmstate_spapr_cpu_state,
|
||||
cpu->machine_data);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static PowerPCCPU *spapr_create_vcpu(SpaprCpuCore *sc, int i, Error **errp)
|
||||
|
@ -263,7 +263,6 @@ static PowerPCCPU *spapr_create_vcpu(SpaprCpuCore *sc, int i, Error **errp)
|
|||
char *id;
|
||||
CPUState *cs;
|
||||
PowerPCCPU *cpu;
|
||||
Error *local_err = NULL;
|
||||
|
||||
obj = object_new(scc->cpu_type);
|
||||
|
||||
|
@ -275,8 +274,7 @@ static PowerPCCPU *spapr_create_vcpu(SpaprCpuCore *sc, int i, Error **errp)
|
|||
*/
|
||||
cs->start_powered_off = true;
|
||||
cs->cpu_index = cc->core_id + i;
|
||||
spapr_set_vcpu_id(cpu, cs->cpu_index, &local_err);
|
||||
if (local_err) {
|
||||
if (!spapr_set_vcpu_id(cpu, cs->cpu_index, errp)) {
|
||||
goto err;
|
||||
}
|
||||
|
||||
|
@ -293,7 +291,6 @@ static PowerPCCPU *spapr_create_vcpu(SpaprCpuCore *sc, int i, Error **errp)
|
|||
|
||||
err:
|
||||
object_unref(obj);
|
||||
error_propagate(errp, local_err);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -316,7 +313,6 @@ static void spapr_cpu_core_realize(DeviceState *dev, Error **errp)
|
|||
TYPE_SPAPR_MACHINE);
|
||||
SpaprCpuCore *sc = SPAPR_CPU_CORE(OBJECT(dev));
|
||||
CPUCore *cc = CPU_CORE(OBJECT(dev));
|
||||
Error *local_err = NULL;
|
||||
int i, j;
|
||||
|
||||
if (!spapr) {
|
||||
|
@ -326,15 +322,14 @@ static void spapr_cpu_core_realize(DeviceState *dev, Error **errp)
|
|||
|
||||
sc->threads = g_new(PowerPCCPU *, cc->nr_threads);
|
||||
for (i = 0; i < cc->nr_threads; i++) {
|
||||
sc->threads[i] = spapr_create_vcpu(sc, i, &local_err);
|
||||
if (local_err) {
|
||||
sc->threads[i] = spapr_create_vcpu(sc, i, errp);
|
||||
if (!sc->threads[i]) {
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
for (j = 0; j < cc->nr_threads; j++) {
|
||||
spapr_realize_vcpu(sc->threads[j], spapr, sc, &local_err);
|
||||
if (local_err) {
|
||||
if (!spapr_realize_vcpu(sc->threads[j], spapr, sc, errp)) {
|
||||
goto err_unrealize;
|
||||
}
|
||||
}
|
||||
|
@ -351,7 +346,6 @@ err:
|
|||
spapr_delete_vcpu(sc->threads[i], sc);
|
||||
}
|
||||
g_free(sc->threads);
|
||||
error_propagate(errp, local_err);
|
||||
}
|
||||
|
||||
static Property spapr_cpu_core_properties[] = {
|
||||
|
|
|
@ -302,7 +302,6 @@ static void prop_get_fdt(Object *obj, Visitor *v, const char *name,
|
|||
{
|
||||
SpaprDrc *drc = SPAPR_DR_CONNECTOR(obj);
|
||||
QNull *null = NULL;
|
||||
Error *err = NULL;
|
||||
int fdt_offset_next, fdt_offset, fdt_depth;
|
||||
void *fdt;
|
||||
|
||||
|
@ -321,6 +320,7 @@ static void prop_get_fdt(Object *obj, Visitor *v, const char *name,
|
|||
const struct fdt_property *prop = NULL;
|
||||
int prop_len = 0, name_len = 0;
|
||||
uint32_t tag;
|
||||
bool ok;
|
||||
|
||||
tag = fdt_next_tag(fdt, fdt_offset, &fdt_offset_next);
|
||||
switch (tag) {
|
||||
|
@ -334,10 +334,9 @@ static void prop_get_fdt(Object *obj, Visitor *v, const char *name,
|
|||
case FDT_END_NODE:
|
||||
/* shouldn't ever see an FDT_END_NODE before FDT_BEGIN_NODE */
|
||||
g_assert(fdt_depth > 0);
|
||||
visit_check_struct(v, &err);
|
||||
ok = visit_check_struct(v, errp);
|
||||
visit_end_struct(v, NULL);
|
||||
if (err) {
|
||||
error_propagate(errp, err);
|
||||
if (!ok) {
|
||||
return;
|
||||
}
|
||||
fdt_depth--;
|
||||
|
@ -355,10 +354,9 @@ static void prop_get_fdt(Object *obj, Visitor *v, const char *name,
|
|||
return;
|
||||
}
|
||||
}
|
||||
visit_check_list(v, &err);
|
||||
ok = visit_check_list(v, errp);
|
||||
visit_end_list(v, NULL);
|
||||
if (err) {
|
||||
error_propagate(errp, err);
|
||||
if (!ok) {
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
@ -371,13 +369,13 @@ static void prop_get_fdt(Object *obj, Visitor *v, const char *name,
|
|||
} while (fdt_depth != 0);
|
||||
}
|
||||
|
||||
void spapr_drc_attach(SpaprDrc *drc, DeviceState *d, Error **errp)
|
||||
bool spapr_drc_attach(SpaprDrc *drc, DeviceState *d, Error **errp)
|
||||
{
|
||||
trace_spapr_drc_attach(spapr_drc_index(drc));
|
||||
|
||||
if (drc->dev) {
|
||||
error_setg(errp, "an attached device is still awaiting release");
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
g_assert((drc->state == SPAPR_DRC_STATE_LOGICAL_UNUSABLE)
|
||||
|| (drc->state == SPAPR_DRC_STATE_PHYSICAL_POWERON));
|
||||
|
@ -388,6 +386,7 @@ void spapr_drc_attach(SpaprDrc *drc, DeviceState *d, Error **errp)
|
|||
object_get_typename(OBJECT(drc->dev)),
|
||||
(Object **)(&drc->dev),
|
||||
NULL, 0);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void spapr_drc_release(SpaprDrc *drc)
|
||||
|
|
|
@ -1590,12 +1590,11 @@ static target_ulong h_signal_sys_reset(PowerPCCPU *cpu,
|
|||
}
|
||||
}
|
||||
|
||||
static uint32_t cas_check_pvr(SpaprMachineState *spapr, PowerPCCPU *cpu,
|
||||
target_ulong *addr, bool *raw_mode_supported,
|
||||
Error **errp)
|
||||
/* Returns either a logical PVR or zero if none was found */
|
||||
static uint32_t cas_check_pvr(PowerPCCPU *cpu, uint32_t max_compat,
|
||||
target_ulong *addr, bool *raw_mode_supported)
|
||||
{
|
||||
bool explicit_match = false; /* Matched the CPU's real PVR */
|
||||
uint32_t max_compat = spapr->max_compat_pvr;
|
||||
uint32_t best_compat = 0;
|
||||
int i;
|
||||
|
||||
|
@ -1624,14 +1623,6 @@ static uint32_t cas_check_pvr(SpaprMachineState *spapr, PowerPCCPU *cpu,
|
|||
}
|
||||
}
|
||||
|
||||
if ((best_compat == 0) && (!explicit_match || max_compat)) {
|
||||
/* We couldn't find a suitable compatibility mode, and either
|
||||
* the guest doesn't support "raw" mode for this CPU, or raw
|
||||
* mode is disabled because a maximum compat mode is set */
|
||||
error_setg(errp, "Couldn't negotiate a suitable PVR during CAS");
|
||||
return 0;
|
||||
}
|
||||
|
||||
*raw_mode_supported = explicit_match;
|
||||
|
||||
/* Parsing finished */
|
||||
|
@ -1675,11 +1666,11 @@ target_ulong do_client_architecture_support(PowerPCCPU *cpu,
|
|||
uint32_t cas_pvr;
|
||||
SpaprOptionVector *ov1_guest, *ov5_guest;
|
||||
bool guest_radix;
|
||||
Error *local_err = NULL;
|
||||
bool raw_mode_supported = false;
|
||||
bool guest_xive;
|
||||
CPUState *cs;
|
||||
void *fdt;
|
||||
uint32_t max_compat = spapr->max_compat_pvr;
|
||||
|
||||
/* CAS is supposed to be called early when only the boot vCPU is active. */
|
||||
CPU_FOREACH(cs) {
|
||||
|
@ -1692,16 +1683,22 @@ target_ulong do_client_architecture_support(PowerPCCPU *cpu,
|
|||
}
|
||||
}
|
||||
|
||||
cas_pvr = cas_check_pvr(spapr, cpu, &vec, &raw_mode_supported, &local_err);
|
||||
if (local_err) {
|
||||
error_report_err(local_err);
|
||||
cas_pvr = cas_check_pvr(cpu, max_compat, &vec, &raw_mode_supported);
|
||||
if (!cas_pvr && (!raw_mode_supported || max_compat)) {
|
||||
/*
|
||||
* We couldn't find a suitable compatibility mode, and either
|
||||
* the guest doesn't support "raw" mode for this CPU, or "raw"
|
||||
* mode is disabled because a maximum compat mode is set.
|
||||
*/
|
||||
error_report("Couldn't negotiate a suitable PVR during CAS");
|
||||
return H_HARDWARE;
|
||||
}
|
||||
|
||||
/* Update CPUs */
|
||||
if (cpu->compat_pvr != cas_pvr) {
|
||||
ppc_set_compat_all(cas_pvr, &local_err);
|
||||
if (local_err) {
|
||||
Error *local_err = NULL;
|
||||
|
||||
if (ppc_set_compat_all(cas_pvr, &local_err) < 0) {
|
||||
/* We fail to set compat mode (likely because running with KVM PR),
|
||||
* but maybe we can fallback to raw mode if the guest supports it.
|
||||
*/
|
||||
|
@ -1710,7 +1707,6 @@ target_ulong do_client_architecture_support(PowerPCCPU *cpu,
|
|||
return H_HARDWARE;
|
||||
}
|
||||
error_free(local_err);
|
||||
local_err = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -19,12 +19,126 @@
|
|||
/* Moved from hw/ppc/spapr_pci_nvlink2.c */
|
||||
#define SPAPR_GPU_NUMA_ID (cpu_to_be32(1))
|
||||
|
||||
static bool spapr_numa_is_symmetrical(MachineState *ms)
|
||||
{
|
||||
int src, dst;
|
||||
int nb_numa_nodes = ms->numa_state->num_nodes;
|
||||
NodeInfo *numa_info = ms->numa_state->nodes;
|
||||
|
||||
for (src = 0; src < nb_numa_nodes; src++) {
|
||||
for (dst = src; dst < nb_numa_nodes; dst++) {
|
||||
if (numa_info[src].distance[dst] !=
|
||||
numa_info[dst].distance[src]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function will translate the user distances into
|
||||
* what the kernel understand as possible values: 10
|
||||
* (local distance), 20, 40, 80 and 160, and return the equivalent
|
||||
* NUMA level for each. Current heuristic is:
|
||||
* - local distance (10) returns numa_level = 0x4, meaning there is
|
||||
* no rounding for local distance
|
||||
* - distances between 11 and 30 inclusive -> rounded to 20,
|
||||
* numa_level = 0x3
|
||||
* - distances between 31 and 60 inclusive -> rounded to 40,
|
||||
* numa_level = 0x2
|
||||
* - distances between 61 and 120 inclusive -> rounded to 80,
|
||||
* numa_level = 0x1
|
||||
* - everything above 120 returns numa_level = 0 to indicate that
|
||||
* there is no match. This will be calculated as disntace = 160
|
||||
* by the kernel (as of v5.9)
|
||||
*/
|
||||
static uint8_t spapr_numa_get_numa_level(uint8_t distance)
|
||||
{
|
||||
if (distance == 10) {
|
||||
return 0x4;
|
||||
} else if (distance > 11 && distance <= 30) {
|
||||
return 0x3;
|
||||
} else if (distance > 31 && distance <= 60) {
|
||||
return 0x2;
|
||||
} else if (distance > 61 && distance <= 120) {
|
||||
return 0x1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void spapr_numa_define_associativity_domains(SpaprMachineState *spapr)
|
||||
{
|
||||
MachineState *ms = MACHINE(spapr);
|
||||
NodeInfo *numa_info = ms->numa_state->nodes;
|
||||
int nb_numa_nodes = ms->numa_state->num_nodes;
|
||||
int src, dst, i;
|
||||
|
||||
for (src = 0; src < nb_numa_nodes; src++) {
|
||||
for (dst = src; dst < nb_numa_nodes; dst++) {
|
||||
/*
|
||||
* This is how the associativity domain between A and B
|
||||
* is calculated:
|
||||
*
|
||||
* - get the distance D between them
|
||||
* - get the correspondent NUMA level 'n_level' for D
|
||||
* - all associativity arrays were initialized with their own
|
||||
* numa_ids, and we're calculating the distance in node_id
|
||||
* ascending order, starting from node id 0 (the first node
|
||||
* retrieved by numa_state). This will have a cascade effect in
|
||||
* the algorithm because the associativity domains that node 0
|
||||
* defines will be carried over to other nodes, and node 1
|
||||
* associativities will be carried over after taking node 0
|
||||
* associativities into account, and so on. This happens because
|
||||
* we'll assign assoc_src as the associativity domain of dst
|
||||
* as well, for all NUMA levels beyond and including n_level.
|
||||
*
|
||||
* The PPC kernel expects the associativity domains of node 0 to
|
||||
* be always 0, and this algorithm will grant that by default.
|
||||
*/
|
||||
uint8_t distance = numa_info[src].distance[dst];
|
||||
uint8_t n_level = spapr_numa_get_numa_level(distance);
|
||||
uint32_t assoc_src;
|
||||
|
||||
/*
|
||||
* n_level = 0 means that the distance is greater than our last
|
||||
* rounded value (120). In this case there is no NUMA level match
|
||||
* between src and dst and we can skip the remaining of the loop.
|
||||
*
|
||||
* The Linux kernel will assume that the distance between src and
|
||||
* dst, in this case of no match, is 10 (local distance) doubled
|
||||
* for each NUMA it didn't match. We have MAX_DISTANCE_REF_POINTS
|
||||
* levels (4), so this gives us 10*2*2*2*2 = 160.
|
||||
*
|
||||
* This logic can be seen in the Linux kernel source code, as of
|
||||
* v5.9, in arch/powerpc/mm/numa.c, function __node_distance().
|
||||
*/
|
||||
if (n_level == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* We must assign all assoc_src to dst, starting from n_level
|
||||
* and going up to 0x1.
|
||||
*/
|
||||
for (i = n_level; i > 0; i--) {
|
||||
assoc_src = spapr->numa_assoc_array[src][i];
|
||||
spapr->numa_assoc_array[dst][i] = assoc_src;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void spapr_numa_associativity_init(SpaprMachineState *spapr,
|
||||
MachineState *machine)
|
||||
{
|
||||
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
|
||||
int nb_numa_nodes = machine->numa_state->num_nodes;
|
||||
int i, j, max_nodes_with_gpus;
|
||||
bool using_legacy_numa = spapr_machine_using_legacy_numa(spapr);
|
||||
|
||||
/*
|
||||
* For all associativity arrays: first position is the size,
|
||||
|
@ -38,6 +152,17 @@ void spapr_numa_associativity_init(SpaprMachineState *spapr,
|
|||
for (i = 0; i < nb_numa_nodes; i++) {
|
||||
spapr->numa_assoc_array[i][0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS);
|
||||
spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i);
|
||||
|
||||
/*
|
||||
* Fill all associativity domains of non-zero NUMA nodes with
|
||||
* node_id. This is required because the default value (0) is
|
||||
* considered a match with associativity domains of node 0.
|
||||
*/
|
||||
if (!using_legacy_numa && i != 0) {
|
||||
for (j = 1; j < MAX_DISTANCE_REF_POINTS; j++) {
|
||||
spapr->numa_assoc_array[i][j] = cpu_to_be32(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -61,6 +186,23 @@ void spapr_numa_associativity_init(SpaprMachineState *spapr,
|
|||
|
||||
spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i);
|
||||
}
|
||||
|
||||
/*
|
||||
* Legacy NUMA guests (pseries-5.1 and older, or guests with only
|
||||
* 1 NUMA node) will not benefit from anything we're going to do
|
||||
* after this point.
|
||||
*/
|
||||
if (using_legacy_numa) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!spapr_numa_is_symmetrical(machine)) {
|
||||
error_report("Asymmetrical NUMA topologies aren't supported "
|
||||
"in the pSeries machine");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
spapr_numa_define_associativity_domains(spapr);
|
||||
}
|
||||
|
||||
void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt,
|
||||
|
@ -144,24 +286,51 @@ int spapr_numa_write_assoc_lookup_arrays(SpaprMachineState *spapr, void *fdt,
|
|||
*/
|
||||
void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas)
|
||||
{
|
||||
MachineState *ms = MACHINE(spapr);
|
||||
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
|
||||
uint32_t refpoints[] = {
|
||||
cpu_to_be32(0x4),
|
||||
cpu_to_be32(0x4),
|
||||
cpu_to_be32(0x3),
|
||||
cpu_to_be32(0x2),
|
||||
cpu_to_be32(0x1),
|
||||
};
|
||||
uint32_t nr_refpoints = ARRAY_SIZE(refpoints);
|
||||
uint32_t maxdomain = cpu_to_be32(spapr->gpu_numa_id > 1 ? 1 : 0);
|
||||
uint32_t maxdomain = ms->numa_state->num_nodes + spapr->gpu_numa_id;
|
||||
uint32_t maxdomains[] = {
|
||||
cpu_to_be32(4),
|
||||
maxdomain,
|
||||
maxdomain,
|
||||
maxdomain,
|
||||
cpu_to_be32(spapr->gpu_numa_id),
|
||||
cpu_to_be32(maxdomain),
|
||||
cpu_to_be32(maxdomain),
|
||||
cpu_to_be32(maxdomain),
|
||||
cpu_to_be32(maxdomain)
|
||||
};
|
||||
|
||||
if (smc->pre_5_1_assoc_refpoints) {
|
||||
nr_refpoints = 2;
|
||||
if (spapr_machine_using_legacy_numa(spapr)) {
|
||||
uint32_t legacy_refpoints[] = {
|
||||
cpu_to_be32(0x4),
|
||||
cpu_to_be32(0x4),
|
||||
cpu_to_be32(0x2),
|
||||
};
|
||||
uint32_t legacy_maxdomain = spapr->gpu_numa_id > 1 ? 1 : 0;
|
||||
uint32_t legacy_maxdomains[] = {
|
||||
cpu_to_be32(4),
|
||||
cpu_to_be32(legacy_maxdomain),
|
||||
cpu_to_be32(legacy_maxdomain),
|
||||
cpu_to_be32(legacy_maxdomain),
|
||||
cpu_to_be32(spapr->gpu_numa_id),
|
||||
};
|
||||
|
||||
G_STATIC_ASSERT(sizeof(legacy_refpoints) <= sizeof(refpoints));
|
||||
G_STATIC_ASSERT(sizeof(legacy_maxdomains) <= sizeof(maxdomains));
|
||||
|
||||
nr_refpoints = 3;
|
||||
|
||||
memcpy(refpoints, legacy_refpoints, sizeof(legacy_refpoints));
|
||||
memcpy(maxdomains, legacy_maxdomains, sizeof(legacy_maxdomains));
|
||||
|
||||
/* pseries-5.0 and older reference-points array is {0x4, 0x4} */
|
||||
if (smc->pre_5_1_assoc_refpoints) {
|
||||
nr_refpoints = 2;
|
||||
}
|
||||
}
|
||||
|
||||
_FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points",
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
#include "sysemu/sysemu.h"
|
||||
#include "hw/ppc/spapr_numa.h"
|
||||
|
||||
void spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm,
|
||||
bool spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm,
|
||||
uint64_t size, Error **errp)
|
||||
{
|
||||
const MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev);
|
||||
|
@ -45,7 +45,7 @@ void spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm,
|
|||
|
||||
if (!mc->nvdimm_supported) {
|
||||
error_setg(errp, "NVDIMM hotplug not supported for this machine");
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -59,20 +59,20 @@ void spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm,
|
|||
*/
|
||||
if (!ms->nvdimms_state->is_enabled && nvdimm_opt) {
|
||||
error_setg(errp, "nvdimm device found but 'nvdimm=off' was set");
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (object_property_get_int(OBJECT(nvdimm), NVDIMM_LABEL_SIZE_PROP,
|
||||
&error_abort) == 0) {
|
||||
error_setg(errp, "PAPR requires NVDIMM devices to have label-size set");
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (size % SPAPR_MINIMUM_SCM_BLOCK_SIZE) {
|
||||
error_setg(errp, "PAPR requires NVDIMM memory size (excluding label)"
|
||||
" to be a multiple of %" PRIu64 "MB",
|
||||
SPAPR_MINIMUM_SCM_BLOCK_SIZE / MiB);
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
uuidstr = object_property_get_str(OBJECT(nvdimm), NVDIMM_UUID_PROP,
|
||||
|
@ -82,8 +82,10 @@ void spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm,
|
|||
|
||||
if (qemu_uuid_is_null(&uuid)) {
|
||||
error_setg(errp, "NVDIMM device requires the uuid to be set");
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
@ -91,14 +93,11 @@ void spapr_add_nvdimm(DeviceState *dev, uint64_t slot, Error **errp)
|
|||
{
|
||||
SpaprDrc *drc;
|
||||
bool hotplugged = spapr_drc_hotplugged(dev);
|
||||
Error *local_err = NULL;
|
||||
|
||||
drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM, slot);
|
||||
g_assert(drc);
|
||||
|
||||
spapr_drc_attach(drc, dev, &local_err);
|
||||
if (local_err) {
|
||||
error_propagate(errp, local_err);
|
||||
if (!spapr_drc_attach(drc, dev, errp)) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -1539,7 +1539,6 @@ static void spapr_pci_plug(HotplugHandler *plug_handler,
|
|||
PCIDevice *pdev = PCI_DEVICE(plugged_dev);
|
||||
PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(plugged_dev);
|
||||
SpaprDrc *drc = drc_from_dev(phb, pdev);
|
||||
Error *local_err = NULL;
|
||||
PCIBus *bus = PCI_BUS(qdev_get_parent_bus(DEVICE(pdev)));
|
||||
uint32_t slotnr = PCI_SLOT(pdev->devfn);
|
||||
|
||||
|
@ -1578,9 +1577,7 @@ static void spapr_pci_plug(HotplugHandler *plug_handler,
|
|||
return;
|
||||
}
|
||||
|
||||
spapr_drc_attach(drc, DEVICE(pdev), &local_err);
|
||||
if (local_err) {
|
||||
error_propagate(errp, local_err);
|
||||
if (!spapr_drc_attach(drc, DEVICE(pdev), errp)) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -474,7 +474,6 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp)
|
|||
SpaprVioDevice *dev = (SpaprVioDevice *)qdev;
|
||||
SpaprVioDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
|
||||
char *id;
|
||||
Error *local_err = NULL;
|
||||
|
||||
if (dev->reg != -1) {
|
||||
/*
|
||||
|
@ -510,16 +509,15 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp)
|
|||
dev->irq = spapr_vio_reg_to_irq(dev->reg);
|
||||
|
||||
if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
|
||||
dev->irq = spapr_irq_findone(spapr, &local_err);
|
||||
if (local_err) {
|
||||
error_propagate(errp, local_err);
|
||||
int irq = spapr_irq_findone(spapr, errp);
|
||||
|
||||
if (irq < 0) {
|
||||
return;
|
||||
}
|
||||
dev->irq = irq;
|
||||
}
|
||||
|
||||
spapr_irq_claim(spapr, dev->irq, false, &local_err);
|
||||
if (local_err) {
|
||||
error_propagate(errp, local_err);
|
||||
if (spapr_irq_claim(spapr, dev->irq, false, errp) < 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -138,6 +138,7 @@ struct SpaprMachineClass {
|
|||
bool smp_threads_vsmt; /* set VSMT to smp_threads by default */
|
||||
hwaddr rma_limit; /* clamp the RMA to this size */
|
||||
bool pre_5_1_assoc_refpoints;
|
||||
bool pre_5_2_numa_associativity;
|
||||
|
||||
void (*phb_placement)(SpaprMachineState *spapr, uint32_t index,
|
||||
uint64_t *buid, hwaddr *pio,
|
||||
|
@ -853,6 +854,7 @@ int spapr_max_server_number(SpaprMachineState *spapr);
|
|||
void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
|
||||
uint64_t pte0, uint64_t pte1);
|
||||
void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered);
|
||||
bool spapr_machine_using_legacy_numa(SpaprMachineState *spapr);
|
||||
|
||||
/* DRC callbacks. */
|
||||
void spapr_core_release(DeviceState *dev);
|
||||
|
@ -902,7 +904,7 @@ void spapr_do_system_reset_on_cpu(CPUState *cs, run_on_cpu_data arg);
|
|||
#define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift))
|
||||
|
||||
int spapr_get_vcpu_id(PowerPCCPU *cpu);
|
||||
void spapr_set_vcpu_id(PowerPCCPU *cpu, int cpu_index, Error **errp);
|
||||
bool spapr_set_vcpu_id(PowerPCCPU *cpu, int cpu_index, Error **errp);
|
||||
PowerPCCPU *spapr_find_cpu(int vcpu_id);
|
||||
|
||||
int spapr_caps_pre_load(void *opaque);
|
||||
|
@ -934,7 +936,7 @@ void spapr_caps_cpu_apply(SpaprMachineState *spapr, PowerPCCPU *cpu);
|
|||
void spapr_caps_add_properties(SpaprMachineClass *smc);
|
||||
int spapr_caps_post_migration(SpaprMachineState *spapr);
|
||||
|
||||
void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
|
||||
bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
|
||||
Error **errp);
|
||||
/*
|
||||
* XIVE definitions
|
||||
|
|
|
@ -235,7 +235,7 @@ SpaprDrc *spapr_drc_by_index(uint32_t index);
|
|||
SpaprDrc *spapr_drc_by_id(const char *type, uint32_t id);
|
||||
int spapr_dt_drc(void *fdt, int offset, Object *owner, uint32_t drc_type_mask);
|
||||
|
||||
void spapr_drc_attach(SpaprDrc *drc, DeviceState *d, Error **errp);
|
||||
bool spapr_drc_attach(SpaprDrc *drc, DeviceState *d, Error **errp);
|
||||
void spapr_drc_detach(SpaprDrc *drc);
|
||||
|
||||
/* Returns true if a hot plug/unplug request is pending */
|
||||
|
|
|
@ -28,7 +28,7 @@ QEMU_BUILD_BUG_ON(SPAPR_MINIMUM_SCM_BLOCK_SIZE % SPAPR_MEMORY_BLOCK_SIZE);
|
|||
int spapr_pmem_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
|
||||
void *fdt, int *fdt_start_offset, Error **errp);
|
||||
void spapr_dt_persistent_memory(SpaprMachineState *spapr, void *fdt);
|
||||
void spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm,
|
||||
bool spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm,
|
||||
uint64_t size, Error **errp);
|
||||
void spapr_add_nvdimm(DeviceState *dev, uint64_t slot, Error **errp);
|
||||
void spapr_create_nvdimm_dr_connectors(SpaprMachineState *spapr);
|
||||
|
|
|
@ -158,7 +158,7 @@ bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr,
|
|||
return pcc_compat(pcc, compat_pvr, min_compat_pvr, max_compat_pvr);
|
||||
}
|
||||
|
||||
void ppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr, Error **errp)
|
||||
int ppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr, Error **errp)
|
||||
{
|
||||
const CompatInfo *compat = compat_by_pvr(compat_pvr);
|
||||
CPUPPCState *env = &cpu->env;
|
||||
|
@ -169,11 +169,11 @@ void ppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr, Error **errp)
|
|||
pcr = 0;
|
||||
} else if (!compat) {
|
||||
error_setg(errp, "Unknown compatibility PVR 0x%08"PRIx32, compat_pvr);
|
||||
return;
|
||||
return -EINVAL;
|
||||
} else if (!ppc_check_compat(cpu, compat_pvr, 0, 0)) {
|
||||
error_setg(errp, "Compatibility PVR 0x%08"PRIx32" not valid for CPU",
|
||||
compat_pvr);
|
||||
return;
|
||||
return -EINVAL;
|
||||
} else {
|
||||
pcr = compat->pcr;
|
||||
}
|
||||
|
@ -185,17 +185,19 @@ void ppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr, Error **errp)
|
|||
if (ret < 0) {
|
||||
error_setg_errno(errp, -ret,
|
||||
"Unable to set CPU compatibility mode in KVM");
|
||||
return;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
cpu->compat_pvr = compat_pvr;
|
||||
env->spr[SPR_PCR] = pcr & pcc->pcr_mask;
|
||||
return 0;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
uint32_t compat_pvr;
|
||||
Error *err;
|
||||
Error **errp;
|
||||
int ret;
|
||||
} SetCompatState;
|
||||
|
||||
static void do_set_compat(CPUState *cs, run_on_cpu_data arg)
|
||||
|
@ -203,26 +205,28 @@ static void do_set_compat(CPUState *cs, run_on_cpu_data arg)
|
|||
PowerPCCPU *cpu = POWERPC_CPU(cs);
|
||||
SetCompatState *s = arg.host_ptr;
|
||||
|
||||
ppc_set_compat(cpu, s->compat_pvr, &s->err);
|
||||
s->ret = ppc_set_compat(cpu, s->compat_pvr, s->errp);
|
||||
}
|
||||
|
||||
void ppc_set_compat_all(uint32_t compat_pvr, Error **errp)
|
||||
int ppc_set_compat_all(uint32_t compat_pvr, Error **errp)
|
||||
{
|
||||
CPUState *cs;
|
||||
|
||||
CPU_FOREACH(cs) {
|
||||
SetCompatState s = {
|
||||
.compat_pvr = compat_pvr,
|
||||
.err = NULL,
|
||||
.errp = errp,
|
||||
.ret = 0,
|
||||
};
|
||||
|
||||
run_on_cpu(cs, do_set_compat, RUN_ON_CPU_HOST_PTR(&s));
|
||||
|
||||
if (s.err) {
|
||||
error_propagate(errp, s.err);
|
||||
return;
|
||||
if (s.ret < 0) {
|
||||
return s.ret;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ppc_compat_max_vthreads(PowerPCCPU *cpu)
|
||||
|
|
|
@ -1352,10 +1352,10 @@ bool ppc_check_compat(PowerPCCPU *cpu, uint32_t compat_pvr,
|
|||
bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr,
|
||||
uint32_t min_compat_pvr, uint32_t max_compat_pvr);
|
||||
|
||||
void ppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr, Error **errp);
|
||||
int ppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr, Error **errp);
|
||||
|
||||
#if !defined(CONFIG_USER_ONLY)
|
||||
void ppc_set_compat_all(uint32_t compat_pvr, Error **errp);
|
||||
int ppc_set_compat_all(uint32_t compat_pvr, Error **errp);
|
||||
#endif
|
||||
int ppc_compat_max_vthreads(PowerPCCPU *cpu);
|
||||
void ppc_compat_add_property(Object *obj, const char *name,
|
||||
|
|
|
@ -347,18 +347,19 @@ static int cpu_post_load(void *opaque, int version_id)
|
|||
if (cpu->compat_pvr) {
|
||||
uint32_t compat_pvr = cpu->compat_pvr;
|
||||
Error *local_err = NULL;
|
||||
int ret;
|
||||
|
||||
cpu->compat_pvr = 0;
|
||||
ppc_set_compat(cpu, compat_pvr, &local_err);
|
||||
if (local_err) {
|
||||
ret = ppc_set_compat(cpu, compat_pvr, &local_err);
|
||||
if (ret < 0) {
|
||||
error_report_err(local_err);
|
||||
return -1;
|
||||
return ret;
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
if (!pvr_match(cpu, env->spr[SPR_PVR])) {
|
||||
return -1;
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue