mirror of https://github.com/xemu-project/xemu.git
target-arm queue:
* Implement FEAT_EBF16 emulation * accel/tcg: Remove dead code from rr_cpu_thread_fn() * hw: add compat machines for 9.2 * virt: default to two-stage SMMU from virt-9.2 * sbsa-ref: use two-stage SMMU * hw: Various minor memory leak fixes * target/arm: Correct names of VFP VFNMA and VFNMS insns * hw/arm/xilinx_zynq: Enable Security Extensions * hw/arm/boot: Report error msg if loading elf/dtb failed -----BEGIN PGP SIGNATURE----- iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAmbZqzEZHHBldGVyLm1h eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3lJ7D/9s/ZTkiCj/z+caHotwNJVt ECgEEVinitwZxSMINZd1f6bxTY8hYVjMewj6A6RvHtMJMr7SUOmL8wi0YlbhTm44 jb8dZVf3pzPaZ399jxOeGnFipGyKmK0XM5rKc7CP6yJUS3B9RkUbLEHng8Q0ZBtl cnZqI12jJBdtHU8D4JIvBgM2N2ay4bKY8EQEPCv4S7ZTKawWcKgSR5pMd2TBIqIT 0gaDL3eOgCt2XWIrMzRjvaJK70obN/+n+vZQskJ/sIDsw+Kz8sZGlivdBXLRmQ+A OUgtdyZoD42Q8KtwM0bjoaoxz6VMNPJp5khB45EPjVgWyeyJ0L6ZcWCX7nT4hZsi 1C0NJaJU6HQbfsPiMIGxgHYJCbQue/mVBE02MPhmN8fZlsTRKWT9Miu67S0PI5Ib ZWo88Ew1coucBm25K2NWdoR3dCP8EFnxqL556L8M4iDWYQ/djf8cpFAN9QJBFrNw CaXS+vxIFUjZ6TSjf8gOYPAONmAg5DsCucgyO4MBKnvlY5h2J+GTq/FC+kWzL9jE UfhqOWSP34ol2lg319zOtKg4Ga+GOivo2DmgWQhDwZ2rmRR+xgN8rkQjpJKIT5Zj Ji+ucJrghBZ0sN622QYG0u0Ap9Jy4KCOxcFfS1b4gNhmMDWg27Tx9tIguXmjOE3M aAs4wmm4Nz4kpsf1KkB11Q== =gZuf -----END PGP SIGNATURE----- Merge tag 'pull-target-arm-20240905' of https://git.linaro.org/people/pmaydell/qemu-arm into staging target-arm queue: * Implement FEAT_EBF16 emulation * accel/tcg: Remove dead code from rr_cpu_thread_fn() * hw: add compat machines for 9.2 * virt: default to two-stage SMMU from virt-9.2 * sbsa-ref: use two-stage SMMU * hw: Various minor memory leak fixes * target/arm: Correct names of VFP VFNMA and VFNMS insns * hw/arm/xilinx_zynq: Enable Security Extensions * hw/arm/boot: Report error msg if loading elf/dtb failed # -----BEGIN PGP SIGNATURE----- # # iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAmbZqzEZHHBldGVyLm1h # eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3lJ7D/9s/ZTkiCj/z+caHotwNJVt # ECgEEVinitwZxSMINZd1f6bxTY8hYVjMewj6A6RvHtMJMr7SUOmL8wi0YlbhTm44 # jb8dZVf3pzPaZ399jxOeGnFipGyKmK0XM5rKc7CP6yJUS3B9RkUbLEHng8Q0ZBtl # cnZqI12jJBdtHU8D4JIvBgM2N2ay4bKY8EQEPCv4S7ZTKawWcKgSR5pMd2TBIqIT # 0gaDL3eOgCt2XWIrMzRjvaJK70obN/+n+vZQskJ/sIDsw+Kz8sZGlivdBXLRmQ+A # OUgtdyZoD42Q8KtwM0bjoaoxz6VMNPJp5khB45EPjVgWyeyJ0L6ZcWCX7nT4hZsi # 1C0NJaJU6HQbfsPiMIGxgHYJCbQue/mVBE02MPhmN8fZlsTRKWT9Miu67S0PI5Ib # ZWo88Ew1coucBm25K2NWdoR3dCP8EFnxqL556L8M4iDWYQ/djf8cpFAN9QJBFrNw # CaXS+vxIFUjZ6TSjf8gOYPAONmAg5DsCucgyO4MBKnvlY5h2J+GTq/FC+kWzL9jE # UfhqOWSP34ol2lg319zOtKg4Ga+GOivo2DmgWQhDwZ2rmRR+xgN8rkQjpJKIT5Zj # Ji+ucJrghBZ0sN622QYG0u0Ap9Jy4KCOxcFfS1b4gNhmMDWg27Tx9tIguXmjOE3M # aAs4wmm4Nz4kpsf1KkB11Q== # =gZuf # -----END PGP SIGNATURE----- # gpg: Signature made Thu 05 Sep 2024 13:59:29 BST # gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE # gpg: issuer "peter.maydell@linaro.org" # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@gmail.com>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate] # gpg: aka "Peter Maydell <peter@archaic.org.uk>" [ultimate] # Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE * tag 'pull-target-arm-20240905' of https://git.linaro.org/people/pmaydell/qemu-arm: (25 commits) platform-bus: fix refcount leak hw/arm/boot: Explain why load_elf_hdr() error is ignored hw/arm/boot: Report error msg if loading elf/dtb failed hw/arm/xilinx_zynq: Enable Security Extensions target/arm: Correct names of VFP VFNMA and VFNMS insns hw/arm/sbsa-ref: Don't leak string in sbsa_fdt_add_gic_node() hm/nvram/xlnx-versal-efuse-ctrl: Call register_finalize_block hw/misc/xlnx-versal-trng: Call register_finalize_block hw/nvram/xlnx-zynqmp-efuse: Call register_finalize_block hw/nvram/xlnx-bbram: Call register_finalize_block hw/misc/xlnx-versal-trng: Free s->prng in finalize, not unrealize hw/misc/xlnx-versal-cfu: destroy fifo in finalize hw/arm/sbsa-ref: Use two-stage SMMU hw/arm/virt: Default to two-stage SMMU from virt-9.2 hw/arm/smmuv3: Update comment documenting "stage" property hw: add compat machines for 9.2 accel/tcg: Remove dead code from rr_cpu_thread_fn() target/arm: Enable FEAT_EBF16 in the "max" CPU target/arm: Implement FPCR.EBF=1 semantics for bfdotadd() target/arm: Prepare bfdotadd() callers for FEAT_EBF support ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
ec08d9a51e
|
@ -302,9 +302,7 @@ static void *rr_cpu_thread_fn(void *arg)
|
||||||
rr_deal_with_unplugged_cpus();
|
rr_deal_with_unplugged_cpus();
|
||||||
}
|
}
|
||||||
|
|
||||||
rcu_remove_force_rcu_notifier(&force_rcu);
|
g_assert_not_reached();
|
||||||
rcu_unregister_thread();
|
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void rr_start_vcpu_thread(CPUState *cpu)
|
void rr_start_vcpu_thread(CPUState *cpu)
|
||||||
|
|
|
@ -45,6 +45,7 @@ the following architecture extensions:
|
||||||
- FEAT_DotProd (Advanced SIMD dot product instructions)
|
- FEAT_DotProd (Advanced SIMD dot product instructions)
|
||||||
- FEAT_DoubleFault (Double Fault Extension)
|
- FEAT_DoubleFault (Double Fault Extension)
|
||||||
- FEAT_E0PD (Preventing EL0 access to halves of address maps)
|
- FEAT_E0PD (Preventing EL0 access to halves of address maps)
|
||||||
|
- FEAT_EBF16 (AArch64 Extended BFloat16 instructions)
|
||||||
- FEAT_ECV (Enhanced Counter Virtualization)
|
- FEAT_ECV (Enhanced Counter Virtualization)
|
||||||
- FEAT_EL0 (Support for execution at EL0)
|
- FEAT_EL0 (Support for execution at EL0)
|
||||||
- FEAT_EL1 (Support for execution at EL1)
|
- FEAT_EL1 (Support for execution at EL1)
|
||||||
|
|
|
@ -799,14 +799,18 @@ static ssize_t arm_load_elf(struct arm_boot_info *info, uint64_t *pentry,
|
||||||
} elf_header;
|
} elf_header;
|
||||||
int data_swab = 0;
|
int data_swab = 0;
|
||||||
bool big_endian;
|
bool big_endian;
|
||||||
ssize_t ret = -1;
|
ssize_t ret;
|
||||||
Error *err = NULL;
|
Error *err = NULL;
|
||||||
|
|
||||||
|
|
||||||
load_elf_hdr(info->kernel_filename, &elf_header, &elf_is64, &err);
|
load_elf_hdr(info->kernel_filename, &elf_header, &elf_is64, &err);
|
||||||
if (err) {
|
if (err) {
|
||||||
|
/*
|
||||||
|
* If the file is not an ELF file we silently return.
|
||||||
|
* The caller will fall back to try other formats.
|
||||||
|
*/
|
||||||
error_free(err);
|
error_free(err);
|
||||||
return ret;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (elf_is64) {
|
if (elf_is64) {
|
||||||
|
@ -839,6 +843,8 @@ static ssize_t arm_load_elf(struct arm_boot_info *info, uint64_t *pentry,
|
||||||
1, data_swab, as);
|
1, data_swab, as);
|
||||||
if (ret <= 0) {
|
if (ret <= 0) {
|
||||||
/* The header loaded but the image didn't */
|
/* The header loaded but the image didn't */
|
||||||
|
error_report("Couldn't load elf '%s': %s",
|
||||||
|
info->kernel_filename, load_elf_strerror(ret));
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -164,23 +164,20 @@ static uint64_t sbsa_ref_cpu_mp_affinity(SBSAMachineState *sms, int idx)
|
||||||
|
|
||||||
static void sbsa_fdt_add_gic_node(SBSAMachineState *sms)
|
static void sbsa_fdt_add_gic_node(SBSAMachineState *sms)
|
||||||
{
|
{
|
||||||
char *nodename;
|
const char *intc_nodename = "/intc";
|
||||||
|
const char *its_nodename = "/intc/its";
|
||||||
|
|
||||||
nodename = g_strdup_printf("/intc");
|
qemu_fdt_add_subnode(sms->fdt, intc_nodename);
|
||||||
qemu_fdt_add_subnode(sms->fdt, nodename);
|
qemu_fdt_setprop_sized_cells(sms->fdt, intc_nodename, "reg",
|
||||||
qemu_fdt_setprop_sized_cells(sms->fdt, nodename, "reg",
|
|
||||||
2, sbsa_ref_memmap[SBSA_GIC_DIST].base,
|
2, sbsa_ref_memmap[SBSA_GIC_DIST].base,
|
||||||
2, sbsa_ref_memmap[SBSA_GIC_DIST].size,
|
2, sbsa_ref_memmap[SBSA_GIC_DIST].size,
|
||||||
2, sbsa_ref_memmap[SBSA_GIC_REDIST].base,
|
2, sbsa_ref_memmap[SBSA_GIC_REDIST].base,
|
||||||
2, sbsa_ref_memmap[SBSA_GIC_REDIST].size);
|
2, sbsa_ref_memmap[SBSA_GIC_REDIST].size);
|
||||||
|
|
||||||
nodename = g_strdup_printf("/intc/its");
|
qemu_fdt_add_subnode(sms->fdt, its_nodename);
|
||||||
qemu_fdt_add_subnode(sms->fdt, nodename);
|
qemu_fdt_setprop_sized_cells(sms->fdt, its_nodename, "reg",
|
||||||
qemu_fdt_setprop_sized_cells(sms->fdt, nodename, "reg",
|
|
||||||
2, sbsa_ref_memmap[SBSA_GIC_ITS].base,
|
2, sbsa_ref_memmap[SBSA_GIC_ITS].base,
|
||||||
2, sbsa_ref_memmap[SBSA_GIC_ITS].size);
|
2, sbsa_ref_memmap[SBSA_GIC_ITS].size);
|
||||||
|
|
||||||
g_free(nodename);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -621,6 +618,7 @@ static void create_smmu(const SBSAMachineState *sms, PCIBus *bus)
|
||||||
|
|
||||||
dev = qdev_new(TYPE_ARM_SMMUV3);
|
dev = qdev_new(TYPE_ARM_SMMUV3);
|
||||||
|
|
||||||
|
object_property_set_str(OBJECT(dev), "stage", "nested", &error_abort);
|
||||||
object_property_set_link(OBJECT(dev), "primary-bus", OBJECT(bus),
|
object_property_set_link(OBJECT(dev), "primary-bus", OBJECT(bus),
|
||||||
&error_abort);
|
&error_abort);
|
||||||
sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
|
sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
|
||||||
|
|
|
@ -1981,6 +1981,7 @@ static Property smmuv3_properties[] = {
|
||||||
* Stages of translation advertised.
|
* Stages of translation advertised.
|
||||||
* "1": Stage 1
|
* "1": Stage 1
|
||||||
* "2": Stage 2
|
* "2": Stage 2
|
||||||
|
* "nested": Both stage 1 and stage 2
|
||||||
* Defaults to stage 1
|
* Defaults to stage 1
|
||||||
*/
|
*/
|
||||||
DEFINE_PROP_STRING("stage", SMMUv3State, stage),
|
DEFINE_PROP_STRING("stage", SMMUv3State, stage),
|
||||||
|
|
|
@ -1408,6 +1408,7 @@ static void create_pcie_irq_map(const MachineState *ms,
|
||||||
static void create_smmu(const VirtMachineState *vms,
|
static void create_smmu(const VirtMachineState *vms,
|
||||||
PCIBus *bus)
|
PCIBus *bus)
|
||||||
{
|
{
|
||||||
|
VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
|
||||||
char *node;
|
char *node;
|
||||||
const char compat[] = "arm,smmu-v3";
|
const char compat[] = "arm,smmu-v3";
|
||||||
int irq = vms->irqmap[VIRT_SMMU];
|
int irq = vms->irqmap[VIRT_SMMU];
|
||||||
|
@ -1424,6 +1425,9 @@ static void create_smmu(const VirtMachineState *vms,
|
||||||
|
|
||||||
dev = qdev_new(TYPE_ARM_SMMUV3);
|
dev = qdev_new(TYPE_ARM_SMMUV3);
|
||||||
|
|
||||||
|
if (!vmc->no_nested_smmu) {
|
||||||
|
object_property_set_str(OBJECT(dev), "stage", "nested", &error_fatal);
|
||||||
|
}
|
||||||
object_property_set_link(OBJECT(dev), "primary-bus", OBJECT(bus),
|
object_property_set_link(OBJECT(dev), "primary-bus", OBJECT(bus),
|
||||||
&error_abort);
|
&error_abort);
|
||||||
sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
|
sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
|
||||||
|
@ -3301,10 +3305,21 @@ static void machvirt_machine_init(void)
|
||||||
}
|
}
|
||||||
type_init(machvirt_machine_init);
|
type_init(machvirt_machine_init);
|
||||||
|
|
||||||
static void virt_machine_9_1_options(MachineClass *mc)
|
static void virt_machine_9_2_options(MachineClass *mc)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
DEFINE_VIRT_MACHINE_AS_LATEST(9, 1)
|
DEFINE_VIRT_MACHINE_AS_LATEST(9, 2)
|
||||||
|
|
||||||
|
static void virt_machine_9_1_options(MachineClass *mc)
|
||||||
|
{
|
||||||
|
VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc));
|
||||||
|
|
||||||
|
virt_machine_9_2_options(mc);
|
||||||
|
compat_props_add(mc->compat_props, hw_compat_9_1, hw_compat_9_1_len);
|
||||||
|
/* 9.1 and earlier have only a stage-1 SMMU, not a nested s1+2 one */
|
||||||
|
vmc->no_nested_smmu = true;
|
||||||
|
}
|
||||||
|
DEFINE_VIRT_MACHINE(9, 1)
|
||||||
|
|
||||||
static void virt_machine_9_0_options(MachineClass *mc)
|
static void virt_machine_9_0_options(MachineClass *mc)
|
||||||
{
|
{
|
||||||
|
|
|
@ -219,14 +219,6 @@ static void zynq_init(MachineState *machine)
|
||||||
for (n = 0; n < smp_cpus; n++) {
|
for (n = 0; n < smp_cpus; n++) {
|
||||||
Object *cpuobj = object_new(machine->cpu_type);
|
Object *cpuobj = object_new(machine->cpu_type);
|
||||||
|
|
||||||
/*
|
|
||||||
* By default A9 CPUs have EL3 enabled. This board does not currently
|
|
||||||
* support EL3 so the CPU EL3 property is disabled before realization.
|
|
||||||
*/
|
|
||||||
if (object_property_find(cpuobj, "has_el3")) {
|
|
||||||
object_property_set_bool(cpuobj, "has_el3", false, &error_fatal);
|
|
||||||
}
|
|
||||||
|
|
||||||
object_property_set_int(cpuobj, "midr", ZYNQ_BOARD_MIDR,
|
object_property_set_int(cpuobj, "midr", ZYNQ_BOARD_MIDR,
|
||||||
&error_fatal);
|
&error_fatal);
|
||||||
object_property_set_int(cpuobj, "reset-cbar", MPCORE_PERIPHBASE,
|
object_property_set_int(cpuobj, "reset-cbar", MPCORE_PERIPHBASE,
|
||||||
|
|
|
@ -34,6 +34,9 @@
|
||||||
#include "hw/virtio/virtio-iommu.h"
|
#include "hw/virtio/virtio-iommu.h"
|
||||||
#include "audio/audio.h"
|
#include "audio/audio.h"
|
||||||
|
|
||||||
|
GlobalProperty hw_compat_9_1[] = {};
|
||||||
|
const size_t hw_compat_9_1_len = G_N_ELEMENTS(hw_compat_9_1);
|
||||||
|
|
||||||
GlobalProperty hw_compat_9_0[] = {
|
GlobalProperty hw_compat_9_0[] = {
|
||||||
{"arm-cpu", "backcompat-cntfrq", "true" },
|
{"arm-cpu", "backcompat-cntfrq", "true" },
|
||||||
{ "scsi-hd", "migrate-emulated-scsi-request", "false" },
|
{ "scsi-hd", "migrate-emulated-scsi-request", "false" },
|
||||||
|
|
|
@ -145,9 +145,12 @@ static void platform_bus_map_mmio(PlatformBusDevice *pbus, SysBusDevice *sbdev,
|
||||||
* the target device's memory region
|
* the target device's memory region
|
||||||
*/
|
*/
|
||||||
for (off = 0; off < pbus->mmio_size; off += alignment) {
|
for (off = 0; off < pbus->mmio_size; off += alignment) {
|
||||||
if (!memory_region_find(&pbus->mmio, off, size).mr) {
|
MemoryRegion *mr = memory_region_find(&pbus->mmio, off, size).mr;
|
||||||
|
if (!mr) {
|
||||||
found_region = true;
|
found_region = true;
|
||||||
break;
|
break;
|
||||||
|
} else {
|
||||||
|
memory_region_unref(mr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -79,6 +79,9 @@
|
||||||
{ "qemu64-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, },\
|
{ "qemu64-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, },\
|
||||||
{ "athlon-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, },
|
{ "athlon-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, },
|
||||||
|
|
||||||
|
GlobalProperty pc_compat_9_1[] = {};
|
||||||
|
const size_t pc_compat_9_1_len = G_N_ELEMENTS(pc_compat_9_1);
|
||||||
|
|
||||||
GlobalProperty pc_compat_9_0[] = {
|
GlobalProperty pc_compat_9_0[] = {
|
||||||
{ TYPE_X86_CPU, "x-amd-topoext-features-only", "false" },
|
{ TYPE_X86_CPU, "x-amd-topoext-features-only", "false" },
|
||||||
{ TYPE_X86_CPU, "x-l1-cache-per-thread", "false" },
|
{ TYPE_X86_CPU, "x-l1-cache-per-thread", "false" },
|
||||||
|
|
|
@ -474,13 +474,24 @@ static void pc_i440fx_machine_options(MachineClass *m)
|
||||||
"Use a different south bridge than PIIX3");
|
"Use a different south bridge than PIIX3");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void pc_i440fx_machine_9_1_options(MachineClass *m)
|
static void pc_i440fx_machine_9_2_options(MachineClass *m)
|
||||||
{
|
{
|
||||||
pc_i440fx_machine_options(m);
|
pc_i440fx_machine_options(m);
|
||||||
m->alias = "pc";
|
m->alias = "pc";
|
||||||
m->is_default = true;
|
m->is_default = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DEFINE_I440FX_MACHINE(9, 2);
|
||||||
|
|
||||||
|
static void pc_i440fx_machine_9_1_options(MachineClass *m)
|
||||||
|
{
|
||||||
|
pc_i440fx_machine_9_2_options(m);
|
||||||
|
m->alias = NULL;
|
||||||
|
m->is_default = false;
|
||||||
|
compat_props_add(m->compat_props, hw_compat_9_1, hw_compat_9_1_len);
|
||||||
|
compat_props_add(m->compat_props, pc_compat_9_1, pc_compat_9_1_len);
|
||||||
|
}
|
||||||
|
|
||||||
DEFINE_I440FX_MACHINE(9, 1);
|
DEFINE_I440FX_MACHINE(9, 1);
|
||||||
|
|
||||||
static void pc_i440fx_machine_9_0_options(MachineClass *m)
|
static void pc_i440fx_machine_9_0_options(MachineClass *m)
|
||||||
|
@ -488,8 +499,6 @@ static void pc_i440fx_machine_9_0_options(MachineClass *m)
|
||||||
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
|
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
|
||||||
|
|
||||||
pc_i440fx_machine_9_1_options(m);
|
pc_i440fx_machine_9_1_options(m);
|
||||||
m->alias = NULL;
|
|
||||||
m->is_default = false;
|
|
||||||
m->smbios_memory_device_size = 16 * GiB;
|
m->smbios_memory_device_size = 16 * GiB;
|
||||||
|
|
||||||
compat_props_add(m->compat_props, hw_compat_9_0, hw_compat_9_0_len);
|
compat_props_add(m->compat_props, hw_compat_9_0, hw_compat_9_0_len);
|
||||||
|
|
|
@ -356,19 +356,28 @@ static void pc_q35_machine_options(MachineClass *m)
|
||||||
pc_q35_compat_defaults, pc_q35_compat_defaults_len);
|
pc_q35_compat_defaults, pc_q35_compat_defaults_len);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void pc_q35_machine_9_1_options(MachineClass *m)
|
static void pc_q35_machine_9_2_options(MachineClass *m)
|
||||||
{
|
{
|
||||||
pc_q35_machine_options(m);
|
pc_q35_machine_options(m);
|
||||||
m->alias = "q35";
|
m->alias = "q35";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DEFINE_Q35_MACHINE(9, 2);
|
||||||
|
|
||||||
|
static void pc_q35_machine_9_1_options(MachineClass *m)
|
||||||
|
{
|
||||||
|
pc_q35_machine_9_2_options(m);
|
||||||
|
m->alias = NULL;
|
||||||
|
compat_props_add(m->compat_props, hw_compat_9_1, hw_compat_9_1_len);
|
||||||
|
compat_props_add(m->compat_props, pc_compat_9_1, pc_compat_9_1_len);
|
||||||
|
}
|
||||||
|
|
||||||
DEFINE_Q35_MACHINE(9, 1);
|
DEFINE_Q35_MACHINE(9, 1);
|
||||||
|
|
||||||
static void pc_q35_machine_9_0_options(MachineClass *m)
|
static void pc_q35_machine_9_0_options(MachineClass *m)
|
||||||
{
|
{
|
||||||
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
|
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
|
||||||
pc_q35_machine_9_1_options(m);
|
pc_q35_machine_9_1_options(m);
|
||||||
m->alias = NULL;
|
|
||||||
m->smbios_memory_device_size = 16 * GiB;
|
m->smbios_memory_device_size = 16 * GiB;
|
||||||
compat_props_add(m->compat_props, hw_compat_9_0, hw_compat_9_0_len);
|
compat_props_add(m->compat_props, hw_compat_9_0, hw_compat_9_0_len);
|
||||||
compat_props_add(m->compat_props, pc_compat_9_0, pc_compat_9_0_len);
|
compat_props_add(m->compat_props, pc_compat_9_0, pc_compat_9_0_len);
|
||||||
|
|
|
@ -366,10 +366,17 @@ type_init(virt_machine_register_types)
|
||||||
#define DEFINE_VIRT_MACHINE(major, minor) \
|
#define DEFINE_VIRT_MACHINE(major, minor) \
|
||||||
DEFINE_VIRT_MACHINE_IMPL(false, major, minor)
|
DEFINE_VIRT_MACHINE_IMPL(false, major, minor)
|
||||||
|
|
||||||
static void virt_machine_9_1_options(MachineClass *mc)
|
static void virt_machine_9_2_options(MachineClass *mc)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
DEFINE_VIRT_MACHINE_AS_LATEST(9, 1)
|
DEFINE_VIRT_MACHINE_AS_LATEST(9, 2)
|
||||||
|
|
||||||
|
static void virt_machine_9_1_options(MachineClass *mc)
|
||||||
|
{
|
||||||
|
virt_machine_9_2_options(mc);
|
||||||
|
compat_props_add(mc->compat_props, hw_compat_9_1, hw_compat_9_1_len);
|
||||||
|
}
|
||||||
|
DEFINE_VIRT_MACHINE(9, 1)
|
||||||
|
|
||||||
static void virt_machine_9_0_options(MachineClass *mc)
|
static void virt_machine_9_0_options(MachineClass *mc)
|
||||||
{
|
{
|
||||||
|
|
|
@ -397,6 +397,13 @@ static void cfu_fdro_init(Object *obj)
|
||||||
fifo32_create(&s->fdro_data, 8 * KiB / sizeof(uint32_t));
|
fifo32_create(&s->fdro_data, 8 * KiB / sizeof(uint32_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void cfu_fdro_finalize(Object *obj)
|
||||||
|
{
|
||||||
|
XlnxVersalCFUFDRO *s = XLNX_VERSAL_CFU_FDRO(obj);
|
||||||
|
|
||||||
|
fifo32_destroy(&s->fdro_data);
|
||||||
|
}
|
||||||
|
|
||||||
static void cfu_fdro_reset_enter(Object *obj, ResetType type)
|
static void cfu_fdro_reset_enter(Object *obj, ResetType type)
|
||||||
{
|
{
|
||||||
XlnxVersalCFUFDRO *s = XLNX_VERSAL_CFU_FDRO(obj);
|
XlnxVersalCFUFDRO *s = XLNX_VERSAL_CFU_FDRO(obj);
|
||||||
|
@ -539,6 +546,7 @@ static const TypeInfo cfu_fdro_info = {
|
||||||
.instance_size = sizeof(XlnxVersalCFUFDRO),
|
.instance_size = sizeof(XlnxVersalCFUFDRO),
|
||||||
.class_init = cfu_fdro_class_init,
|
.class_init = cfu_fdro_class_init,
|
||||||
.instance_init = cfu_fdro_init,
|
.instance_init = cfu_fdro_init,
|
||||||
|
.instance_finalize = cfu_fdro_finalize,
|
||||||
.interfaces = (InterfaceInfo[]) {
|
.interfaces = (InterfaceInfo[]) {
|
||||||
{ TYPE_XLNX_CFI_IF },
|
{ TYPE_XLNX_CFI_IF },
|
||||||
{ }
|
{ }
|
||||||
|
|
|
@ -608,9 +608,8 @@ static void trng_init(Object *obj)
|
||||||
{
|
{
|
||||||
XlnxVersalTRng *s = XLNX_VERSAL_TRNG(obj);
|
XlnxVersalTRng *s = XLNX_VERSAL_TRNG(obj);
|
||||||
SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
|
SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
|
||||||
RegisterInfoArray *reg_array;
|
|
||||||
|
|
||||||
reg_array =
|
s->reg_array =
|
||||||
register_init_block32(DEVICE(obj), trng_regs_info,
|
register_init_block32(DEVICE(obj), trng_regs_info,
|
||||||
ARRAY_SIZE(trng_regs_info),
|
ARRAY_SIZE(trng_regs_info),
|
||||||
s->regs_info, s->regs,
|
s->regs_info, s->regs,
|
||||||
|
@ -618,16 +617,17 @@ static void trng_init(Object *obj)
|
||||||
XLNX_VERSAL_TRNG_ERR_DEBUG,
|
XLNX_VERSAL_TRNG_ERR_DEBUG,
|
||||||
R_MAX * 4);
|
R_MAX * 4);
|
||||||
|
|
||||||
sysbus_init_mmio(sbd, ®_array->mem);
|
sysbus_init_mmio(sbd, &s->reg_array->mem);
|
||||||
sysbus_init_irq(sbd, &s->irq);
|
sysbus_init_irq(sbd, &s->irq);
|
||||||
|
|
||||||
s->prng = g_rand_new();
|
s->prng = g_rand_new();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void trng_unrealize(DeviceState *dev)
|
static void trng_finalize(Object *obj)
|
||||||
{
|
{
|
||||||
XlnxVersalTRng *s = XLNX_VERSAL_TRNG(dev);
|
XlnxVersalTRng *s = XLNX_VERSAL_TRNG(obj);
|
||||||
|
|
||||||
|
register_finalize_block(s->reg_array);
|
||||||
g_rand_free(s->prng);
|
g_rand_free(s->prng);
|
||||||
s->prng = NULL;
|
s->prng = NULL;
|
||||||
}
|
}
|
||||||
|
@ -689,7 +689,6 @@ static void trng_class_init(ObjectClass *klass, void *data)
|
||||||
ResettableClass *rc = RESETTABLE_CLASS(klass);
|
ResettableClass *rc = RESETTABLE_CLASS(klass);
|
||||||
|
|
||||||
dc->vmsd = &vmstate_trng;
|
dc->vmsd = &vmstate_trng;
|
||||||
dc->unrealize = trng_unrealize;
|
|
||||||
rc->phases.hold = trng_reset_hold;
|
rc->phases.hold = trng_reset_hold;
|
||||||
|
|
||||||
/* Clone uint64 property with set allowed after realized */
|
/* Clone uint64 property with set allowed after realized */
|
||||||
|
@ -706,6 +705,7 @@ static const TypeInfo trng_info = {
|
||||||
.instance_size = sizeof(XlnxVersalTRng),
|
.instance_size = sizeof(XlnxVersalTRng),
|
||||||
.class_init = trng_class_init,
|
.class_init = trng_class_init,
|
||||||
.instance_init = trng_init,
|
.instance_init = trng_init,
|
||||||
|
.instance_finalize = trng_finalize,
|
||||||
};
|
};
|
||||||
|
|
||||||
static void trng_register_types(void)
|
static void trng_register_types(void)
|
||||||
|
|
|
@ -456,9 +456,8 @@ static void bbram_ctrl_init(Object *obj)
|
||||||
{
|
{
|
||||||
XlnxBBRam *s = XLNX_BBRAM(obj);
|
XlnxBBRam *s = XLNX_BBRAM(obj);
|
||||||
SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
|
SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
|
||||||
RegisterInfoArray *reg_array;
|
|
||||||
|
|
||||||
reg_array =
|
s->reg_array =
|
||||||
register_init_block32(DEVICE(obj), bbram_ctrl_regs_info,
|
register_init_block32(DEVICE(obj), bbram_ctrl_regs_info,
|
||||||
ARRAY_SIZE(bbram_ctrl_regs_info),
|
ARRAY_SIZE(bbram_ctrl_regs_info),
|
||||||
s->regs_info, s->regs,
|
s->regs_info, s->regs,
|
||||||
|
@ -466,10 +465,17 @@ static void bbram_ctrl_init(Object *obj)
|
||||||
XLNX_BBRAM_ERR_DEBUG,
|
XLNX_BBRAM_ERR_DEBUG,
|
||||||
R_MAX * 4);
|
R_MAX * 4);
|
||||||
|
|
||||||
sysbus_init_mmio(sbd, ®_array->mem);
|
sysbus_init_mmio(sbd, &s->reg_array->mem);
|
||||||
sysbus_init_irq(sbd, &s->irq_bbram);
|
sysbus_init_irq(sbd, &s->irq_bbram);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void bbram_ctrl_finalize(Object *obj)
|
||||||
|
{
|
||||||
|
XlnxBBRam *s = XLNX_BBRAM(obj);
|
||||||
|
|
||||||
|
register_finalize_block(s->reg_array);
|
||||||
|
}
|
||||||
|
|
||||||
static void bbram_prop_set_drive(Object *obj, Visitor *v, const char *name,
|
static void bbram_prop_set_drive(Object *obj, Visitor *v, const char *name,
|
||||||
void *opaque, Error **errp)
|
void *opaque, Error **errp)
|
||||||
{
|
{
|
||||||
|
@ -537,6 +543,7 @@ static const TypeInfo bbram_ctrl_info = {
|
||||||
.instance_size = sizeof(XlnxBBRam),
|
.instance_size = sizeof(XlnxBBRam),
|
||||||
.class_init = bbram_ctrl_class_init,
|
.class_init = bbram_ctrl_class_init,
|
||||||
.instance_init = bbram_ctrl_init,
|
.instance_init = bbram_ctrl_init,
|
||||||
|
.instance_finalize = bbram_ctrl_finalize,
|
||||||
};
|
};
|
||||||
|
|
||||||
static void bbram_ctrl_register_types(void)
|
static void bbram_ctrl_register_types(void)
|
||||||
|
|
|
@ -712,9 +712,8 @@ static void efuse_ctrl_init(Object *obj)
|
||||||
{
|
{
|
||||||
XlnxVersalEFuseCtrl *s = XLNX_VERSAL_EFUSE_CTRL(obj);
|
XlnxVersalEFuseCtrl *s = XLNX_VERSAL_EFUSE_CTRL(obj);
|
||||||
SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
|
SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
|
||||||
RegisterInfoArray *reg_array;
|
|
||||||
|
|
||||||
reg_array =
|
s->reg_array =
|
||||||
register_init_block32(DEVICE(obj), efuse_ctrl_regs_info,
|
register_init_block32(DEVICE(obj), efuse_ctrl_regs_info,
|
||||||
ARRAY_SIZE(efuse_ctrl_regs_info),
|
ARRAY_SIZE(efuse_ctrl_regs_info),
|
||||||
s->regs_info, s->regs,
|
s->regs_info, s->regs,
|
||||||
|
@ -722,7 +721,7 @@ static void efuse_ctrl_init(Object *obj)
|
||||||
XLNX_VERSAL_EFUSE_CTRL_ERR_DEBUG,
|
XLNX_VERSAL_EFUSE_CTRL_ERR_DEBUG,
|
||||||
R_MAX * 4);
|
R_MAX * 4);
|
||||||
|
|
||||||
sysbus_init_mmio(sbd, ®_array->mem);
|
sysbus_init_mmio(sbd, &s->reg_array->mem);
|
||||||
sysbus_init_irq(sbd, &s->irq_efuse_imr);
|
sysbus_init_irq(sbd, &s->irq_efuse_imr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -730,6 +729,7 @@ static void efuse_ctrl_finalize(Object *obj)
|
||||||
{
|
{
|
||||||
XlnxVersalEFuseCtrl *s = XLNX_VERSAL_EFUSE_CTRL(obj);
|
XlnxVersalEFuseCtrl *s = XLNX_VERSAL_EFUSE_CTRL(obj);
|
||||||
|
|
||||||
|
register_finalize_block(s->reg_array);
|
||||||
g_free(s->extra_pg0_lock_spec);
|
g_free(s->extra_pg0_lock_spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -803,9 +803,8 @@ static void zynqmp_efuse_init(Object *obj)
|
||||||
{
|
{
|
||||||
XlnxZynqMPEFuse *s = XLNX_ZYNQMP_EFUSE(obj);
|
XlnxZynqMPEFuse *s = XLNX_ZYNQMP_EFUSE(obj);
|
||||||
SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
|
SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
|
||||||
RegisterInfoArray *reg_array;
|
|
||||||
|
|
||||||
reg_array =
|
s->reg_array =
|
||||||
register_init_block32(DEVICE(obj), zynqmp_efuse_regs_info,
|
register_init_block32(DEVICE(obj), zynqmp_efuse_regs_info,
|
||||||
ARRAY_SIZE(zynqmp_efuse_regs_info),
|
ARRAY_SIZE(zynqmp_efuse_regs_info),
|
||||||
s->regs_info, s->regs,
|
s->regs_info, s->regs,
|
||||||
|
@ -813,10 +812,17 @@ static void zynqmp_efuse_init(Object *obj)
|
||||||
ZYNQMP_EFUSE_ERR_DEBUG,
|
ZYNQMP_EFUSE_ERR_DEBUG,
|
||||||
R_MAX * 4);
|
R_MAX * 4);
|
||||||
|
|
||||||
sysbus_init_mmio(sbd, ®_array->mem);
|
sysbus_init_mmio(sbd, &s->reg_array->mem);
|
||||||
sysbus_init_irq(sbd, &s->irq);
|
sysbus_init_irq(sbd, &s->irq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void zynqmp_efuse_finalize(Object *obj)
|
||||||
|
{
|
||||||
|
XlnxZynqMPEFuse *s = XLNX_ZYNQMP_EFUSE(obj);
|
||||||
|
|
||||||
|
register_finalize_block(s->reg_array);
|
||||||
|
}
|
||||||
|
|
||||||
static const VMStateDescription vmstate_efuse = {
|
static const VMStateDescription vmstate_efuse = {
|
||||||
.name = TYPE_XLNX_ZYNQMP_EFUSE,
|
.name = TYPE_XLNX_ZYNQMP_EFUSE,
|
||||||
.version_id = 1,
|
.version_id = 1,
|
||||||
|
@ -853,6 +859,7 @@ static const TypeInfo efuse_info = {
|
||||||
.instance_size = sizeof(XlnxZynqMPEFuse),
|
.instance_size = sizeof(XlnxZynqMPEFuse),
|
||||||
.class_init = zynqmp_efuse_class_init,
|
.class_init = zynqmp_efuse_class_init,
|
||||||
.instance_init = zynqmp_efuse_init,
|
.instance_init = zynqmp_efuse_init,
|
||||||
|
.instance_finalize = zynqmp_efuse_finalize,
|
||||||
};
|
};
|
||||||
|
|
||||||
static void efuse_register_types(void)
|
static void efuse_register_types(void)
|
||||||
|
|
|
@ -4838,14 +4838,25 @@ static void spapr_machine_latest_class_options(MachineClass *mc)
|
||||||
DEFINE_SPAPR_MACHINE_IMPL(false, major, minor, _, tag)
|
DEFINE_SPAPR_MACHINE_IMPL(false, major, minor, _, tag)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* pseries-9.1
|
* pseries-9.2
|
||||||
*/
|
*/
|
||||||
static void spapr_machine_9_1_class_options(MachineClass *mc)
|
static void spapr_machine_9_2_class_options(MachineClass *mc)
|
||||||
{
|
{
|
||||||
/* Defaults for the latest behaviour inherited from the base class */
|
/* Defaults for the latest behaviour inherited from the base class */
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_SPAPR_MACHINE_AS_LATEST(9, 1);
|
DEFINE_SPAPR_MACHINE_AS_LATEST(9, 2);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* pseries-9.1
|
||||||
|
*/
|
||||||
|
static void spapr_machine_9_1_class_options(MachineClass *mc)
|
||||||
|
{
|
||||||
|
spapr_machine_9_2_class_options(mc);
|
||||||
|
compat_props_add(mc->compat_props, hw_compat_9_1, hw_compat_9_1_len);
|
||||||
|
}
|
||||||
|
|
||||||
|
DEFINE_SPAPR_MACHINE(9, 1);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* pseries-9.0
|
* pseries-9.0
|
||||||
|
|
|
@ -871,14 +871,26 @@ static const TypeInfo ccw_machine_info = {
|
||||||
DEFINE_CCW_MACHINE_IMPL(false, major, minor)
|
DEFINE_CCW_MACHINE_IMPL(false, major, minor)
|
||||||
|
|
||||||
|
|
||||||
|
static void ccw_machine_9_2_instance_options(MachineState *machine)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ccw_machine_9_2_class_options(MachineClass *mc)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
DEFINE_CCW_MACHINE_AS_LATEST(9, 2);
|
||||||
|
|
||||||
static void ccw_machine_9_1_instance_options(MachineState *machine)
|
static void ccw_machine_9_1_instance_options(MachineState *machine)
|
||||||
{
|
{
|
||||||
|
ccw_machine_9_2_instance_options(machine);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ccw_machine_9_1_class_options(MachineClass *mc)
|
static void ccw_machine_9_1_class_options(MachineClass *mc)
|
||||||
{
|
{
|
||||||
|
ccw_machine_9_2_class_options(mc);
|
||||||
|
compat_props_add(mc->compat_props, hw_compat_9_1, hw_compat_9_1_len);
|
||||||
}
|
}
|
||||||
DEFINE_CCW_MACHINE_AS_LATEST(9, 1);
|
DEFINE_CCW_MACHINE(9, 1);
|
||||||
|
|
||||||
static void ccw_machine_9_0_instance_options(MachineState *machine)
|
static void ccw_machine_9_0_instance_options(MachineState *machine)
|
||||||
{
|
{
|
||||||
|
|
|
@ -134,6 +134,7 @@ struct VirtMachineClass {
|
||||||
bool no_cpu_topology;
|
bool no_cpu_topology;
|
||||||
bool no_tcg_lpa2;
|
bool no_tcg_lpa2;
|
||||||
bool no_ns_el2_virt_timer_irq;
|
bool no_ns_el2_virt_timer_irq;
|
||||||
|
bool no_nested_smmu;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct VirtMachineState {
|
struct VirtMachineState {
|
||||||
|
|
|
@ -732,6 +732,9 @@ struct MachineState {
|
||||||
} \
|
} \
|
||||||
type_init(machine_initfn##_register_types)
|
type_init(machine_initfn##_register_types)
|
||||||
|
|
||||||
|
extern GlobalProperty hw_compat_9_1[];
|
||||||
|
extern const size_t hw_compat_9_1_len;
|
||||||
|
|
||||||
extern GlobalProperty hw_compat_9_0[];
|
extern GlobalProperty hw_compat_9_0[];
|
||||||
extern const size_t hw_compat_9_0_len;
|
extern const size_t hw_compat_9_0_len;
|
||||||
|
|
||||||
|
|
|
@ -215,6 +215,9 @@ void pc_system_parse_ovmf_flash(uint8_t *flash_ptr, size_t flash_size);
|
||||||
/* sgx.c */
|
/* sgx.c */
|
||||||
void pc_machine_init_sgx_epc(PCMachineState *pcms);
|
void pc_machine_init_sgx_epc(PCMachineState *pcms);
|
||||||
|
|
||||||
|
extern GlobalProperty pc_compat_9_1[];
|
||||||
|
extern const size_t pc_compat_9_1_len;
|
||||||
|
|
||||||
extern GlobalProperty pc_compat_9_0[];
|
extern GlobalProperty pc_compat_9_0[];
|
||||||
extern const size_t pc_compat_9_0_len;
|
extern const size_t pc_compat_9_0_len;
|
||||||
|
|
||||||
|
|
|
@ -50,6 +50,7 @@ typedef struct XlnxVersalTRng {
|
||||||
uint64_t forced_prng_count;
|
uint64_t forced_prng_count;
|
||||||
uint64_t tst_seed[2];
|
uint64_t tst_seed[2];
|
||||||
|
|
||||||
|
RegisterInfoArray *reg_array;
|
||||||
uint32_t regs[RMAX_XLNX_VERSAL_TRNG];
|
uint32_t regs[RMAX_XLNX_VERSAL_TRNG];
|
||||||
RegisterInfo regs_info[RMAX_XLNX_VERSAL_TRNG];
|
RegisterInfo regs_info[RMAX_XLNX_VERSAL_TRNG];
|
||||||
} XlnxVersalTRng;
|
} XlnxVersalTRng;
|
||||||
|
|
|
@ -47,6 +47,7 @@ struct XlnxBBRam {
|
||||||
bool bbram8_wo;
|
bool bbram8_wo;
|
||||||
bool blk_ro;
|
bool blk_ro;
|
||||||
|
|
||||||
|
RegisterInfoArray *reg_array;
|
||||||
uint32_t regs[RMAX_XLNX_BBRAM];
|
uint32_t regs[RMAX_XLNX_BBRAM];
|
||||||
RegisterInfo regs_info[RMAX_XLNX_BBRAM];
|
RegisterInfo regs_info[RMAX_XLNX_BBRAM];
|
||||||
};
|
};
|
||||||
|
|
|
@ -44,6 +44,7 @@ struct XlnxVersalEFuseCtrl {
|
||||||
void *extra_pg0_lock_spec; /* Opaque property */
|
void *extra_pg0_lock_spec; /* Opaque property */
|
||||||
uint32_t extra_pg0_lock_n16;
|
uint32_t extra_pg0_lock_n16;
|
||||||
|
|
||||||
|
RegisterInfoArray *reg_array;
|
||||||
uint32_t regs[XLNX_VERSAL_EFUSE_CTRL_R_MAX];
|
uint32_t regs[XLNX_VERSAL_EFUSE_CTRL_R_MAX];
|
||||||
RegisterInfo regs_info[XLNX_VERSAL_EFUSE_CTRL_R_MAX];
|
RegisterInfo regs_info[XLNX_VERSAL_EFUSE_CTRL_R_MAX];
|
||||||
};
|
};
|
||||||
|
|
|
@ -37,6 +37,7 @@ struct XlnxZynqMPEFuse {
|
||||||
qemu_irq irq;
|
qemu_irq irq;
|
||||||
|
|
||||||
XlnxEFuse *efuse;
|
XlnxEFuse *efuse;
|
||||||
|
RegisterInfoArray *reg_array;
|
||||||
uint32_t regs[XLNX_ZYNQMP_EFUSE_R_MAX];
|
uint32_t regs[XLNX_ZYNQMP_EFUSE_R_MAX];
|
||||||
RegisterInfo regs_info[XLNX_ZYNQMP_EFUSE_R_MAX];
|
RegisterInfo regs_info[XLNX_ZYNQMP_EFUSE_R_MAX];
|
||||||
};
|
};
|
||||||
|
|
|
@ -556,6 +556,11 @@ static inline bool isar_feature_aa64_bf16(const ARMISARegisters *id)
|
||||||
return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, BF16) != 0;
|
return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, BF16) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool isar_feature_aa64_ebf16(const ARMISARegisters *id)
|
||||||
|
{
|
||||||
|
return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, BF16) > 1;
|
||||||
|
}
|
||||||
|
|
||||||
static inline bool isar_feature_aa64_rcpc_8_3(const ARMISARegisters *id)
|
static inline bool isar_feature_aa64_rcpc_8_3(const ARMISARegisters *id)
|
||||||
{
|
{
|
||||||
return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, LRCPC) != 0;
|
return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, LRCPC) != 0;
|
||||||
|
|
|
@ -1707,6 +1707,7 @@ void vfp_set_fpscr(CPUARMState *env, uint32_t val);
|
||||||
#define FPCR_OFE (1 << 10) /* Overflow exception trap enable */
|
#define FPCR_OFE (1 << 10) /* Overflow exception trap enable */
|
||||||
#define FPCR_UFE (1 << 11) /* Underflow exception trap enable */
|
#define FPCR_UFE (1 << 11) /* Underflow exception trap enable */
|
||||||
#define FPCR_IXE (1 << 12) /* Inexact exception trap enable */
|
#define FPCR_IXE (1 << 12) /* Inexact exception trap enable */
|
||||||
|
#define FPCR_EBF (1 << 13) /* Extended BFloat16 behaviors */
|
||||||
#define FPCR_IDE (1 << 15) /* Input Denormal exception trap enable */
|
#define FPCR_IDE (1 << 15) /* Input Denormal exception trap enable */
|
||||||
#define FPCR_LEN_MASK (7 << 16) /* LEN, A-profile only */
|
#define FPCR_LEN_MASK (7 << 16) /* LEN, A-profile only */
|
||||||
#define FPCR_FZ16 (1 << 19) /* ARMv8.2+, FP16 flush-to-zero */
|
#define FPCR_FZ16 (1 << 19) /* ARMv8.2+, FP16 flush-to-zero */
|
||||||
|
|
|
@ -1027,13 +1027,13 @@ DEF_HELPER_FLAGS_5(gvec_ummla_b, TCG_CALL_NO_RWG,
|
||||||
DEF_HELPER_FLAGS_5(gvec_usmmla_b, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_5(gvec_usmmla_b, TCG_CALL_NO_RWG,
|
||||||
void, ptr, ptr, ptr, ptr, i32)
|
void, ptr, ptr, ptr, ptr, i32)
|
||||||
|
|
||||||
DEF_HELPER_FLAGS_5(gvec_bfdot, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_6(gvec_bfdot, TCG_CALL_NO_RWG,
|
||||||
void, ptr, ptr, ptr, ptr, i32)
|
void, ptr, ptr, ptr, ptr, env, i32)
|
||||||
DEF_HELPER_FLAGS_5(gvec_bfdot_idx, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_6(gvec_bfdot_idx, TCG_CALL_NO_RWG,
|
||||||
void, ptr, ptr, ptr, ptr, i32)
|
void, ptr, ptr, ptr, ptr, env, i32)
|
||||||
|
|
||||||
DEF_HELPER_FLAGS_5(gvec_bfmmla, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_6(gvec_bfmmla, TCG_CALL_NO_RWG,
|
||||||
void, ptr, ptr, ptr, ptr, i32)
|
void, ptr, ptr, ptr, ptr, env, i32)
|
||||||
|
|
||||||
DEF_HELPER_FLAGS_6(gvec_bfmlal, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_6(gvec_bfmlal, TCG_CALL_NO_RWG,
|
||||||
void, ptr, ptr, ptr, ptr, ptr, i32)
|
void, ptr, ptr, ptr, ptr, ptr, i32)
|
||||||
|
|
|
@ -1160,7 +1160,7 @@ void aarch64_max_tcg_initfn(Object *obj)
|
||||||
t = FIELD_DP64(t, ID_AA64ISAR1, FRINTTS, 1); /* FEAT_FRINTTS */
|
t = FIELD_DP64(t, ID_AA64ISAR1, FRINTTS, 1); /* FEAT_FRINTTS */
|
||||||
t = FIELD_DP64(t, ID_AA64ISAR1, SB, 1); /* FEAT_SB */
|
t = FIELD_DP64(t, ID_AA64ISAR1, SB, 1); /* FEAT_SB */
|
||||||
t = FIELD_DP64(t, ID_AA64ISAR1, SPECRES, 1); /* FEAT_SPECRES */
|
t = FIELD_DP64(t, ID_AA64ISAR1, SPECRES, 1); /* FEAT_SPECRES */
|
||||||
t = FIELD_DP64(t, ID_AA64ISAR1, BF16, 1); /* FEAT_BF16 */
|
t = FIELD_DP64(t, ID_AA64ISAR1, BF16, 2); /* FEAT_BF16, FEAT_EBF16 */
|
||||||
t = FIELD_DP64(t, ID_AA64ISAR1, DGH, 1); /* FEAT_DGH */
|
t = FIELD_DP64(t, ID_AA64ISAR1, DGH, 1); /* FEAT_DGH */
|
||||||
t = FIELD_DP64(t, ID_AA64ISAR1, I8MM, 1); /* FEAT_I8MM */
|
t = FIELD_DP64(t, ID_AA64ISAR1, I8MM, 1); /* FEAT_I8MM */
|
||||||
cpu->isar.id_aa64isar1 = t;
|
cpu->isar.id_aa64isar1 = t;
|
||||||
|
@ -1244,7 +1244,7 @@ void aarch64_max_tcg_initfn(Object *obj)
|
||||||
t = FIELD_DP64(t, ID_AA64ZFR0, SVEVER, 1);
|
t = FIELD_DP64(t, ID_AA64ZFR0, SVEVER, 1);
|
||||||
t = FIELD_DP64(t, ID_AA64ZFR0, AES, 2); /* FEAT_SVE_PMULL128 */
|
t = FIELD_DP64(t, ID_AA64ZFR0, AES, 2); /* FEAT_SVE_PMULL128 */
|
||||||
t = FIELD_DP64(t, ID_AA64ZFR0, BITPERM, 1); /* FEAT_SVE_BitPerm */
|
t = FIELD_DP64(t, ID_AA64ZFR0, BITPERM, 1); /* FEAT_SVE_BitPerm */
|
||||||
t = FIELD_DP64(t, ID_AA64ZFR0, BFLOAT16, 1); /* FEAT_BF16 */
|
t = FIELD_DP64(t, ID_AA64ZFR0, BFLOAT16, 2); /* FEAT_BF16, FEAT_EBF16 */
|
||||||
t = FIELD_DP64(t, ID_AA64ZFR0, SHA3, 1); /* FEAT_SVE_SHA3 */
|
t = FIELD_DP64(t, ID_AA64ZFR0, SHA3, 1); /* FEAT_SVE_SHA3 */
|
||||||
t = FIELD_DP64(t, ID_AA64ZFR0, SM4, 1); /* FEAT_SVE_SM4 */
|
t = FIELD_DP64(t, ID_AA64ZFR0, SM4, 1); /* FEAT_SVE_SM4 */
|
||||||
t = FIELD_DP64(t, ID_AA64ZFR0, I8MM, 1); /* FEAT_I8MM */
|
t = FIELD_DP64(t, ID_AA64ZFR0, I8MM, 1); /* FEAT_I8MM */
|
||||||
|
|
|
@ -126,8 +126,8 @@ DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
|
||||||
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
|
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
|
||||||
DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
|
||||||
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
|
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
|
||||||
DEF_HELPER_FLAGS_6(sme_bfmopa, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_7(sme_bfmopa, TCG_CALL_NO_RWG,
|
||||||
void, ptr, ptr, ptr, ptr, ptr, i32)
|
void, ptr, ptr, ptr, ptr, ptr, env, i32)
|
||||||
DEF_HELPER_FLAGS_6(sme_smopa_s, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_6(sme_smopa_s, TCG_CALL_NO_RWG,
|
||||||
void, ptr, ptr, ptr, ptr, ptr, i32)
|
void, ptr, ptr, ptr, ptr, ptr, i32)
|
||||||
DEF_HELPER_FLAGS_6(sme_umopa_s, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_6(sme_umopa_s, TCG_CALL_NO_RWG,
|
||||||
|
|
|
@ -1079,38 +1079,68 @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn,
|
void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm,
|
||||||
void *vpm, uint32_t desc)
|
void *vpn, void *vpm, CPUARMState *env, uint32_t desc)
|
||||||
{
|
{
|
||||||
intptr_t row, col, oprsz = simd_maxsz(desc);
|
intptr_t row, col, oprsz = simd_maxsz(desc);
|
||||||
uint32_t neg = simd_data(desc) * 0x80008000u;
|
uint32_t neg = simd_data(desc) * 0x80008000u;
|
||||||
uint16_t *pn = vpn, *pm = vpm;
|
uint16_t *pn = vpn, *pm = vpm;
|
||||||
|
float_status fpst, fpst_odd;
|
||||||
|
|
||||||
for (row = 0; row < oprsz; ) {
|
if (is_ebf(env, &fpst, &fpst_odd)) {
|
||||||
uint16_t prow = pn[H2(row >> 4)];
|
for (row = 0; row < oprsz; ) {
|
||||||
do {
|
uint16_t prow = pn[H2(row >> 4)];
|
||||||
void *vza_row = vza + tile_vslice_offset(row);
|
do {
|
||||||
uint32_t n = *(uint32_t *)(vzn + H1_4(row));
|
void *vza_row = vza + tile_vslice_offset(row);
|
||||||
|
uint32_t n = *(uint32_t *)(vzn + H1_4(row));
|
||||||
|
|
||||||
n = f16mop_adj_pair(n, prow, neg);
|
n = f16mop_adj_pair(n, prow, neg);
|
||||||
|
|
||||||
for (col = 0; col < oprsz; ) {
|
for (col = 0; col < oprsz; ) {
|
||||||
uint16_t pcol = pm[H2(col >> 4)];
|
uint16_t pcol = pm[H2(col >> 4)];
|
||||||
do {
|
do {
|
||||||
if (prow & pcol & 0b0101) {
|
if (prow & pcol & 0b0101) {
|
||||||
uint32_t *a = vza_row + H1_4(col);
|
uint32_t *a = vza_row + H1_4(col);
|
||||||
uint32_t m = *(uint32_t *)(vzm + H1_4(col));
|
uint32_t m = *(uint32_t *)(vzm + H1_4(col));
|
||||||
|
|
||||||
m = f16mop_adj_pair(m, pcol, 0);
|
m = f16mop_adj_pair(m, pcol, 0);
|
||||||
*a = bfdotadd(*a, n, m);
|
*a = bfdotadd_ebf(*a, n, m, &fpst, &fpst_odd);
|
||||||
}
|
}
|
||||||
col += 4;
|
col += 4;
|
||||||
pcol >>= 4;
|
pcol >>= 4;
|
||||||
} while (col & 15);
|
} while (col & 15);
|
||||||
}
|
}
|
||||||
row += 4;
|
row += 4;
|
||||||
prow >>= 4;
|
prow >>= 4;
|
||||||
} while (row & 15);
|
} while (row & 15);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (row = 0; row < oprsz; ) {
|
||||||
|
uint16_t prow = pn[H2(row >> 4)];
|
||||||
|
do {
|
||||||
|
void *vza_row = vza + tile_vslice_offset(row);
|
||||||
|
uint32_t n = *(uint32_t *)(vzn + H1_4(row));
|
||||||
|
|
||||||
|
n = f16mop_adj_pair(n, prow, neg);
|
||||||
|
|
||||||
|
for (col = 0; col < oprsz; ) {
|
||||||
|
uint16_t pcol = pm[H2(col >> 4)];
|
||||||
|
do {
|
||||||
|
if (prow & pcol & 0b0101) {
|
||||||
|
uint32_t *a = vza_row + H1_4(col);
|
||||||
|
uint32_t m = *(uint32_t *)(vzm + H1_4(col));
|
||||||
|
|
||||||
|
m = f16mop_adj_pair(m, pcol, 0);
|
||||||
|
*a = bfdotadd(*a, n, m, &fpst);
|
||||||
|
}
|
||||||
|
col += 4;
|
||||||
|
pcol >>= 4;
|
||||||
|
} while (col & 15);
|
||||||
|
}
|
||||||
|
row += 4;
|
||||||
|
prow >>= 4;
|
||||||
|
} while (row & 15);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -735,6 +735,22 @@ static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
|
||||||
is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
|
is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Expand a 4-operand operation using an out-of-line helper that takes
|
||||||
|
* a pointer to the CPU env.
|
||||||
|
*/
|
||||||
|
static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn,
|
||||||
|
int rm, int ra, int data,
|
||||||
|
gen_helper_gvec_4_ptr *fn)
|
||||||
|
{
|
||||||
|
tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
|
||||||
|
vec_full_reg_offset(s, rn),
|
||||||
|
vec_full_reg_offset(s, rm),
|
||||||
|
vec_full_reg_offset(s, ra),
|
||||||
|
tcg_env,
|
||||||
|
is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Expand a 4-operand + fpstatus pointer + simd data value operation using
|
* Expand a 4-operand + fpstatus pointer + simd data value operation using
|
||||||
* an out-of-line helper.
|
* an out-of-line helper.
|
||||||
|
@ -5608,11 +5624,20 @@ static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a,
|
||||||
|
gen_helper_gvec_4_ptr *fn)
|
||||||
|
{
|
||||||
|
if (fp_access_check(s)) {
|
||||||
|
gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b)
|
TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b)
|
||||||
TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b)
|
TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b)
|
||||||
TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b)
|
TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b)
|
||||||
TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector, a, gen_helper_gvec_bfdot)
|
TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot)
|
||||||
TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector, a, gen_helper_gvec_bfmmla)
|
TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfmmla)
|
||||||
TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b)
|
TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b)
|
||||||
TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b)
|
TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b)
|
||||||
TRANS_FEAT(USMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usmmla_b)
|
TRANS_FEAT(USMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usmmla_b)
|
||||||
|
@ -6385,13 +6410,22 @@ static bool do_dot_vector_idx(DisasContext *s, arg_qrrx_e *a,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool do_dot_vector_idx_env(DisasContext *s, arg_qrrx_e *a,
|
||||||
|
gen_helper_gvec_4_ptr *fn)
|
||||||
|
{
|
||||||
|
if (fp_access_check(s)) {
|
||||||
|
gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b)
|
TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b)
|
||||||
TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b)
|
TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b)
|
||||||
TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
|
TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
|
||||||
gen_helper_gvec_sudot_idx_b)
|
gen_helper_gvec_sudot_idx_b)
|
||||||
TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
|
TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
|
||||||
gen_helper_gvec_usdot_idx_b)
|
gen_helper_gvec_usdot_idx_b)
|
||||||
TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx, a,
|
TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx_env, a,
|
||||||
gen_helper_gvec_bfdot_idx)
|
gen_helper_gvec_bfdot_idx)
|
||||||
|
|
||||||
static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a)
|
static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a)
|
||||||
|
|
|
@ -148,6 +148,37 @@ static bool do_neon_ddda(DisasContext *s, int q, int vd, int vn, int vm,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool do_neon_ddda_env(DisasContext *s, int q, int vd, int vn, int vm,
|
||||||
|
int data, gen_helper_gvec_4_ptr *fn_gvec)
|
||||||
|
{
|
||||||
|
/* UNDEF accesses to D16-D31 if they don't exist. */
|
||||||
|
if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* UNDEF accesses to odd registers for each bit of Q.
|
||||||
|
* Q will be 0b111 for all Q-reg instructions, otherwise
|
||||||
|
* when we have mixed Q- and D-reg inputs.
|
||||||
|
*/
|
||||||
|
if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!vfp_access_check(s)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
int opr_sz = q ? 16 : 8;
|
||||||
|
tcg_gen_gvec_4_ptr(vfp_reg_offset(1, vd),
|
||||||
|
vfp_reg_offset(1, vn),
|
||||||
|
vfp_reg_offset(1, vm),
|
||||||
|
vfp_reg_offset(1, vd),
|
||||||
|
tcg_env,
|
||||||
|
opr_sz, opr_sz, data, fn_gvec);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static bool do_neon_ddda_fpst(DisasContext *s, int q, int vd, int vn, int vm,
|
static bool do_neon_ddda_fpst(DisasContext *s, int q, int vd, int vn, int vm,
|
||||||
int data, ARMFPStatusFlavour fp_flavour,
|
int data, ARMFPStatusFlavour fp_flavour,
|
||||||
gen_helper_gvec_4_ptr *fn_gvec_ptr)
|
gen_helper_gvec_4_ptr *fn_gvec_ptr)
|
||||||
|
@ -266,8 +297,8 @@ static bool trans_VDOT_b16(DisasContext *s, arg_VDOT_b16 *a)
|
||||||
if (!dc_isar_feature(aa32_bf16, s)) {
|
if (!dc_isar_feature(aa32_bf16, s)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
|
return do_neon_ddda_env(s, a->q * 7, a->vd, a->vn, a->vm, 0,
|
||||||
gen_helper_gvec_bfdot);
|
gen_helper_gvec_bfdot);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool trans_VFML(DisasContext *s, arg_VFML *a)
|
static bool trans_VFML(DisasContext *s, arg_VFML *a)
|
||||||
|
@ -360,8 +391,8 @@ static bool trans_VDOT_b16_scal(DisasContext *s, arg_VDOT_b16_scal *a)
|
||||||
if (!dc_isar_feature(aa32_bf16, s)) {
|
if (!dc_isar_feature(aa32_bf16, s)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
|
return do_neon_ddda_env(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
|
||||||
gen_helper_gvec_bfdot_idx);
|
gen_helper_gvec_bfdot_idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a)
|
static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a)
|
||||||
|
@ -3699,8 +3730,8 @@ static bool trans_VMMLA_b16(DisasContext *s, arg_VMMLA_b16 *a)
|
||||||
if (!dc_isar_feature(aa32_bf16, s)) {
|
if (!dc_isar_feature(aa32_bf16, s)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
|
return do_neon_ddda_env(s, 7, a->vd, a->vn, a->vm, 0,
|
||||||
gen_helper_gvec_bfmmla);
|
gen_helper_gvec_bfmmla);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool trans_VFMA_b16(DisasContext *s, arg_VFMA_b16 *a)
|
static bool trans_VFMA_b16(DisasContext *s, arg_VFMA_b16 *a)
|
||||||
|
|
|
@ -362,8 +362,7 @@ TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a,
|
||||||
TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a,
|
TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a,
|
||||||
MO_64, FPST_FPCR, gen_helper_sme_fmopa_d)
|
MO_64, FPST_FPCR, gen_helper_sme_fmopa_d)
|
||||||
|
|
||||||
/* TODO: FEAT_EBF16 */
|
TRANS_FEAT(BFMOPA, aa64_sme, do_outprod_env, a, MO_32, gen_helper_sme_bfmopa)
|
||||||
TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa)
|
|
||||||
|
|
||||||
TRANS_FEAT(SMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_smopa_s)
|
TRANS_FEAT(SMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_smopa_s)
|
||||||
TRANS_FEAT(UMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_umopa_s)
|
TRANS_FEAT(UMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_umopa_s)
|
||||||
|
|
|
@ -252,6 +252,25 @@ static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool gen_gvec_env_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
|
||||||
|
int rd, int rn, int rm, int ra,
|
||||||
|
int data)
|
||||||
|
{
|
||||||
|
return gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, tcg_env);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool gen_gvec_env_arg_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
|
||||||
|
arg_rrrr_esz *a, int data)
|
||||||
|
{
|
||||||
|
return gen_gvec_env_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool gen_gvec_env_arg_zzxz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
|
||||||
|
arg_rrxr_esz *a)
|
||||||
|
{
|
||||||
|
return gen_gvec_env_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index);
|
||||||
|
}
|
||||||
|
|
||||||
/* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */
|
/* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */
|
||||||
static bool gen_gvec_fpst_zzzzp(DisasContext *s, gen_helper_gvec_5_ptr *fn,
|
static bool gen_gvec_fpst_zzzzp(DisasContext *s, gen_helper_gvec_5_ptr *fn,
|
||||||
int rd, int rn, int rm, int ra, int pg,
|
int rd, int rn, int rm, int ra, int pg,
|
||||||
|
@ -7113,12 +7132,12 @@ TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
|
||||||
TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
|
TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
|
||||||
gen_helper_gvec_ummla_b, a, 0)
|
gen_helper_gvec_ummla_b, a, 0)
|
||||||
|
|
||||||
TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
|
TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_env_arg_zzzz,
|
||||||
gen_helper_gvec_bfdot, a, 0)
|
gen_helper_gvec_bfdot, a, 0)
|
||||||
TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz,
|
TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_env_arg_zzxz,
|
||||||
gen_helper_gvec_bfdot_idx, a)
|
gen_helper_gvec_bfdot_idx, a)
|
||||||
|
|
||||||
TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
|
TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_env_arg_zzzz,
|
||||||
gen_helper_gvec_bfmmla, a, 0)
|
gen_helper_gvec_bfmmla, a, 0)
|
||||||
|
|
||||||
static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
|
static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
|
||||||
|
|
|
@ -2190,8 +2190,8 @@ static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
|
||||||
static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
|
static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* VFNMA : fd = muladd(-fd, fn, fm)
|
* VFNMA : fd = muladd(-fd, -fn, fm)
|
||||||
* VFNMS : fd = muladd(-fd, -fn, fm)
|
* VFNMS : fd = muladd(-fd, fn, fm)
|
||||||
* VFMA : fd = muladd( fd, fn, fm)
|
* VFMA : fd = muladd( fd, fn, fm)
|
||||||
* VFMS : fd = muladd( fd, -fn, fm)
|
* VFMS : fd = muladd( fd, -fn, fm)
|
||||||
*
|
*
|
||||||
|
@ -2262,8 +2262,8 @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
|
||||||
#define MAKE_VFM_TRANS_FNS(PREC) \
|
#define MAKE_VFM_TRANS_FNS(PREC) \
|
||||||
MAKE_ONE_VFM_TRANS_FN(VFMA, PREC, false, false) \
|
MAKE_ONE_VFM_TRANS_FN(VFMA, PREC, false, false) \
|
||||||
MAKE_ONE_VFM_TRANS_FN(VFMS, PREC, true, false) \
|
MAKE_ONE_VFM_TRANS_FN(VFMS, PREC, true, false) \
|
||||||
MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, false, true) \
|
MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, false, true) \
|
||||||
MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, true, true)
|
MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, true, true)
|
||||||
|
|
||||||
MAKE_VFM_TRANS_FNS(hp)
|
MAKE_VFM_TRANS_FNS(hp)
|
||||||
MAKE_VFM_TRANS_FNS(sp)
|
MAKE_VFM_TRANS_FNS(sp)
|
||||||
|
|
|
@ -2790,44 +2790,115 @@ DO_MMLA_B(gvec_usmmla_b, do_usmmla_b)
|
||||||
* BFloat16 Dot Product
|
* BFloat16 Dot Product
|
||||||
*/
|
*/
|
||||||
|
|
||||||
float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2)
|
bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp)
|
||||||
{
|
{
|
||||||
/* FPCR is ignored for BFDOT and BFMMLA. */
|
/*
|
||||||
float_status bf_status = {
|
* For BFDOT, BFMMLA, etc, the behaviour depends on FPCR.EBF.
|
||||||
|
* For EBF = 0, we ignore the FPCR bits which determine rounding
|
||||||
|
* mode and denormal-flushing, and we do unfused multiplies and
|
||||||
|
* additions with intermediate rounding of all products and sums.
|
||||||
|
* For EBF = 1, we honour FPCR rounding mode and denormal-flushing bits,
|
||||||
|
* and we perform a fused two-way sum-of-products without intermediate
|
||||||
|
* rounding of the products.
|
||||||
|
* In either case, we don't set fp exception flags.
|
||||||
|
*
|
||||||
|
* EBF is AArch64 only, so even if it's set in the FPCR it has
|
||||||
|
* no effect on AArch32 instructions.
|
||||||
|
*/
|
||||||
|
bool ebf = is_a64(env) && env->vfp.fpcr & FPCR_EBF;
|
||||||
|
*statusp = (float_status){
|
||||||
.tininess_before_rounding = float_tininess_before_rounding,
|
.tininess_before_rounding = float_tininess_before_rounding,
|
||||||
.float_rounding_mode = float_round_to_odd_inf,
|
.float_rounding_mode = float_round_to_odd_inf,
|
||||||
.flush_to_zero = true,
|
.flush_to_zero = true,
|
||||||
.flush_inputs_to_zero = true,
|
.flush_inputs_to_zero = true,
|
||||||
.default_nan_mode = true,
|
.default_nan_mode = true,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if (ebf) {
|
||||||
|
float_status *fpst = &env->vfp.fp_status;
|
||||||
|
set_flush_to_zero(get_flush_to_zero(fpst), statusp);
|
||||||
|
set_flush_inputs_to_zero(get_flush_inputs_to_zero(fpst), statusp);
|
||||||
|
set_float_rounding_mode(get_float_rounding_mode(fpst), statusp);
|
||||||
|
|
||||||
|
/* EBF=1 needs to do a step with round-to-odd semantics */
|
||||||
|
*oddstatusp = *statusp;
|
||||||
|
set_float_rounding_mode(float_round_to_odd, oddstatusp);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ebf;
|
||||||
|
}
|
||||||
|
|
||||||
|
float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2, float_status *fpst)
|
||||||
|
{
|
||||||
float32 t1, t2;
|
float32 t1, t2;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Extract each BFloat16 from the element pair, and shift
|
* Extract each BFloat16 from the element pair, and shift
|
||||||
* them such that they become float32.
|
* them such that they become float32.
|
||||||
*/
|
*/
|
||||||
t1 = float32_mul(e1 << 16, e2 << 16, &bf_status);
|
t1 = float32_mul(e1 << 16, e2 << 16, fpst);
|
||||||
t2 = float32_mul(e1 & 0xffff0000u, e2 & 0xffff0000u, &bf_status);
|
t2 = float32_mul(e1 & 0xffff0000u, e2 & 0xffff0000u, fpst);
|
||||||
t1 = float32_add(t1, t2, &bf_status);
|
t1 = float32_add(t1, t2, fpst);
|
||||||
t1 = float32_add(sum, t1, &bf_status);
|
t1 = float32_add(sum, t1, fpst);
|
||||||
|
|
||||||
return t1;
|
return t1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HELPER(gvec_bfdot)(void *vd, void *vn, void *vm, void *va, uint32_t desc)
|
float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2,
|
||||||
|
float_status *fpst, float_status *fpst_odd)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Compare f16_dotadd() in sme_helper.c, but here we have
|
||||||
|
* bfloat16 inputs. In particular that means that we do not
|
||||||
|
* want the FPCR.FZ16 flush semantics, so we use the normal
|
||||||
|
* float_status for the input handling here.
|
||||||
|
*/
|
||||||
|
float64 e1r = float32_to_float64(e1 << 16, fpst);
|
||||||
|
float64 e1c = float32_to_float64(e1 & 0xffff0000u, fpst);
|
||||||
|
float64 e2r = float32_to_float64(e2 << 16, fpst);
|
||||||
|
float64 e2c = float32_to_float64(e2 & 0xffff0000u, fpst);
|
||||||
|
float64 t64;
|
||||||
|
float32 t32;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The ARM pseudocode function FPDot performs both multiplies
|
||||||
|
* and the add with a single rounding operation. Emulate this
|
||||||
|
* by performing the first multiply in round-to-odd, then doing
|
||||||
|
* the second multiply as fused multiply-add, and rounding to
|
||||||
|
* float32 all in one step.
|
||||||
|
*/
|
||||||
|
t64 = float64_mul(e1r, e2r, fpst_odd);
|
||||||
|
t64 = float64r32_muladd(e1c, e2c, t64, 0, fpst);
|
||||||
|
|
||||||
|
/* This conversion is exact, because we've already rounded. */
|
||||||
|
t32 = float64_to_float32(t64, fpst);
|
||||||
|
|
||||||
|
/* The final accumulation step is not fused. */
|
||||||
|
return float32_add(sum, t32, fpst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void HELPER(gvec_bfdot)(void *vd, void *vn, void *vm, void *va,
|
||||||
|
CPUARMState *env, uint32_t desc)
|
||||||
{
|
{
|
||||||
intptr_t i, opr_sz = simd_oprsz(desc);
|
intptr_t i, opr_sz = simd_oprsz(desc);
|
||||||
float32 *d = vd, *a = va;
|
float32 *d = vd, *a = va;
|
||||||
uint32_t *n = vn, *m = vm;
|
uint32_t *n = vn, *m = vm;
|
||||||
|
float_status fpst, fpst_odd;
|
||||||
|
|
||||||
for (i = 0; i < opr_sz / 4; ++i) {
|
if (is_ebf(env, &fpst, &fpst_odd)) {
|
||||||
d[i] = bfdotadd(a[i], n[i], m[i]);
|
for (i = 0; i < opr_sz / 4; ++i) {
|
||||||
|
d[i] = bfdotadd_ebf(a[i], n[i], m[i], &fpst, &fpst_odd);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (i = 0; i < opr_sz / 4; ++i) {
|
||||||
|
d[i] = bfdotadd(a[i], n[i], m[i], &fpst);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
clear_tail(d, opr_sz, simd_maxsz(desc));
|
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||||
}
|
}
|
||||||
|
|
||||||
void HELPER(gvec_bfdot_idx)(void *vd, void *vn, void *vm,
|
void HELPER(gvec_bfdot_idx)(void *vd, void *vn, void *vm,
|
||||||
void *va, uint32_t desc)
|
void *va, CPUARMState *env, uint32_t desc)
|
||||||
{
|
{
|
||||||
intptr_t i, j, opr_sz = simd_oprsz(desc);
|
intptr_t i, j, opr_sz = simd_oprsz(desc);
|
||||||
intptr_t index = simd_data(desc);
|
intptr_t index = simd_data(desc);
|
||||||
|
@ -2835,53 +2906,100 @@ void HELPER(gvec_bfdot_idx)(void *vd, void *vn, void *vm,
|
||||||
intptr_t eltspersegment = MIN(16 / 4, elements);
|
intptr_t eltspersegment = MIN(16 / 4, elements);
|
||||||
float32 *d = vd, *a = va;
|
float32 *d = vd, *a = va;
|
||||||
uint32_t *n = vn, *m = vm;
|
uint32_t *n = vn, *m = vm;
|
||||||
|
float_status fpst, fpst_odd;
|
||||||
|
|
||||||
for (i = 0; i < elements; i += eltspersegment) {
|
if (is_ebf(env, &fpst, &fpst_odd)) {
|
||||||
uint32_t m_idx = m[i + H4(index)];
|
for (i = 0; i < elements; i += eltspersegment) {
|
||||||
|
uint32_t m_idx = m[i + H4(index)];
|
||||||
|
|
||||||
for (j = i; j < i + eltspersegment; j++) {
|
for (j = i; j < i + eltspersegment; j++) {
|
||||||
d[j] = bfdotadd(a[j], n[j], m_idx);
|
d[j] = bfdotadd_ebf(a[j], n[j], m_idx, &fpst, &fpst_odd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (i = 0; i < elements; i += eltspersegment) {
|
||||||
|
uint32_t m_idx = m[i + H4(index)];
|
||||||
|
|
||||||
|
for (j = i; j < i + eltspersegment; j++) {
|
||||||
|
d[j] = bfdotadd(a[j], n[j], m_idx, &fpst);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
clear_tail(d, opr_sz, simd_maxsz(desc));
|
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||||
}
|
}
|
||||||
|
|
||||||
void HELPER(gvec_bfmmla)(void *vd, void *vn, void *vm, void *va, uint32_t desc)
|
void HELPER(gvec_bfmmla)(void *vd, void *vn, void *vm, void *va,
|
||||||
|
CPUARMState *env, uint32_t desc)
|
||||||
{
|
{
|
||||||
intptr_t s, opr_sz = simd_oprsz(desc);
|
intptr_t s, opr_sz = simd_oprsz(desc);
|
||||||
float32 *d = vd, *a = va;
|
float32 *d = vd, *a = va;
|
||||||
uint32_t *n = vn, *m = vm;
|
uint32_t *n = vn, *m = vm;
|
||||||
|
float_status fpst, fpst_odd;
|
||||||
|
|
||||||
for (s = 0; s < opr_sz / 4; s += 4) {
|
if (is_ebf(env, &fpst, &fpst_odd)) {
|
||||||
float32 sum00, sum01, sum10, sum11;
|
for (s = 0; s < opr_sz / 4; s += 4) {
|
||||||
|
float32 sum00, sum01, sum10, sum11;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Process the entire segment at once, writing back the
|
* Process the entire segment at once, writing back the
|
||||||
* results only after we've consumed all of the inputs.
|
* results only after we've consumed all of the inputs.
|
||||||
*
|
*
|
||||||
* Key to indices by column:
|
* Key to indices by column:
|
||||||
* i j i k j k
|
* i j i k j k
|
||||||
*/
|
*/
|
||||||
sum00 = a[s + H4(0 + 0)];
|
sum00 = a[s + H4(0 + 0)];
|
||||||
sum00 = bfdotadd(sum00, n[s + H4(0 + 0)], m[s + H4(0 + 0)]);
|
sum00 = bfdotadd_ebf(sum00, n[s + H4(0 + 0)], m[s + H4(0 + 0)], &fpst, &fpst_odd);
|
||||||
sum00 = bfdotadd(sum00, n[s + H4(0 + 1)], m[s + H4(0 + 1)]);
|
sum00 = bfdotadd_ebf(sum00, n[s + H4(0 + 1)], m[s + H4(0 + 1)], &fpst, &fpst_odd);
|
||||||
|
|
||||||
sum01 = a[s + H4(0 + 1)];
|
sum01 = a[s + H4(0 + 1)];
|
||||||
sum01 = bfdotadd(sum01, n[s + H4(0 + 0)], m[s + H4(2 + 0)]);
|
sum01 = bfdotadd_ebf(sum01, n[s + H4(0 + 0)], m[s + H4(2 + 0)], &fpst, &fpst_odd);
|
||||||
sum01 = bfdotadd(sum01, n[s + H4(0 + 1)], m[s + H4(2 + 1)]);
|
sum01 = bfdotadd_ebf(sum01, n[s + H4(0 + 1)], m[s + H4(2 + 1)], &fpst, &fpst_odd);
|
||||||
|
|
||||||
sum10 = a[s + H4(2 + 0)];
|
sum10 = a[s + H4(2 + 0)];
|
||||||
sum10 = bfdotadd(sum10, n[s + H4(2 + 0)], m[s + H4(0 + 0)]);
|
sum10 = bfdotadd_ebf(sum10, n[s + H4(2 + 0)], m[s + H4(0 + 0)], &fpst, &fpst_odd);
|
||||||
sum10 = bfdotadd(sum10, n[s + H4(2 + 1)], m[s + H4(0 + 1)]);
|
sum10 = bfdotadd_ebf(sum10, n[s + H4(2 + 1)], m[s + H4(0 + 1)], &fpst, &fpst_odd);
|
||||||
|
|
||||||
sum11 = a[s + H4(2 + 1)];
|
sum11 = a[s + H4(2 + 1)];
|
||||||
sum11 = bfdotadd(sum11, n[s + H4(2 + 0)], m[s + H4(2 + 0)]);
|
sum11 = bfdotadd_ebf(sum11, n[s + H4(2 + 0)], m[s + H4(2 + 0)], &fpst, &fpst_odd);
|
||||||
sum11 = bfdotadd(sum11, n[s + H4(2 + 1)], m[s + H4(2 + 1)]);
|
sum11 = bfdotadd_ebf(sum11, n[s + H4(2 + 1)], m[s + H4(2 + 1)], &fpst, &fpst_odd);
|
||||||
|
|
||||||
d[s + H4(0 + 0)] = sum00;
|
d[s + H4(0 + 0)] = sum00;
|
||||||
d[s + H4(0 + 1)] = sum01;
|
d[s + H4(0 + 1)] = sum01;
|
||||||
d[s + H4(2 + 0)] = sum10;
|
d[s + H4(2 + 0)] = sum10;
|
||||||
d[s + H4(2 + 1)] = sum11;
|
d[s + H4(2 + 1)] = sum11;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (s = 0; s < opr_sz / 4; s += 4) {
|
||||||
|
float32 sum00, sum01, sum10, sum11;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Process the entire segment at once, writing back the
|
||||||
|
* results only after we've consumed all of the inputs.
|
||||||
|
*
|
||||||
|
* Key to indices by column:
|
||||||
|
* i j i k j k
|
||||||
|
*/
|
||||||
|
sum00 = a[s + H4(0 + 0)];
|
||||||
|
sum00 = bfdotadd(sum00, n[s + H4(0 + 0)], m[s + H4(0 + 0)], &fpst);
|
||||||
|
sum00 = bfdotadd(sum00, n[s + H4(0 + 1)], m[s + H4(0 + 1)], &fpst);
|
||||||
|
|
||||||
|
sum01 = a[s + H4(0 + 1)];
|
||||||
|
sum01 = bfdotadd(sum01, n[s + H4(0 + 0)], m[s + H4(2 + 0)], &fpst);
|
||||||
|
sum01 = bfdotadd(sum01, n[s + H4(0 + 1)], m[s + H4(2 + 1)], &fpst);
|
||||||
|
|
||||||
|
sum10 = a[s + H4(2 + 0)];
|
||||||
|
sum10 = bfdotadd(sum10, n[s + H4(2 + 0)], m[s + H4(0 + 0)], &fpst);
|
||||||
|
sum10 = bfdotadd(sum10, n[s + H4(2 + 1)], m[s + H4(0 + 1)], &fpst);
|
||||||
|
|
||||||
|
sum11 = a[s + H4(2 + 1)];
|
||||||
|
sum11 = bfdotadd(sum11, n[s + H4(2 + 0)], m[s + H4(2 + 0)], &fpst);
|
||||||
|
sum11 = bfdotadd(sum11, n[s + H4(2 + 1)], m[s + H4(2 + 1)], &fpst);
|
||||||
|
|
||||||
|
d[s + H4(0 + 0)] = sum00;
|
||||||
|
d[s + H4(0 + 1)] = sum01;
|
||||||
|
d[s + H4(2 + 0)] = sum10;
|
||||||
|
d[s + H4(2 + 1)] = sum11;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
clear_tail(d, opr_sz, simd_maxsz(desc));
|
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||||
}
|
}
|
||||||
|
|
|
@ -223,13 +223,46 @@ int64_t do_sqrdmlah_d(int64_t, int64_t, int64_t, bool, bool);
|
||||||
* bfdotadd:
|
* bfdotadd:
|
||||||
* @sum: addend
|
* @sum: addend
|
||||||
* @e1, @e2: multiplicand vectors
|
* @e1, @e2: multiplicand vectors
|
||||||
|
* @fpst: floating-point status to use
|
||||||
*
|
*
|
||||||
* BFloat16 2-way dot product of @e1 & @e2, accumulating with @sum.
|
* BFloat16 2-way dot product of @e1 & @e2, accumulating with @sum.
|
||||||
* The @e1 and @e2 operands correspond to the 32-bit source vector
|
* The @e1 and @e2 operands correspond to the 32-bit source vector
|
||||||
* slots and contain two Bfloat16 values each.
|
* slots and contain two Bfloat16 values each.
|
||||||
*
|
*
|
||||||
* Corresponds to the ARM pseudocode function BFDotAdd.
|
* Corresponds to the ARM pseudocode function BFDotAdd, specialized
|
||||||
|
* for the FPCR.EBF == 0 case.
|
||||||
*/
|
*/
|
||||||
float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2);
|
float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2, float_status *fpst);
|
||||||
|
/**
|
||||||
|
* bfdotadd_ebf:
|
||||||
|
* @sum: addend
|
||||||
|
* @e1, @e2: multiplicand vectors
|
||||||
|
* @fpst: floating-point status to use
|
||||||
|
* @fpst_odd: floating-point status to use for round-to-odd operations
|
||||||
|
*
|
||||||
|
* BFloat16 2-way dot product of @e1 & @e2, accumulating with @sum.
|
||||||
|
* The @e1 and @e2 operands correspond to the 32-bit source vector
|
||||||
|
* slots and contain two Bfloat16 values each.
|
||||||
|
*
|
||||||
|
* Corresponds to the ARM pseudocode function BFDotAdd, specialized
|
||||||
|
* for the FPCR.EBF == 1 case.
|
||||||
|
*/
|
||||||
|
float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2,
|
||||||
|
float_status *fpst, float_status *fpst_odd);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* is_ebf:
|
||||||
|
* @env: CPU state
|
||||||
|
* @statusp: pointer to floating point status to fill in
|
||||||
|
* @oddstatusp: pointer to floating point status to fill in for round-to-odd
|
||||||
|
*
|
||||||
|
* Determine whether a BFDotAdd operation should use FPCR.EBF = 0
|
||||||
|
* or FPCR.EBF = 1 semantics. On return, has initialized *statusp
|
||||||
|
* and *oddstatusp to suitable float_status arguments to use with either
|
||||||
|
* bfdotadd() or bfdotadd_ebf().
|
||||||
|
* Returns true for EBF = 1, false for EBF = 0. (The caller should use this
|
||||||
|
* to decide whether to call bfdotadd() or bfdotadd_ebf().)
|
||||||
|
*/
|
||||||
|
bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp);
|
||||||
|
|
||||||
#endif /* TARGET_ARM_VEC_INTERNAL_H */
|
#endif /* TARGET_ARM_VEC_INTERNAL_H */
|
||||||
|
|
|
@ -141,18 +141,18 @@ VDIV_dp ---- 1110 1.00 .... .... 1011 .0.0 .... @vfp_dnm_d
|
||||||
|
|
||||||
VFMA_hp ---- 1110 1.10 .... .... 1001 .0. 0 .... @vfp_dnm_s
|
VFMA_hp ---- 1110 1.10 .... .... 1001 .0. 0 .... @vfp_dnm_s
|
||||||
VFMS_hp ---- 1110 1.10 .... .... 1001 .1. 0 .... @vfp_dnm_s
|
VFMS_hp ---- 1110 1.10 .... .... 1001 .1. 0 .... @vfp_dnm_s
|
||||||
VFNMA_hp ---- 1110 1.01 .... .... 1001 .0. 0 .... @vfp_dnm_s
|
VFNMS_hp ---- 1110 1.01 .... .... 1001 .0. 0 .... @vfp_dnm_s
|
||||||
VFNMS_hp ---- 1110 1.01 .... .... 1001 .1. 0 .... @vfp_dnm_s
|
VFNMA_hp ---- 1110 1.01 .... .... 1001 .1. 0 .... @vfp_dnm_s
|
||||||
|
|
||||||
VFMA_sp ---- 1110 1.10 .... .... 1010 .0. 0 .... @vfp_dnm_s
|
VFMA_sp ---- 1110 1.10 .... .... 1010 .0. 0 .... @vfp_dnm_s
|
||||||
VFMS_sp ---- 1110 1.10 .... .... 1010 .1. 0 .... @vfp_dnm_s
|
VFMS_sp ---- 1110 1.10 .... .... 1010 .1. 0 .... @vfp_dnm_s
|
||||||
VFNMA_sp ---- 1110 1.01 .... .... 1010 .0. 0 .... @vfp_dnm_s
|
VFNMS_sp ---- 1110 1.01 .... .... 1010 .0. 0 .... @vfp_dnm_s
|
||||||
VFNMS_sp ---- 1110 1.01 .... .... 1010 .1. 0 .... @vfp_dnm_s
|
VFNMA_sp ---- 1110 1.01 .... .... 1010 .1. 0 .... @vfp_dnm_s
|
||||||
|
|
||||||
VFMA_dp ---- 1110 1.10 .... .... 1011 .0.0 .... @vfp_dnm_d
|
VFMA_dp ---- 1110 1.10 .... .... 1011 .0.0 .... @vfp_dnm_d
|
||||||
VFMS_dp ---- 1110 1.10 .... .... 1011 .1.0 .... @vfp_dnm_d
|
VFMS_dp ---- 1110 1.10 .... .... 1011 .1.0 .... @vfp_dnm_d
|
||||||
VFNMA_dp ---- 1110 1.01 .... .... 1011 .0.0 .... @vfp_dnm_d
|
VFNMS_dp ---- 1110 1.01 .... .... 1011 .0.0 .... @vfp_dnm_d
|
||||||
VFNMS_dp ---- 1110 1.01 .... .... 1011 .1.0 .... @vfp_dnm_d
|
VFNMA_dp ---- 1110 1.01 .... .... 1011 .1.0 .... @vfp_dnm_d
|
||||||
|
|
||||||
VMOV_imm_hp ---- 1110 1.11 .... .... 1001 0000 .... \
|
VMOV_imm_hp ---- 1110 1.11 .... .... 1001 0000 .... \
|
||||||
vd=%vd_sp imm=%vmov_imm
|
vd=%vd_sp imm=%vmov_imm
|
||||||
|
|
|
@ -254,6 +254,10 @@ static void vfp_set_fpcr_masked(CPUARMState *env, uint32_t val, uint32_t mask)
|
||||||
val &= ~FPCR_FZ16;
|
val &= ~FPCR_FZ16;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!cpu_isar_feature(aa64_ebf16, cpu)) {
|
||||||
|
val &= ~FPCR_EBF;
|
||||||
|
}
|
||||||
|
|
||||||
vfp_set_fpcr_to_host(env, val, mask);
|
vfp_set_fpcr_to_host(env, val, mask);
|
||||||
|
|
||||||
if (mask & (FPCR_LEN_MASK | FPCR_STRIDE_MASK)) {
|
if (mask & (FPCR_LEN_MASK | FPCR_STRIDE_MASK)) {
|
||||||
|
@ -278,12 +282,12 @@ static void vfp_set_fpcr_masked(CPUARMState *env, uint32_t val, uint32_t mask)
|
||||||
* We don't implement trapped exception handling, so the
|
* We don't implement trapped exception handling, so the
|
||||||
* trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!)
|
* trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!)
|
||||||
*
|
*
|
||||||
* The FPCR bits we keep in vfp.fpcr are AHP, DN, FZ, RMode
|
* The FPCR bits we keep in vfp.fpcr are AHP, DN, FZ, RMode, EBF
|
||||||
* and FZ16. Len, Stride and LTPSIZE we just handled. Store those bits
|
* and FZ16. Len, Stride and LTPSIZE we just handled. Store those bits
|
||||||
* there, and zero any of the other FPCR bits and the RES0 and RAZ/WI
|
* there, and zero any of the other FPCR bits and the RES0 and RAZ/WI
|
||||||
* bits.
|
* bits.
|
||||||
*/
|
*/
|
||||||
val &= FPCR_AHP | FPCR_DN | FPCR_FZ | FPCR_RMODE_MASK | FPCR_FZ16;
|
val &= FPCR_AHP | FPCR_DN | FPCR_FZ | FPCR_RMODE_MASK | FPCR_FZ16 | FPCR_EBF;
|
||||||
env->vfp.fpcr &= ~mask;
|
env->vfp.fpcr &= ~mask;
|
||||||
env->vfp.fpcr |= val;
|
env->vfp.fpcr |= val;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue