target-arm queue:

* Add raw_writes ops for register whose write induce TLB maintenance
  * hw/arm/sbsa-ref: use XHCI to replace EHCI
  * Avoid splitting Zregs across lines in dump
  * Dump ZA[] when active
  * Fix SME full tile indexing
  * Handle IC IVAU to improve compatibility with JITs
  * xlnx-canfd-test: Fix code coverity issues
  * gdbstub: Guard M-profile code with CONFIG_TCG
  * allwinner-sramc: Set class_size
  * target/xtensa: Assert that interrupt level is within bounds
  * Avoid over-length shift in arm_cpu_sve_finalize() error case
  * Define new 'neoverse-v1' CPU type
 -----BEGIN PGP SIGNATURE-----
 
 iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAmSmwEEZHHBldGVyLm1h
 eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3vBcD/4vKUw6klRV7vyz/KBr2AOi
 Z1FnkLmOhwdp7CKvAVfU58TbPEJ8Fjo7OjziByV5nn/Ht9XrXcdl/E+0JamgrJ/n
 G90ZfpoY3Boan4XBukBz/KX63sT1erF4io1NxbvqLxZ2mbZWNb0D1v2qkxC5zPFE
 97knlbSle4/VB8N6VgaPaWKVy5gmBZQwl7NUlFtB8TTZp3HPo0V77E9p1Wqpwpls
 BNbqdtgUre3dlJci2f24PmXHYraKa68qk9xGnsSae96EY2+pOHbKhoZ/Fobaor2C
 u+dfgQ3fY3aLDVKx8UESIUoqkGoVqwEbmt+pWG2rJiljLkdsI3ZsVq7p3+VGbLAN
 berL14kCC2vRQYeNUwxeh5wdNVXc58xhWI5KXQRe8hr1dKWS5LQEHWgr7g7mb0+m
 zPHqbdF4FR1DAV29vQ9WyK4zttrinFAYl+zvLyd8dX2ogoUeivR+4o3YX4hlFr4H
 vcrglZbCGqAb3oKQG3PSGliS9GYtBwodLqKEH8PfcwfOP5PIcnSVc0Kl9DSzf7um
 dAuYpaK/XW3MPx5qpWjnip4dRWUV5m/6nSCJr+fELEv3A0sGZY4pywv5NS/Yg1wE
 nXdi8D+nyx9+AAiWTcB+ePsLuDEO2gYtubfqed99TFoJbL6/b4NbH8YE6cF3N/gY
 lqFyvEIYNJZ9klf7XKnX2w==
 =/MkB
 -----END PGP SIGNATURE-----

Merge tag 'pull-target-arm-20230706' of https://git.linaro.org/people/pmaydell/qemu-arm into staging

target-arm queue:
 * Add raw_writes ops for register whose write induce TLB maintenance
 * hw/arm/sbsa-ref: use XHCI to replace EHCI
 * Avoid splitting Zregs across lines in dump
 * Dump ZA[] when active
 * Fix SME full tile indexing
 * Handle IC IVAU to improve compatibility with JITs
 * xlnx-canfd-test: Fix code coverity issues
 * gdbstub: Guard M-profile code with CONFIG_TCG
 * allwinner-sramc: Set class_size
 * target/xtensa: Assert that interrupt level is within bounds
 * Avoid over-length shift in arm_cpu_sve_finalize() error case
 * Define new 'neoverse-v1' CPU type

# -----BEGIN PGP SIGNATURE-----
#
# iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAmSmwEEZHHBldGVyLm1h
# eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3vBcD/4vKUw6klRV7vyz/KBr2AOi
# Z1FnkLmOhwdp7CKvAVfU58TbPEJ8Fjo7OjziByV5nn/Ht9XrXcdl/E+0JamgrJ/n
# G90ZfpoY3Boan4XBukBz/KX63sT1erF4io1NxbvqLxZ2mbZWNb0D1v2qkxC5zPFE
# 97knlbSle4/VB8N6VgaPaWKVy5gmBZQwl7NUlFtB8TTZp3HPo0V77E9p1Wqpwpls
# BNbqdtgUre3dlJci2f24PmXHYraKa68qk9xGnsSae96EY2+pOHbKhoZ/Fobaor2C
# u+dfgQ3fY3aLDVKx8UESIUoqkGoVqwEbmt+pWG2rJiljLkdsI3ZsVq7p3+VGbLAN
# berL14kCC2vRQYeNUwxeh5wdNVXc58xhWI5KXQRe8hr1dKWS5LQEHWgr7g7mb0+m
# zPHqbdF4FR1DAV29vQ9WyK4zttrinFAYl+zvLyd8dX2ogoUeivR+4o3YX4hlFr4H
# vcrglZbCGqAb3oKQG3PSGliS9GYtBwodLqKEH8PfcwfOP5PIcnSVc0Kl9DSzf7um
# dAuYpaK/XW3MPx5qpWjnip4dRWUV5m/6nSCJr+fELEv3A0sGZY4pywv5NS/Yg1wE
# nXdi8D+nyx9+AAiWTcB+ePsLuDEO2gYtubfqed99TFoJbL6/b4NbH8YE6cF3N/gY
# lqFyvEIYNJZ9klf7XKnX2w==
# =/MkB
# -----END PGP SIGNATURE-----
# gpg: Signature made Thu 06 Jul 2023 02:23:13 PM BST
# gpg:                using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE
# gpg:                issuer "peter.maydell@linaro.org"
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [full]
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>" [full]
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [full]
# gpg:                 aka "Peter Maydell <peter@archaic.org.uk>" [unknown]

* tag 'pull-target-arm-20230706' of https://git.linaro.org/people/pmaydell/qemu-arm:
  target/arm: Avoid over-length shift in arm_cpu_sve_finalize() error case
  target/arm: Define neoverse-v1
  target/arm: Suppress more TCG unimplemented features in ID registers
  target/xtensa: Assert that interrupt level is within bounds
  hw: arm: allwinner-sramc: Set class_size
  target/arm: gdbstub: Guard M-profile code with CONFIG_TCG
  tests/qtest: xlnx-canfd-test: Fix code coverity issues
  target/arm: Handle IC IVAU to improve compatibility with JITs
  target/arm: Fix SME full tile indexing
  target/arm: Dump ZA[] when active
  target/arm: Avoid splitting Zregs across lines in dump
  tests/tcg/aarch64/sysregs.c: Use S syntax for id_aa64zfr0_el1 and id_aa64smfr0_el1
  hw/arm/sbsa-ref: use XHCI to replace EHCI
  target/arm: Add raw_writes ops for register whose write induce TLB maintenance

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2023-07-06 16:19:21 +01:00
commit 822cb97cef
17 changed files with 415 additions and 93 deletions

View File

@ -19,7 +19,7 @@ The ``sbsa-ref`` board supports:
- A configurable number of AArch64 CPUs
- GIC version 3
- System bus AHCI controller
- System bus EHCI controller
- System bus XHCI controller
- CDROM and hard disc on AHCI bus
- E1000E ethernet card on PCIe bus
- Bochs display adapter on PCIe bus
@ -68,3 +68,6 @@ Platform version changes:
0.2
GIC ITS information is present in devicetree.
0.3
The USB controller is an XHCI device, not EHCI

View File

@ -61,6 +61,7 @@ Supported guest CPU types:
- ``a64fx`` (64-bit)
- ``host`` (with KVM only)
- ``neoverse-n1`` (64-bit)
- ``neoverse-v1`` (64-bit)
- ``max`` (same as ``host`` for KVM; best possible emulation with TCG)
Note that the default is ``cortex-a15``, so for an AArch64 guest you must

View File

@ -266,7 +266,7 @@ config SBSA_REF
select PL011 # UART
select PL031 # RTC
select PL061 # GPIO
select USB_EHCI_SYSBUS
select USB_XHCI_SYSBUS
select WDT_SBSA
select BOCHS_DISPLAY

View File

@ -42,6 +42,7 @@
#include "hw/pci-host/gpex.h"
#include "hw/qdev-properties.h"
#include "hw/usb.h"
#include "hw/usb/xhci.h"
#include "hw/char/pl011.h"
#include "hw/watchdog/sbsa_gwdt.h"
#include "net/net.h"
@ -85,7 +86,7 @@ enum {
SBSA_SECURE_UART_MM,
SBSA_SECURE_MEM,
SBSA_AHCI,
SBSA_EHCI,
SBSA_XHCI,
};
struct SBSAMachineState {
@ -123,7 +124,7 @@ static const MemMapEntry sbsa_ref_memmap[] = {
[SBSA_SMMU] = { 0x60050000, 0x00020000 },
/* Space here reserved for more SMMUs */
[SBSA_AHCI] = { 0x60100000, 0x00010000 },
[SBSA_EHCI] = { 0x60110000, 0x00010000 },
[SBSA_XHCI] = { 0x60110000, 0x00010000 },
/* Space here reserved for other devices */
[SBSA_PCIE_PIO] = { 0x7fff0000, 0x00010000 },
/* 32-bit address PCIE MMIO space */
@ -143,7 +144,7 @@ static const int sbsa_ref_irqmap[] = {
[SBSA_SECURE_UART] = 8,
[SBSA_SECURE_UART_MM] = 9,
[SBSA_AHCI] = 10,
[SBSA_EHCI] = 11,
[SBSA_XHCI] = 11,
[SBSA_SMMU] = 12, /* ... to 15 */
[SBSA_GWDT_WS0] = 16,
};
@ -152,6 +153,7 @@ static const char * const valid_cpus[] = {
ARM_CPU_TYPE_NAME("cortex-a57"),
ARM_CPU_TYPE_NAME("cortex-a72"),
ARM_CPU_TYPE_NAME("neoverse-n1"),
ARM_CPU_TYPE_NAME("neoverse-v1"),
ARM_CPU_TYPE_NAME("max"),
};
@ -230,7 +232,7 @@ static void create_fdt(SBSAMachineState *sms)
* fw compatibility.
*/
qemu_fdt_setprop_cell(fdt, "/", "machine-version-major", 0);
qemu_fdt_setprop_cell(fdt, "/", "machine-version-minor", 2);
qemu_fdt_setprop_cell(fdt, "/", "machine-version-minor", 3);
if (ms->numa_state->have_numa_distance) {
int size = nb_numa_nodes * nb_numa_nodes * 3 * sizeof(uint32_t);
@ -604,13 +606,15 @@ static void create_ahci(const SBSAMachineState *sms)
}
}
static void create_ehci(const SBSAMachineState *sms)
static void create_xhci(const SBSAMachineState *sms)
{
hwaddr base = sbsa_ref_memmap[SBSA_EHCI].base;
int irq = sbsa_ref_irqmap[SBSA_EHCI];
hwaddr base = sbsa_ref_memmap[SBSA_XHCI].base;
int irq = sbsa_ref_irqmap[SBSA_XHCI];
DeviceState *dev = qdev_new(TYPE_XHCI_SYSBUS);
sysbus_create_simple("platform-ehci-usb", base,
qdev_get_gpio_in(sms->gic, irq));
sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base);
sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, qdev_get_gpio_in(sms->gic, irq));
}
static void create_smmu(const SBSAMachineState *sms, PCIBus *bus)
@ -832,7 +836,7 @@ static void sbsa_ref_init(MachineState *machine)
create_ahci(sms);
create_ehci(sms);
create_xhci(sms);
create_pcie(sms);

View File

@ -214,6 +214,7 @@ static const char *valid_cpus[] = {
ARM_CPU_TYPE_NAME("cortex-a76"),
ARM_CPU_TYPE_NAME("a64fx"),
ARM_CPU_TYPE_NAME("neoverse-n1"),
ARM_CPU_TYPE_NAME("neoverse-v1"),
#endif
ARM_CPU_TYPE_NAME("cortex-a53"),
ARM_CPU_TYPE_NAME("cortex-a57"),

View File

@ -159,6 +159,7 @@ static const TypeInfo allwinner_sramc_info = {
.parent = TYPE_SYS_BUS_DEVICE,
.instance_init = allwinner_sramc_init,
.instance_size = sizeof(AwSRAMCState),
.class_size = sizeof(AwSRAMCClass),
.class_init = allwinner_sramc_class_init,
};

View File

@ -955,7 +955,7 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
ARMCPU *cpu = ARM_CPU(cs);
CPUARMState *env = &cpu->env;
uint32_t psr = pstate_read(env);
int i;
int i, j;
int el = arm_current_el(env);
const char *ns_status;
bool sve;
@ -1014,7 +1014,7 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
}
if (sve) {
int j, zcr_len = sve_vqm1_for_el(env, el);
int zcr_len = sve_vqm1_for_el(env, el);
for (i = 0; i <= FFR_PRED_NUM; i++) {
bool eol;
@ -1054,32 +1054,24 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
}
}
for (i = 0; i < 32; i++) {
if (zcr_len == 0) {
if (zcr_len == 0) {
/*
* With vl=16, there are only 37 columns per register,
* so output two registers per line.
*/
for (i = 0; i < 32; i++) {
qemu_fprintf(f, "Z%02d=%016" PRIx64 ":%016" PRIx64 "%s",
i, env->vfp.zregs[i].d[1],
env->vfp.zregs[i].d[0], i & 1 ? "\n" : " ");
} else if (zcr_len == 1) {
qemu_fprintf(f, "Z%02d=%016" PRIx64 ":%016" PRIx64
":%016" PRIx64 ":%016" PRIx64 "\n",
i, env->vfp.zregs[i].d[3], env->vfp.zregs[i].d[2],
env->vfp.zregs[i].d[1], env->vfp.zregs[i].d[0]);
} else {
}
} else {
for (i = 0; i < 32; i++) {
qemu_fprintf(f, "Z%02d=", i);
for (j = zcr_len; j >= 0; j--) {
bool odd = (zcr_len - j) % 2 != 0;
if (j == zcr_len) {
qemu_fprintf(f, "Z%02d[%x-%x]=", i, j, j - 1);
} else if (!odd) {
if (j > 0) {
qemu_fprintf(f, " [%x-%x]=", j, j - 1);
} else {
qemu_fprintf(f, " [%x]=", j);
}
}
qemu_fprintf(f, "%016" PRIx64 ":%016" PRIx64 "%s",
env->vfp.zregs[i].d[j * 2 + 1],
env->vfp.zregs[i].d[j * 2],
odd || j == 0 ? "\n" : ":");
env->vfp.zregs[i].d[j * 2 + 0],
j ? ":" : "\n");
}
}
}
@ -1090,6 +1082,24 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
i, q[1], q[0], (i & 1 ? "\n" : " "));
}
}
if (cpu_isar_feature(aa64_sme, cpu) &&
FIELD_EX64(env->svcr, SVCR, ZA) &&
sme_exception_el(env, el) == 0) {
int zcr_len = sve_vqm1_for_el_sm(env, el, true);
int svl = (zcr_len + 1) * 16;
int svl_lg10 = svl < 100 ? 2 : 3;
for (i = 0; i < svl; i++) {
qemu_fprintf(f, "ZA[%0*d]=", svl_lg10, i);
for (j = zcr_len; j >= 0; --j) {
qemu_fprintf(f, "%016" PRIx64 ":%016" PRIx64 "%c",
env->zarray[i].d[2 * j + 1],
env->zarray[i].d[2 * j],
j ? ':' : '\n');
}
}
}
}
#else
@ -1684,6 +1694,17 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
return;
}
#ifdef CONFIG_USER_ONLY
/*
* User mode relies on IC IVAU instructions to catch modification of
* dual-mapped code.
*
* Clear CTR_EL0.DIC to ensure that software that honors these flags uses
* IC IVAU even if the emulated processor does not normally require it.
*/
cpu->ctr = FIELD_DP64(cpu->ctr, CTR_EL0, DIC, 0);
#endif
if (arm_feature(env, ARM_FEATURE_AARCH64) &&
cpu->has_vfp != cpu->has_neon) {
/*
@ -2048,13 +2069,38 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
if (tcg_enabled()) {
/*
* Don't report the Statistical Profiling Extension in the ID
* registers, because TCG doesn't implement it yet (not even a
* minimal stub version) and guests will fall over when they
* try to access the non-existent system registers for it.
* Don't report some architectural features in the ID registers
* where TCG does not yet implement it (not even a minimal
* stub version). This avoids guests falling over when they
* try to access the non-existent system registers for them.
*/
/* FEAT_SPE (Statistical Profiling Extension) */
cpu->isar.id_aa64dfr0 =
FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, PMSVER, 0);
/* FEAT_TRF (Self-hosted Trace Extension) */
cpu->isar.id_aa64dfr0 =
FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, TRACEFILT, 0);
cpu->isar.id_dfr0 =
FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, TRACEFILT, 0);
/* Trace Macrocell system register access */
cpu->isar.id_aa64dfr0 =
FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, TRACEVER, 0);
cpu->isar.id_dfr0 =
FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, COPTRC, 0);
/* Memory mapped trace */
cpu->isar.id_dfr0 =
FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, MMAPTRC, 0);
/* FEAT_AMU (Activity Monitors Extension) */
cpu->isar.id_aa64pfr0 =
FIELD_DP64(cpu->isar.id_aa64pfr0, ID_AA64PFR0, AMU, 0);
cpu->isar.id_pfr0 =
FIELD_DP32(cpu->isar.id_pfr0, ID_PFR0, AMU, 0);
/* FEAT_MPAM (Memory Partitioning and Monitoring Extension) */
cpu->isar.id_aa64pfr0 =
FIELD_DP64(cpu->isar.id_aa64pfr0, ID_AA64PFR0, MPAM, 0);
/* FEAT_NV (Nested Virtualization) */
cpu->isar.id_aa64mmfr2 =
FIELD_DP64(cpu->isar.id_aa64mmfr2, ID_AA64MMFR2, NV, 0);
}
/* MPU can be configured out of a PMSA CPU either by setting has-mpu

View File

@ -122,10 +122,10 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
vq = ctz32(tmp) + 1;
max_vq = vq <= ARM_MAX_VQ ? vq - 1 : ARM_MAX_VQ;
vq_mask = MAKE_64BIT_MASK(0, max_vq);
vq_mask = max_vq > 0 ? MAKE_64BIT_MASK(0, max_vq) : 0;
vq_map = vq_supported & ~vq_init & vq_mask;
if (max_vq == 0 || vq_map == 0) {
if (vq_map == 0) {
error_setg(errp, "cannot disable sve%d", vq * 128);
error_append_hint(errp, "Disabling sve%d results in all "
"vector lengths being disabled.\n",

View File

@ -324,6 +324,7 @@ static int arm_gen_dynamic_sysreg_xml(CPUState *cs, int base_reg)
return cpu->dyn_sysreg_xml.num;
}
#ifdef CONFIG_TCG
typedef enum {
M_SYSREG_MSP,
M_SYSREG_PSP,
@ -481,6 +482,7 @@ static int arm_gen_dynamic_m_secextreg_xml(CPUState *cs, int orig_base_reg)
return cpu->dyn_m_secextreg_xml.num;
}
#endif
#endif /* CONFIG_TCG */
const char *arm_gdb_get_dynamic_xml(CPUState *cs, const char *xmlname)
{
@ -561,6 +563,7 @@ void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu)
arm_gen_dynamic_sysreg_xml(cs, cs->gdb_num_regs),
"system-registers.xml", 0);
#ifdef CONFIG_TCG
if (arm_feature(env, ARM_FEATURE_M) && tcg_enabled()) {
gdb_register_coprocessor(cs,
arm_gdb_get_m_systemreg, arm_gdb_set_m_systemreg,
@ -575,4 +578,5 @@ void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu)
}
#endif
}
#endif /* CONFIG_TCG */
}

View File

@ -4189,14 +4189,14 @@ static const ARMCPRegInfo vmsa_cp_reginfo[] = {
.opc0 = 3, .opc1 = 0, .crn = 2, .crm = 0, .opc2 = 0,
.access = PL1_RW, .accessfn = access_tvm_trvm,
.fgt = FGT_TTBR0_EL1,
.writefn = vmsa_ttbr_write, .resetvalue = 0,
.writefn = vmsa_ttbr_write, .resetvalue = 0, .raw_writefn = raw_write,
.bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr0_s),
offsetof(CPUARMState, cp15.ttbr0_ns) } },
{ .name = "TTBR1_EL1", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 2, .crm = 0, .opc2 = 1,
.access = PL1_RW, .accessfn = access_tvm_trvm,
.fgt = FGT_TTBR1_EL1,
.writefn = vmsa_ttbr_write, .resetvalue = 0,
.writefn = vmsa_ttbr_write, .resetvalue = 0, .raw_writefn = raw_write,
.bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr1_s),
offsetof(CPUARMState, cp15.ttbr1_ns) } },
{ .name = "TCR_EL1", .state = ARM_CP_STATE_AA64,
@ -4456,13 +4456,13 @@ static const ARMCPRegInfo lpae_cp_reginfo[] = {
.type = ARM_CP_64BIT | ARM_CP_ALIAS,
.bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr0_s),
offsetof(CPUARMState, cp15.ttbr0_ns) },
.writefn = vmsa_ttbr_write, },
.writefn = vmsa_ttbr_write, .raw_writefn = raw_write },
{ .name = "TTBR1", .cp = 15, .crm = 2, .opc1 = 1,
.access = PL1_RW, .accessfn = access_tvm_trvm,
.type = ARM_CP_64BIT | ARM_CP_ALIAS,
.bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr1_s),
offsetof(CPUARMState, cp15.ttbr1_ns) },
.writefn = vmsa_ttbr_write, },
.writefn = vmsa_ttbr_write, .raw_writefn = raw_write },
};
static uint64_t aa64_fpcr_read(CPUARMState *env, const ARMCPRegInfo *ri)
@ -5234,6 +5234,36 @@ static void mdcr_el2_write(CPUARMState *env, const ARMCPRegInfo *ri,
}
}
#ifdef CONFIG_USER_ONLY
/*
* `IC IVAU` is handled to improve compatibility with JITs that dual-map their
* code to get around W^X restrictions, where one region is writable and the
* other is executable.
*
* Since the executable region is never written to we cannot detect code
* changes when running in user mode, and rely on the emulated JIT telling us
* that the code has changed by executing this instruction.
*/
static void ic_ivau_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
uint64_t icache_line_mask, start_address, end_address;
const ARMCPU *cpu;
cpu = env_archcpu(env);
icache_line_mask = (4 << extract32(cpu->ctr, 0, 4)) - 1;
start_address = value & ~icache_line_mask;
end_address = value | icache_line_mask;
mmap_lock();
tb_invalidate_phys_range(start_address, end_address);
mmap_unlock();
}
#endif
static const ARMCPRegInfo v8_cp_reginfo[] = {
/*
* Minimal set of EL0-visible registers. This will need to be expanded
@ -5273,7 +5303,10 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
{ .name = "CURRENTEL", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .opc2 = 2, .crn = 4, .crm = 2,
.access = PL1_R, .type = ARM_CP_CURRENTEL },
/* Cache ops: all NOPs since we don't emulate caches */
/*
* Instruction cache ops. All of these except `IC IVAU` NOP because we
* don't emulate caches.
*/
{ .name = "IC_IALLUIS", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 0,
.access = PL1_W, .type = ARM_CP_NOP,
@ -5286,9 +5319,17 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
.accessfn = access_tocu },
{ .name = "IC_IVAU", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 3, .crn = 7, .crm = 5, .opc2 = 1,
.access = PL0_W, .type = ARM_CP_NOP,
.access = PL0_W,
.fgt = FGT_ICIVAU,
.accessfn = access_tocu },
.accessfn = access_tocu,
#ifdef CONFIG_USER_ONLY
.type = ARM_CP_NO_RAW,
.writefn = ic_ivau_write
#else
.type = ARM_CP_NOP
#endif
},
/* Cache ops: all NOPs since we don't emulate caches */
{ .name = "DC_IVAC", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 1,
.access = PL1_W, .accessfn = aa64_cacheop_poc_access,
@ -5911,7 +5952,7 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
.type = ARM_CP_IO,
.opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0,
.access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.hcr_el2),
.writefn = hcr_write },
.writefn = hcr_write, .raw_writefn = raw_write },
{ .name = "HCR", .state = ARM_CP_STATE_AA32,
.type = ARM_CP_ALIAS | ARM_CP_IO,
.cp = 15, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0,
@ -5983,6 +6024,7 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
{ .name = "TCR_EL2", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 4, .crn = 2, .crm = 0, .opc2 = 2,
.access = PL2_RW, .writefn = vmsa_tcr_el12_write,
.raw_writefn = raw_write,
.fieldoffset = offsetof(CPUARMState, cp15.tcr_el[2]) },
{ .name = "VTCR", .state = ARM_CP_STATE_AA32,
.cp = 15, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 2,
@ -5999,10 +6041,10 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
.type = ARM_CP_64BIT | ARM_CP_ALIAS,
.access = PL2_RW, .accessfn = access_el3_aa32ns,
.fieldoffset = offsetof(CPUARMState, cp15.vttbr_el2),
.writefn = vttbr_write },
.writefn = vttbr_write, .raw_writefn = raw_write },
{ .name = "VTTBR_EL2", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 0,
.access = PL2_RW, .writefn = vttbr_write,
.access = PL2_RW, .writefn = vttbr_write, .raw_writefn = raw_write,
.fieldoffset = offsetof(CPUARMState, cp15.vttbr_el2) },
{ .name = "SCTLR_EL2", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 4, .crn = 1, .crm = 0, .opc2 = 0,
@ -6014,7 +6056,8 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
.fieldoffset = offsetof(CPUARMState, cp15.tpidr_el[2]) },
{ .name = "TTBR0_EL2", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 4, .crn = 2, .crm = 0, .opc2 = 0,
.access = PL2_RW, .resetvalue = 0, .writefn = vmsa_tcr_ttbr_el2_write,
.access = PL2_RW, .resetvalue = 0,
.writefn = vmsa_tcr_ttbr_el2_write, .raw_writefn = raw_write,
.fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el[2]) },
{ .name = "HTTBR", .cp = 15, .opc1 = 4, .crm = 2,
.access = PL2_RW, .type = ARM_CP_64BIT | ARM_CP_ALIAS,
@ -6201,12 +6244,12 @@ static const ARMCPRegInfo el3_cp_reginfo[] = {
{ .name = "SCR_EL3", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 6, .crn = 1, .crm = 1, .opc2 = 0,
.access = PL3_RW, .fieldoffset = offsetof(CPUARMState, cp15.scr_el3),
.resetfn = scr_reset, .writefn = scr_write },
.resetfn = scr_reset, .writefn = scr_write, .raw_writefn = raw_write },
{ .name = "SCR", .type = ARM_CP_ALIAS | ARM_CP_NEWEL,
.cp = 15, .opc1 = 0, .crn = 1, .crm = 1, .opc2 = 0,
.access = PL1_RW, .accessfn = access_trap_aa32s_el1,
.fieldoffset = offsetoflow32(CPUARMState, cp15.scr_el3),
.writefn = scr_write },
.writefn = scr_write, .raw_writefn = raw_write },
{ .name = "SDER32_EL3", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 6, .crn = 1, .crm = 1, .opc2 = 1,
.access = PL3_RW, .resetvalue = 0,
@ -7927,6 +7970,7 @@ static const ARMCPRegInfo vhe_reginfo[] = {
{ .name = "TTBR1_EL2", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 4, .crn = 2, .crm = 0, .opc2 = 1,
.access = PL2_RW, .writefn = vmsa_tcr_ttbr_el2_write,
.raw_writefn = raw_write,
.fieldoffset = offsetof(CPUARMState, cp15.ttbr1_el[2]) },
#ifndef CONFIG_USER_ONLY
{ .name = "CNTHV_CVAL_EL2", .state = ARM_CP_STATE_AA64,

View File

@ -502,6 +502,31 @@ static void define_neoverse_n1_cp_reginfo(ARMCPU *cpu)
define_arm_cp_regs(cpu, neoverse_n1_cp_reginfo);
}
static const ARMCPRegInfo neoverse_v1_cp_reginfo[] = {
{ .name = "CPUECTLR2_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 5,
.access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
{ .name = "CPUPPMCR_EL3", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 0,
.access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
{ .name = "CPUPPMCR2_EL3", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 1,
.access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
{ .name = "CPUPPMCR3_EL3", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 6,
.access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
};
static void define_neoverse_v1_cp_reginfo(ARMCPU *cpu)
{
/*
* The Neoverse V1 has all of the Neoverse N1's IMPDEF
* registers and a few more of its own.
*/
define_arm_cp_regs(cpu, neoverse_n1_cp_reginfo);
define_arm_cp_regs(cpu, neoverse_v1_cp_reginfo);
}
static void aarch64_neoverse_n1_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
@ -573,6 +598,108 @@ static void aarch64_neoverse_n1_initfn(Object *obj)
define_neoverse_n1_cp_reginfo(cpu);
}
static void aarch64_neoverse_v1_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
cpu->dtb_compatible = "arm,neoverse-v1";
set_feature(&cpu->env, ARM_FEATURE_V8);
set_feature(&cpu->env, ARM_FEATURE_NEON);
set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
set_feature(&cpu->env, ARM_FEATURE_AARCH64);
set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
set_feature(&cpu->env, ARM_FEATURE_EL2);
set_feature(&cpu->env, ARM_FEATURE_EL3);
set_feature(&cpu->env, ARM_FEATURE_PMU);
/* Ordered by 3.2.4 AArch64 registers by functional group */
cpu->clidr = 0x82000023;
cpu->ctr = 0xb444c004; /* With DIC and IDC set */
cpu->dcz_blocksize = 4;
cpu->id_aa64afr0 = 0x00000000;
cpu->id_aa64afr1 = 0x00000000;
cpu->isar.id_aa64dfr0 = 0x000001f210305519ull;
cpu->isar.id_aa64dfr1 = 0x00000000;
cpu->isar.id_aa64isar0 = 0x1011111110212120ull; /* with FEAT_RNG */
cpu->isar.id_aa64isar1 = 0x0111000001211032ull;
cpu->isar.id_aa64mmfr0 = 0x0000000000101125ull;
cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull;
cpu->isar.id_aa64mmfr2 = 0x0220011102101011ull;
cpu->isar.id_aa64pfr0 = 0x1101110120111112ull; /* GIC filled in later */
cpu->isar.id_aa64pfr1 = 0x0000000000000020ull;
cpu->id_afr0 = 0x00000000;
cpu->isar.id_dfr0 = 0x15011099;
cpu->isar.id_isar0 = 0x02101110;
cpu->isar.id_isar1 = 0x13112111;
cpu->isar.id_isar2 = 0x21232042;
cpu->isar.id_isar3 = 0x01112131;
cpu->isar.id_isar4 = 0x00010142;
cpu->isar.id_isar5 = 0x11011121;
cpu->isar.id_isar6 = 0x01100111;
cpu->isar.id_mmfr0 = 0x10201105;
cpu->isar.id_mmfr1 = 0x40000000;
cpu->isar.id_mmfr2 = 0x01260000;
cpu->isar.id_mmfr3 = 0x02122211;
cpu->isar.id_mmfr4 = 0x01021110;
cpu->isar.id_pfr0 = 0x21110131;
cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */
cpu->isar.id_pfr2 = 0x00000011;
cpu->midr = 0x411FD402; /* r1p2 */
cpu->revidr = 0;
/*
* The Neoverse-V1 r1p2 TRM lists 32-bit format CCSIDR_EL1 values,
* but also says it implements CCIDX, which means they should be
* 64-bit format. So we here use values which are based on the textual
* information in chapter 2 of the TRM (and on the fact that
* sets * associativity * linesize == cachesize).
*
* The 64-bit CCSIDR_EL1 format is:
* [55:32] number of sets - 1
* [23:3] associativity - 1
* [2:0] log2(linesize) - 4
* so 0 == 16 bytes, 1 == 32 bytes, 2 == 64 bytes, etc
*
* L1: 4-way set associative 64-byte line size, total size 64K,
* so sets is 256.
*
* L2: 8-way set associative, 64 byte line size, either 512K or 1MB.
* We pick 1MB, so this has 2048 sets.
*
* L3: No L3 (this matches the CLIDR_EL1 value).
*/
cpu->ccsidr[0] = 0x000000ff0000001aull; /* 64KB L1 dcache */
cpu->ccsidr[1] = 0x000000ff0000001aull; /* 64KB L1 icache */
cpu->ccsidr[2] = 0x000007ff0000003aull; /* 1MB L2 cache */
/* From 3.2.115 SCTLR_EL3 */
cpu->reset_sctlr = 0x30c50838;
/* From 3.4.8 ICC_CTLR_EL3 and 3.4.23 ICH_VTR_EL2 */
cpu->gic_num_lrs = 4;
cpu->gic_vpribits = 5;
cpu->gic_vprebits = 5;
cpu->gic_pribits = 5;
/* From 3.5.1 AdvSIMD AArch64 register summary */
cpu->isar.mvfr0 = 0x10110222;
cpu->isar.mvfr1 = 0x13211111;
cpu->isar.mvfr2 = 0x00000043;
/* From 3.7.5 ID_AA64ZFR0_EL1 */
cpu->isar.id_aa64zfr0 = 0x0000100000100000;
cpu->sve_vq.supported = (1 << 0) /* 128bit */
| (1 << 1); /* 256bit */
/* From 5.5.1 AArch64 PMU register summary */
cpu->isar.reset_pmcr_el0 = 0x41213000;
define_neoverse_v1_cp_reginfo(cpu);
aarch64_add_pauth_properties(obj);
aarch64_add_sve_properties(obj);
}
/*
* -cpu max: a CPU with as many features enabled as our emulation supports.
* The version of '-cpu max' for qemu-system-arm is defined in cpu32.c;
@ -763,6 +890,7 @@ static const ARMCPUInfo aarch64_cpus[] = {
{ .name = "cortex-a76", .initfn = aarch64_a76_initfn },
{ .name = "a64fx", .initfn = aarch64_a64fx_initfn },
{ .name = "neoverse-n1", .initfn = aarch64_neoverse_n1_initfn },
{ .name = "neoverse-v1", .initfn = aarch64_neoverse_v1_initfn },
};
static void aarch64_cpu_register_types(void)

View File

@ -95,6 +95,21 @@ static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs,
return addr;
}
/*
* Resolve tile.size[0] to a host pointer.
* Used by e.g. outer product insns where we require the entire tile.
*/
static TCGv_ptr get_tile(DisasContext *s, int esz, int tile)
{
TCGv_ptr addr = tcg_temp_new_ptr();
int offset;
offset = tile * sizeof(ARMVectorReg) + offsetof(CPUARMState, zarray);
tcg_gen_addi_ptr(addr, cpu_env, offset);
return addr;
}
static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
{
if (!dc_isar_feature(aa64_sme, s)) {
@ -260,8 +275,7 @@ static bool do_adda(DisasContext *s, arg_adda *a, MemOp esz,
return true;
}
/* Sum XZR+zad to find ZAd. */
za = get_tile_rowcol(s, esz, 31, a->zad, false);
za = get_tile(s, esz, a->zad);
zn = vec_full_reg_ptr(s, a->zn);
pn = pred_full_reg_ptr(s, a->pn);
pm = pred_full_reg_ptr(s, a->pm);
@ -286,8 +300,7 @@ static bool do_outprod(DisasContext *s, arg_op *a, MemOp esz,
return true;
}
/* Sum XZR+zad to find ZAd. */
za = get_tile_rowcol(s, esz, 31, a->zad, false);
za = get_tile(s, esz, a->zad);
zn = vec_full_reg_ptr(s, a->zn);
zm = vec_full_reg_ptr(s, a->zm);
pn = pred_full_reg_ptr(s, a->pn);
@ -308,8 +321,7 @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
return true;
}
/* Sum XZR+zad to find ZAd. */
za = get_tile_rowcol(s, esz, 31, a->zad, false);
za = get_tile(s, esz, a->zad);
zn = vec_full_reg_ptr(s, a->zn);
zm = vec_full_reg_ptr(s, a->zm);
pn = pred_full_reg_ptr(s, a->pn);

View File

@ -169,6 +169,9 @@ static void handle_interrupt(CPUXtensaState *env)
CPUState *cs = env_cpu(env);
if (level > 1) {
/* env->config->nlevel check should have ensured this */
assert(level < sizeof(env->config->interrupt_vector));
env->sregs[EPC1 + level - 1] = env->pc;
env->sregs[EPS2 + level - 2] = env->sregs[PS];
env->sregs[PS] =

View File

@ -170,23 +170,23 @@ static void generate_random_data(uint32_t *buf_tx, bool is_canfd_frame)
/* Generate random TX data for CANFD frame. */
if (is_canfd_frame) {
for (int i = 0; i < CANFD_FRAME_SIZE - 2; i++) {
buf_tx[2 + i] = rand();
buf_tx[2 + i] = g_random_int();
}
} else {
/* Generate random TX data for CAN frame. */
for (int i = 0; i < CAN_FRAME_SIZE - 2; i++) {
buf_tx[2 + i] = rand();
buf_tx[2 + i] = g_random_int();
}
}
}
static void read_data(QTestState *qts, uint64_t can_base_addr, uint32_t *buf_rx)
static void read_data(QTestState *qts, uint64_t can_base_addr, uint32_t *buf_rx,
uint32_t frame_size)
{
uint32_t int_status;
uint32_t fifo_status_reg_value;
/* At which RX FIFO the received data is stored. */
uint8_t store_ind = 0;
bool is_canfd_frame = false;
/* Read the interrupt on CANFD rx. */
int_status = qtest_readl(qts, can_base_addr + R_ISR_OFFSET) & ISR_RXOK;
@ -207,16 +207,9 @@ static void read_data(QTestState *qts, uint64_t can_base_addr, uint32_t *buf_rx)
buf_rx[0] = qtest_readl(qts, can_base_addr + R_RX0_ID_OFFSET);
buf_rx[1] = qtest_readl(qts, can_base_addr + R_RX0_DLC_OFFSET);
is_canfd_frame = (buf_rx[1] >> DLC_FD_BIT_SHIFT) & 1;
if (is_canfd_frame) {
for (int i = 0; i < CANFD_FRAME_SIZE - 2; i++) {
buf_rx[i + 2] = qtest_readl(qts,
can_base_addr + R_RX0_DATA1_OFFSET + 4 * i);
}
} else {
buf_rx[2] = qtest_readl(qts, can_base_addr + R_RX0_DATA1_OFFSET);
buf_rx[3] = qtest_readl(qts, can_base_addr + R_RX0_DATA2_OFFSET);
for (int i = 0; i < frame_size - 2; i++) {
buf_rx[i + 2] = qtest_readl(qts,
can_base_addr + R_RX0_DATA1_OFFSET + 4 * i);
}
/* Clear the RX interrupt. */
@ -272,10 +265,6 @@ static void match_rx_tx_data(const uint32_t *buf_tx, const uint32_t *buf_rx,
g_assert_cmpint((buf_rx[size] & DLC_FD_BIT_MASK), ==,
(buf_tx[size] & DLC_FD_BIT_MASK));
} else {
if (!is_canfd_frame && size == 4) {
break;
}
g_assert_cmpint(buf_rx[size], ==, buf_tx[size]);
}
@ -318,7 +307,7 @@ static void test_can_data_transfer(void)
write_data(qts, CANFD0_BASE_ADDR, buf_tx, false);
send_data(qts, CANFD0_BASE_ADDR);
read_data(qts, CANFD1_BASE_ADDR, buf_rx);
read_data(qts, CANFD1_BASE_ADDR, buf_rx, CAN_FRAME_SIZE);
match_rx_tx_data(buf_tx, buf_rx, false);
qtest_quit(qts);
@ -358,7 +347,7 @@ static void test_canfd_data_transfer(void)
write_data(qts, CANFD0_BASE_ADDR, buf_tx, true);
send_data(qts, CANFD0_BASE_ADDR);
read_data(qts, CANFD1_BASE_ADDR, buf_rx);
read_data(qts, CANFD1_BASE_ADDR, buf_rx, CANFD_FRAME_SIZE);
match_rx_tx_data(buf_tx, buf_rx, true);
qtest_quit(qts);
@ -397,7 +386,7 @@ static void test_can_loopback(void)
write_data(qts, CANFD0_BASE_ADDR, buf_tx, true);
send_data(qts, CANFD0_BASE_ADDR);
read_data(qts, CANFD0_BASE_ADDR, buf_rx);
read_data(qts, CANFD0_BASE_ADDR, buf_rx, CANFD_FRAME_SIZE);
match_rx_tx_data(buf_tx, buf_rx, true);
generate_random_data(buf_tx, true);
@ -405,7 +394,7 @@ static void test_can_loopback(void)
write_data(qts, CANFD1_BASE_ADDR, buf_tx, true);
send_data(qts, CANFD1_BASE_ADDR);
read_data(qts, CANFD1_BASE_ADDR, buf_rx);
read_data(qts, CANFD1_BASE_ADDR, buf_rx, CANFD_FRAME_SIZE);
match_rx_tx_data(buf_tx, buf_rx, true);
qtest_quit(qts);

View File

@ -26,7 +26,7 @@ config-cc.mak: Makefile
$(call cc-option,-march=armv8.5-a, CROSS_CC_HAS_ARMV8_5); \
$(call cc-option,-mbranch-protection=standard, CROSS_CC_HAS_ARMV8_BTI); \
$(call cc-option,-march=armv8.5-a+memtag, CROSS_CC_HAS_ARMV8_MTE); \
$(call cc-option,-march=armv9-a+sme, CROSS_CC_HAS_ARMV9_SME)) 3> config-cc.mak
$(call cc-option,-Wa$(COMMA)-march=armv9-a+sme, CROSS_AS_HAS_ARMV9_SME)) 3> config-cc.mak
-include config-cc.mak
ifneq ($(CROSS_CC_HAS_ARMV8_2),)
@ -61,15 +61,15 @@ AARCH64_TESTS += mte-1 mte-2 mte-3 mte-4 mte-5 mte-6 mte-7
mte-%: CFLAGS += -march=armv8.5-a+memtag
endif
ifneq ($(CROSS_CC_HAS_SVE),)
# System Registers Tests
AARCH64_TESTS += sysregs
ifneq ($(CROSS_CC_HAS_ARMV9_SME),)
sysregs: CFLAGS+=-march=armv9-a+sme -DHAS_ARMV9_SME
else
sysregs: CFLAGS+=-march=armv8.1-a+sve
# SME Tests
ifneq ($(CROSS_AS_HAS_ARMV9_SME),)
AARCH64_TESTS += sme-outprod1
endif
# System Registers Tests
AARCH64_TESTS += sysregs
ifneq ($(CROSS_CC_HAS_SVE),)
# SVE ioctl test
AARCH64_TESTS += sve-ioctls
sve-ioctls: CFLAGS+=-march=armv8.1-a+sve

View File

@ -0,0 +1,83 @@
/*
* SME outer product, 1 x 1.
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#include <stdio.h>
extern void foo(float *dst);
asm(
" .arch_extension sme\n"
" .type foo, @function\n"
"foo:\n"
" stp x29, x30, [sp, -80]!\n"
" mov x29, sp\n"
" stp d8, d9, [sp, 16]\n"
" stp d10, d11, [sp, 32]\n"
" stp d12, d13, [sp, 48]\n"
" stp d14, d15, [sp, 64]\n"
" smstart\n"
" ptrue p0.s, vl4\n"
" fmov z0.s, #1.0\n"
/*
* An outer product of a vector of 1.0 by itself should be a matrix of 1.0.
* Note that we are using tile 1 here (za1.s) rather than tile 0.
*/
" zero {za}\n"
" fmopa za1.s, p0/m, p0/m, z0.s, z0.s\n"
/*
* Read the first 4x4 sub-matrix of elements from tile 1:
* Note that za1h should be interchangable here.
*/
" mov w12, #0\n"
" mova z0.s, p0/m, za1v.s[w12, #0]\n"
" mova z1.s, p0/m, za1v.s[w12, #1]\n"
" mova z2.s, p0/m, za1v.s[w12, #2]\n"
" mova z3.s, p0/m, za1v.s[w12, #3]\n"
/*
* And store them to the input pointer (dst in the C code):
*/
" st1w {z0.s}, p0, [x0]\n"
" add x0, x0, #16\n"
" st1w {z1.s}, p0, [x0]\n"
" add x0, x0, #16\n"
" st1w {z2.s}, p0, [x0]\n"
" add x0, x0, #16\n"
" st1w {z3.s}, p0, [x0]\n"
" smstop\n"
" ldp d8, d9, [sp, 16]\n"
" ldp d10, d11, [sp, 32]\n"
" ldp d12, d13, [sp, 48]\n"
" ldp d14, d15, [sp, 64]\n"
" ldp x29, x30, [sp], 80\n"
" ret\n"
" .size foo, . - foo"
);
int main()
{
float dst[16];
int i, j;
foo(dst);
for (i = 0; i < 16; i++) {
if (dst[i] != 1.0f) {
break;
}
}
if (i == 16) {
return 0; /* success */
}
/* failure */
for (i = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j) {
printf("%f ", (double)dst[i * 4 + j]);
}
printf("\n");
}
return 1;
}

View File

@ -25,9 +25,14 @@
/*
* Older assemblers don't recognize newer system register names,
* but we can still access them by the Sn_n_Cn_Cn_n syntax.
* This also means we don't need to specifically request that the
* assembler enables whatever architectural features the ID registers
* syntax might be gated behind.
*/
#define SYS_ID_AA64ISAR2_EL1 S3_0_C0_C6_2
#define SYS_ID_AA64MMFR2_EL1 S3_0_C0_C7_2
#define SYS_ID_AA64ZFR0_EL1 S3_0_C0_C4_4
#define SYS_ID_AA64SMFR0_EL1 S3_0_C0_C4_5
int failed_bit_count;
@ -132,10 +137,8 @@ int main(void)
/* all hidden, DebugVer fixed to 0x6 (ARMv8 debug architecture) */
get_cpu_reg_check_mask(id_aa64dfr0_el1, _m(0000,0000,0000,0006));
get_cpu_reg_check_zero(id_aa64dfr1_el1);
get_cpu_reg_check_mask(id_aa64zfr0_el1, _m(0ff0,ff0f,00ff,00ff));
#ifdef HAS_ARMV9_SME
get_cpu_reg_check_mask(id_aa64smfr0_el1, _m(80f1,00fd,0000,0000));
#endif
get_cpu_reg_check_mask(SYS_ID_AA64ZFR0_EL1, _m(0ff0,ff0f,00ff,00ff));
get_cpu_reg_check_mask(SYS_ID_AA64SMFR0_EL1, _m(80f1,00fd,0000,0000));
get_cpu_reg_check_zero(id_aa64afr0_el1);
get_cpu_reg_check_zero(id_aa64afr1_el1);