From 61e99241491c2d87af2a9751630e597a54ea7af8 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Mon, 9 Jun 2014 15:43:22 +0100 Subject: [PATCH 01/19] vexpress: Add support for the -bios flag to provide firmware Right now to run firmware inside the QEMU VExpress model requires padding out the firmware image to the size of the virtual flash and passing it in via the -pflash argument. If the firmware image is passed without padding, then QEMU will fail. Also, when passed as a -pflash argument, QEMU treats the file as persistent storage and will modify the file. The -bios flag provides the semantics that we want for providing a firmware image. This patch maps the contents of the -bios file into the address space at the boot flash location. Tested with the vexpress-a15 model and the Tianocore port. Signed-off-by: Grant Likely Tested-by: Roy Franz [PMM: folded long line, removed stray \n from error message, use correct variable for printing image name, exit(1) rather than 0] Reviewed-by: Peter Crosthwaite Signed-off-by: Peter Maydell --- hw/arm/vexpress.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/hw/arm/vexpress.c b/hw/arm/vexpress.c index 33ff422542..f311595d67 100644 --- a/hw/arm/vexpress.c +++ b/hw/arm/vexpress.c @@ -28,6 +28,7 @@ #include "net/net.h" #include "sysemu/sysemu.h" #include "hw/boards.h" +#include "hw/loader.h" #include "exec/address-spaces.h" #include "sysemu/blockdev.h" #include "hw/block/flash.h" @@ -528,6 +529,18 @@ static void vexpress_common_init(VEDBoardInfo *daughterboard, daughterboard->init(daughterboard, machine->ram_size, machine->cpu_model, pic); + /* + * If a bios file was provided, attempt to map it into memory + */ + if (bios_name) { + const char *fn = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); + if (!fn || load_image_targphys(fn, map[VE_NORFLASH0], + VEXPRESS_FLASH_SIZE) < 0) { + error_report("Could not load ROM image '%s'", bios_name); + exit(1); + } + } + /* Motherboard peripherals: the wiring is the same but the * addresses vary between the legacy and A-Series memory maps. */ From bf01601764fbef7b54894c9dc1f9c1e727a49294 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 9 Jun 2014 15:43:22 +0100 Subject: [PATCH 02/19] target-arm/cpu64.c: Actually register Cortex-A57 impdef registers cpu64.c contains a reginfo list for the impdef registers on the Cortex-A57; however we forgot to actually call define_arm_cp_regs(), so it was sitting there doing nothing. Remedy this omission. Message-id: 1401226259-23121-1-git-send-email-peter.maydell@linaro.org Reviewed-by: Peter Crosthwaite Tested-by: Peter Crosthwaite Signed-off-by: Peter Maydell --- target-arm/cpu64.c | 1 + 1 file changed, 1 insertion(+) diff --git a/target-arm/cpu64.c b/target-arm/cpu64.c index 8daa622bdc..ff4c2b46d7 100644 --- a/target-arm/cpu64.c +++ b/target-arm/cpu64.c @@ -128,6 +128,7 @@ static void aarch64_a57_initfn(Object *obj) cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */ cpu->ccsidr[2] = 0x70ffe07a; /* 2048KB L2 cache */ cpu->dcz_blocksize = 4; /* 64 bytes */ + define_arm_cp_regs(cpu, cortexa57_cp_reginfo); } #ifdef CONFIG_USER_ONLY From 8d5c773e323b22402abdd0beef4c7d2fc91dd0eb Mon Sep 17 00:00:00 2001 From: Fabian Aggeler Date: Mon, 9 Jun 2014 15:43:22 +0100 Subject: [PATCH 03/19] target-arm: Prepare cpreg writefns/readfns for EL3/SecExt This patch changes some readfns/writefns to use raw_write and raw_read functions, which use the fieldoffset specified in ARMCPRegInfo instead of directly accessing the field. This will simplify patches for EL3 & Security Extensions. Reviewed-by: Peter Crosthwaite Signed-off-by: Fabian Aggeler Message-id: 1401962428-14749-1-git-send-email-aggelerf@ethz.ch Signed-off-by: Peter Maydell --- target-arm/helper.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/target-arm/helper.c b/target-arm/helper.c index 95af624126..3e7f0db15b 100644 --- a/target-arm/helper.c +++ b/target-arm/helper.c @@ -319,7 +319,7 @@ static void dacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { ARMCPU *cpu = arm_env_get_cpu(env); - env->cp15.c3 = value; + raw_write(env, ri, value); tlb_flush(CPU(cpu), 1); /* Flush TLB as domain not tracked in TLB */ } @@ -327,12 +327,12 @@ static void fcse_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { ARMCPU *cpu = arm_env_get_cpu(env); - if (env->cp15.c13_fcse != value) { + if (raw_read(env, ri) != value) { /* Unlike real hardware the qemu TLB uses virtual addresses, * not modified virtual addresses, so this causes a TLB flush. */ tlb_flush(CPU(cpu), 1); - env->cp15.c13_fcse = value; + raw_write(env, ri, value); } } @@ -341,7 +341,7 @@ static void contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri, { ARMCPU *cpu = arm_env_get_cpu(env); - if (env->cp15.contextidr_el1 != value && !arm_feature(env, ARM_FEATURE_MPU) + if (raw_read(env, ri) != value && !arm_feature(env, ARM_FEATURE_MPU) && !extended_addresses_enabled(env)) { /* For VMSA (when not using the LPAE long descriptor page table * format) this register includes the ASID, so do a TLB flush. @@ -349,7 +349,7 @@ static void contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri, */ tlb_flush(CPU(cpu), 1); } - env->cp15.contextidr_el1 = value; + raw_write(env, ri, value); } static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -693,7 +693,7 @@ static uint64_t ccsidr_read(CPUARMState *env, const ARMCPRegInfo *ri) static void csselr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { - env->cp15.c0_cssel = value & 0xf; + raw_write(env, ri, value & 0xf); } static uint64_t isr_read(CPUARMState *env, const ARMCPRegInfo *ri) @@ -1216,11 +1216,11 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = { static void par_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { if (arm_feature(env, ARM_FEATURE_LPAE)) { - env->cp15.par_el1 = value; + raw_write(env, ri, value); } else if (arm_feature(env, ARM_FEATURE_V7)) { - env->cp15.par_el1 = value & 0xfffff6ff; + raw_write(env, ri, value & 0xfffff6ff); } else { - env->cp15.par_el1 = value & 0xfffff1ff; + raw_write(env, ri, value & 0xfffff1ff); } } @@ -1423,7 +1423,7 @@ static void vmsa_ttbcr_raw_write(CPUARMState *env, const ARMCPRegInfo *ri, * for long-descriptor tables the TTBCR fields are used differently * and the c2_mask and c2_base_mask values are meaningless. */ - env->cp15.c2_control = value; + raw_write(env, ri, value); env->cp15.c2_mask = ~(((uint32_t)0xffffffffu) >> maskshift); env->cp15.c2_base_mask = ~((uint32_t)0x3fffu >> maskshift); } @@ -1445,7 +1445,7 @@ static void vmsa_ttbcr_write(CPUARMState *env, const ARMCPRegInfo *ri, static void vmsa_ttbcr_reset(CPUARMState *env, const ARMCPRegInfo *ri) { env->cp15.c2_base_mask = 0xffffc000u; - env->cp15.c2_control = 0; + raw_write(env, ri, 0); env->cp15.c2_mask = 0; } @@ -1456,7 +1456,7 @@ static void vmsa_tcr_el1_write(CPUARMState *env, const ARMCPRegInfo *ri, /* For AArch64 the A1 bit could result in a change of ASID, so TLB flush. */ tlb_flush(CPU(cpu), 1); - env->cp15.c2_control = value; + raw_write(env, ri, value); } static void vmsa_ttbr_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -2151,14 +2151,14 @@ static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri, { ARMCPU *cpu = arm_env_get_cpu(env); - if (env->cp15.c1_sys == value) { + if (raw_read(env, ri) == value) { /* Skip the TLB flush if nothing actually changed; Linux likes * to do a lot of pointless SCTLR writes. */ return; } - env->cp15.c1_sys = value; + raw_write(env, ri, value); /* ??? Lots of these bits are not implemented. */ /* This may enable/disable the MMU, so do a TLB flush. */ tlb_flush(CPU(cpu), 1); From d615efac7c4dc0984de31791c5c7d6b06408aadb Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 9 Jun 2014 15:43:23 +0100 Subject: [PATCH 04/19] target-arm: Correct handling of UXN bit in ARMv8 LPAE page tables In v8 page tables bit 54 in the PTE is UXN in the EL0/EL1 translation regimes and XN elsewhere. In v7 the bit is always XN. Since we only emulate EL0/EL1 we can just treat this bit as UXN whenever we are in v8 mode. Also correctly extract the upper attributes from the PTE entry, the v8 version tried to avoid extracting the CONTIG bit and ended up with the upper bits being off-by-one. Instead behave the same as v7 and extract (but ignore) the CONTIG bit. This fixes "Bad mode in Synchronous Abort handler detected, code 0x8400000f" seen when modprobing modules under Linux. Signed-off-by: Ian Campbell Cc: Peter Maydell Cc: Claudio Fontana Cc: Rob Herring Signed-off-by: Peter Maydell --- target-arm/helper.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/target-arm/helper.c b/target-arm/helper.c index 3e7f0db15b..1e50a7840e 100644 --- a/target-arm/helper.c +++ b/target-arm/helper.c @@ -3929,13 +3929,8 @@ static int get_phys_addr_lpae(CPUARMState *env, target_ulong address, page_size = (1 << ((granule_sz * (4 - level)) + 3)); descaddr |= (address & (page_size - 1)); /* Extract attributes from the descriptor and merge with table attrs */ - if (arm_feature(env, ARM_FEATURE_V8)) { - attrs = extract64(descriptor, 2, 10) - | (extract64(descriptor, 53, 11) << 10); - } else { - attrs = extract64(descriptor, 2, 10) - | (extract64(descriptor, 52, 12) << 10); - } + attrs = extract64(descriptor, 2, 10) + | (extract64(descriptor, 52, 12) << 10); attrs |= extract32(tableattrs, 0, 2) << 11; /* XN, PXN */ attrs |= extract32(tableattrs, 3, 1) << 5; /* APTable[1] => AP[2] */ /* The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1 @@ -3961,8 +3956,12 @@ static int get_phys_addr_lpae(CPUARMState *env, target_ulong address, goto do_fault; } *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; - if (attrs & (1 << 12) || (!is_user && (attrs & (1 << 11)))) { - /* XN or PXN */ + if ((arm_feature(env, ARM_FEATURE_V8) && is_user && (attrs & (1 << 12))) || + (!arm_feature(env, ARM_FEATURE_V8) && (attrs & (1 << 12))) || + (!is_user && (attrs & (1 << 11)))) { + /* XN/UXN or PXN. Since we only implement EL0/EL1 we unconditionally + * treat XN/UXN as UXN for v8. + */ if (access_type == 2) { goto do_fault; } From f1ecb913d81199758383b8cbc15f4eb435b91753 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 9 Jun 2014 15:43:23 +0100 Subject: [PATCH 05/19] target-arm: add support for v8 SHA1 and SHA256 instructions This adds support for the SHA1 and SHA256 instructions that are available on some v8 implementations of Aarch32. Signed-off-by: Ard Biesheuvel Signed-off-by: Peter Maydell Message-id: 1401386724-26529-2-git-send-email-peter.maydell@linaro.org [PMM: * rebase * fix bad indent * add a missing UNDEF check for Q!=1 in the 3-reg SHA1/SHA256 case * use g_assert_not_reached() * don't re-extract bit 6 for the 2-reg-misc encodings * set the ELF HWCAP2 bits for the new features ] Signed-off-by: Peter Maydell --- linux-user/elfload.c | 2 + target-arm/cpu.c | 2 + target-arm/cpu.h | 2 + target-arm/crypto_helper.c | 257 ++++++++++++++++++++++++++++++++++++- target-arm/helper.h | 9 ++ target-arm/translate.c | 84 ++++++++++++ 6 files changed, 349 insertions(+), 7 deletions(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 995f999768..9bda262419 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -468,6 +468,8 @@ static uint32_t get_elf_hwcap2(void) uint32_t hwcaps = 0; GET_FEATURE(ARM_FEATURE_V8_AES, ARM_HWCAP2_ARM_AES); + GET_FEATURE(ARM_FEATURE_V8_SHA1, ARM_HWCAP2_ARM_SHA1); + GET_FEATURE(ARM_FEATURE_V8_SHA256, ARM_HWCAP2_ARM_SHA2); GET_FEATURE(ARM_FEATURE_CRC, ARM_HWCAP2_ARM_CRC32); return hwcaps; } diff --git a/target-arm/cpu.c b/target-arm/cpu.c index 794dcb9fb7..753f6cb1da 100644 --- a/target-arm/cpu.c +++ b/target-arm/cpu.c @@ -317,6 +317,8 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) set_feature(env, ARM_FEATURE_ARM_DIV); set_feature(env, ARM_FEATURE_LPAE); set_feature(env, ARM_FEATURE_V8_AES); + set_feature(env, ARM_FEATURE_V8_SHA1); + set_feature(env, ARM_FEATURE_V8_SHA256); } if (arm_feature(env, ARM_FEATURE_V7)) { set_feature(env, ARM_FEATURE_VAPA); diff --git a/target-arm/cpu.h b/target-arm/cpu.h index 7d8332e8be..14d5f21e7a 100644 --- a/target-arm/cpu.h +++ b/target-arm/cpu.h @@ -635,6 +635,8 @@ enum arm_features { ARM_FEATURE_CBAR_RO, /* has cp15 CBAR and it is read-only */ ARM_FEATURE_EL2, /* has EL2 Virtualization support */ ARM_FEATURE_EL3, /* has EL3 Secure monitor support */ + ARM_FEATURE_V8_SHA1, /* implements SHA1 part of v8 Crypto Extensions */ + ARM_FEATURE_V8_SHA256, /* implements SHA256 part of v8 Crypto Extensions */ }; static inline int arm_feature(CPUARMState *env, int feature) diff --git a/target-arm/crypto_helper.c b/target-arm/crypto_helper.c index d8898ed805..3e4b5f7c48 100644 --- a/target-arm/crypto_helper.c +++ b/target-arm/crypto_helper.c @@ -1,7 +1,7 @@ /* * crypto_helper.c - emulate v8 Crypto Extensions instructions * - * Copyright (C) 2013 Linaro Ltd + * Copyright (C) 2013 - 2014 Linaro Ltd * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -15,9 +15,9 @@ #include "exec/exec-all.h" #include "exec/helper-proto.h" -union AES_STATE { +union CRYPTO_STATE { uint8_t bytes[16]; - uint32_t cols[4]; + uint32_t words[4]; uint64_t l[2]; }; @@ -99,11 +99,11 @@ void HELPER(crypto_aese)(CPUARMState *env, uint32_t rd, uint32_t rm, /* ShiftRows permutation vector for decryption */ { 0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3 }, }; - union AES_STATE rk = { .l = { + union CRYPTO_STATE rk = { .l = { float64_val(env->vfp.regs[rm]), float64_val(env->vfp.regs[rm + 1]) } }; - union AES_STATE st = { .l = { + union CRYPTO_STATE st = { .l = { float64_val(env->vfp.regs[rd]), float64_val(env->vfp.regs[rd + 1]) } }; @@ -260,7 +260,7 @@ void HELPER(crypto_aesmc)(CPUARMState *env, uint32_t rd, uint32_t rm, 0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5, 0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d, } }; - union AES_STATE st = { .l = { + union CRYPTO_STATE st = { .l = { float64_val(env->vfp.regs[rm]), float64_val(env->vfp.regs[rm + 1]) } }; @@ -269,7 +269,7 @@ void HELPER(crypto_aesmc)(CPUARMState *env, uint32_t rd, uint32_t rm, assert(decrypt < 2); for (i = 0; i < 16; i += 4) { - st.cols[i >> 2] = cpu_to_le32( + st.words[i >> 2] = cpu_to_le32( mc[decrypt][st.bytes[i]] ^ rol32(mc[decrypt][st.bytes[i + 1]], 8) ^ rol32(mc[decrypt][st.bytes[i + 2]], 16) ^ @@ -279,3 +279,246 @@ void HELPER(crypto_aesmc)(CPUARMState *env, uint32_t rd, uint32_t rm, env->vfp.regs[rd] = make_float64(st.l[0]); env->vfp.regs[rd + 1] = make_float64(st.l[1]); } + +/* + * SHA-1 logical functions + */ + +static uint32_t cho(uint32_t x, uint32_t y, uint32_t z) +{ + return (x & (y ^ z)) ^ z; +} + +static uint32_t par(uint32_t x, uint32_t y, uint32_t z) +{ + return x ^ y ^ z; +} + +static uint32_t maj(uint32_t x, uint32_t y, uint32_t z) +{ + return (x & y) | ((x | y) & z); +} + +void HELPER(crypto_sha1_3reg)(CPUARMState *env, uint32_t rd, uint32_t rn, + uint32_t rm, uint32_t op) +{ + union CRYPTO_STATE d = { .l = { + float64_val(env->vfp.regs[rd]), + float64_val(env->vfp.regs[rd + 1]) + } }; + union CRYPTO_STATE n = { .l = { + float64_val(env->vfp.regs[rn]), + float64_val(env->vfp.regs[rn + 1]) + } }; + union CRYPTO_STATE m = { .l = { + float64_val(env->vfp.regs[rm]), + float64_val(env->vfp.regs[rm + 1]) + } }; + + if (op == 3) { /* sha1su0 */ + d.l[0] ^= d.l[1] ^ m.l[0]; + d.l[1] ^= n.l[0] ^ m.l[1]; + } else { + int i; + + for (i = 0; i < 4; i++) { + uint32_t t; + + switch (op) { + case 0: /* sha1c */ + t = cho(d.words[1], d.words[2], d.words[3]); + break; + case 1: /* sha1p */ + t = par(d.words[1], d.words[2], d.words[3]); + break; + case 2: /* sha1m */ + t = maj(d.words[1], d.words[2], d.words[3]); + break; + default: + g_assert_not_reached(); + } + t += rol32(d.words[0], 5) + n.words[0] + m.words[i]; + + n.words[0] = d.words[3]; + d.words[3] = d.words[2]; + d.words[2] = ror32(d.words[1], 2); + d.words[1] = d.words[0]; + d.words[0] = t; + } + } + env->vfp.regs[rd] = make_float64(d.l[0]); + env->vfp.regs[rd + 1] = make_float64(d.l[1]); +} + +void HELPER(crypto_sha1h)(CPUARMState *env, uint32_t rd, uint32_t rm) +{ + union CRYPTO_STATE m = { .l = { + float64_val(env->vfp.regs[rm]), + float64_val(env->vfp.regs[rm + 1]) + } }; + + m.words[0] = ror32(m.words[0], 2); + m.words[1] = m.words[2] = m.words[3] = 0; + + env->vfp.regs[rd] = make_float64(m.l[0]); + env->vfp.regs[rd + 1] = make_float64(m.l[1]); +} + +void HELPER(crypto_sha1su1)(CPUARMState *env, uint32_t rd, uint32_t rm) +{ + union CRYPTO_STATE d = { .l = { + float64_val(env->vfp.regs[rd]), + float64_val(env->vfp.regs[rd + 1]) + } }; + union CRYPTO_STATE m = { .l = { + float64_val(env->vfp.regs[rm]), + float64_val(env->vfp.regs[rm + 1]) + } }; + + d.words[0] = rol32(d.words[0] ^ m.words[1], 1); + d.words[1] = rol32(d.words[1] ^ m.words[2], 1); + d.words[2] = rol32(d.words[2] ^ m.words[3], 1); + d.words[3] = rol32(d.words[3] ^ d.words[0], 1); + + env->vfp.regs[rd] = make_float64(d.l[0]); + env->vfp.regs[rd + 1] = make_float64(d.l[1]); +} + +/* + * The SHA-256 logical functions, according to + * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf + */ + +static uint32_t S0(uint32_t x) +{ + return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22); +} + +static uint32_t S1(uint32_t x) +{ + return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25); +} + +static uint32_t s0(uint32_t x) +{ + return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3); +} + +static uint32_t s1(uint32_t x) +{ + return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10); +} + +void HELPER(crypto_sha256h)(CPUARMState *env, uint32_t rd, uint32_t rn, + uint32_t rm) +{ + union CRYPTO_STATE d = { .l = { + float64_val(env->vfp.regs[rd]), + float64_val(env->vfp.regs[rd + 1]) + } }; + union CRYPTO_STATE n = { .l = { + float64_val(env->vfp.regs[rn]), + float64_val(env->vfp.regs[rn + 1]) + } }; + union CRYPTO_STATE m = { .l = { + float64_val(env->vfp.regs[rm]), + float64_val(env->vfp.regs[rm + 1]) + } }; + int i; + + for (i = 0; i < 4; i++) { + uint32_t t = cho(n.words[0], n.words[1], n.words[2]) + n.words[3] + + S1(n.words[0]) + m.words[i]; + + n.words[3] = n.words[2]; + n.words[2] = n.words[1]; + n.words[1] = n.words[0]; + n.words[0] = d.words[3] + t; + + t += maj(d.words[0], d.words[1], d.words[2]) + S0(d.words[0]); + + d.words[3] = d.words[2]; + d.words[2] = d.words[1]; + d.words[1] = d.words[0]; + d.words[0] = t; + } + + env->vfp.regs[rd] = make_float64(d.l[0]); + env->vfp.regs[rd + 1] = make_float64(d.l[1]); +} + +void HELPER(crypto_sha256h2)(CPUARMState *env, uint32_t rd, uint32_t rn, + uint32_t rm) +{ + union CRYPTO_STATE d = { .l = { + float64_val(env->vfp.regs[rd]), + float64_val(env->vfp.regs[rd + 1]) + } }; + union CRYPTO_STATE n = { .l = { + float64_val(env->vfp.regs[rn]), + float64_val(env->vfp.regs[rn + 1]) + } }; + union CRYPTO_STATE m = { .l = { + float64_val(env->vfp.regs[rm]), + float64_val(env->vfp.regs[rm + 1]) + } }; + int i; + + for (i = 0; i < 4; i++) { + uint32_t t = cho(d.words[0], d.words[1], d.words[2]) + d.words[3] + + S1(d.words[0]) + m.words[i]; + + d.words[3] = d.words[2]; + d.words[2] = d.words[1]; + d.words[1] = d.words[0]; + d.words[0] = n.words[3 - i] + t; + } + + env->vfp.regs[rd] = make_float64(d.l[0]); + env->vfp.regs[rd + 1] = make_float64(d.l[1]); +} + +void HELPER(crypto_sha256su0)(CPUARMState *env, uint32_t rd, uint32_t rm) +{ + union CRYPTO_STATE d = { .l = { + float64_val(env->vfp.regs[rd]), + float64_val(env->vfp.regs[rd + 1]) + } }; + union CRYPTO_STATE m = { .l = { + float64_val(env->vfp.regs[rm]), + float64_val(env->vfp.regs[rm + 1]) + } }; + + d.words[0] += s0(d.words[1]); + d.words[1] += s0(d.words[2]); + d.words[2] += s0(d.words[3]); + d.words[3] += s0(m.words[0]); + + env->vfp.regs[rd] = make_float64(d.l[0]); + env->vfp.regs[rd + 1] = make_float64(d.l[1]); +} + +void HELPER(crypto_sha256su1)(CPUARMState *env, uint32_t rd, uint32_t rn, + uint32_t rm) +{ + union CRYPTO_STATE d = { .l = { + float64_val(env->vfp.regs[rd]), + float64_val(env->vfp.regs[rd + 1]) + } }; + union CRYPTO_STATE n = { .l = { + float64_val(env->vfp.regs[rn]), + float64_val(env->vfp.regs[rn + 1]) + } }; + union CRYPTO_STATE m = { .l = { + float64_val(env->vfp.regs[rm]), + float64_val(env->vfp.regs[rm + 1]) + } }; + + d.words[0] += s1(m.words[2]) + n.words[1]; + d.words[1] += s1(m.words[3]) + n.words[2]; + d.words[2] += s1(d.words[0]) + n.words[3]; + d.words[3] += s1(d.words[1]) + m.words[0]; + + env->vfp.regs[rd] = make_float64(d.l[0]); + env->vfp.regs[rd + 1] = make_float64(d.l[1]); +} diff --git a/target-arm/helper.h b/target-arm/helper.h index b63fd0ff1c..113b09d35e 100644 --- a/target-arm/helper.h +++ b/target-arm/helper.h @@ -512,6 +512,15 @@ DEF_HELPER_3(neon_qzip32, void, env, i32, i32) DEF_HELPER_4(crypto_aese, void, env, i32, i32, i32) DEF_HELPER_4(crypto_aesmc, void, env, i32, i32, i32) +DEF_HELPER_5(crypto_sha1_3reg, void, env, i32, i32, i32, i32) +DEF_HELPER_3(crypto_sha1h, void, env, i32, i32) +DEF_HELPER_3(crypto_sha1su1, void, env, i32, i32) + +DEF_HELPER_4(crypto_sha256h, void, env, i32, i32, i32) +DEF_HELPER_4(crypto_sha256h2, void, env, i32, i32, i32) +DEF_HELPER_3(crypto_sha256su0, void, env, i32, i32) +DEF_HELPER_4(crypto_sha256su1, void, env, i32, i32, i32) + DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) DEF_HELPER_2(dc_zva, void, env, i64) diff --git a/target-arm/translate.c b/target-arm/translate.c index d499caa562..38ef5b19aa 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -4776,6 +4776,7 @@ static void gen_neon_narrow_op(int op, int u, int size, #define NEON_3R_VPMIN 21 #define NEON_3R_VQDMULH_VQRDMULH 22 #define NEON_3R_VPADD 23 +#define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */ #define NEON_3R_VFM 25 /* VFMA, VFMS : float fused multiply-add */ #define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */ #define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */ @@ -4809,6 +4810,7 @@ static const uint8_t neon_3r_sizes[] = { [NEON_3R_VPMIN] = 0x7, [NEON_3R_VQDMULH_VQRDMULH] = 0x6, [NEON_3R_VPADD] = 0x7, + [NEON_3R_SHA] = 0xf, /* size field encodes op type */ [NEON_3R_VFM] = 0x5, /* size bit 1 encodes op */ [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */ [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */ @@ -4842,6 +4844,7 @@ static const uint8_t neon_3r_sizes[] = { #define NEON_2RM_VCEQ0 18 #define NEON_2RM_VCLE0 19 #define NEON_2RM_VCLT0 20 +#define NEON_2RM_SHA1H 21 #define NEON_2RM_VABS 22 #define NEON_2RM_VNEG 23 #define NEON_2RM_VCGT0_F 24 @@ -4858,6 +4861,7 @@ static const uint8_t neon_3r_sizes[] = { #define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */ #define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */ #define NEON_2RM_VSHLL 38 +#define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */ #define NEON_2RM_VRINTN 40 #define NEON_2RM_VRINTX 41 #define NEON_2RM_VRINTA 42 @@ -4918,6 +4922,7 @@ static const uint8_t neon_2rm_sizes[] = { [NEON_2RM_VCEQ0] = 0x7, [NEON_2RM_VCLE0] = 0x7, [NEON_2RM_VCLT0] = 0x7, + [NEON_2RM_SHA1H] = 0x4, [NEON_2RM_VABS] = 0x7, [NEON_2RM_VNEG] = 0x7, [NEON_2RM_VCGT0_F] = 0x4, @@ -4934,6 +4939,7 @@ static const uint8_t neon_2rm_sizes[] = { [NEON_2RM_VMOVN] = 0x7, [NEON_2RM_VQMOVN] = 0x7, [NEON_2RM_VSHLL] = 0x7, + [NEON_2RM_SHA1SU1] = 0x4, [NEON_2RM_VRINTN] = 0x4, [NEON_2RM_VRINTX] = 0x4, [NEON_2RM_VRINTA] = 0x4, @@ -5011,6 +5017,49 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins if (q && ((rd | rn | rm) & 1)) { return 1; } + /* + * The SHA-1/SHA-256 3-register instructions require special treatment + * here, as their size field is overloaded as an op type selector, and + * they all consume their input in a single pass. + */ + if (op == NEON_3R_SHA) { + if (!q) { + return 1; + } + if (!u) { /* SHA-1 */ + if (!arm_feature(env, ARM_FEATURE_V8_SHA1)) { + return 1; + } + tmp = tcg_const_i32(rd); + tmp2 = tcg_const_i32(rn); + tmp3 = tcg_const_i32(rm); + tmp4 = tcg_const_i32(size); + gen_helper_crypto_sha1_3reg(cpu_env, tmp, tmp2, tmp3, tmp4); + tcg_temp_free_i32(tmp4); + } else { /* SHA-256 */ + if (!arm_feature(env, ARM_FEATURE_V8_SHA256) || size == 3) { + return 1; + } + tmp = tcg_const_i32(rd); + tmp2 = tcg_const_i32(rn); + tmp3 = tcg_const_i32(rm); + switch (size) { + case 0: + gen_helper_crypto_sha256h(cpu_env, tmp, tmp2, tmp3); + break; + case 1: + gen_helper_crypto_sha256h2(cpu_env, tmp, tmp2, tmp3); + break; + case 2: + gen_helper_crypto_sha256su1(cpu_env, tmp, tmp2, tmp3); + break; + } + } + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp3); + return 0; + } if (size == 3 && op != NEON_3R_LOGIC) { /* 64-bit element instructions. */ for (pass = 0; pass < (q ? 2 : 1); pass++) { @@ -6486,6 +6535,41 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins tcg_temp_free_i32(tmp2); tcg_temp_free_i32(tmp3); break; + case NEON_2RM_SHA1H: + if (!arm_feature(env, ARM_FEATURE_V8_SHA1) + || ((rm | rd) & 1)) { + return 1; + } + tmp = tcg_const_i32(rd); + tmp2 = tcg_const_i32(rm); + + gen_helper_crypto_sha1h(cpu_env, tmp, tmp2); + + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(tmp2); + break; + case NEON_2RM_SHA1SU1: + if ((rm | rd) & 1) { + return 1; + } + /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */ + if (q) { + if (!arm_feature(env, ARM_FEATURE_V8_SHA256)) { + return 1; + } + } else if (!arm_feature(env, ARM_FEATURE_V8_SHA1)) { + return 1; + } + tmp = tcg_const_i32(rd); + tmp2 = tcg_const_i32(rm); + if (q) { + gen_helper_crypto_sha256su0(cpu_env, tmp, tmp2); + } else { + gen_helper_crypto_sha1su1(cpu_env, tmp, tmp2); + } + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(tmp2); + break; default: elementwise: for (pass = 0; pass < (q ? 4 : 2); pass++) { From 526d0096e56e82ffa5edb15bd75c5c093e61fa59 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 9 Jun 2014 15:43:23 +0100 Subject: [PATCH 06/19] target-arm: Allow 3reg_wide undefreq to encode more bad size options The current undefreq field in the neon_3reg_wide handling allows us to encode "UNDEF if size != 0" and "UNDEF if size == 0". This is no longer sufficient with the advent of 64-bit polynomial VMULL, which means we want to UNDEF if size == 1. Change the undefreq encoding to use separate bits for all of "UNDEF if size == 0", "UNDEF if size == 1" and "UNDEF if size == 2". Signed-off-by: Peter Maydell Message-id: 1401386724-26529-3-git-send-email-peter.maydell@linaro.org --- target-arm/translate.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/target-arm/translate.c b/target-arm/translate.c index 38ef5b19aa..7124606723 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -5954,10 +5954,11 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins int src1_wide; int src2_wide; int prewiden; - /* undefreq: bit 0 : UNDEF if size != 0 - * bit 1 : UNDEF if size == 0 - * bit 2 : UNDEF if U == 1 - * Note that [1:0] set implies 'always UNDEF' + /* undefreq: bit 0 : UNDEF if size == 0 + * bit 1 : UNDEF if size == 1 + * bit 2 : UNDEF if size == 2 + * bit 3 : UNDEF if U == 1 + * Note that [2:0] set implies 'always UNDEF' */ int undefreq; /* prewiden, src1_wide, src2_wide, undefreq */ @@ -5971,13 +5972,13 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins {0, 1, 1, 0}, /* VSUBHN */ {0, 0, 0, 0}, /* VABDL */ {0, 0, 0, 0}, /* VMLAL */ - {0, 0, 0, 6}, /* VQDMLAL */ + {0, 0, 0, 9}, /* VQDMLAL */ {0, 0, 0, 0}, /* VMLSL */ - {0, 0, 0, 6}, /* VQDMLSL */ + {0, 0, 0, 9}, /* VQDMLSL */ {0, 0, 0, 0}, /* Integer VMULL */ - {0, 0, 0, 2}, /* VQDMULL */ - {0, 0, 0, 5}, /* Polynomial VMULL */ - {0, 0, 0, 3}, /* Reserved: always UNDEF */ + {0, 0, 0, 1}, /* VQDMULL */ + {0, 0, 0, 15}, /* Polynomial VMULL */ + {0, 0, 0, 7}, /* Reserved: always UNDEF */ }; prewiden = neon_3reg_wide[op][0]; @@ -5985,9 +5986,8 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins src2_wide = neon_3reg_wide[op][2]; undefreq = neon_3reg_wide[op][3]; - if (((undefreq & 1) && (size != 0)) || - ((undefreq & 2) && (size == 0)) || - ((undefreq & 4) && u)) { + if ((undefreq & (1 << size)) || + ((undefreq & 8) && u)) { return 1; } if ((src1_wide && (rn & 1)) || From 4e624edaebec9312be7b63096bbe5b2fe76f3613 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 9 Jun 2014 15:43:23 +0100 Subject: [PATCH 07/19] target-arm: add support for v8 VMULL.P64 instruction Add support for the VMULL.P64 polynomial 64x64 to 128 bit multiplication instruction in the A32/T32 instruction sets; this is part of the v8 Crypto Extensions. To do this we have to move the neon_pmull_64_{lo,hi} helpers from helper-a64.c into neon_helper.c so they can be used by the AArch32 translator. Inspired-by: Ard Biesheuvel Signed-off-by: Peter Maydell Message-id: 1401386724-26529-4-git-send-email-peter.maydell@linaro.org --- linux-user/elfload.c | 1 + target-arm/cpu.c | 1 + target-arm/cpu.h | 1 + target-arm/helper-a64.c | 30 ------------------------------ target-arm/helper-a64.h | 2 -- target-arm/helper.h | 3 +++ target-arm/neon_helper.c | 30 ++++++++++++++++++++++++++++++ target-arm/translate.c | 26 +++++++++++++++++++++++++- 8 files changed, 61 insertions(+), 33 deletions(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 9bda262419..3241fec035 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -468,6 +468,7 @@ static uint32_t get_elf_hwcap2(void) uint32_t hwcaps = 0; GET_FEATURE(ARM_FEATURE_V8_AES, ARM_HWCAP2_ARM_AES); + GET_FEATURE(ARM_FEATURE_V8_PMULL, ARM_HWCAP2_ARM_PMULL); GET_FEATURE(ARM_FEATURE_V8_SHA1, ARM_HWCAP2_ARM_SHA1); GET_FEATURE(ARM_FEATURE_V8_SHA256, ARM_HWCAP2_ARM_SHA2); GET_FEATURE(ARM_FEATURE_CRC, ARM_HWCAP2_ARM_CRC32); diff --git a/target-arm/cpu.c b/target-arm/cpu.c index 753f6cb1da..fb9c12d81e 100644 --- a/target-arm/cpu.c +++ b/target-arm/cpu.c @@ -319,6 +319,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) set_feature(env, ARM_FEATURE_V8_AES); set_feature(env, ARM_FEATURE_V8_SHA1); set_feature(env, ARM_FEATURE_V8_SHA256); + set_feature(env, ARM_FEATURE_V8_PMULL); } if (arm_feature(env, ARM_FEATURE_V7)) { set_feature(env, ARM_FEATURE_VAPA); diff --git a/target-arm/cpu.h b/target-arm/cpu.h index 14d5f21e7a..79e7f82515 100644 --- a/target-arm/cpu.h +++ b/target-arm/cpu.h @@ -637,6 +637,7 @@ enum arm_features { ARM_FEATURE_EL3, /* has EL3 Secure monitor support */ ARM_FEATURE_V8_SHA1, /* implements SHA1 part of v8 Crypto Extensions */ ARM_FEATURE_V8_SHA256, /* implements SHA256 part of v8 Crypto Extensions */ + ARM_FEATURE_V8_PMULL, /* implements PMULL part of v8 Crypto Extensions */ }; static inline int arm_feature(CPUARMState *env, int feature) diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c index cccda74113..f0d27229d4 100644 --- a/target-arm/helper-a64.c +++ b/target-arm/helper-a64.c @@ -186,36 +186,6 @@ uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices, return result; } -/* Helper function for 64 bit polynomial multiply case: - * perform PolynomialMult(op1, op2) and return either the top or - * bottom half of the 128 bit result. - */ -uint64_t HELPER(neon_pmull_64_lo)(uint64_t op1, uint64_t op2) -{ - int bitnum; - uint64_t res = 0; - - for (bitnum = 0; bitnum < 64; bitnum++) { - if (op1 & (1ULL << bitnum)) { - res ^= op2 << bitnum; - } - } - return res; -} -uint64_t HELPER(neon_pmull_64_hi)(uint64_t op1, uint64_t op2) -{ - int bitnum; - uint64_t res = 0; - - /* bit 0 of op1 can't influence the high 64 bits at all */ - for (bitnum = 1; bitnum < 64; bitnum++) { - if (op1 & (1ULL << bitnum)) { - res ^= op2 >> (64 - bitnum); - } - } - return res; -} - /* 64bit/double versions of the neon float compare functions */ uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp) { diff --git a/target-arm/helper-a64.h b/target-arm/helper-a64.h index 3f05bedcca..8de7536ff7 100644 --- a/target-arm/helper-a64.h +++ b/target-arm/helper-a64.h @@ -28,8 +28,6 @@ DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr) DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr) DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr) DEF_HELPER_FLAGS_5(simd_tbl, TCG_CALL_NO_RWG_SE, i64, env, i64, i64, i32, i32) -DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64) -DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_3(vfp_mulxs, TCG_CALL_NO_RWG, f32, f32, f32, ptr) DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, ptr) DEF_HELPER_FLAGS_3(neon_ceq_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr) diff --git a/target-arm/helper.h b/target-arm/helper.h index 113b09d35e..0ef8fcac17 100644 --- a/target-arm/helper.h +++ b/target-arm/helper.h @@ -525,6 +525,9 @@ DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) DEF_HELPER_2(dc_zva, void, env, i64) +DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64) + #ifdef TARGET_AARCH64 #include "helper-a64.h" #endif diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c index 492e500700..47d13e908c 100644 --- a/target-arm/neon_helper.c +++ b/target-arm/neon_helper.c @@ -2211,3 +2211,33 @@ void HELPER(neon_zip16)(CPUARMState *env, uint32_t rd, uint32_t rm) env->vfp.regs[rm] = make_float64(m0); env->vfp.regs[rd] = make_float64(d0); } + +/* Helper function for 64 bit polynomial multiply case: + * perform PolynomialMult(op1, op2) and return either the top or + * bottom half of the 128 bit result. + */ +uint64_t HELPER(neon_pmull_64_lo)(uint64_t op1, uint64_t op2) +{ + int bitnum; + uint64_t res = 0; + + for (bitnum = 0; bitnum < 64; bitnum++) { + if (op1 & (1ULL << bitnum)) { + res ^= op2 << bitnum; + } + } + return res; +} +uint64_t HELPER(neon_pmull_64_hi)(uint64_t op1, uint64_t op2) +{ + int bitnum; + uint64_t res = 0; + + /* bit 0 of op1 can't influence the high 64 bits at all */ + for (bitnum = 1; bitnum < 64; bitnum++) { + if (op1 & (1ULL << bitnum)) { + res ^= op2 >> (64 - bitnum); + } + } + return res; +} diff --git a/target-arm/translate.c b/target-arm/translate.c index 7124606723..41c3fc77b0 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -5977,7 +5977,7 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins {0, 0, 0, 9}, /* VQDMLSL */ {0, 0, 0, 0}, /* Integer VMULL */ {0, 0, 0, 1}, /* VQDMULL */ - {0, 0, 0, 15}, /* Polynomial VMULL */ + {0, 0, 0, 0xa}, /* Polynomial VMULL */ {0, 0, 0, 7}, /* Reserved: always UNDEF */ }; @@ -5996,6 +5996,30 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins return 1; } + /* Handle VMULL.P64 (Polynomial 64x64 to 128 bit multiply) + * outside the loop below as it only performs a single pass. + */ + if (op == 14 && size == 2) { + TCGv_i64 tcg_rn, tcg_rm, tcg_rd; + + if (!arm_feature(env, ARM_FEATURE_V8_PMULL)) { + return 1; + } + tcg_rn = tcg_temp_new_i64(); + tcg_rm = tcg_temp_new_i64(); + tcg_rd = tcg_temp_new_i64(); + neon_load_reg64(tcg_rn, rn); + neon_load_reg64(tcg_rm, rm); + gen_helper_neon_pmull_64_lo(tcg_rd, tcg_rn, tcg_rm); + neon_store_reg64(tcg_rd, rd); + gen_helper_neon_pmull_64_hi(tcg_rd, tcg_rn, tcg_rm); + neon_store_reg64(tcg_rd, rd + 1); + tcg_temp_free_i64(tcg_rn); + tcg_temp_free_i64(tcg_rm); + tcg_temp_free_i64(tcg_rd); + return 0; + } + /* Avoid overlapping operands. Wide source operands are always aligned so will never overlap with wide destinations in problematic ways. */ From 411bdc7837681f58828ca490647fa1784a6700b9 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 9 Jun 2014 15:43:24 +0100 Subject: [PATCH 08/19] target-arm: A64: Use PMULL feature bit for PMULL Now that we have a separate ARM_FEATURE_V8_PMULL bit, use it for the A64 PMULL, not the AES feature bit. Signed-off-by: Peter Maydell --- linux-user/elfload.c | 2 +- target-arm/translate-a64.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 3241fec035..e8724939f8 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -539,7 +539,7 @@ static uint32_t get_elf_hwcap(void) /* probe for the extra features */ #define GET_FEATURE(feat, hwcap) \ do { if (arm_feature(&cpu->env, feat)) { hwcaps |= hwcap; } } while (0) - GET_FEATURE(ARM_FEATURE_V8_AES, ARM_HWCAP_A64_PMULL); + GET_FEATURE(ARM_FEATURE_V8_PMULL, ARM_HWCAP_A64_PMULL); #undef GET_FEATURE return hwcaps; diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c index a9c4633517..9832cc3c22 100644 --- a/target-arm/translate-a64.c +++ b/target-arm/translate-a64.c @@ -8574,7 +8574,7 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) return; } if (size == 3) { - if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)) { + if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) { unallocated_encoding(s); return; } From 46d9dfdad65fa783691871622271a861466905c4 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 9 Jun 2014 15:43:24 +0100 Subject: [PATCH 09/19] target-arm: arm_any_initfn() should never set ARM_FEATURE_AARCH64 The arm_any_initfn() is used only for the 32-bit linux-user "cpu any", so it only gets called in builds where TARGET_AARCH64 is not defined. Remove the unreachable line which sets ARM_FEATURE_AARCH64. Signed-off-by: Peter Maydell Message-id: 1401458125-27977-2-git-send-email-peter.maydell@linaro.org --- target-arm/cpu.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/target-arm/cpu.c b/target-arm/cpu.c index fb9c12d81e..94123b2213 100644 --- a/target-arm/cpu.c +++ b/target-arm/cpu.c @@ -963,9 +963,6 @@ static void arm_any_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_ARM_DIV); set_feature(&cpu->env, ARM_FEATURE_V7MP); set_feature(&cpu->env, ARM_FEATURE_CRC); -#ifdef TARGET_AARCH64 - set_feature(&cpu->env, ARM_FEATURE_AARCH64); -#endif cpu->midr = 0xffffffff; } #endif From fb8ad9f2c1a82256b1ac384393a093a61201272f Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 9 Jun 2014 15:43:24 +0100 Subject: [PATCH 10/19] target-arm: Remove unnecessary setting of feature bits FEATURE_V8 implies both FEATURE_V7MP and FEATURE_ARM_DIV, so we don't need to set them explicitly in initfns which set the V8 feature bit. Signed-off-by: Peter Maydell Message-id: 1401458125-27977-3-git-send-email-peter.maydell@linaro.org --- target-arm/cpu.c | 2 -- target-arm/cpu64.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/target-arm/cpu.c b/target-arm/cpu.c index 94123b2213..383e22a757 100644 --- a/target-arm/cpu.c +++ b/target-arm/cpu.c @@ -960,8 +960,6 @@ static void arm_any_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_VFP_FP16); set_feature(&cpu->env, ARM_FEATURE_NEON); set_feature(&cpu->env, ARM_FEATURE_THUMB2EE); - set_feature(&cpu->env, ARM_FEATURE_ARM_DIV); - set_feature(&cpu->env, ARM_FEATURE_V7MP); set_feature(&cpu->env, ARM_FEATURE_CRC); cpu->midr = 0xffffffff; } diff --git a/target-arm/cpu64.c b/target-arm/cpu64.c index ff4c2b46d7..2dd01fdf45 100644 --- a/target-arm/cpu64.c +++ b/target-arm/cpu64.c @@ -140,8 +140,6 @@ static void aarch64_any_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_VFP4); set_feature(&cpu->env, ARM_FEATURE_VFP_FP16); set_feature(&cpu->env, ARM_FEATURE_NEON); - set_feature(&cpu->env, ARM_FEATURE_ARM_DIV); - set_feature(&cpu->env, ARM_FEATURE_V7MP); set_feature(&cpu->env, ARM_FEATURE_AARCH64); cpu->ctr = 0x80030003; /* 32 byte I and D cacheline size, VIPT icache */ cpu->dcz_blocksize = 7; /* 512 bytes */ From 25f748e37a868b322a960c322ca11fb2dc5252b2 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 9 Jun 2014 15:43:24 +0100 Subject: [PATCH 11/19] target-arm: Clean up handling of ARMv8 optional feature bits CRC and crypto are both optional v8 extensions, so FEATURE_V8 should not imply them. Instead we should set these bits in the initfns for the 32-bit and 64-bit "cpu any" and for the Cortex-A57. Signed-off-by: Peter Maydell Message-id: 1401458125-27977-4-git-send-email-peter.maydell@linaro.org --- target-arm/cpu.c | 8 ++++---- target-arm/cpu64.c | 10 ++++++++++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/target-arm/cpu.c b/target-arm/cpu.c index 383e22a757..bc19d80439 100644 --- a/target-arm/cpu.c +++ b/target-arm/cpu.c @@ -316,10 +316,6 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) set_feature(env, ARM_FEATURE_V7); set_feature(env, ARM_FEATURE_ARM_DIV); set_feature(env, ARM_FEATURE_LPAE); - set_feature(env, ARM_FEATURE_V8_AES); - set_feature(env, ARM_FEATURE_V8_SHA1); - set_feature(env, ARM_FEATURE_V8_SHA256); - set_feature(env, ARM_FEATURE_V8_PMULL); } if (arm_feature(env, ARM_FEATURE_V7)) { set_feature(env, ARM_FEATURE_VAPA); @@ -960,6 +956,10 @@ static void arm_any_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_VFP_FP16); set_feature(&cpu->env, ARM_FEATURE_NEON); set_feature(&cpu->env, ARM_FEATURE_THUMB2EE); + set_feature(&cpu->env, ARM_FEATURE_V8_AES); + set_feature(&cpu->env, ARM_FEATURE_V8_SHA1); + set_feature(&cpu->env, ARM_FEATURE_V8_SHA256); + set_feature(&cpu->env, ARM_FEATURE_V8_PMULL); set_feature(&cpu->env, ARM_FEATURE_CRC); cpu->midr = 0xffffffff; } diff --git a/target-arm/cpu64.c b/target-arm/cpu64.c index 2dd01fdf45..40cc063f07 100644 --- a/target-arm/cpu64.c +++ b/target-arm/cpu64.c @@ -98,6 +98,11 @@ static void aarch64_a57_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); set_feature(&cpu->env, ARM_FEATURE_AARCH64); set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); + set_feature(&cpu->env, ARM_FEATURE_V8_AES); + set_feature(&cpu->env, ARM_FEATURE_V8_SHA1); + set_feature(&cpu->env, ARM_FEATURE_V8_SHA256); + set_feature(&cpu->env, ARM_FEATURE_V8_PMULL); + set_feature(&cpu->env, ARM_FEATURE_CRC); cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A57; cpu->midr = 0x411fd070; cpu->reset_fpsid = 0x41034070; @@ -141,6 +146,11 @@ static void aarch64_any_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_VFP_FP16); set_feature(&cpu->env, ARM_FEATURE_NEON); set_feature(&cpu->env, ARM_FEATURE_AARCH64); + set_feature(&cpu->env, ARM_FEATURE_V8_AES); + set_feature(&cpu->env, ARM_FEATURE_V8_SHA1); + set_feature(&cpu->env, ARM_FEATURE_V8_SHA256); + set_feature(&cpu->env, ARM_FEATURE_V8_PMULL); + set_feature(&cpu->env, ARM_FEATURE_CRC); cpu->ctr = 0x80030003; /* 32 byte I and D cacheline size, VIPT icache */ cpu->dcz_blocksize = 7; /* 512 bytes */ } From da5141fc45937fa358ca4554a335ae6a4c4453ec Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 9 Jun 2014 15:43:25 +0100 Subject: [PATCH 12/19] target-arm: VFPv4 implies half-precision extension VFPv4 implies the presence of the half-precision floating point extension (which is optional in VFPv3). Add this implied rule to arm_cpu_realizefn() and remove some no-longer-needed explicit setting of the bit in initfns. Signed-off-by: Peter Maydell Message-id: 1401458125-27977-5-git-send-email-peter.maydell@linaro.org --- target-arm/cpu.c | 3 +-- target-arm/cpu64.c | 2 -- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/target-arm/cpu.c b/target-arm/cpu.c index bc19d80439..b8778350f7 100644 --- a/target-arm/cpu.c +++ b/target-arm/cpu.c @@ -348,6 +348,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) } if (arm_feature(env, ARM_FEATURE_VFP4)) { set_feature(env, ARM_FEATURE_VFP3); + set_feature(env, ARM_FEATURE_VFP_FP16); } if (arm_feature(env, ARM_FEATURE_VFP3)) { set_feature(env, ARM_FEATURE_VFP); @@ -744,7 +745,6 @@ static void cortex_a15_initfn(Object *obj) cpu->dtb_compatible = "arm,cortex-a15"; set_feature(&cpu->env, ARM_FEATURE_V7); set_feature(&cpu->env, ARM_FEATURE_VFP4); - set_feature(&cpu->env, ARM_FEATURE_VFP_FP16); set_feature(&cpu->env, ARM_FEATURE_NEON); set_feature(&cpu->env, ARM_FEATURE_THUMB2EE); set_feature(&cpu->env, ARM_FEATURE_ARM_DIV); @@ -953,7 +953,6 @@ static void arm_any_initfn(Object *obj) ARMCPU *cpu = ARM_CPU(obj); set_feature(&cpu->env, ARM_FEATURE_V8); set_feature(&cpu->env, ARM_FEATURE_VFP4); - set_feature(&cpu->env, ARM_FEATURE_VFP_FP16); set_feature(&cpu->env, ARM_FEATURE_NEON); set_feature(&cpu->env, ARM_FEATURE_THUMB2EE); set_feature(&cpu->env, ARM_FEATURE_V8_AES); diff --git a/target-arm/cpu64.c b/target-arm/cpu64.c index 40cc063f07..8b2081c246 100644 --- a/target-arm/cpu64.c +++ b/target-arm/cpu64.c @@ -93,7 +93,6 @@ static void aarch64_a57_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_V8); set_feature(&cpu->env, ARM_FEATURE_VFP4); - set_feature(&cpu->env, ARM_FEATURE_VFP_FP16); set_feature(&cpu->env, ARM_FEATURE_NEON); set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); set_feature(&cpu->env, ARM_FEATURE_AARCH64); @@ -143,7 +142,6 @@ static void aarch64_any_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_V8); set_feature(&cpu->env, ARM_FEATURE_VFP4); - set_feature(&cpu->env, ARM_FEATURE_VFP_FP16); set_feature(&cpu->env, ARM_FEATURE_NEON); set_feature(&cpu->env, ARM_FEATURE_AARCH64); set_feature(&cpu->env, ARM_FEATURE_V8_AES); From 130f2e7dcb4a5f9534fc65200121f06983435a77 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 9 Jun 2014 15:43:25 +0100 Subject: [PATCH 13/19] target-arm: A64: Implement CRC instructions Implement the optional A64 CRC instructions. Signed-off-by: Peter Maydell Message-id: 1401458125-27977-6-git-send-email-peter.maydell@linaro.org --- linux-user/elfload.c | 1 + target-arm/helper-a64.c | 30 +++++++++++++++++++++ target-arm/helper-a64.h | 2 ++ target-arm/translate-a64.c | 54 +++++++++++++++++++++++++++++++++++++- 4 files changed, 86 insertions(+), 1 deletion(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index e8724939f8..f2d5955230 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -540,6 +540,7 @@ static uint32_t get_elf_hwcap(void) #define GET_FEATURE(feat, hwcap) \ do { if (arm_feature(&cpu->env, feat)) { hwcaps |= hwcap; } } while (0) GET_FEATURE(ARM_FEATURE_V8_PMULL, ARM_HWCAP_A64_PMULL); + GET_FEATURE(ARM_FEATURE_CRC, ARM_HWCAP_A64_CRC32); #undef GET_FEATURE return hwcaps; diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c index f0d27229d4..2b4ce6ac60 100644 --- a/target-arm/helper-a64.c +++ b/target-arm/helper-a64.c @@ -24,6 +24,8 @@ #include "sysemu/sysemu.h" #include "qemu/bitops.h" #include "internals.h" +#include "qemu/crc32c.h" +#include /* For crc32 */ /* C2.4.7 Multiply and divide */ /* special cases for 0 and LLONG_MIN are mandated by the standard */ @@ -408,6 +410,34 @@ float32 HELPER(fcvtx_f64_to_f32)(float64 a, CPUARMState *env) return r; } +/* 64-bit versions of the CRC helpers. Note that although the operation + * (and the prototypes of crc32c() and crc32() mean that only the bottom + * 32 bits of the accumulator and result are used, we pass and return + * uint64_t for convenience of the generated code. Unlike the 32-bit + * instruction set versions, val may genuinely have 64 bits of data in it. + * The upper bytes of val (above the number specified by 'bytes') must have + * been zeroed out by the caller. + */ +uint64_t HELPER(crc32_64)(uint64_t acc, uint64_t val, uint32_t bytes) +{ + uint8_t buf[8]; + + stq_le_p(buf, val); + + /* zlib crc32 converts the accumulator and output to one's complement. */ + return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff; +} + +uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes) +{ + uint8_t buf[8]; + + stq_le_p(buf, val); + + /* Linux crc32c converts the output to one's complement. */ + return crc32c(acc, buf, bytes) ^ 0xffffffff; +} + /* Handle a CPU exception. */ void aarch64_cpu_do_interrupt(CPUState *cs) { diff --git a/target-arm/helper-a64.h b/target-arm/helper-a64.h index 8de7536ff7..1d3d10fffb 100644 --- a/target-arm/helper-a64.h +++ b/target-arm/helper-a64.h @@ -44,3 +44,5 @@ DEF_HELPER_FLAGS_1(neon_addlp_u16, TCG_CALL_NO_RWG_SE, i64, i64) DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, ptr) DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, ptr) DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, env) +DEF_HELPER_FLAGS_3(crc32_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32) +DEF_HELPER_FLAGS_3(crc32c_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32) diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c index 9832cc3c22..6af593a26e 100644 --- a/target-arm/translate-a64.c +++ b/target-arm/translate-a64.c @@ -3774,6 +3774,54 @@ static void handle_shift_reg(DisasContext *s, tcg_temp_free_i64(tcg_shift); } +/* CRC32[BHWX], CRC32C[BHWX] */ +static void handle_crc32(DisasContext *s, + unsigned int sf, unsigned int sz, bool crc32c, + unsigned int rm, unsigned int rn, unsigned int rd) +{ + TCGv_i64 tcg_acc, tcg_val; + TCGv_i32 tcg_bytes; + + if (!arm_dc_feature(s, ARM_FEATURE_CRC) + || (sf == 1 && sz != 3) + || (sf == 0 && sz == 3)) { + unallocated_encoding(s); + return; + } + + if (sz == 3) { + tcg_val = cpu_reg(s, rm); + } else { + uint64_t mask; + switch (sz) { + case 0: + mask = 0xFF; + break; + case 1: + mask = 0xFFFF; + break; + case 2: + mask = 0xFFFFFFFF; + break; + default: + g_assert_not_reached(); + } + tcg_val = new_tmp_a64(s); + tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask); + } + + tcg_acc = cpu_reg(s, rn); + tcg_bytes = tcg_const_i32(1 << sz); + + if (crc32c) { + gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); + } else { + gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); + } + + tcg_temp_free_i32(tcg_bytes); +} + /* C3.5.8 Data-processing (2 source) * 31 30 29 28 21 20 16 15 10 9 5 4 0 * +----+---+---+-----------------+------+--------+------+------+ @@ -3821,8 +3869,12 @@ static void disas_data_proc_2src(DisasContext *s, uint32_t insn) case 21: case 22: case 23: /* CRC32 */ - unsupported_encoding(s, insn); + { + int sz = extract32(opcode, 0, 2); + bool crc32c = extract32(opcode, 2, 1); + handle_crc32(s, sf, sz, crc32c, rm, rn, rd); break; + } default: unallocated_encoding(s); break; From aa633469ed902a6d96b3d4013ec5ce32597f0626 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 9 Jun 2014 15:43:25 +0100 Subject: [PATCH 14/19] target-arm: A32/T32: Mask CRC value in calling code, not helper Bring the 32-bit CRC helper functions into line with the A64 ones, by masking the high bytes of the value in the calling code rather than the helper. This is more efficient since we can determine the mask at translation time. Signed-off-by: Peter Maydell Message-id: 1401458125-27977-7-git-send-email-peter.maydell@linaro.org --- target-arm/helper.c | 25 ++++++------------------- target-arm/translate.c | 10 ++++++++++ 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/target-arm/helper.c b/target-arm/helper.c index 1e50a7840e..177ed076c7 100644 --- a/target-arm/helper.c +++ b/target-arm/helper.c @@ -5559,28 +5559,15 @@ int arm_rmode_to_sf(int rmode) return rmode; } -static void crc_init_buffer(uint8_t *buf, uint32_t val, uint32_t bytes) -{ - memset(buf, 0, 4); - - if (bytes == 1) { - buf[0] = val & 0xff; - } else if (bytes == 2) { - buf[0] = val & 0xff; - buf[1] = (val >> 8) & 0xff; - } else { - buf[0] = val & 0xff; - buf[1] = (val >> 8) & 0xff; - buf[2] = (val >> 16) & 0xff; - buf[3] = (val >> 24) & 0xff; - } -} - +/* CRC helpers. + * The upper bytes of val (above the number specified by 'bytes') must have + * been zeroed out by the caller. + */ uint32_t HELPER(crc32)(uint32_t acc, uint32_t val, uint32_t bytes) { uint8_t buf[4]; - crc_init_buffer(buf, val, bytes); + stl_le_p(buf, val); /* zlib crc32 converts the accumulator and output to one's complement. */ return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff; @@ -5590,7 +5577,7 @@ uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes) { uint8_t buf[4]; - crc_init_buffer(buf, val, bytes); + stl_le_p(buf, val); /* Linux crc32c converts the output to one's complement. */ return crc32c(acc, buf, bytes) ^ 0xffffffff; diff --git a/target-arm/translate.c b/target-arm/translate.c index 41c3fc77b0..351943f6f3 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -7806,6 +7806,11 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s) tmp = load_reg(s, rn); tmp2 = load_reg(s, rm); + if (op1 == 0) { + tcg_gen_andi_i32(tmp2, tmp2, 0xff); + } else if (op1 == 1) { + tcg_gen_andi_i32(tmp2, tmp2, 0xffff); + } tmp3 = tcg_const_i32(1 << op1); if (c & 0x2) { gen_helper_crc32c(tmp, tmp, tmp2, tmp3); @@ -9438,6 +9443,11 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw } tmp2 = load_reg(s, rm); + if (sz == 0) { + tcg_gen_andi_i32(tmp2, tmp2, 0xff); + } else if (sz == 1) { + tcg_gen_andi_i32(tmp2, tmp2, 0xffff); + } tmp3 = tcg_const_i32(1 << sz); if (c) { gen_helper_crc32c(tmp, tmp, tmp2, tmp3); From 5acc765c04300f9ac8b7944008bc3ad54c6b032a Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 9 Jun 2014 15:43:25 +0100 Subject: [PATCH 15/19] target-arm: A64: Implement AES instructions Implement the AES instructions from the optional Crypto Extensions. Signed-off-by: Peter Maydell Message-id: 1401458125-27977-8-git-send-email-peter.maydell@linaro.org --- linux-user/elfload.c | 1 + target-arm/translate-a64.c | 51 +++++++++++++++++++++++++++++++++++++- 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index f2d5955230..396a80817c 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -539,6 +539,7 @@ static uint32_t get_elf_hwcap(void) /* probe for the extra features */ #define GET_FEATURE(feat, hwcap) \ do { if (arm_feature(&cpu->env, feat)) { hwcaps |= hwcap; } } while (0) + GET_FEATURE(ARM_FEATURE_V8_AES, ARM_HWCAP_A64_AES); GET_FEATURE(ARM_FEATURE_V8_PMULL, ARM_HWCAP_A64_PMULL); GET_FEATURE(ARM_FEATURE_CRC, ARM_HWCAP_A64_CRC32); #undef GET_FEATURE diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c index 6af593a26e..94b4642141 100644 --- a/target-arm/translate-a64.c +++ b/target-arm/translate-a64.c @@ -85,6 +85,7 @@ typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32); typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64); +typedef void CryptoThreeOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); /* initialize TCG globals. */ void a64_translate_init(void) @@ -10549,7 +10550,55 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) */ static void disas_crypto_aes(DisasContext *s, uint32_t insn) { - unsupported_encoding(s, insn); + int size = extract32(insn, 22, 2); + int opcode = extract32(insn, 12, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + int decrypt; + TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_decrypt; + CryptoThreeOpEnvFn *genfn; + + if (!arm_dc_feature(s, ARM_FEATURE_V8_AES) + || size != 0) { + unallocated_encoding(s); + return; + } + + switch (opcode) { + case 0x4: /* AESE */ + decrypt = 0; + genfn = gen_helper_crypto_aese; + break; + case 0x6: /* AESMC */ + decrypt = 0; + genfn = gen_helper_crypto_aesmc; + break; + case 0x5: /* AESD */ + decrypt = 1; + genfn = gen_helper_crypto_aese; + break; + case 0x7: /* AESIMC */ + decrypt = 1; + genfn = gen_helper_crypto_aesmc; + break; + default: + unallocated_encoding(s); + return; + } + + /* Note that we convert the Vx register indexes into the + * index within the vfp.regs[] array, so we can share the + * helper with the AArch32 instructions. + */ + tcg_rd_regno = tcg_const_i32(rd << 1); + tcg_rn_regno = tcg_const_i32(rn << 1); + tcg_decrypt = tcg_const_i32(decrypt); + + genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_decrypt); + + tcg_temp_free_i32(tcg_rd_regno); + tcg_temp_free_i32(tcg_rn_regno); + tcg_temp_free_i32(tcg_decrypt); } /* C3.6.20 Crypto three-reg SHA From be56f04eeacc22634a3bd709fdbda9873e8f55af Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 9 Jun 2014 15:43:26 +0100 Subject: [PATCH 16/19] target-arm: A64: Implement 3-register SHA instructions Implement the 3-register SHA instruction group from the optional Crypto Extensions. Signed-off-by: Peter Maydell Message-id: 1401458125-27977-9-git-send-email-peter.maydell@linaro.org --- target-arm/translate-a64.c | 59 +++++++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c index 94b4642141..82d46fafe5 100644 --- a/target-arm/translate-a64.c +++ b/target-arm/translate-a64.c @@ -10609,7 +10609,64 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn) */ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) { - unsupported_encoding(s, insn); + int size = extract32(insn, 22, 2); + int opcode = extract32(insn, 12, 3); + int rm = extract32(insn, 16, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + CryptoThreeOpEnvFn *genfn; + TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_rm_regno; + int feature = ARM_FEATURE_V8_SHA256; + + if (size != 0) { + unallocated_encoding(s); + return; + } + + switch (opcode) { + case 0: /* SHA1C */ + case 1: /* SHA1P */ + case 2: /* SHA1M */ + case 3: /* SHA1SU0 */ + genfn = NULL; + feature = ARM_FEATURE_V8_SHA1; + break; + case 4: /* SHA256H */ + genfn = gen_helper_crypto_sha256h; + break; + case 5: /* SHA256H2 */ + genfn = gen_helper_crypto_sha256h2; + break; + case 6: /* SHA256SU1 */ + genfn = gen_helper_crypto_sha256su1; + break; + default: + unallocated_encoding(s); + return; + } + + if (!arm_dc_feature(s, feature)) { + unallocated_encoding(s); + return; + } + + tcg_rd_regno = tcg_const_i32(rd << 1); + tcg_rn_regno = tcg_const_i32(rn << 1); + tcg_rm_regno = tcg_const_i32(rm << 1); + + if (genfn) { + genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_rm_regno); + } else { + TCGv_i32 tcg_opcode = tcg_const_i32(opcode); + + gen_helper_crypto_sha1_3reg(cpu_env, tcg_rd_regno, + tcg_rn_regno, tcg_rm_regno, tcg_opcode); + tcg_temp_free_i32(tcg_opcode); + } + + tcg_temp_free_i32(tcg_rd_regno); + tcg_temp_free_i32(tcg_rn_regno); + tcg_temp_free_i32(tcg_rm_regno); } /* C3.6.21 Crypto two-reg SHA From f6fe04d566f1a1e3219b501487cd2d2d00d723a5 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 9 Jun 2014 15:43:26 +0100 Subject: [PATCH 17/19] target-arm: A64: Implement two-register SHA instructions Implement the two-register SHA instruction group from the optional Crypto Extensions. Signed-off-by: Peter Maydell Message-id: 1401458125-27977-10-git-send-email-peter.maydell@linaro.org --- linux-user/elfload.c | 2 ++ target-arm/translate-a64.c | 45 +++++++++++++++++++++++++++++++++++++- 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 396a80817c..68b9793649 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -541,6 +541,8 @@ static uint32_t get_elf_hwcap(void) do { if (arm_feature(&cpu->env, feat)) { hwcaps |= hwcap; } } while (0) GET_FEATURE(ARM_FEATURE_V8_AES, ARM_HWCAP_A64_AES); GET_FEATURE(ARM_FEATURE_V8_PMULL, ARM_HWCAP_A64_PMULL); + GET_FEATURE(ARM_FEATURE_V8_SHA1, ARM_HWCAP_A64_SHA1); + GET_FEATURE(ARM_FEATURE_V8_SHA256, ARM_HWCAP_A64_SHA2); GET_FEATURE(ARM_FEATURE_CRC, ARM_HWCAP_A64_CRC32); #undef GET_FEATURE diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c index 82d46fafe5..63ad787e9f 100644 --- a/target-arm/translate-a64.c +++ b/target-arm/translate-a64.c @@ -85,6 +85,7 @@ typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32); typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64); +typedef void CryptoTwoOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32); typedef void CryptoThreeOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); /* initialize TCG globals. */ @@ -10677,7 +10678,49 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) */ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) { - unsupported_encoding(s, insn); + int size = extract32(insn, 22, 2); + int opcode = extract32(insn, 12, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + CryptoTwoOpEnvFn *genfn; + int feature; + TCGv_i32 tcg_rd_regno, tcg_rn_regno; + + if (size != 0) { + unallocated_encoding(s); + return; + } + + switch (opcode) { + case 0: /* SHA1H */ + feature = ARM_FEATURE_V8_SHA1; + genfn = gen_helper_crypto_sha1h; + break; + case 1: /* SHA1SU1 */ + feature = ARM_FEATURE_V8_SHA1; + genfn = gen_helper_crypto_sha1su1; + break; + case 2: /* SHA256SU0 */ + feature = ARM_FEATURE_V8_SHA256; + genfn = gen_helper_crypto_sha256su0; + break; + default: + unallocated_encoding(s); + return; + } + + if (!arm_dc_feature(s, feature)) { + unallocated_encoding(s); + return; + } + + tcg_rd_regno = tcg_const_i32(rd << 1); + tcg_rn_regno = tcg_const_i32(rn << 1); + + genfn(cpu_env, tcg_rd_regno, tcg_rn_regno); + + tcg_temp_free_i32(tcg_rd_regno); + tcg_temp_free_i32(tcg_rn_regno); } /* C3.6 Data processing - SIMD, inc Crypto From d3afacc7269fee45d54d1501a46b51f12ea7bb15 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 9 Jun 2014 15:43:26 +0100 Subject: [PATCH 18/19] target-arm: Fix errors in writes to generic timer control registers The code for handling writes to the generic timer control registers had several bugs: * ISTATUS (bit 2) is read-only but we forced it to zero on any write * the check for "was IMASK (bit 1) toggled?" incorrectly used '&' where it should be '^' * the handling of IMASK was inverted: we should set the IRQ if ISTATUS is set and IMASK is clear, not if both are set The combination of these bugs meant that when running a Linux guest that uses the generic timers we would fairly quickly end up either forgetting that the timer output should be asserted, or failing to set the IRQ when the timer was unmasked. The result is that the guest never gets any more timer interrupts. Signed-off-by: Peter Maydell Message-id: 1401803208-1281-1-git-send-email-peter.maydell@linaro.org Cc: qemu-stable@nongnu.org --- target-arm/helper.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/target-arm/helper.c b/target-arm/helper.c index 177ed076c7..050c40981b 100644 --- a/target-arm/helper.c +++ b/target-arm/helper.c @@ -1040,16 +1040,16 @@ static void gt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri, int timeridx = ri->crm & 1; uint32_t oldval = env->cp15.c14_timer[timeridx].ctl; - env->cp15.c14_timer[timeridx].ctl = value & 3; + env->cp15.c14_timer[timeridx].ctl = deposit64(oldval, 0, 2, value); if ((oldval ^ value) & 1) { /* Enable toggled */ gt_recalc_timer(cpu, timeridx); - } else if ((oldval & value) & 2) { + } else if ((oldval ^ value) & 2) { /* IMASK toggled: don't need to recalculate, * just set the interrupt line based on ISTATUS */ qemu_set_irq(cpu->gt_timer_outputs[timeridx], - (oldval & 4) && (value & 2)); + (oldval & 4) && !(value & 2)); } } From 3b1a41381254f6080b5cfeb149c28a9237d42a0b Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 9 Jun 2014 15:43:26 +0100 Subject: [PATCH 19/19] target-arm: Delete unused iwmmxt_msadb helper The iwmmxt_msadb helper and its corresponding gen function are unused; delete them. (This function appears to have never been used right back to the initial implementation of iwMMXt; it is identical to iwmmxt_madduq, and is presumably an accidental remnant from the initial development.) Reviewed-by: Peter Crosthwaite Signed-off-by: Peter Maydell Message-id: 1401822125-1822-1-git-send-email-peter.maydell@linaro.org --- target-arm/helper.h | 2 -- target-arm/iwmmxt_helper.c | 9 --------- target-arm/translate.c | 2 -- 3 files changed, 13 deletions(-) diff --git a/target-arm/helper.h b/target-arm/helper.h index 0ef8fcac17..facfcd2c11 100644 --- a/target-arm/helper.h +++ b/target-arm/helper.h @@ -456,8 +456,6 @@ DEF_HELPER_3(iwmmxt_avgb1, i64, env, i64, i64) DEF_HELPER_3(iwmmxt_avgw0, i64, env, i64, i64) DEF_HELPER_3(iwmmxt_avgw1, i64, env, i64, i64) -DEF_HELPER_2(iwmmxt_msadb, i64, i64, i64) - DEF_HELPER_3(iwmmxt_align, i64, i64, i64, i32) DEF_HELPER_4(iwmmxt_insr, i64, i64, i32, i32, i32) diff --git a/target-arm/iwmmxt_helper.c b/target-arm/iwmmxt_helper.c index 398cbcbec7..a5069144d1 100644 --- a/target-arm/iwmmxt_helper.c +++ b/target-arm/iwmmxt_helper.c @@ -369,15 +369,6 @@ IWMMXT_OP_AVGW(1) #undef IWMMXT_OP_AVGW #undef AVGW -uint64_t HELPER(iwmmxt_msadb)(uint64_t a, uint64_t b) -{ - a = ((((a >> 0 ) & 0xffff) * ((b >> 0) & 0xffff) + - ((a >> 16) & 0xffff) * ((b >> 16) & 0xffff)) & 0xffffffff) | - ((((a >> 32) & 0xffff) * ((b >> 32) & 0xffff) + - ((a >> 48) & 0xffff) * ((b >> 48) & 0xffff)) << 32); - return a; -} - uint64_t HELPER(iwmmxt_align)(uint64_t a, uint64_t b, uint32_t n) { a >>= n << 3; diff --git a/target-arm/translate.c b/target-arm/translate.c index 351943f6f3..cf4e767ff8 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -1382,8 +1382,6 @@ IWMMXT_OP_ENV(avgb1) IWMMXT_OP_ENV(avgw0) IWMMXT_OP_ENV(avgw1) -IWMMXT_OP(msadb) - IWMMXT_OP_ENV(packuw) IWMMXT_OP_ENV(packul) IWMMXT_OP_ENV(packuq)