From 61e99241491c2d87af2a9751630e597a54ea7af8 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Mon, 9 Jun 2014 15:43:22 +0100
Subject: [PATCH 01/19] vexpress: Add support for the -bios flag to provide
 firmware

Right now to run firmware inside the QEMU VExpress model requires
padding out the firmware image to the size of the virtual flash and
passing it in via the -pflash argument. If the firmware image is passed
without padding, then QEMU will fail. Also, when passed as a -pflash
argument, QEMU treats the file as persistent storage and will modify the
file.

The -bios flag provides the semantics that we want for providing a
firmware image. This patch maps the contents of the -bios file into the
address space at the boot flash location.

Tested with the vexpress-a15 model and the Tianocore port.

Signed-off-by: Grant Likely <grant.likely@linaro.org>
Tested-by: Roy Franz <roy.franz@linaro.org>
[PMM: folded long line, removed stray \n from error message,
 use correct variable for printing image name, exit(1) rather than 0]
Reviewed-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 hw/arm/vexpress.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/hw/arm/vexpress.c b/hw/arm/vexpress.c
index 33ff422542..f311595d67 100644
--- a/hw/arm/vexpress.c
+++ b/hw/arm/vexpress.c
@@ -28,6 +28,7 @@
 #include "net/net.h"
 #include "sysemu/sysemu.h"
 #include "hw/boards.h"
+#include "hw/loader.h"
 #include "exec/address-spaces.h"
 #include "sysemu/blockdev.h"
 #include "hw/block/flash.h"
@@ -528,6 +529,18 @@ static void vexpress_common_init(VEDBoardInfo *daughterboard,
     daughterboard->init(daughterboard, machine->ram_size, machine->cpu_model,
                         pic);
 
+    /*
+     * If a bios file was provided, attempt to map it into memory
+     */
+    if (bios_name) {
+        const char *fn = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
+        if (!fn || load_image_targphys(fn, map[VE_NORFLASH0],
+                                       VEXPRESS_FLASH_SIZE) < 0) {
+            error_report("Could not load ROM image '%s'", bios_name);
+            exit(1);
+        }
+    }
+
     /* Motherboard peripherals: the wiring is the same but the
      * addresses vary between the legacy and A-Series memory maps.
      */

From bf01601764fbef7b54894c9dc1f9c1e727a49294 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 9 Jun 2014 15:43:22 +0100
Subject: [PATCH 02/19] target-arm/cpu64.c: Actually register Cortex-A57 impdef
 registers

cpu64.c contains a reginfo list for the impdef registers on
the Cortex-A57; however we forgot to actually call define_arm_cp_regs(),
so it was sitting there doing nothing. Remedy this omission.

Message-id: 1401226259-23121-1-git-send-email-peter.maydell@linaro.org
Reviewed-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
Tested-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target-arm/cpu64.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/target-arm/cpu64.c b/target-arm/cpu64.c
index 8daa622bdc..ff4c2b46d7 100644
--- a/target-arm/cpu64.c
+++ b/target-arm/cpu64.c
@@ -128,6 +128,7 @@ static void aarch64_a57_initfn(Object *obj)
     cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */
     cpu->ccsidr[2] = 0x70ffe07a; /* 2048KB L2 cache */
     cpu->dcz_blocksize = 4; /* 64 bytes */
+    define_arm_cp_regs(cpu, cortexa57_cp_reginfo);
 }
 
 #ifdef CONFIG_USER_ONLY

From 8d5c773e323b22402abdd0beef4c7d2fc91dd0eb Mon Sep 17 00:00:00 2001
From: Fabian Aggeler <aggelerf@ethz.ch>
Date: Mon, 9 Jun 2014 15:43:22 +0100
Subject: [PATCH 03/19] target-arm: Prepare cpreg writefns/readfns for
 EL3/SecExt

This patch changes some readfns/writefns to use raw_write
and raw_read functions, which use the fieldoffset specified
in ARMCPRegInfo instead of directly accessing the field.
This will simplify patches for EL3 & Security Extensions.

Reviewed-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
Signed-off-by: Fabian Aggeler <aggelerf@ethz.ch>
Message-id: 1401962428-14749-1-git-send-email-aggelerf@ethz.ch
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target-arm/helper.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/target-arm/helper.c b/target-arm/helper.c
index 95af624126..3e7f0db15b 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -319,7 +319,7 @@ static void dacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
 {
     ARMCPU *cpu = arm_env_get_cpu(env);
 
-    env->cp15.c3 = value;
+    raw_write(env, ri, value);
     tlb_flush(CPU(cpu), 1); /* Flush TLB as domain not tracked in TLB */
 }
 
@@ -327,12 +327,12 @@ static void fcse_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
 {
     ARMCPU *cpu = arm_env_get_cpu(env);
 
-    if (env->cp15.c13_fcse != value) {
+    if (raw_read(env, ri) != value) {
         /* Unlike real hardware the qemu TLB uses virtual addresses,
          * not modified virtual addresses, so this causes a TLB flush.
          */
         tlb_flush(CPU(cpu), 1);
-        env->cp15.c13_fcse = value;
+        raw_write(env, ri, value);
     }
 }
 
@@ -341,7 +341,7 @@ static void contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri,
 {
     ARMCPU *cpu = arm_env_get_cpu(env);
 
-    if (env->cp15.contextidr_el1 != value && !arm_feature(env, ARM_FEATURE_MPU)
+    if (raw_read(env, ri) != value && !arm_feature(env, ARM_FEATURE_MPU)
         && !extended_addresses_enabled(env)) {
         /* For VMSA (when not using the LPAE long descriptor page table
          * format) this register includes the ASID, so do a TLB flush.
@@ -349,7 +349,7 @@ static void contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri,
          */
         tlb_flush(CPU(cpu), 1);
     }
-    env->cp15.contextidr_el1 = value;
+    raw_write(env, ri, value);
 }
 
 static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -693,7 +693,7 @@ static uint64_t ccsidr_read(CPUARMState *env, const ARMCPRegInfo *ri)
 static void csselr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                          uint64_t value)
 {
-    env->cp15.c0_cssel = value & 0xf;
+    raw_write(env, ri, value & 0xf);
 }
 
 static uint64_t isr_read(CPUARMState *env, const ARMCPRegInfo *ri)
@@ -1216,11 +1216,11 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
 static void par_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
 {
     if (arm_feature(env, ARM_FEATURE_LPAE)) {
-        env->cp15.par_el1 = value;
+        raw_write(env, ri, value);
     } else if (arm_feature(env, ARM_FEATURE_V7)) {
-        env->cp15.par_el1 = value & 0xfffff6ff;
+        raw_write(env, ri, value & 0xfffff6ff);
     } else {
-        env->cp15.par_el1 = value & 0xfffff1ff;
+        raw_write(env, ri, value & 0xfffff1ff);
     }
 }
 
@@ -1423,7 +1423,7 @@ static void vmsa_ttbcr_raw_write(CPUARMState *env, const ARMCPRegInfo *ri,
      * for long-descriptor tables the TTBCR fields are used differently
      * and the c2_mask and c2_base_mask values are meaningless.
      */
-    env->cp15.c2_control = value;
+    raw_write(env, ri, value);
     env->cp15.c2_mask = ~(((uint32_t)0xffffffffu) >> maskshift);
     env->cp15.c2_base_mask = ~((uint32_t)0x3fffu >> maskshift);
 }
@@ -1445,7 +1445,7 @@ static void vmsa_ttbcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
 static void vmsa_ttbcr_reset(CPUARMState *env, const ARMCPRegInfo *ri)
 {
     env->cp15.c2_base_mask = 0xffffc000u;
-    env->cp15.c2_control = 0;
+    raw_write(env, ri, 0);
     env->cp15.c2_mask = 0;
 }
 
@@ -1456,7 +1456,7 @@ static void vmsa_tcr_el1_write(CPUARMState *env, const ARMCPRegInfo *ri,
 
     /* For AArch64 the A1 bit could result in a change of ASID, so TLB flush. */
     tlb_flush(CPU(cpu), 1);
-    env->cp15.c2_control = value;
+    raw_write(env, ri, value);
 }
 
 static void vmsa_ttbr_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -2151,14 +2151,14 @@ static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri,
 {
     ARMCPU *cpu = arm_env_get_cpu(env);
 
-    if (env->cp15.c1_sys == value) {
+    if (raw_read(env, ri) == value) {
         /* Skip the TLB flush if nothing actually changed; Linux likes
          * to do a lot of pointless SCTLR writes.
          */
         return;
     }
 
-    env->cp15.c1_sys = value;
+    raw_write(env, ri, value);
     /* ??? Lots of these bits are not implemented.  */
     /* This may enable/disable the MMU, so do a TLB flush.  */
     tlb_flush(CPU(cpu), 1);

From d615efac7c4dc0984de31791c5c7d6b06408aadb Mon Sep 17 00:00:00 2001
From: Ian Campbell <ijc@hellion.org.uk>
Date: Mon, 9 Jun 2014 15:43:23 +0100
Subject: [PATCH 04/19] target-arm: Correct handling of UXN bit in ARMv8 LPAE
 page tables

In v8 page tables bit 54 in the PTE is UXN in the EL0/EL1 translation regimes
and XN elsewhere. In v7 the bit is always XN. Since we only emulate EL0/EL1 we
can just treat this bit as UXN whenever we are in v8 mode.

Also correctly extract the upper attributes from the PTE entry, the v8 version
tried to avoid extracting the CONTIG bit and ended up with the upper bits being
off-by-one. Instead behave the same as v7 and extract (but ignore) the CONTIG
bit.

This fixes "Bad mode in Synchronous Abort handler detected, code 0x8400000f"
seen when modprobing modules under Linux.

Signed-off-by: Ian Campbell <ijc@hellion.org.uk>
Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: Claudio Fontana <claudio.fontana@huawei.com>
Cc: Rob Herring <robherring2@gmail.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target-arm/helper.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/target-arm/helper.c b/target-arm/helper.c
index 3e7f0db15b..1e50a7840e 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -3929,13 +3929,8 @@ static int get_phys_addr_lpae(CPUARMState *env, target_ulong address,
         page_size = (1 << ((granule_sz * (4 - level)) + 3));
         descaddr |= (address & (page_size - 1));
         /* Extract attributes from the descriptor and merge with table attrs */
-        if (arm_feature(env, ARM_FEATURE_V8)) {
-            attrs = extract64(descriptor, 2, 10)
-                | (extract64(descriptor, 53, 11) << 10);
-        } else {
-            attrs = extract64(descriptor, 2, 10)
-                | (extract64(descriptor, 52, 12) << 10);
-        }
+        attrs = extract64(descriptor, 2, 10)
+            | (extract64(descriptor, 52, 12) << 10);
         attrs |= extract32(tableattrs, 0, 2) << 11; /* XN, PXN */
         attrs |= extract32(tableattrs, 3, 1) << 5; /* APTable[1] => AP[2] */
         /* The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1
@@ -3961,8 +3956,12 @@ static int get_phys_addr_lpae(CPUARMState *env, target_ulong address,
         goto do_fault;
     }
     *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
-    if (attrs & (1 << 12) || (!is_user && (attrs & (1 << 11)))) {
-        /* XN or PXN */
+    if ((arm_feature(env, ARM_FEATURE_V8) && is_user && (attrs & (1 << 12))) ||
+        (!arm_feature(env, ARM_FEATURE_V8) && (attrs & (1 << 12))) ||
+        (!is_user && (attrs & (1 << 11)))) {
+        /* XN/UXN or PXN. Since we only implement EL0/EL1 we unconditionally
+         * treat XN/UXN as UXN for v8.
+         */
         if (access_type == 2) {
             goto do_fault;
         }

From f1ecb913d81199758383b8cbc15f4eb435b91753 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 9 Jun 2014 15:43:23 +0100
Subject: [PATCH 05/19] target-arm: add support for v8 SHA1 and SHA256
 instructions

This adds support for the SHA1 and SHA256 instructions that are available
on some v8 implementations of Aarch32.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1401386724-26529-2-git-send-email-peter.maydell@linaro.org
[PMM:
 * rebase
 * fix bad indent
 * add a missing UNDEF check for Q!=1 in the 3-reg SHA1/SHA256 case
 * use g_assert_not_reached()
 * don't re-extract bit 6 for the 2-reg-misc encodings
 * set the ELF HWCAP2 bits for the new features
]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 linux-user/elfload.c       |   2 +
 target-arm/cpu.c           |   2 +
 target-arm/cpu.h           |   2 +
 target-arm/crypto_helper.c | 257 ++++++++++++++++++++++++++++++++++++-
 target-arm/helper.h        |   9 ++
 target-arm/translate.c     |  84 ++++++++++++
 6 files changed, 349 insertions(+), 7 deletions(-)

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 995f999768..9bda262419 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -468,6 +468,8 @@ static uint32_t get_elf_hwcap2(void)
     uint32_t hwcaps = 0;
 
     GET_FEATURE(ARM_FEATURE_V8_AES, ARM_HWCAP2_ARM_AES);
+    GET_FEATURE(ARM_FEATURE_V8_SHA1, ARM_HWCAP2_ARM_SHA1);
+    GET_FEATURE(ARM_FEATURE_V8_SHA256, ARM_HWCAP2_ARM_SHA2);
     GET_FEATURE(ARM_FEATURE_CRC, ARM_HWCAP2_ARM_CRC32);
     return hwcaps;
 }
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index 794dcb9fb7..753f6cb1da 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -317,6 +317,8 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
         set_feature(env, ARM_FEATURE_ARM_DIV);
         set_feature(env, ARM_FEATURE_LPAE);
         set_feature(env, ARM_FEATURE_V8_AES);
+        set_feature(env, ARM_FEATURE_V8_SHA1);
+        set_feature(env, ARM_FEATURE_V8_SHA256);
     }
     if (arm_feature(env, ARM_FEATURE_V7)) {
         set_feature(env, ARM_FEATURE_VAPA);
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 7d8332e8be..14d5f21e7a 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -635,6 +635,8 @@ enum arm_features {
     ARM_FEATURE_CBAR_RO, /* has cp15 CBAR and it is read-only */
     ARM_FEATURE_EL2, /* has EL2 Virtualization support */
     ARM_FEATURE_EL3, /* has EL3 Secure monitor support */
+    ARM_FEATURE_V8_SHA1, /* implements SHA1 part of v8 Crypto Extensions */
+    ARM_FEATURE_V8_SHA256, /* implements SHA256 part of v8 Crypto Extensions */
 };
 
 static inline int arm_feature(CPUARMState *env, int feature)
diff --git a/target-arm/crypto_helper.c b/target-arm/crypto_helper.c
index d8898ed805..3e4b5f7c48 100644
--- a/target-arm/crypto_helper.c
+++ b/target-arm/crypto_helper.c
@@ -1,7 +1,7 @@
 /*
  * crypto_helper.c - emulate v8 Crypto Extensions instructions
  *
- * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -15,9 +15,9 @@
 #include "exec/exec-all.h"
 #include "exec/helper-proto.h"
 
-union AES_STATE {
+union CRYPTO_STATE {
     uint8_t    bytes[16];
-    uint32_t   cols[4];
+    uint32_t   words[4];
     uint64_t   l[2];
 };
 
@@ -99,11 +99,11 @@ void HELPER(crypto_aese)(CPUARMState *env, uint32_t rd, uint32_t rm,
         /* ShiftRows permutation vector for decryption */
         { 0, 13, 10,  7, 4, 1, 14, 11, 8,  5, 2, 15, 12, 9, 6,  3 },
     };
-    union AES_STATE rk = { .l = {
+    union CRYPTO_STATE rk = { .l = {
         float64_val(env->vfp.regs[rm]),
         float64_val(env->vfp.regs[rm + 1])
     } };
-    union AES_STATE st = { .l = {
+    union CRYPTO_STATE st = { .l = {
         float64_val(env->vfp.regs[rd]),
         float64_val(env->vfp.regs[rd + 1])
     } };
@@ -260,7 +260,7 @@ void HELPER(crypto_aesmc)(CPUARMState *env, uint32_t rd, uint32_t rm,
         0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5,
         0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d,
     } };
-    union AES_STATE st = { .l = {
+    union CRYPTO_STATE st = { .l = {
         float64_val(env->vfp.regs[rm]),
         float64_val(env->vfp.regs[rm + 1])
     } };
@@ -269,7 +269,7 @@ void HELPER(crypto_aesmc)(CPUARMState *env, uint32_t rd, uint32_t rm,
     assert(decrypt < 2);
 
     for (i = 0; i < 16; i += 4) {
-        st.cols[i >> 2] = cpu_to_le32(
+        st.words[i >> 2] = cpu_to_le32(
             mc[decrypt][st.bytes[i]] ^
             rol32(mc[decrypt][st.bytes[i + 1]], 8) ^
             rol32(mc[decrypt][st.bytes[i + 2]], 16) ^
@@ -279,3 +279,246 @@ void HELPER(crypto_aesmc)(CPUARMState *env, uint32_t rd, uint32_t rm,
     env->vfp.regs[rd] = make_float64(st.l[0]);
     env->vfp.regs[rd + 1] = make_float64(st.l[1]);
 }
+
+/*
+ * SHA-1 logical functions
+ */
+
+static uint32_t cho(uint32_t x, uint32_t y, uint32_t z)
+{
+    return (x & (y ^ z)) ^ z;
+}
+
+static uint32_t par(uint32_t x, uint32_t y, uint32_t z)
+{
+    return x ^ y ^ z;
+}
+
+static uint32_t maj(uint32_t x, uint32_t y, uint32_t z)
+{
+    return (x & y) | ((x | y) & z);
+}
+
+void HELPER(crypto_sha1_3reg)(CPUARMState *env, uint32_t rd, uint32_t rn,
+                              uint32_t rm, uint32_t op)
+{
+    union CRYPTO_STATE d = { .l = {
+        float64_val(env->vfp.regs[rd]),
+        float64_val(env->vfp.regs[rd + 1])
+    } };
+    union CRYPTO_STATE n = { .l = {
+        float64_val(env->vfp.regs[rn]),
+        float64_val(env->vfp.regs[rn + 1])
+    } };
+    union CRYPTO_STATE m = { .l = {
+        float64_val(env->vfp.regs[rm]),
+        float64_val(env->vfp.regs[rm + 1])
+    } };
+
+    if (op == 3) { /* sha1su0 */
+        d.l[0] ^= d.l[1] ^ m.l[0];
+        d.l[1] ^= n.l[0] ^ m.l[1];
+    } else {
+        int i;
+
+        for (i = 0; i < 4; i++) {
+            uint32_t t;
+
+            switch (op) {
+            case 0: /* sha1c */
+                t = cho(d.words[1], d.words[2], d.words[3]);
+                break;
+            case 1: /* sha1p */
+                t = par(d.words[1], d.words[2], d.words[3]);
+                break;
+            case 2: /* sha1m */
+                t = maj(d.words[1], d.words[2], d.words[3]);
+                break;
+            default:
+                g_assert_not_reached();
+            }
+            t += rol32(d.words[0], 5) + n.words[0] + m.words[i];
+
+            n.words[0] = d.words[3];
+            d.words[3] = d.words[2];
+            d.words[2] = ror32(d.words[1], 2);
+            d.words[1] = d.words[0];
+            d.words[0] = t;
+        }
+    }
+    env->vfp.regs[rd] = make_float64(d.l[0]);
+    env->vfp.regs[rd + 1] = make_float64(d.l[1]);
+}
+
+void HELPER(crypto_sha1h)(CPUARMState *env, uint32_t rd, uint32_t rm)
+{
+    union CRYPTO_STATE m = { .l = {
+        float64_val(env->vfp.regs[rm]),
+        float64_val(env->vfp.regs[rm + 1])
+    } };
+
+    m.words[0] = ror32(m.words[0], 2);
+    m.words[1] = m.words[2] = m.words[3] = 0;
+
+    env->vfp.regs[rd] = make_float64(m.l[0]);
+    env->vfp.regs[rd + 1] = make_float64(m.l[1]);
+}
+
+void HELPER(crypto_sha1su1)(CPUARMState *env, uint32_t rd, uint32_t rm)
+{
+    union CRYPTO_STATE d = { .l = {
+        float64_val(env->vfp.regs[rd]),
+        float64_val(env->vfp.regs[rd + 1])
+    } };
+    union CRYPTO_STATE m = { .l = {
+        float64_val(env->vfp.regs[rm]),
+        float64_val(env->vfp.regs[rm + 1])
+    } };
+
+    d.words[0] = rol32(d.words[0] ^ m.words[1], 1);
+    d.words[1] = rol32(d.words[1] ^ m.words[2], 1);
+    d.words[2] = rol32(d.words[2] ^ m.words[3], 1);
+    d.words[3] = rol32(d.words[3] ^ d.words[0], 1);
+
+    env->vfp.regs[rd] = make_float64(d.l[0]);
+    env->vfp.regs[rd + 1] = make_float64(d.l[1]);
+}
+
+/*
+ * The SHA-256 logical functions, according to
+ * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
+ */
+
+static uint32_t S0(uint32_t x)
+{
+    return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22);
+}
+
+static uint32_t S1(uint32_t x)
+{
+    return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25);
+}
+
+static uint32_t s0(uint32_t x)
+{
+    return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3);
+}
+
+static uint32_t s1(uint32_t x)
+{
+    return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10);
+}
+
+void HELPER(crypto_sha256h)(CPUARMState *env, uint32_t rd, uint32_t rn,
+                            uint32_t rm)
+{
+    union CRYPTO_STATE d = { .l = {
+        float64_val(env->vfp.regs[rd]),
+        float64_val(env->vfp.regs[rd + 1])
+    } };
+    union CRYPTO_STATE n = { .l = {
+        float64_val(env->vfp.regs[rn]),
+        float64_val(env->vfp.regs[rn + 1])
+    } };
+    union CRYPTO_STATE m = { .l = {
+        float64_val(env->vfp.regs[rm]),
+        float64_val(env->vfp.regs[rm + 1])
+    } };
+    int i;
+
+    for (i = 0; i < 4; i++) {
+        uint32_t t = cho(n.words[0], n.words[1], n.words[2]) + n.words[3]
+                     + S1(n.words[0]) + m.words[i];
+
+        n.words[3] = n.words[2];
+        n.words[2] = n.words[1];
+        n.words[1] = n.words[0];
+        n.words[0] = d.words[3] + t;
+
+        t += maj(d.words[0], d.words[1], d.words[2]) + S0(d.words[0]);
+
+        d.words[3] = d.words[2];
+        d.words[2] = d.words[1];
+        d.words[1] = d.words[0];
+        d.words[0] = t;
+    }
+
+    env->vfp.regs[rd] = make_float64(d.l[0]);
+    env->vfp.regs[rd + 1] = make_float64(d.l[1]);
+}
+
+void HELPER(crypto_sha256h2)(CPUARMState *env, uint32_t rd, uint32_t rn,
+                             uint32_t rm)
+{
+    union CRYPTO_STATE d = { .l = {
+        float64_val(env->vfp.regs[rd]),
+        float64_val(env->vfp.regs[rd + 1])
+    } };
+    union CRYPTO_STATE n = { .l = {
+        float64_val(env->vfp.regs[rn]),
+        float64_val(env->vfp.regs[rn + 1])
+    } };
+    union CRYPTO_STATE m = { .l = {
+        float64_val(env->vfp.regs[rm]),
+        float64_val(env->vfp.regs[rm + 1])
+    } };
+    int i;
+
+    for (i = 0; i < 4; i++) {
+        uint32_t t = cho(d.words[0], d.words[1], d.words[2]) + d.words[3]
+                     + S1(d.words[0]) + m.words[i];
+
+        d.words[3] = d.words[2];
+        d.words[2] = d.words[1];
+        d.words[1] = d.words[0];
+        d.words[0] = n.words[3 - i] + t;
+    }
+
+    env->vfp.regs[rd] = make_float64(d.l[0]);
+    env->vfp.regs[rd + 1] = make_float64(d.l[1]);
+}
+
+void HELPER(crypto_sha256su0)(CPUARMState *env, uint32_t rd, uint32_t rm)
+{
+    union CRYPTO_STATE d = { .l = {
+        float64_val(env->vfp.regs[rd]),
+        float64_val(env->vfp.regs[rd + 1])
+    } };
+    union CRYPTO_STATE m = { .l = {
+        float64_val(env->vfp.regs[rm]),
+        float64_val(env->vfp.regs[rm + 1])
+    } };
+
+    d.words[0] += s0(d.words[1]);
+    d.words[1] += s0(d.words[2]);
+    d.words[2] += s0(d.words[3]);
+    d.words[3] += s0(m.words[0]);
+
+    env->vfp.regs[rd] = make_float64(d.l[0]);
+    env->vfp.regs[rd + 1] = make_float64(d.l[1]);
+}
+
+void HELPER(crypto_sha256su1)(CPUARMState *env, uint32_t rd, uint32_t rn,
+                              uint32_t rm)
+{
+    union CRYPTO_STATE d = { .l = {
+        float64_val(env->vfp.regs[rd]),
+        float64_val(env->vfp.regs[rd + 1])
+    } };
+    union CRYPTO_STATE n = { .l = {
+        float64_val(env->vfp.regs[rn]),
+        float64_val(env->vfp.regs[rn + 1])
+    } };
+    union CRYPTO_STATE m = { .l = {
+        float64_val(env->vfp.regs[rm]),
+        float64_val(env->vfp.regs[rm + 1])
+    } };
+
+    d.words[0] += s1(m.words[2]) + n.words[1];
+    d.words[1] += s1(m.words[3]) + n.words[2];
+    d.words[2] += s1(d.words[0]) + n.words[3];
+    d.words[3] += s1(d.words[1]) + m.words[0];
+
+    env->vfp.regs[rd] = make_float64(d.l[0]);
+    env->vfp.regs[rd + 1] = make_float64(d.l[1]);
+}
diff --git a/target-arm/helper.h b/target-arm/helper.h
index b63fd0ff1c..113b09d35e 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -512,6 +512,15 @@ DEF_HELPER_3(neon_qzip32, void, env, i32, i32)
 DEF_HELPER_4(crypto_aese, void, env, i32, i32, i32)
 DEF_HELPER_4(crypto_aesmc, void, env, i32, i32, i32)
 
+DEF_HELPER_5(crypto_sha1_3reg, void, env, i32, i32, i32, i32)
+DEF_HELPER_3(crypto_sha1h, void, env, i32, i32)
+DEF_HELPER_3(crypto_sha1su1, void, env, i32, i32)
+
+DEF_HELPER_4(crypto_sha256h, void, env, i32, i32, i32)
+DEF_HELPER_4(crypto_sha256h2, void, env, i32, i32, i32)
+DEF_HELPER_3(crypto_sha256su0, void, env, i32, i32)
+DEF_HELPER_4(crypto_sha256su1, void, env, i32, i32, i32)
+
 DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
 DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
 DEF_HELPER_2(dc_zva, void, env, i64)
diff --git a/target-arm/translate.c b/target-arm/translate.c
index d499caa562..38ef5b19aa 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -4776,6 +4776,7 @@ static void gen_neon_narrow_op(int op, int u, int size,
 #define NEON_3R_VPMIN 21
 #define NEON_3R_VQDMULH_VQRDMULH 22
 #define NEON_3R_VPADD 23
+#define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
 #define NEON_3R_VFM 25 /* VFMA, VFMS : float fused multiply-add */
 #define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
 #define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
@@ -4809,6 +4810,7 @@ static const uint8_t neon_3r_sizes[] = {
     [NEON_3R_VPMIN] = 0x7,
     [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
     [NEON_3R_VPADD] = 0x7,
+    [NEON_3R_SHA] = 0xf, /* size field encodes op type */
     [NEON_3R_VFM] = 0x5, /* size bit 1 encodes op */
     [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
     [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
@@ -4842,6 +4844,7 @@ static const uint8_t neon_3r_sizes[] = {
 #define NEON_2RM_VCEQ0 18
 #define NEON_2RM_VCLE0 19
 #define NEON_2RM_VCLT0 20
+#define NEON_2RM_SHA1H 21
 #define NEON_2RM_VABS 22
 #define NEON_2RM_VNEG 23
 #define NEON_2RM_VCGT0_F 24
@@ -4858,6 +4861,7 @@ static const uint8_t neon_3r_sizes[] = {
 #define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
 #define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
 #define NEON_2RM_VSHLL 38
+#define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
 #define NEON_2RM_VRINTN 40
 #define NEON_2RM_VRINTX 41
 #define NEON_2RM_VRINTA 42
@@ -4918,6 +4922,7 @@ static const uint8_t neon_2rm_sizes[] = {
     [NEON_2RM_VCEQ0] = 0x7,
     [NEON_2RM_VCLE0] = 0x7,
     [NEON_2RM_VCLT0] = 0x7,
+    [NEON_2RM_SHA1H] = 0x4,
     [NEON_2RM_VABS] = 0x7,
     [NEON_2RM_VNEG] = 0x7,
     [NEON_2RM_VCGT0_F] = 0x4,
@@ -4934,6 +4939,7 @@ static const uint8_t neon_2rm_sizes[] = {
     [NEON_2RM_VMOVN] = 0x7,
     [NEON_2RM_VQMOVN] = 0x7,
     [NEON_2RM_VSHLL] = 0x7,
+    [NEON_2RM_SHA1SU1] = 0x4,
     [NEON_2RM_VRINTN] = 0x4,
     [NEON_2RM_VRINTX] = 0x4,
     [NEON_2RM_VRINTA] = 0x4,
@@ -5011,6 +5017,49 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
         if (q && ((rd | rn | rm) & 1)) {
             return 1;
         }
+        /*
+         * The SHA-1/SHA-256 3-register instructions require special treatment
+         * here, as their size field is overloaded as an op type selector, and
+         * they all consume their input in a single pass.
+         */
+        if (op == NEON_3R_SHA) {
+            if (!q) {
+                return 1;
+            }
+            if (!u) { /* SHA-1 */
+                if (!arm_feature(env, ARM_FEATURE_V8_SHA1)) {
+                    return 1;
+                }
+                tmp = tcg_const_i32(rd);
+                tmp2 = tcg_const_i32(rn);
+                tmp3 = tcg_const_i32(rm);
+                tmp4 = tcg_const_i32(size);
+                gen_helper_crypto_sha1_3reg(cpu_env, tmp, tmp2, tmp3, tmp4);
+                tcg_temp_free_i32(tmp4);
+            } else { /* SHA-256 */
+                if (!arm_feature(env, ARM_FEATURE_V8_SHA256) || size == 3) {
+                    return 1;
+                }
+                tmp = tcg_const_i32(rd);
+                tmp2 = tcg_const_i32(rn);
+                tmp3 = tcg_const_i32(rm);
+                switch (size) {
+                case 0:
+                    gen_helper_crypto_sha256h(cpu_env, tmp, tmp2, tmp3);
+                    break;
+                case 1:
+                    gen_helper_crypto_sha256h2(cpu_env, tmp, tmp2, tmp3);
+                    break;
+                case 2:
+                    gen_helper_crypto_sha256su1(cpu_env, tmp, tmp2, tmp3);
+                    break;
+                }
+            }
+            tcg_temp_free_i32(tmp);
+            tcg_temp_free_i32(tmp2);
+            tcg_temp_free_i32(tmp3);
+            return 0;
+        }
         if (size == 3 && op != NEON_3R_LOGIC) {
             /* 64-bit element instructions. */
             for (pass = 0; pass < (q ? 2 : 1); pass++) {
@@ -6486,6 +6535,41 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
                     tcg_temp_free_i32(tmp2);
                     tcg_temp_free_i32(tmp3);
                     break;
+                case NEON_2RM_SHA1H:
+                    if (!arm_feature(env, ARM_FEATURE_V8_SHA1)
+                        || ((rm | rd) & 1)) {
+                        return 1;
+                    }
+                    tmp = tcg_const_i32(rd);
+                    tmp2 = tcg_const_i32(rm);
+
+                    gen_helper_crypto_sha1h(cpu_env, tmp, tmp2);
+
+                    tcg_temp_free_i32(tmp);
+                    tcg_temp_free_i32(tmp2);
+                    break;
+                case NEON_2RM_SHA1SU1:
+                    if ((rm | rd) & 1) {
+                            return 1;
+                    }
+                    /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */
+                    if (q) {
+                        if (!arm_feature(env, ARM_FEATURE_V8_SHA256)) {
+                            return 1;
+                        }
+                    } else if (!arm_feature(env, ARM_FEATURE_V8_SHA1)) {
+                        return 1;
+                    }
+                    tmp = tcg_const_i32(rd);
+                    tmp2 = tcg_const_i32(rm);
+                    if (q) {
+                        gen_helper_crypto_sha256su0(cpu_env, tmp, tmp2);
+                    } else {
+                        gen_helper_crypto_sha1su1(cpu_env, tmp, tmp2);
+                    }
+                    tcg_temp_free_i32(tmp);
+                    tcg_temp_free_i32(tmp2);
+                    break;
                 default:
                 elementwise:
                     for (pass = 0; pass < (q ? 4 : 2); pass++) {

From 526d0096e56e82ffa5edb15bd75c5c093e61fa59 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 9 Jun 2014 15:43:23 +0100
Subject: [PATCH 06/19] target-arm: Allow 3reg_wide undefreq to encode more bad
 size options

The current undefreq field in the neon_3reg_wide handling allows us
to encode "UNDEF if size != 0" and "UNDEF if size == 0". This is
no longer sufficient with the advent of 64-bit polynomial VMULL,
which means we want to UNDEF if size == 1. Change the undefreq
encoding to use separate bits for all of "UNDEF if size == 0",
"UNDEF if size == 1" and "UNDEF if size == 2".

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1401386724-26529-3-git-send-email-peter.maydell@linaro.org
---
 target-arm/translate.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/target-arm/translate.c b/target-arm/translate.c
index 38ef5b19aa..7124606723 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -5954,10 +5954,11 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
                 int src1_wide;
                 int src2_wide;
                 int prewiden;
-                /* undefreq: bit 0 : UNDEF if size != 0
-                 *           bit 1 : UNDEF if size == 0
-                 *           bit 2 : UNDEF if U == 1
-                 * Note that [1:0] set implies 'always UNDEF'
+                /* undefreq: bit 0 : UNDEF if size == 0
+                 *           bit 1 : UNDEF if size == 1
+                 *           bit 2 : UNDEF if size == 2
+                 *           bit 3 : UNDEF if U == 1
+                 * Note that [2:0] set implies 'always UNDEF'
                  */
                 int undefreq;
                 /* prewiden, src1_wide, src2_wide, undefreq */
@@ -5971,13 +5972,13 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
                     {0, 1, 1, 0}, /* VSUBHN */
                     {0, 0, 0, 0}, /* VABDL */
                     {0, 0, 0, 0}, /* VMLAL */
-                    {0, 0, 0, 6}, /* VQDMLAL */
+                    {0, 0, 0, 9}, /* VQDMLAL */
                     {0, 0, 0, 0}, /* VMLSL */
-                    {0, 0, 0, 6}, /* VQDMLSL */
+                    {0, 0, 0, 9}, /* VQDMLSL */
                     {0, 0, 0, 0}, /* Integer VMULL */
-                    {0, 0, 0, 2}, /* VQDMULL */
-                    {0, 0, 0, 5}, /* Polynomial VMULL */
-                    {0, 0, 0, 3}, /* Reserved: always UNDEF */
+                    {0, 0, 0, 1}, /* VQDMULL */
+                    {0, 0, 0, 15}, /* Polynomial VMULL */
+                    {0, 0, 0, 7}, /* Reserved: always UNDEF */
                 };
 
                 prewiden = neon_3reg_wide[op][0];
@@ -5985,9 +5986,8 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
                 src2_wide = neon_3reg_wide[op][2];
                 undefreq = neon_3reg_wide[op][3];
 
-                if (((undefreq & 1) && (size != 0)) ||
-                    ((undefreq & 2) && (size == 0)) ||
-                    ((undefreq & 4) && u)) {
+                if ((undefreq & (1 << size)) ||
+                    ((undefreq & 8) && u)) {
                     return 1;
                 }
                 if ((src1_wide && (rn & 1)) ||

From 4e624edaebec9312be7b63096bbe5b2fe76f3613 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 9 Jun 2014 15:43:23 +0100
Subject: [PATCH 07/19] target-arm: add support for v8 VMULL.P64 instruction

Add support for the VMULL.P64 polynomial 64x64 to 128 bit multiplication
instruction in the A32/T32 instruction sets; this is part of the v8
Crypto Extensions.

To do this we have to move the neon_pmull_64_{lo,hi} helpers from
helper-a64.c into neon_helper.c so they can be used by the AArch32
translator.

Inspired-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1401386724-26529-4-git-send-email-peter.maydell@linaro.org
---
 linux-user/elfload.c     |  1 +
 target-arm/cpu.c         |  1 +
 target-arm/cpu.h         |  1 +
 target-arm/helper-a64.c  | 30 ------------------------------
 target-arm/helper-a64.h  |  2 --
 target-arm/helper.h      |  3 +++
 target-arm/neon_helper.c | 30 ++++++++++++++++++++++++++++++
 target-arm/translate.c   | 26 +++++++++++++++++++++++++-
 8 files changed, 61 insertions(+), 33 deletions(-)

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 9bda262419..3241fec035 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -468,6 +468,7 @@ static uint32_t get_elf_hwcap2(void)
     uint32_t hwcaps = 0;
 
     GET_FEATURE(ARM_FEATURE_V8_AES, ARM_HWCAP2_ARM_AES);
+    GET_FEATURE(ARM_FEATURE_V8_PMULL, ARM_HWCAP2_ARM_PMULL);
     GET_FEATURE(ARM_FEATURE_V8_SHA1, ARM_HWCAP2_ARM_SHA1);
     GET_FEATURE(ARM_FEATURE_V8_SHA256, ARM_HWCAP2_ARM_SHA2);
     GET_FEATURE(ARM_FEATURE_CRC, ARM_HWCAP2_ARM_CRC32);
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index 753f6cb1da..fb9c12d81e 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -319,6 +319,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
         set_feature(env, ARM_FEATURE_V8_AES);
         set_feature(env, ARM_FEATURE_V8_SHA1);
         set_feature(env, ARM_FEATURE_V8_SHA256);
+        set_feature(env, ARM_FEATURE_V8_PMULL);
     }
     if (arm_feature(env, ARM_FEATURE_V7)) {
         set_feature(env, ARM_FEATURE_VAPA);
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 14d5f21e7a..79e7f82515 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -637,6 +637,7 @@ enum arm_features {
     ARM_FEATURE_EL3, /* has EL3 Secure monitor support */
     ARM_FEATURE_V8_SHA1, /* implements SHA1 part of v8 Crypto Extensions */
     ARM_FEATURE_V8_SHA256, /* implements SHA256 part of v8 Crypto Extensions */
+    ARM_FEATURE_V8_PMULL, /* implements PMULL part of v8 Crypto Extensions */
 };
 
 static inline int arm_feature(CPUARMState *env, int feature)
diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c
index cccda74113..f0d27229d4 100644
--- a/target-arm/helper-a64.c
+++ b/target-arm/helper-a64.c
@@ -186,36 +186,6 @@ uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices,
     return result;
 }
 
-/* Helper function for 64 bit polynomial multiply case:
- * perform PolynomialMult(op1, op2) and return either the top or
- * bottom half of the 128 bit result.
- */
-uint64_t HELPER(neon_pmull_64_lo)(uint64_t op1, uint64_t op2)
-{
-    int bitnum;
-    uint64_t res = 0;
-
-    for (bitnum = 0; bitnum < 64; bitnum++) {
-        if (op1 & (1ULL << bitnum)) {
-            res ^= op2 << bitnum;
-        }
-    }
-    return res;
-}
-uint64_t HELPER(neon_pmull_64_hi)(uint64_t op1, uint64_t op2)
-{
-    int bitnum;
-    uint64_t res = 0;
-
-    /* bit 0 of op1 can't influence the high 64 bits at all */
-    for (bitnum = 1; bitnum < 64; bitnum++) {
-        if (op1 & (1ULL << bitnum)) {
-            res ^= op2 >> (64 - bitnum);
-        }
-    }
-    return res;
-}
-
 /* 64bit/double versions of the neon float compare functions */
 uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp)
 {
diff --git a/target-arm/helper-a64.h b/target-arm/helper-a64.h
index 3f05bedcca..8de7536ff7 100644
--- a/target-arm/helper-a64.h
+++ b/target-arm/helper-a64.h
@@ -28,8 +28,6 @@ DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr)
 DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr)
 DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr)
 DEF_HELPER_FLAGS_5(simd_tbl, TCG_CALL_NO_RWG_SE, i64, env, i64, i64, i32, i32)
-DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_3(vfp_mulxs, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
 DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
 DEF_HELPER_FLAGS_3(neon_ceq_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)
diff --git a/target-arm/helper.h b/target-arm/helper.h
index 113b09d35e..0ef8fcac17 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -525,6 +525,9 @@ DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
 DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
 DEF_HELPER_2(dc_zva, void, env, i64)
 
+DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+
 #ifdef TARGET_AARCH64
 #include "helper-a64.h"
 #endif
diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index 492e500700..47d13e908c 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -2211,3 +2211,33 @@ void HELPER(neon_zip16)(CPUARMState *env, uint32_t rd, uint32_t rm)
     env->vfp.regs[rm] = make_float64(m0);
     env->vfp.regs[rd] = make_float64(d0);
 }
+
+/* Helper function for 64 bit polynomial multiply case:
+ * perform PolynomialMult(op1, op2) and return either the top or
+ * bottom half of the 128 bit result.
+ */
+uint64_t HELPER(neon_pmull_64_lo)(uint64_t op1, uint64_t op2)
+{
+    int bitnum;
+    uint64_t res = 0;
+
+    for (bitnum = 0; bitnum < 64; bitnum++) {
+        if (op1 & (1ULL << bitnum)) {
+            res ^= op2 << bitnum;
+        }
+    }
+    return res;
+}
+uint64_t HELPER(neon_pmull_64_hi)(uint64_t op1, uint64_t op2)
+{
+    int bitnum;
+    uint64_t res = 0;
+
+    /* bit 0 of op1 can't influence the high 64 bits at all */
+    for (bitnum = 1; bitnum < 64; bitnum++) {
+        if (op1 & (1ULL << bitnum)) {
+            res ^= op2 >> (64 - bitnum);
+        }
+    }
+    return res;
+}
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 7124606723..41c3fc77b0 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -5977,7 +5977,7 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
                     {0, 0, 0, 9}, /* VQDMLSL */
                     {0, 0, 0, 0}, /* Integer VMULL */
                     {0, 0, 0, 1}, /* VQDMULL */
-                    {0, 0, 0, 15}, /* Polynomial VMULL */
+                    {0, 0, 0, 0xa}, /* Polynomial VMULL */
                     {0, 0, 0, 7}, /* Reserved: always UNDEF */
                 };
 
@@ -5996,6 +5996,30 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
                     return 1;
                 }
 
+                /* Handle VMULL.P64 (Polynomial 64x64 to 128 bit multiply)
+                 * outside the loop below as it only performs a single pass.
+                 */
+                if (op == 14 && size == 2) {
+                    TCGv_i64 tcg_rn, tcg_rm, tcg_rd;
+
+                    if (!arm_feature(env, ARM_FEATURE_V8_PMULL)) {
+                        return 1;
+                    }
+                    tcg_rn = tcg_temp_new_i64();
+                    tcg_rm = tcg_temp_new_i64();
+                    tcg_rd = tcg_temp_new_i64();
+                    neon_load_reg64(tcg_rn, rn);
+                    neon_load_reg64(tcg_rm, rm);
+                    gen_helper_neon_pmull_64_lo(tcg_rd, tcg_rn, tcg_rm);
+                    neon_store_reg64(tcg_rd, rd);
+                    gen_helper_neon_pmull_64_hi(tcg_rd, tcg_rn, tcg_rm);
+                    neon_store_reg64(tcg_rd, rd + 1);
+                    tcg_temp_free_i64(tcg_rn);
+                    tcg_temp_free_i64(tcg_rm);
+                    tcg_temp_free_i64(tcg_rd);
+                    return 0;
+                }
+
                 /* Avoid overlapping operands.  Wide source operands are
                    always aligned so will never overlap with wide
                    destinations in problematic ways.  */

From 411bdc7837681f58828ca490647fa1784a6700b9 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 9 Jun 2014 15:43:24 +0100
Subject: [PATCH 08/19] target-arm: A64: Use PMULL feature bit for PMULL

Now that we have a separate ARM_FEATURE_V8_PMULL bit, use it for
the A64 PMULL, not the AES feature bit.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 linux-user/elfload.c       | 2 +-
 target-arm/translate-a64.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 3241fec035..e8724939f8 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -539,7 +539,7 @@ static uint32_t get_elf_hwcap(void)
     /* probe for the extra features */
 #define GET_FEATURE(feat, hwcap) \
     do { if (arm_feature(&cpu->env, feat)) { hwcaps |= hwcap; } } while (0)
-    GET_FEATURE(ARM_FEATURE_V8_AES, ARM_HWCAP_A64_PMULL);
+    GET_FEATURE(ARM_FEATURE_V8_PMULL, ARM_HWCAP_A64_PMULL);
 #undef GET_FEATURE
 
     return hwcaps;
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index a9c4633517..9832cc3c22 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -8574,7 +8574,7 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
             return;
         }
         if (size == 3) {
-            if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)) {
+            if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
                 unallocated_encoding(s);
                 return;
             }

From 46d9dfdad65fa783691871622271a861466905c4 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 9 Jun 2014 15:43:24 +0100
Subject: [PATCH 09/19] target-arm: arm_any_initfn() should never set
 ARM_FEATURE_AARCH64

The arm_any_initfn() is used only for the 32-bit linux-user "cpu any",
so it only gets called in builds where TARGET_AARCH64 is not defined.
Remove the unreachable line which sets ARM_FEATURE_AARCH64.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1401458125-27977-2-git-send-email-peter.maydell@linaro.org
---
 target-arm/cpu.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index fb9c12d81e..94123b2213 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -963,9 +963,6 @@ static void arm_any_initfn(Object *obj)
     set_feature(&cpu->env, ARM_FEATURE_ARM_DIV);
     set_feature(&cpu->env, ARM_FEATURE_V7MP);
     set_feature(&cpu->env, ARM_FEATURE_CRC);
-#ifdef TARGET_AARCH64
-    set_feature(&cpu->env, ARM_FEATURE_AARCH64);
-#endif
     cpu->midr = 0xffffffff;
 }
 #endif

From fb8ad9f2c1a82256b1ac384393a093a61201272f Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 9 Jun 2014 15:43:24 +0100
Subject: [PATCH 10/19] target-arm: Remove unnecessary setting of feature bits

FEATURE_V8 implies both FEATURE_V7MP and FEATURE_ARM_DIV, so
we don't need to set them explicitly in initfns which set the
V8 feature bit.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1401458125-27977-3-git-send-email-peter.maydell@linaro.org
---
 target-arm/cpu.c   | 2 --
 target-arm/cpu64.c | 2 --
 2 files changed, 4 deletions(-)

diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index 94123b2213..383e22a757 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -960,8 +960,6 @@ static void arm_any_initfn(Object *obj)
     set_feature(&cpu->env, ARM_FEATURE_VFP_FP16);
     set_feature(&cpu->env, ARM_FEATURE_NEON);
     set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
-    set_feature(&cpu->env, ARM_FEATURE_ARM_DIV);
-    set_feature(&cpu->env, ARM_FEATURE_V7MP);
     set_feature(&cpu->env, ARM_FEATURE_CRC);
     cpu->midr = 0xffffffff;
 }
diff --git a/target-arm/cpu64.c b/target-arm/cpu64.c
index ff4c2b46d7..2dd01fdf45 100644
--- a/target-arm/cpu64.c
+++ b/target-arm/cpu64.c
@@ -140,8 +140,6 @@ static void aarch64_any_initfn(Object *obj)
     set_feature(&cpu->env, ARM_FEATURE_VFP4);
     set_feature(&cpu->env, ARM_FEATURE_VFP_FP16);
     set_feature(&cpu->env, ARM_FEATURE_NEON);
-    set_feature(&cpu->env, ARM_FEATURE_ARM_DIV);
-    set_feature(&cpu->env, ARM_FEATURE_V7MP);
     set_feature(&cpu->env, ARM_FEATURE_AARCH64);
     cpu->ctr = 0x80030003; /* 32 byte I and D cacheline size, VIPT icache */
     cpu->dcz_blocksize = 7; /*  512 bytes */

From 25f748e37a868b322a960c322ca11fb2dc5252b2 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 9 Jun 2014 15:43:24 +0100
Subject: [PATCH 11/19] target-arm: Clean up handling of ARMv8 optional feature
 bits

CRC and crypto are both optional v8 extensions, so FEATURE_V8
should not imply them. Instead we should set these bits in the
initfns for the 32-bit and 64-bit "cpu any" and for the Cortex-A57.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1401458125-27977-4-git-send-email-peter.maydell@linaro.org
---
 target-arm/cpu.c   |  8 ++++----
 target-arm/cpu64.c | 10 ++++++++++
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index 383e22a757..bc19d80439 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -316,10 +316,6 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
         set_feature(env, ARM_FEATURE_V7);
         set_feature(env, ARM_FEATURE_ARM_DIV);
         set_feature(env, ARM_FEATURE_LPAE);
-        set_feature(env, ARM_FEATURE_V8_AES);
-        set_feature(env, ARM_FEATURE_V8_SHA1);
-        set_feature(env, ARM_FEATURE_V8_SHA256);
-        set_feature(env, ARM_FEATURE_V8_PMULL);
     }
     if (arm_feature(env, ARM_FEATURE_V7)) {
         set_feature(env, ARM_FEATURE_VAPA);
@@ -960,6 +956,10 @@ static void arm_any_initfn(Object *obj)
     set_feature(&cpu->env, ARM_FEATURE_VFP_FP16);
     set_feature(&cpu->env, ARM_FEATURE_NEON);
     set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
+    set_feature(&cpu->env, ARM_FEATURE_V8_AES);
+    set_feature(&cpu->env, ARM_FEATURE_V8_SHA1);
+    set_feature(&cpu->env, ARM_FEATURE_V8_SHA256);
+    set_feature(&cpu->env, ARM_FEATURE_V8_PMULL);
     set_feature(&cpu->env, ARM_FEATURE_CRC);
     cpu->midr = 0xffffffff;
 }
diff --git a/target-arm/cpu64.c b/target-arm/cpu64.c
index 2dd01fdf45..40cc063f07 100644
--- a/target-arm/cpu64.c
+++ b/target-arm/cpu64.c
@@ -98,6 +98,11 @@ static void aarch64_a57_initfn(Object *obj)
     set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
     set_feature(&cpu->env, ARM_FEATURE_AARCH64);
     set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
+    set_feature(&cpu->env, ARM_FEATURE_V8_AES);
+    set_feature(&cpu->env, ARM_FEATURE_V8_SHA1);
+    set_feature(&cpu->env, ARM_FEATURE_V8_SHA256);
+    set_feature(&cpu->env, ARM_FEATURE_V8_PMULL);
+    set_feature(&cpu->env, ARM_FEATURE_CRC);
     cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A57;
     cpu->midr = 0x411fd070;
     cpu->reset_fpsid = 0x41034070;
@@ -141,6 +146,11 @@ static void aarch64_any_initfn(Object *obj)
     set_feature(&cpu->env, ARM_FEATURE_VFP_FP16);
     set_feature(&cpu->env, ARM_FEATURE_NEON);
     set_feature(&cpu->env, ARM_FEATURE_AARCH64);
+    set_feature(&cpu->env, ARM_FEATURE_V8_AES);
+    set_feature(&cpu->env, ARM_FEATURE_V8_SHA1);
+    set_feature(&cpu->env, ARM_FEATURE_V8_SHA256);
+    set_feature(&cpu->env, ARM_FEATURE_V8_PMULL);
+    set_feature(&cpu->env, ARM_FEATURE_CRC);
     cpu->ctr = 0x80030003; /* 32 byte I and D cacheline size, VIPT icache */
     cpu->dcz_blocksize = 7; /*  512 bytes */
 }

From da5141fc45937fa358ca4554a335ae6a4c4453ec Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 9 Jun 2014 15:43:25 +0100
Subject: [PATCH 12/19] target-arm: VFPv4 implies half-precision extension

VFPv4 implies the presence of the half-precision floating point
extension (which is optional in VFPv3). Add this implied rule
to arm_cpu_realizefn() and remove some no-longer-needed explicit
setting of the bit in initfns.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1401458125-27977-5-git-send-email-peter.maydell@linaro.org
---
 target-arm/cpu.c   | 3 +--
 target-arm/cpu64.c | 2 --
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index bc19d80439..b8778350f7 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -348,6 +348,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
     }
     if (arm_feature(env, ARM_FEATURE_VFP4)) {
         set_feature(env, ARM_FEATURE_VFP3);
+        set_feature(env, ARM_FEATURE_VFP_FP16);
     }
     if (arm_feature(env, ARM_FEATURE_VFP3)) {
         set_feature(env, ARM_FEATURE_VFP);
@@ -744,7 +745,6 @@ static void cortex_a15_initfn(Object *obj)
     cpu->dtb_compatible = "arm,cortex-a15";
     set_feature(&cpu->env, ARM_FEATURE_V7);
     set_feature(&cpu->env, ARM_FEATURE_VFP4);
-    set_feature(&cpu->env, ARM_FEATURE_VFP_FP16);
     set_feature(&cpu->env, ARM_FEATURE_NEON);
     set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
     set_feature(&cpu->env, ARM_FEATURE_ARM_DIV);
@@ -953,7 +953,6 @@ static void arm_any_initfn(Object *obj)
     ARMCPU *cpu = ARM_CPU(obj);
     set_feature(&cpu->env, ARM_FEATURE_V8);
     set_feature(&cpu->env, ARM_FEATURE_VFP4);
-    set_feature(&cpu->env, ARM_FEATURE_VFP_FP16);
     set_feature(&cpu->env, ARM_FEATURE_NEON);
     set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
     set_feature(&cpu->env, ARM_FEATURE_V8_AES);
diff --git a/target-arm/cpu64.c b/target-arm/cpu64.c
index 40cc063f07..8b2081c246 100644
--- a/target-arm/cpu64.c
+++ b/target-arm/cpu64.c
@@ -93,7 +93,6 @@ static void aarch64_a57_initfn(Object *obj)
 
     set_feature(&cpu->env, ARM_FEATURE_V8);
     set_feature(&cpu->env, ARM_FEATURE_VFP4);
-    set_feature(&cpu->env, ARM_FEATURE_VFP_FP16);
     set_feature(&cpu->env, ARM_FEATURE_NEON);
     set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
     set_feature(&cpu->env, ARM_FEATURE_AARCH64);
@@ -143,7 +142,6 @@ static void aarch64_any_initfn(Object *obj)
 
     set_feature(&cpu->env, ARM_FEATURE_V8);
     set_feature(&cpu->env, ARM_FEATURE_VFP4);
-    set_feature(&cpu->env, ARM_FEATURE_VFP_FP16);
     set_feature(&cpu->env, ARM_FEATURE_NEON);
     set_feature(&cpu->env, ARM_FEATURE_AARCH64);
     set_feature(&cpu->env, ARM_FEATURE_V8_AES);

From 130f2e7dcb4a5f9534fc65200121f06983435a77 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 9 Jun 2014 15:43:25 +0100
Subject: [PATCH 13/19] target-arm: A64: Implement CRC instructions

Implement the optional A64 CRC instructions.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1401458125-27977-6-git-send-email-peter.maydell@linaro.org
---
 linux-user/elfload.c       |  1 +
 target-arm/helper-a64.c    | 30 +++++++++++++++++++++
 target-arm/helper-a64.h    |  2 ++
 target-arm/translate-a64.c | 54 +++++++++++++++++++++++++++++++++++++-
 4 files changed, 86 insertions(+), 1 deletion(-)

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index e8724939f8..f2d5955230 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -540,6 +540,7 @@ static uint32_t get_elf_hwcap(void)
 #define GET_FEATURE(feat, hwcap) \
     do { if (arm_feature(&cpu->env, feat)) { hwcaps |= hwcap; } } while (0)
     GET_FEATURE(ARM_FEATURE_V8_PMULL, ARM_HWCAP_A64_PMULL);
+    GET_FEATURE(ARM_FEATURE_CRC, ARM_HWCAP_A64_CRC32);
 #undef GET_FEATURE
 
     return hwcaps;
diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c
index f0d27229d4..2b4ce6ac60 100644
--- a/target-arm/helper-a64.c
+++ b/target-arm/helper-a64.c
@@ -24,6 +24,8 @@
 #include "sysemu/sysemu.h"
 #include "qemu/bitops.h"
 #include "internals.h"
+#include "qemu/crc32c.h"
+#include <zlib.h> /* For crc32 */
 
 /* C2.4.7 Multiply and divide */
 /* special cases for 0 and LLONG_MIN are mandated by the standard */
@@ -408,6 +410,34 @@ float32 HELPER(fcvtx_f64_to_f32)(float64 a, CPUARMState *env)
     return r;
 }
 
+/* 64-bit versions of the CRC helpers. Note that although the operation
+ * (and the prototypes of crc32c() and crc32() mean that only the bottom
+ * 32 bits of the accumulator and result are used, we pass and return
+ * uint64_t for convenience of the generated code. Unlike the 32-bit
+ * instruction set versions, val may genuinely have 64 bits of data in it.
+ * The upper bytes of val (above the number specified by 'bytes') must have
+ * been zeroed out by the caller.
+ */
+uint64_t HELPER(crc32_64)(uint64_t acc, uint64_t val, uint32_t bytes)
+{
+    uint8_t buf[8];
+
+    stq_le_p(buf, val);
+
+    /* zlib crc32 converts the accumulator and output to one's complement.  */
+    return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff;
+}
+
+uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes)
+{
+    uint8_t buf[8];
+
+    stq_le_p(buf, val);
+
+    /* Linux crc32c converts the output to one's complement.  */
+    return crc32c(acc, buf, bytes) ^ 0xffffffff;
+}
+
 /* Handle a CPU exception.  */
 void aarch64_cpu_do_interrupt(CPUState *cs)
 {
diff --git a/target-arm/helper-a64.h b/target-arm/helper-a64.h
index 8de7536ff7..1d3d10fffb 100644
--- a/target-arm/helper-a64.h
+++ b/target-arm/helper-a64.h
@@ -44,3 +44,5 @@ DEF_HELPER_FLAGS_1(neon_addlp_u16, TCG_CALL_NO_RWG_SE, i64, i64)
 DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
 DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
 DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, env)
+DEF_HELPER_FLAGS_3(crc32_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
+DEF_HELPER_FLAGS_3(crc32c_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 9832cc3c22..6af593a26e 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -3774,6 +3774,54 @@ static void handle_shift_reg(DisasContext *s,
     tcg_temp_free_i64(tcg_shift);
 }
 
+/* CRC32[BHWX], CRC32C[BHWX] */
+static void handle_crc32(DisasContext *s,
+                         unsigned int sf, unsigned int sz, bool crc32c,
+                         unsigned int rm, unsigned int rn, unsigned int rd)
+{
+    TCGv_i64 tcg_acc, tcg_val;
+    TCGv_i32 tcg_bytes;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_CRC)
+        || (sf == 1 && sz != 3)
+        || (sf == 0 && sz == 3)) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    if (sz == 3) {
+        tcg_val = cpu_reg(s, rm);
+    } else {
+        uint64_t mask;
+        switch (sz) {
+        case 0:
+            mask = 0xFF;
+            break;
+        case 1:
+            mask = 0xFFFF;
+            break;
+        case 2:
+            mask = 0xFFFFFFFF;
+            break;
+        default:
+            g_assert_not_reached();
+        }
+        tcg_val = new_tmp_a64(s);
+        tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
+    }
+
+    tcg_acc = cpu_reg(s, rn);
+    tcg_bytes = tcg_const_i32(1 << sz);
+
+    if (crc32c) {
+        gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
+    } else {
+        gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
+    }
+
+    tcg_temp_free_i32(tcg_bytes);
+}
+
 /* C3.5.8 Data-processing (2 source)
  *   31   30  29 28             21 20  16 15    10 9    5 4    0
  * +----+---+---+-----------------+------+--------+------+------+
@@ -3821,8 +3869,12 @@ static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
     case 21:
     case 22:
     case 23: /* CRC32 */
-        unsupported_encoding(s, insn);
+    {
+        int sz = extract32(opcode, 0, 2);
+        bool crc32c = extract32(opcode, 2, 1);
+        handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
         break;
+    }
     default:
         unallocated_encoding(s);
         break;

From aa633469ed902a6d96b3d4013ec5ce32597f0626 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 9 Jun 2014 15:43:25 +0100
Subject: [PATCH 14/19] target-arm: A32/T32: Mask CRC value in calling code,
 not helper

Bring the 32-bit CRC helper functions into line with the A64 ones,
by masking the high bytes of the value in the calling code rather
than the helper. This is more efficient since we can determine the
mask at translation time.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1401458125-27977-7-git-send-email-peter.maydell@linaro.org
---
 target-arm/helper.c    | 25 ++++++-------------------
 target-arm/translate.c | 10 ++++++++++
 2 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/target-arm/helper.c b/target-arm/helper.c
index 1e50a7840e..177ed076c7 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -5559,28 +5559,15 @@ int arm_rmode_to_sf(int rmode)
     return rmode;
 }
 
-static void crc_init_buffer(uint8_t *buf, uint32_t val, uint32_t bytes)
-{
-    memset(buf, 0, 4);
-
-    if (bytes == 1) {
-        buf[0] = val & 0xff;
-    } else if (bytes == 2) {
-        buf[0] = val & 0xff;
-        buf[1] = (val >> 8) & 0xff;
-    } else {
-        buf[0] = val & 0xff;
-        buf[1] = (val >> 8) & 0xff;
-        buf[2] = (val >> 16) & 0xff;
-        buf[3] = (val >> 24) & 0xff;
-    }
-}
-
+/* CRC helpers.
+ * The upper bytes of val (above the number specified by 'bytes') must have
+ * been zeroed out by the caller.
+ */
 uint32_t HELPER(crc32)(uint32_t acc, uint32_t val, uint32_t bytes)
 {
     uint8_t buf[4];
 
-    crc_init_buffer(buf, val, bytes);
+    stl_le_p(buf, val);
 
     /* zlib crc32 converts the accumulator and output to one's complement.  */
     return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff;
@@ -5590,7 +5577,7 @@ uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes)
 {
     uint8_t buf[4];
 
-    crc_init_buffer(buf, val, bytes);
+    stl_le_p(buf, val);
 
     /* Linux crc32c converts the output to one's complement.  */
     return crc32c(acc, buf, bytes) ^ 0xffffffff;
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 41c3fc77b0..351943f6f3 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -7806,6 +7806,11 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
 
             tmp = load_reg(s, rn);
             tmp2 = load_reg(s, rm);
+            if (op1 == 0) {
+                tcg_gen_andi_i32(tmp2, tmp2, 0xff);
+            } else if (op1 == 1) {
+                tcg_gen_andi_i32(tmp2, tmp2, 0xffff);
+            }
             tmp3 = tcg_const_i32(1 << op1);
             if (c & 0x2) {
                 gen_helper_crc32c(tmp, tmp, tmp2, tmp3);
@@ -9438,6 +9443,11 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw
                     }
 
                     tmp2 = load_reg(s, rm);
+                    if (sz == 0) {
+                        tcg_gen_andi_i32(tmp2, tmp2, 0xff);
+                    } else if (sz == 1) {
+                        tcg_gen_andi_i32(tmp2, tmp2, 0xffff);
+                    }
                     tmp3 = tcg_const_i32(1 << sz);
                     if (c) {
                         gen_helper_crc32c(tmp, tmp, tmp2, tmp3);

From 5acc765c04300f9ac8b7944008bc3ad54c6b032a Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 9 Jun 2014 15:43:25 +0100
Subject: [PATCH 15/19] target-arm: A64: Implement AES instructions

Implement the AES instructions from the optional Crypto Extensions.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1401458125-27977-8-git-send-email-peter.maydell@linaro.org
---
 linux-user/elfload.c       |  1 +
 target-arm/translate-a64.c | 51 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index f2d5955230..396a80817c 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -539,6 +539,7 @@ static uint32_t get_elf_hwcap(void)
     /* probe for the extra features */
 #define GET_FEATURE(feat, hwcap) \
     do { if (arm_feature(&cpu->env, feat)) { hwcaps |= hwcap; } } while (0)
+    GET_FEATURE(ARM_FEATURE_V8_AES, ARM_HWCAP_A64_AES);
     GET_FEATURE(ARM_FEATURE_V8_PMULL, ARM_HWCAP_A64_PMULL);
     GET_FEATURE(ARM_FEATURE_CRC, ARM_HWCAP_A64_CRC32);
 #undef GET_FEATURE
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 6af593a26e..94b4642141 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -85,6 +85,7 @@ typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
 typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
 typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
 typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
+typedef void CryptoThreeOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
 
 /* initialize TCG globals.  */
 void a64_translate_init(void)
@@ -10549,7 +10550,55 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
  */
 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
 {
-    unsupported_encoding(s, insn);
+    int size = extract32(insn, 22, 2);
+    int opcode = extract32(insn, 12, 5);
+    int rn = extract32(insn, 5, 5);
+    int rd = extract32(insn, 0, 5);
+    int decrypt;
+    TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_decrypt;
+    CryptoThreeOpEnvFn *genfn;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
+        || size != 0) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    switch (opcode) {
+    case 0x4: /* AESE */
+        decrypt = 0;
+        genfn = gen_helper_crypto_aese;
+        break;
+    case 0x6: /* AESMC */
+        decrypt = 0;
+        genfn = gen_helper_crypto_aesmc;
+        break;
+    case 0x5: /* AESD */
+        decrypt = 1;
+        genfn = gen_helper_crypto_aese;
+        break;
+    case 0x7: /* AESIMC */
+        decrypt = 1;
+        genfn = gen_helper_crypto_aesmc;
+        break;
+    default:
+        unallocated_encoding(s);
+        return;
+    }
+
+    /* Note that we convert the Vx register indexes into the
+     * index within the vfp.regs[] array, so we can share the
+     * helper with the AArch32 instructions.
+     */
+    tcg_rd_regno = tcg_const_i32(rd << 1);
+    tcg_rn_regno = tcg_const_i32(rn << 1);
+    tcg_decrypt = tcg_const_i32(decrypt);
+
+    genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_decrypt);
+
+    tcg_temp_free_i32(tcg_rd_regno);
+    tcg_temp_free_i32(tcg_rn_regno);
+    tcg_temp_free_i32(tcg_decrypt);
 }
 
 /* C3.6.20 Crypto three-reg SHA

From be56f04eeacc22634a3bd709fdbda9873e8f55af Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 9 Jun 2014 15:43:26 +0100
Subject: [PATCH 16/19] target-arm: A64: Implement 3-register SHA instructions

Implement the 3-register SHA instruction group from the optional
Crypto Extensions.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1401458125-27977-9-git-send-email-peter.maydell@linaro.org
---
 target-arm/translate-a64.c | 59 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 58 insertions(+), 1 deletion(-)

diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 94b4642141..82d46fafe5 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -10609,7 +10609,64 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn)
  */
 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
 {
-    unsupported_encoding(s, insn);
+    int size = extract32(insn, 22, 2);
+    int opcode = extract32(insn, 12, 3);
+    int rm = extract32(insn, 16, 5);
+    int rn = extract32(insn, 5, 5);
+    int rd = extract32(insn, 0, 5);
+    CryptoThreeOpEnvFn *genfn;
+    TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_rm_regno;
+    int feature = ARM_FEATURE_V8_SHA256;
+
+    if (size != 0) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    switch (opcode) {
+    case 0: /* SHA1C */
+    case 1: /* SHA1P */
+    case 2: /* SHA1M */
+    case 3: /* SHA1SU0 */
+        genfn = NULL;
+        feature = ARM_FEATURE_V8_SHA1;
+        break;
+    case 4: /* SHA256H */
+        genfn = gen_helper_crypto_sha256h;
+        break;
+    case 5: /* SHA256H2 */
+        genfn = gen_helper_crypto_sha256h2;
+        break;
+    case 6: /* SHA256SU1 */
+        genfn = gen_helper_crypto_sha256su1;
+        break;
+    default:
+        unallocated_encoding(s);
+        return;
+    }
+
+    if (!arm_dc_feature(s, feature)) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    tcg_rd_regno = tcg_const_i32(rd << 1);
+    tcg_rn_regno = tcg_const_i32(rn << 1);
+    tcg_rm_regno = tcg_const_i32(rm << 1);
+
+    if (genfn) {
+        genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_rm_regno);
+    } else {
+        TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
+
+        gen_helper_crypto_sha1_3reg(cpu_env, tcg_rd_regno,
+                                    tcg_rn_regno, tcg_rm_regno, tcg_opcode);
+        tcg_temp_free_i32(tcg_opcode);
+    }
+
+    tcg_temp_free_i32(tcg_rd_regno);
+    tcg_temp_free_i32(tcg_rn_regno);
+    tcg_temp_free_i32(tcg_rm_regno);
 }
 
 /* C3.6.21 Crypto two-reg SHA

From f6fe04d566f1a1e3219b501487cd2d2d00d723a5 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 9 Jun 2014 15:43:26 +0100
Subject: [PATCH 17/19] target-arm: A64: Implement two-register SHA
 instructions

Implement the two-register SHA instruction group from the optional
Crypto Extensions.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1401458125-27977-10-git-send-email-peter.maydell@linaro.org
---
 linux-user/elfload.c       |  2 ++
 target-arm/translate-a64.c | 45 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 396a80817c..68b9793649 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -541,6 +541,8 @@ static uint32_t get_elf_hwcap(void)
     do { if (arm_feature(&cpu->env, feat)) { hwcaps |= hwcap; } } while (0)
     GET_FEATURE(ARM_FEATURE_V8_AES, ARM_HWCAP_A64_AES);
     GET_FEATURE(ARM_FEATURE_V8_PMULL, ARM_HWCAP_A64_PMULL);
+    GET_FEATURE(ARM_FEATURE_V8_SHA1, ARM_HWCAP_A64_SHA1);
+    GET_FEATURE(ARM_FEATURE_V8_SHA256, ARM_HWCAP_A64_SHA2);
     GET_FEATURE(ARM_FEATURE_CRC, ARM_HWCAP_A64_CRC32);
 #undef GET_FEATURE
 
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 82d46fafe5..63ad787e9f 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -85,6 +85,7 @@ typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
 typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
 typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
 typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
+typedef void CryptoTwoOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32);
 typedef void CryptoThreeOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
 
 /* initialize TCG globals.  */
@@ -10677,7 +10678,49 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
  */
 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
 {
-    unsupported_encoding(s, insn);
+    int size = extract32(insn, 22, 2);
+    int opcode = extract32(insn, 12, 5);
+    int rn = extract32(insn, 5, 5);
+    int rd = extract32(insn, 0, 5);
+    CryptoTwoOpEnvFn *genfn;
+    int feature;
+    TCGv_i32 tcg_rd_regno, tcg_rn_regno;
+
+    if (size != 0) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    switch (opcode) {
+    case 0: /* SHA1H */
+        feature = ARM_FEATURE_V8_SHA1;
+        genfn = gen_helper_crypto_sha1h;
+        break;
+    case 1: /* SHA1SU1 */
+        feature = ARM_FEATURE_V8_SHA1;
+        genfn = gen_helper_crypto_sha1su1;
+        break;
+    case 2: /* SHA256SU0 */
+        feature = ARM_FEATURE_V8_SHA256;
+        genfn = gen_helper_crypto_sha256su0;
+        break;
+    default:
+        unallocated_encoding(s);
+        return;
+    }
+
+    if (!arm_dc_feature(s, feature)) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    tcg_rd_regno = tcg_const_i32(rd << 1);
+    tcg_rn_regno = tcg_const_i32(rn << 1);
+
+    genfn(cpu_env, tcg_rd_regno, tcg_rn_regno);
+
+    tcg_temp_free_i32(tcg_rd_regno);
+    tcg_temp_free_i32(tcg_rn_regno);
 }
 
 /* C3.6 Data processing - SIMD, inc Crypto

From d3afacc7269fee45d54d1501a46b51f12ea7bb15 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 9 Jun 2014 15:43:26 +0100
Subject: [PATCH 18/19] target-arm: Fix errors in writes to generic timer
 control registers

The code for handling writes to the generic timer control registers
had several bugs:
 * ISTATUS (bit 2) is read-only but we forced it to zero on any write
 * the check for "was IMASK (bit 1) toggled?" incorrectly used '&' where
   it should be '^'
 * the handling of IMASK was inverted: we should set the IRQ if
   ISTATUS is set and IMASK is clear, not if both are set

The combination of these bugs meant that when running a Linux guest
that uses the generic timers we would fairly quickly end up either
forgetting that the timer output should be asserted, or failing to
set the IRQ when the timer was unmasked. The result is that the guest
never gets any more timer interrupts.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1401803208-1281-1-git-send-email-peter.maydell@linaro.org
Cc: qemu-stable@nongnu.org
---
 target-arm/helper.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/target-arm/helper.c b/target-arm/helper.c
index 177ed076c7..050c40981b 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -1040,16 +1040,16 @@ static void gt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
     int timeridx = ri->crm & 1;
     uint32_t oldval = env->cp15.c14_timer[timeridx].ctl;
 
-    env->cp15.c14_timer[timeridx].ctl = value & 3;
+    env->cp15.c14_timer[timeridx].ctl = deposit64(oldval, 0, 2, value);
     if ((oldval ^ value) & 1) {
         /* Enable toggled */
         gt_recalc_timer(cpu, timeridx);
-    } else if ((oldval & value) & 2) {
+    } else if ((oldval ^ value) & 2) {
         /* IMASK toggled: don't need to recalculate,
          * just set the interrupt line based on ISTATUS
          */
         qemu_set_irq(cpu->gt_timer_outputs[timeridx],
-                     (oldval & 4) && (value & 2));
+                     (oldval & 4) && !(value & 2));
     }
 }
 

From 3b1a41381254f6080b5cfeb149c28a9237d42a0b Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 9 Jun 2014 15:43:26 +0100
Subject: [PATCH 19/19] target-arm: Delete unused iwmmxt_msadb helper

The iwmmxt_msadb helper and its corresponding gen function are unused;
delete them. (This function appears to have never been used right back
to the initial implementation of iwMMXt; it is identical to iwmmxt_madduq,
and is presumably an accidental remnant from the initial development.)

Reviewed-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1401822125-1822-1-git-send-email-peter.maydell@linaro.org
---
 target-arm/helper.h        | 2 --
 target-arm/iwmmxt_helper.c | 9 ---------
 target-arm/translate.c     | 2 --
 3 files changed, 13 deletions(-)

diff --git a/target-arm/helper.h b/target-arm/helper.h
index 0ef8fcac17..facfcd2c11 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -456,8 +456,6 @@ DEF_HELPER_3(iwmmxt_avgb1, i64, env, i64, i64)
 DEF_HELPER_3(iwmmxt_avgw0, i64, env, i64, i64)
 DEF_HELPER_3(iwmmxt_avgw1, i64, env, i64, i64)
 
-DEF_HELPER_2(iwmmxt_msadb, i64, i64, i64)
-
 DEF_HELPER_3(iwmmxt_align, i64, i64, i64, i32)
 DEF_HELPER_4(iwmmxt_insr, i64, i64, i32, i32, i32)
 
diff --git a/target-arm/iwmmxt_helper.c b/target-arm/iwmmxt_helper.c
index 398cbcbec7..a5069144d1 100644
--- a/target-arm/iwmmxt_helper.c
+++ b/target-arm/iwmmxt_helper.c
@@ -369,15 +369,6 @@ IWMMXT_OP_AVGW(1)
 #undef IWMMXT_OP_AVGW
 #undef AVGW
 
-uint64_t HELPER(iwmmxt_msadb)(uint64_t a, uint64_t b)
-{
-    a =  ((((a >> 0 ) & 0xffff) * ((b >> 0) & 0xffff) +
-           ((a >> 16) & 0xffff) * ((b >> 16) & 0xffff)) & 0xffffffff) |
-         ((((a >> 32) & 0xffff) * ((b >> 32) & 0xffff) +
-           ((a >> 48) & 0xffff) * ((b >> 48) & 0xffff)) << 32);
-    return a;
-}
-
 uint64_t HELPER(iwmmxt_align)(uint64_t a, uint64_t b, uint32_t n)
 {
     a >>= n << 3;
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 351943f6f3..cf4e767ff8 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -1382,8 +1382,6 @@ IWMMXT_OP_ENV(avgb1)
 IWMMXT_OP_ENV(avgw0)
 IWMMXT_OP_ENV(avgw1)
 
-IWMMXT_OP(msadb)
-
 IWMMXT_OP_ENV(packuw)
 IWMMXT_OP_ENV(packul)
 IWMMXT_OP_ENV(packuq)