Hexagon bug fixes and additional tests

Also includes a patch from Zongyuan Li <zongyuan.li@smartx.com> to remove an unused variable ******** Changes in v2 ******** Fix problems with build-user-hexagon CI job -----BEGIN PGP SIGNATURE----- iQEzBAABCgAdFiEENjXHiM5iuR/UxZq0ewJE+xLeRCIFAmIs/fkACgkQewJE+xLe RCJNTQf/ZfvVqC6QKYw4OLEl0OCo56EAAl85GYQd7yKD+CU2RbsQBKpEoU9psuJX zjCd2QW/ZkoB5kDMpSxyJTkdmcnWBfgHr6j5QKEI5QeuWxdlPzJ/UB0ZVWcnhJhV ubyCfwceUyK69YZFvd7ZnbUj00ZLjpJXrLf5biHHJNZ2Q590kpwcxSyMJn8IaUJI HSqjNt8xdxrNMLRJE048hNQoMp4GIoXYnQbLaDc4inZJRYStphV/Z+QmNtN2tqNs VyP+U0udOzc552DJ0XMBV0vFZJ36L+C1HtalaI+ctQnksc7ht+W1t9D45BjT++Nx AQgNkOVLHbDqKcVioLxgLyY4651ghg== =bndB -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/quic/tags/pull-hex-20220312-1' into staging Hexagon bug fixes and additional tests Also includes a patch from Zongyuan Li <zongyuan.li@smartx.com> to remove an unused variable ******** Changes in v2 ******** Fix problems with build-user-hexagon CI job # gpg: Signature made Sat 12 Mar 2022 20:09:29 GMT # gpg: using RSA key 3635C788CE62B91FD4C59AB47B0244FB12DE4422 # gpg: Good signature from "Taylor Simpson (Rock on) <tsimpson@quicinc.com>" [unknown] # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: 3635 C788 CE62 B91F D4C5 9AB4 7B02 44FB 12DE 4422 * remotes/quic/tags/pull-hex-20220312-1: target/hexagon: remove unused variable Hexagon (target/hexagon) assignment to c4 should wait until packet commit Hexagon (target/hexagon) fix bug in conv_df2uw_chop Hexagon (tests/tcg/hexagon) fix inline asm in preg_alias.c Hexagon (tests/tcg/hexagon) update overflow test Hexagon (tests/tcg/hexagon) add floating point instructions to usr.c Hexagon (tests/tcg/hexagon) test instructions that might set bits in USR Hexagon (target/hexagon) properly handle NaN in dfmin/dfmax/sfmin/sfmax Hexagon (target/hexagon) properly handle denorm in arch_sf_recip_common Hexagon (target/hexagon) properly set FPINVF bit in sfcmp.uo and dfcmp.uo Hexagon HVX (target/hexagon) fix bug in HVX saturate instructions Hexagon (target/hexagon) fix bug in circular addressing Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2022-03-13 17:29:18 +00:00 · 2022-03-13 17:29:18 +00:00 · 15df33ceb7
parent 1416688c53 4a818b3767
commit 15df33ceb7
13 changed files with 1473 additions and 82 deletions
--- a/target/hexagon/arch.c
+++ b/target/hexagon/arch.c
@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
@ -298,8 +298,8 @@ int arch_sf_recip_common(float32 *Rs, float32 *Rt, float32 *Rd, int *adjust,
    } else {
        PeV = 0x00;
        /* Basic checks passed */
-        n_exp = float32_getexp(RsV);
-        d_exp = float32_getexp(RtV);
+        n_exp = float32_getexp_raw(RsV);
+        d_exp = float32_getexp_raw(RtV);
        if ((n_exp - d_exp + SF_BIAS) <= SF_MANTBITS) {
            /* Near quotient underflow / inexact Q */
            PeV = 0x80;
--- a/target/hexagon/fma_emu.h
+++ b/target/hexagon/fma_emu.h
@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
@ -24,6 +24,10 @@ static inline bool is_finite(float64 x)
 }

 int32_t float64_getexp(float64 f64);
+static inline uint32_t float32_getexp_raw(float32 f32)
+{
+    return extract32(f32, 23, 8);
+}
 int32_t float32_getexp(float32 f32);
 float32 infinite_float32(uint8_t sign);
 float32 internal_fmafx(float32 a, float32 b, float32 c,
--- a/target/hexagon/genptr.c
+++ b/target/hexagon/genptr.c
@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
@ -210,11 +210,15 @@ static inline void gen_read_ctrl_reg_pair(DisasContext *ctx, const int reg_num,
    }
 }

-static inline void gen_write_p3_0(TCGv control_reg)
+static void gen_write_p3_0(DisasContext *ctx, TCGv control_reg)
 {
+    TCGv hex_p8 = tcg_temp_new();
    for (int i = 0; i < NUM_PREGS; i++) {
-        tcg_gen_extract_tl(hex_pred[i], control_reg, i * 8, 8);
+        tcg_gen_extract_tl(hex_p8, control_reg, i * 8, 8);
+        gen_log_pred_write(ctx, i, hex_p8);
+        ctx_log_pred_write(ctx, i);
    }
+    tcg_temp_free(hex_p8);
 }

 /*
@ -228,7 +232,7 @@ static inline void gen_write_ctrl_reg(DisasContext *ctx, int reg_num,
                                      TCGv val)
 {
    if (reg_num == HEX_REG_P3_0) {
-        gen_write_p3_0(val);
+        gen_write_p3_0(ctx, val);
    } else {
        gen_log_reg_write(reg_num, val);
        ctx_log_reg_write(ctx, reg_num);
@ -250,7 +254,7 @@ static inline void gen_write_ctrl_reg_pair(DisasContext *ctx, int reg_num,
    if (reg_num == HEX_REG_P3_0) {
        TCGv val32 = tcg_temp_new();
        tcg_gen_extrl_i64_i32(val32, val);
-        gen_write_p3_0(val32);
+        gen_write_p3_0(ctx, val32);
        tcg_gen_extrh_i64_i32(val32, val);
        gen_log_reg_write(reg_num + 1, val32);
        tcg_temp_free(val32);
--- a/target/hexagon/macros.h
+++ b/target/hexagon/macros.h
@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
@ -268,7 +268,7 @@ static inline void gen_pred_cancel(TCGv pred, int slot_num)

 #define fVSATUVALN(N, VAL) \
    ({ \
-        (((int)(VAL)) < 0) ? 0 : ((1LL << (N)) - 1); \
+        (((int64_t)(VAL)) < 0) ? 0 : ((1LL << (N)) - 1); \
    })
 #define fSATUVALN(N, VAL) \
    ({ \
--- a/target/hexagon/mmvec/macros.h
+++ b/target/hexagon/mmvec/macros.h
@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
@ -164,11 +164,9 @@
        target_ulong va = EA; \
        target_ulong va_high = EA + LEN; \
        uintptr_t ra = GETPC(); \
-        int log_bank = 0; \
        int log_byte = 0; \
        for (i0 = 0; i0 < ELEMENT_SIZE; i0++) { \
            log_byte = ((va + i0) <= va_high) && QVAL; \
-            log_bank |= (log_byte << i0); \
            uint8_t B; \
            B = cpu_ldub_data_ra(env, EA + i0, ra); \
            env->tmp_VRegs[0].ub[ELEMENT_SIZE * IDX + i0] = B; \
@ -243,11 +241,9 @@
        int i0; \
        target_ulong va = EA; \
        target_ulong va_high = EA + LEN; \
-        int log_bank = 0; \
        int log_byte = 0; \
        for (i0 = 0; i0 < ELEM_SIZE; i0++) { \
            log_byte = ((va + i0) <= va_high) && QVAL; \
-            log_bank |= (log_byte << i0); \
            LOG_VTCM_BYTE(va + i0, log_byte, IN.ub[ELEM_SIZE * IDX + i0], \
                          ELEM_SIZE * IDX + i0); \
        } \
--- a/target/hexagon/op_helper.c
+++ b/target/hexagon/op_helper.c
@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
@ -304,8 +304,8 @@ void HELPER(debug_commit_end)(CPUHexagonState *env, int has_st0, int has_st1)

 int32_t HELPER(fcircadd)(int32_t RxV, int32_t offset, int32_t M, int32_t CS)
 {
-    int32_t K_const = sextract32(M, 24, 4);
-    int32_t length = sextract32(M, 0, 17);
+    uint32_t K_const = extract32(M, 24, 4);
+    uint32_t length = extract32(M, 0, 17);
    uint32_t new_ptr = RxV + offset;
    uint32_t start_addr;
    uint32_t end_addr;
@ -829,7 +829,7 @@ uint32_t HELPER(conv_df2uw_chop)(CPUHexagonState *env, float64 RssV)
    uint32_t RdV;
    arch_fpop_start(env);
    /* Hexagon checks the sign before rounding */
-    if (float64_is_neg(RssV) && !float32_is_any_nan(RssV)) {
+    if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
        float_raise(float_flag_invalid, &env->fp_status);
        RdV = 0;
    } else {
@ -938,8 +938,7 @@ int32_t HELPER(sfcmpuo)(CPUHexagonState *env, float32 RsV, float32 RtV)
 {
    int32_t PdV;
    arch_fpop_start(env);
-    PdV = f8BITSOF(float32_is_any_nan(RsV) ||
-                   float32_is_any_nan(RtV));
+    PdV = f8BITSOF(float32_unordered_quiet(RsV, RtV, &env->fp_status));
    arch_fpop_end(env);
    return PdV;
 }
@ -948,7 +947,7 @@ float32 HELPER(sfmax)(CPUHexagonState *env, float32 RsV, float32 RtV)
 {
    float32 RdV;
    arch_fpop_start(env);
-    RdV = float32_maxnum(RsV, RtV, &env->fp_status);
+    RdV = float32_maximum_number(RsV, RtV, &env->fp_status);
    arch_fpop_end(env);
    return RdV;
 }
@ -957,7 +956,7 @@ float32 HELPER(sfmin)(CPUHexagonState *env, float32 RsV, float32 RtV)
 {
    float32 RdV;
    arch_fpop_start(env);
-    RdV = float32_minnum(RsV, RtV, &env->fp_status);
+    RdV = float32_minimum_number(RsV, RtV, &env->fp_status);
    arch_fpop_end(env);
    return RdV;
 }
@ -1041,10 +1040,7 @@ float64 HELPER(dfmax)(CPUHexagonState *env, float64 RssV, float64 RttV)
 {
    float64 RddV;
    arch_fpop_start(env);
-    RddV = float64_maxnum(RssV, RttV, &env->fp_status);
-    if (float64_is_any_nan(RssV) || float64_is_any_nan(RttV)) {
-        float_raise(float_flag_invalid, &env->fp_status);
-    }
+    RddV = float64_maximum_number(RssV, RttV, &env->fp_status);
    arch_fpop_end(env);
    return RddV;
 }
@ -1053,10 +1049,7 @@ float64 HELPER(dfmin)(CPUHexagonState *env, float64 RssV, float64 RttV)
 {
    float64 RddV;
    arch_fpop_start(env);
-    RddV = float64_minnum(RssV, RttV, &env->fp_status);
-    if (float64_is_any_nan(RssV) || float64_is_any_nan(RttV)) {
-        float_raise(float_flag_invalid, &env->fp_status);
-    }
+    RddV = float64_minimum_number(RssV, RttV, &env->fp_status);
    arch_fpop_end(env);
    return RddV;
 }
@ -1097,8 +1090,7 @@ int32_t HELPER(dfcmpuo)(CPUHexagonState *env, float64 RssV, float64 RttV)
 {
    int32_t PdV;
    arch_fpop_start(env);
-    PdV = f8BITSOF(float64_is_any_nan(RssV) ||
-                   float64_is_any_nan(RttV));
+    PdV = f8BITSOF(float64_unordered_quiet(RssV, RttV, &env->fp_status));
    arch_fpop_end(env);
    return PdV;
 }
--- a/tests/tcg/hexagon/Makefile.target
+++ b/tests/tcg/hexagon/Makefile.target
@ -1,5 +1,5 @@
 ##
-##  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+##  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
 ##
 ##  This program is free software; you can redistribute it and/or modify
 ##  it under the terms of the GNU General Public License as published by
@ -30,6 +30,7 @@ first: $(HEX_SRC)/first.S
 HEX_TESTS = first
 HEX_TESTS += hex_sigsegv
 HEX_TESTS += misc
+HEX_TESTS += usr
 HEX_TESTS += preg_alias
 HEX_TESTS += dual_stores
 HEX_TESTS += multi_result
@ -43,3 +44,8 @@ HEX_TESTS += fpstuff
 HEX_TESTS += overflow

 TESTS += $(HEX_TESTS)
+
+# This test has to be compiled for the -mv67t target
+usr: usr.c
+	$(CC) $(CFLAGS) -mv67t -O2 -Wno-inline-asm -Wno-expansion-to-defined $< -o $@ $(LDFLAGS)
+
--- a/tests/tcg/hexagon/circ.c
+++ b/tests/tcg/hexagon/circ.c
@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
@ -415,7 +415,8 @@ static void circ_test_v3(void)
 {
    int *p = wbuf;
    int size = 15;
-    int K = 4;      /* 64 bytes */
+    /* set high bit in K to test unsigned extract in fcirc */
+    int K = 8;      /* 1024 bytes */
    int element;
    int i;

--- a/tests/tcg/hexagon/fpstuff.c
+++ b/tests/tcg/hexagon/fpstuff.c
@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2020-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2020-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
@ -38,8 +38,11 @@ const int SF_NaN_special =                0x7f800001;
 const int SF_ANY =                        0x3f800000;
 const int SF_HEX_NAN =                    0xffffffff;
 const int SF_small_neg =                  0xab98fba8;
+const int SF_denorm =                     0x00000001;
+const int SF_random =                     0x346001d6;

-const long long DF_NaN =                  0x7ff8000000000000ULL;
+const long long DF_QNaN =                 0x7ff8000000000000ULL;
+const long long DF_SNaN =                 0x7ff7000000000000ULL;
 const long long DF_ANY =                  0x3f80000000000000ULL;
 const long long DF_HEX_NAN =              0xffffffffffffffffULL;
 const long long DF_small_neg =            0xbd731f7500000000ULL;
@ -126,7 +129,7 @@ static void check_compare_exception(void)
         "p0 = dfcmp.eq(%2, %3)\n\t"
         "%0 = p0\n\t"
         "%1 = usr\n\t"
-         : "=r"(cmp), "=r"(usr) : "r"(DF_NaN), "r"(DF_ANY)
+         : "=r"(cmp), "=r"(usr) : "r"(DF_QNaN), "r"(DF_ANY)
         : "r2", "p0", "usr");
    check32(cmp, 0);
    check_fpstatus(usr, 0);
@ -135,7 +138,7 @@ static void check_compare_exception(void)
         "p0 = dfcmp.gt(%2, %3)\n\t"
         "%0 = p0\n\t"
         "%1 = usr\n\t"
-         : "=r"(cmp), "=r"(usr) : "r"(DF_NaN), "r"(DF_ANY)
+         : "=r"(cmp), "=r"(usr) : "r"(DF_QNaN), "r"(DF_ANY)
         : "r2", "p0", "usr");
    check32(cmp, 0);
    check_fpstatus(usr, 0);
@ -144,7 +147,7 @@ static void check_compare_exception(void)
         "p0 = dfcmp.ge(%2, %3)\n\t"
         "%0 = p0\n\t"
         "%1 = usr\n\t"
-         : "=r"(cmp), "=r"(usr) : "r"(DF_NaN), "r"(DF_ANY)
+         : "=r"(cmp), "=r"(usr) : "r"(DF_QNaN), "r"(DF_ANY)
         : "r2", "p0", "usr");
    check32(cmp, 0);
    check_fpstatus(usr, 0);
@ -206,7 +209,7 @@ static void check_dfminmax(void)
    int usr;

    /*
-     * Execute dfmin/dfmax instructions with one operand as NaN
+     * Execute dfmin/dfmax instructions with one operand as SNaN
     * Check that
     *     Result is the other operand
     *     Invalid bit in USR is set
@ -214,7 +217,7 @@ static void check_dfminmax(void)
     asm (CLEAR_FPSTATUS
         "%0 = dfmin(%2, %3)\n\t"
         "%1 = usr\n\t"
-         : "=r"(minmax), "=r"(usr) : "r"(DF_NaN), "r"(DF_ANY)
+         : "=r"(minmax), "=r"(usr) : "r"(DF_SNaN), "r"(DF_ANY)
         : "r2", "usr");
    check64(minmax, DF_ANY);
    check_fpstatus(usr, FPINVF);
@ -222,13 +225,35 @@ static void check_dfminmax(void)
    asm (CLEAR_FPSTATUS
         "%0 = dfmax(%2, %3)\n\t"
         "%1 = usr\n\t"
-         : "=r"(minmax), "=r"(usr) : "r"(DF_NaN), "r"(DF_ANY)
+         : "=r"(minmax), "=r"(usr) : "r"(DF_SNaN), "r"(DF_ANY)
         : "r2", "usr");
    check64(minmax, DF_ANY);
    check_fpstatus(usr, FPINVF);

    /*
-     * Execute dfmin/dfmax instructions with both operands NaN
+     * Execute dfmin/dfmax instructions with one operand as QNaN
+     * Check that
+     *     Result is the other operand
+     *     No bit in USR is set
+     */
+     asm (CLEAR_FPSTATUS
+         "%0 = dfmin(%2, %3)\n\t"
+         "%1 = usr\n\t"
+         : "=r"(minmax), "=r"(usr) : "r"(DF_QNaN), "r"(DF_ANY)
+         : "r2", "usr");
+    check64(minmax, DF_ANY);
+    check_fpstatus(usr, 0);
+
+    asm (CLEAR_FPSTATUS
+         "%0 = dfmax(%2, %3)\n\t"
+         "%1 = usr\n\t"
+         : "=r"(minmax), "=r"(usr) : "r"(DF_QNaN), "r"(DF_ANY)
+         : "r2", "usr");
+    check64(minmax, DF_ANY);
+    check_fpstatus(usr, 0);
+
+    /*
+     * Execute dfmin/dfmax instructions with both operands SNaN
     * Check that
     *     Result is DF_HEX_NAN
     *     Invalid bit in USR is set
@ -236,7 +261,7 @@ static void check_dfminmax(void)
    asm (CLEAR_FPSTATUS
         "%0 = dfmin(%2, %3)\n\t"
         "%1 = usr\n\t"
-         : "=r"(minmax), "=r"(usr) : "r"(DF_NaN), "r"(DF_NaN)
+         : "=r"(minmax), "=r"(usr) : "r"(DF_SNaN), "r"(DF_SNaN)
         : "r2", "usr");
    check64(minmax, DF_HEX_NAN);
    check_fpstatus(usr, FPINVF);
@ -244,16 +269,39 @@ static void check_dfminmax(void)
    asm (CLEAR_FPSTATUS
         "%0 = dfmax(%2, %3)\n\t"
         "%1 = usr\n\t"
-         : "=r"(minmax), "=r"(usr) : "r"(DF_NaN), "r"(DF_NaN)
+         : "=r"(minmax), "=r"(usr) : "r"(DF_SNaN), "r"(DF_SNaN)
         : "r2", "usr");
    check64(minmax, DF_HEX_NAN);
    check_fpstatus(usr, FPINVF);
+
+    /*
+     * Execute dfmin/dfmax instructions with both operands QNaN
+     * Check that
+     *     Result is DF_HEX_NAN
+     *     No bit in USR is set
+     */
+    asm (CLEAR_FPSTATUS
+         "%0 = dfmin(%2, %3)\n\t"
+         "%1 = usr\n\t"
+         : "=r"(minmax), "=r"(usr) : "r"(DF_QNaN), "r"(DF_QNaN)
+         : "r2", "usr");
+    check64(minmax, DF_HEX_NAN);
+    check_fpstatus(usr, 0);
+
+    asm (CLEAR_FPSTATUS
+         "%0 = dfmax(%2, %3)\n\t"
+         "%1 = usr\n\t"
+         : "=r"(minmax), "=r"(usr) : "r"(DF_QNaN), "r"(DF_QNaN)
+         : "r2", "usr");
+    check64(minmax, DF_HEX_NAN);
+    check_fpstatus(usr, 0);
 }

-static void check_recip_exception(void)
+static void check_sfrecipa(void)
 {
    int result;
    int usr;
+    int pred;

    /*
     * Check that sfrecipa doesn't set status bits when
@ -329,6 +377,17 @@ static void check_recip_exception(void)
         : "r2", "p0", "usr");
    check32(result, 0x3f800000);
    check_fpstatus(usr, 0);
+
+    /*
+     * Check that sfrecipa properly handles denorm
+     */
+    asm (CLEAR_FPSTATUS
+         "%0,p0 = sfrecipa(%2, %3)\n\t"
+         "%1 = p0\n\t"
+         : "=r"(result), "=r"(pred) : "r"(SF_denorm), "r"(SF_random)
+         : "p0", "usr");
+    check32(result, 0x6a920001);
+    check32(pred, 0x80);
 }

 static void check_canonical_NaN(void)
@ -411,7 +470,7 @@ static void check_canonical_NaN(void)
    asm(CLEAR_FPSTATUS
        "%0 = convert_df2sf(%2)\n\t"
        "%1 = usr\n\t"
-        : "=r"(sf_result), "=r"(usr) : "r"(DF_NaN)
+        : "=r"(sf_result), "=r"(usr) : "r"(DF_QNaN)
        : "r2", "usr");
    check32(sf_result, SF_HEX_NAN);
    check_fpstatus(usr, 0);
@ -419,7 +478,7 @@ static void check_canonical_NaN(void)
    asm(CLEAR_FPSTATUS
        "%0 = dfadd(%2, %3)\n\t"
        "%1 = usr\n\t"
-        : "=r"(df_result), "=r"(usr) : "r"(DF_NaN), "r"(DF_ANY)
+        : "=r"(df_result), "=r"(usr) : "r"(DF_QNaN), "r"(DF_ANY)
        : "r2", "usr");
    check64(df_result, DF_HEX_NAN);
    check_fpstatus(usr, 0);
@ -427,7 +486,7 @@ static void check_canonical_NaN(void)
    asm(CLEAR_FPSTATUS
        "%0 = dfsub(%2, %3)\n\t"
        "%1 = usr\n\t"
-        : "=r"(df_result), "=r"(usr) : "r"(DF_NaN), "r"(DF_ANY)
+        : "=r"(df_result), "=r"(usr) : "r"(DF_QNaN), "r"(DF_ANY)
        : "r2", "usr");
    check64(df_result, DF_HEX_NAN);
    check_fpstatus(usr, 0);
@ -455,6 +514,28 @@ static void check_invsqrta(void)
    check32(predval, 0x0);
 }

+static void check_sffixupn(void)
+{
+    int result;
+
+    /* Check that sffixupn properly deals with denorm */
+    asm volatile("%0 = sffixupn(%1, %2)\n\t"
+                 : "=r"(result)
+                 : "r"(SF_random), "r"(SF_denorm));
+    check32(result, 0x246001d6);
+}
+
+static void check_sffixupd(void)
+{
+    int result;
+
+    /* Check that sffixupd properly deals with denorm */
+    asm volatile("%0 = sffixupd(%1, %2)\n\t"
+                 : "=r"(result)
+                 : "r"(SF_denorm), "r"(SF_random));
+    check32(result, 0x146001d6);
+}
+
 static void check_float2int_convs()
 {
    int res32;
@ -567,7 +648,7 @@ static void check_float2int_convs()
    asm(CLEAR_FPSTATUS
        "%0 = convert_df2w(%2)\n\t"
        "%1 = usr\n\t"
-        : "=r"(res32), "=r"(usr) : "r"(DF_NaN)
+        : "=r"(res32), "=r"(usr) : "r"(DF_QNaN)
        : "r2", "usr");
    check32(res32, -1);
    check_fpstatus(usr, FPINVF);
@ -575,7 +656,7 @@ static void check_float2int_convs()
    asm(CLEAR_FPSTATUS
        "%0 = convert_df2w(%2):chop\n\t"
        "%1 = usr\n\t"
-        : "=r"(res32), "=r"(usr) : "r"(DF_NaN)
+        : "=r"(res32), "=r"(usr) : "r"(DF_QNaN)
        : "r2", "usr");
    check32(res32, -1);
    check_fpstatus(usr, FPINVF);
@ -583,7 +664,7 @@ static void check_float2int_convs()
    asm(CLEAR_FPSTATUS
        "%0 = convert_df2d(%2)\n\t"
        "%1 = usr\n\t"
-        : "=r"(res64), "=r"(usr) : "r"(DF_NaN)
+        : "=r"(res64), "=r"(usr) : "r"(DF_QNaN)
        : "r2", "usr");
    check64(res64, -1);
    check_fpstatus(usr, FPINVF);
@ -591,7 +672,7 @@ static void check_float2int_convs()
    asm(CLEAR_FPSTATUS
        "%0 = convert_df2d(%2):chop\n\t"
        "%1 = usr\n\t"
-        : "=r"(res64), "=r"(usr) : "r"(DF_NaN)
+        : "=r"(res64), "=r"(usr) : "r"(DF_QNaN)
        : "r2", "usr");
    check64(res64, -1);
    check_fpstatus(usr, FPINVF);
@ -602,9 +683,11 @@ int main()
    check_compare_exception();
    check_sfminmax();
    check_dfminmax();
-    check_recip_exception();
+    check_sfrecipa();
    check_canonical_NaN();
    check_invsqrta();
+    check_sffixupn();
+    check_sffixupd();
    check_float2int_convs();

    puts(err ? "FAIL" : "PASS");
--- a/tests/tcg/hexagon/hvx_misc.c
+++ b/tests/tcg/hexagon/hvx_misc.c
@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2021-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
@ -19,6 +19,7 @@
 #include <stdint.h>
 #include <stdbool.h>
 #include <string.h>
+#include <limits.h>

 int err;

@ -432,6 +433,71 @@ TEST_PRED_OP2(pred_and, and, &, "")
 TEST_PRED_OP2(pred_and_n, and, &, "!")
 TEST_PRED_OP2(pred_xor, xor, ^, "")

+static void test_vadduwsat(void)
+{
+    /*
+     * Test for saturation by adding two numbers that add to more than UINT_MAX
+     * and make sure the result saturates to UINT_MAX
+     */
+    const uint32_t x = 0xffff0000;
+    const uint32_t y = 0x000fffff;
+
+    memset(expect, 0x12, sizeof(MMVector));
+    memset(output, 0x34, sizeof(MMVector));
+
+    asm volatile ("v10 = vsplat(%0)\n\t"
+                  "v11 = vsplat(%1)\n\t"
+                  "v21.uw = vadd(v11.uw, v10.uw):sat\n\t"
+                  "vmem(%2+#0) = v21\n\t"
+                  : /* no outputs */
+                  : "r"(x), "r"(y), "r"(output)
+                  : "v10", "v11", "v21", "memory");
+
+    for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
+        expect[0].uw[j] = UINT_MAX;
+    }
+
+    check_output_w(__LINE__, 1);
+}
+
+static void test_vsubuwsat_dv(void)
+{
+    /*
+     * Test for saturation by subtracting two numbers where the result is
+     * negative and make sure the result saturates to zero
+     *
+     * vsubuwsat_dv operates on an HVX register pair, so we'll have a
+     * pair of subtractions
+     *     w - x < 0
+     *     y - z < 0
+     */
+    const uint32_t w = 0x000000b7;
+    const uint32_t x = 0xffffff4e;
+    const uint32_t y = 0x31fe88e7;
+    const uint32_t z = 0x7fffff79;
+
+    memset(expect, 0x12, sizeof(MMVector) * 2);
+    memset(output, 0x34, sizeof(MMVector) * 2);
+
+    asm volatile ("v16 = vsplat(%0)\n\t"
+                  "v17 = vsplat(%1)\n\t"
+                  "v26 = vsplat(%2)\n\t"
+                  "v27 = vsplat(%3)\n\t"
+                  "v25:24.uw = vsub(v17:16.uw, v27:26.uw):sat\n\t"
+                  "vmem(%4+#0) = v24\n\t"
+                  "vmem(%4+#1) = v25\n\t"
+                  : /* no outputs */
+                  : "r"(w), "r"(y), "r"(x), "r"(z), "r"(output)
+                  : "v16", "v17", "v24", "v25", "v26", "v27", "memory");
+
+    for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
+        expect[0].uw[j] = 0x00000000;
+        expect[1].uw[j] = 0x00000000;
+    }
+
+    check_output_w(__LINE__, 2);
+}
+
 int main()
 {
    init_buffers();
@ -464,6 +530,9 @@ int main()
    test_pred_and_n(true);
    test_pred_xor(false);

+    test_vadduwsat();
+    test_vsubuwsat_dv();
+
    puts(err ? "FAIL" : "PASS");
    return err ? 1 : 0;
 }
--- a/tests/tcg/hexagon/overflow.c
+++ b/tests/tcg/hexagon/overflow.c
@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2021-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
@ -72,6 +72,20 @@ int read_usr_overflow(void)
    return result & 1;
 }

+int get_usr_overflow(int usr)
+{
+    return usr & 1;
+}
+
+int get_usr_fp_invalid(int usr)
+{
+    return (usr >> 1) & 1;
+}
+
+int get_usr_lpcfg(int usr)
+{
+    return (usr >> 8) & 0x3;
+}

 jmp_buf jmp_env;
 int usr_overflow;
@ -82,6 +96,49 @@ static void sig_segv(int sig, siginfo_t *info, void *puc)
    longjmp(jmp_env, 1);
 }

+static void test_packet(void)
+{
+    int convres;
+    int satres;
+    int usr;
+
+    asm("r2 = usr\n\t"
+        "r2 = clrbit(r2, #0)\n\t"        /* clear overflow bit */
+        "r2 = clrbit(r2, #1)\n\t"        /* clear FP invalid bit */
+        "usr = r2\n\t"
+        "{\n\t"
+        "    %0 = convert_sf2uw(%3):chop\n\t"
+        "    %1 = satb(%4)\n\t"
+        "}\n\t"
+        "%2 = usr\n\t"
+        : "=r"(convres), "=r"(satres), "=r"(usr)
+        : "r"(0x6a051b86), "r"(0x0410eec0)
+        : "r2", "usr");
+
+    check(convres, 0xffffffff);
+    check(satres, 0x7f);
+    check(get_usr_overflow(usr), 1);
+    check(get_usr_fp_invalid(usr), 1);
+
+    asm("r2 = usr\n\t"
+        "r2 = clrbit(r2, #0)\n\t"        /* clear overflow bit */
+        "usr = r2\n\t"
+        "%2 = r2\n\t"
+        "p3 = sp3loop0(1f, #1)\n\t"
+        "1:\n\t"
+        "{\n\t"
+        "    %0 = satb(%2)\n\t"
+        "}:endloop0\n\t"
+        "%1 = usr\n\t"
+        : "=r"(satres), "=r"(usr)
+        : "r"(0x0410eec0)
+        : "r2", "usr", "p3", "sa0", "lc0");
+
+    check(satres, 0x7f);
+    check(get_usr_overflow(usr), 1);
+    check(get_usr_lpcfg(usr), 2);
+}
+
 int main()
 {
    struct sigaction act;
@ -102,6 +159,8 @@ int main()

    check(usr_overflow, 0);

+    test_packet();
+
    puts(err ? "FAIL" : "PASS");
    return err ? EXIT_FAILURE : EXIT_SUCCESS;
 }
--- a/tests/tcg/hexagon/preg_alias.c
+++ b/tests/tcg/hexagon/preg_alias.c
@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
@ -57,17 +57,15 @@ typedef union {

 static inline void creg_alias(int cval, PRegs *pregs)
 {
-  unsigned char val;
-  asm volatile("c4 = %0" : : "r"(cval));
-
-  asm volatile("%0 = p0" : "=r"(val));
-  pregs->pregs.p0 = val;
-  asm volatile("%0 = p1" : "=r"(val));
-  pregs->pregs.p1 = val;
-  asm volatile("%0 = p2" : "=r"(val));
-  pregs->pregs.p2 = val;
-  asm volatile("%0 = p3" : "=r"(val));
-  pregs->pregs.p3 = val;
+  asm("c4 = %4\n\t"
+      "%0 = p0\n\t"
+      "%1 = p1\n\t"
+      "%2 = p2\n\t"
+      "%3 = p3\n\t"
+      : "=r"(pregs->pregs.p0), "=r"(pregs->pregs.p1),
+        "=r"(pregs->pregs.p2), "=r"(pregs->pregs.p3)
+      : "r"(cval)
+      : "p0", "p1", "p2", "p3");
 }

 int err;
@ -83,22 +81,58 @@ static void check(int val, int expect)
 static inline void creg_alias_pair(unsigned int cval, PRegs *pregs)
 {
  unsigned long long cval_pair = (0xdeadbeefULL << 32) | cval;
-  unsigned char val;
  int c5;
-  asm volatile("c5:4 = %0" : : "r"(cval_pair));

-  asm volatile("%0 = p0" : "=r"(val));
-  pregs->pregs.p0 = val;
-  asm volatile("%0 = p1" : "=r"(val));
-  pregs->pregs.p1 = val;
-  asm volatile("%0 = p2" : "=r"(val));
-  pregs->pregs.p2 = val;
-  asm volatile("%0 = p3" : "=r"(val));
-  pregs->pregs.p3 = val;
-  asm volatile("%0 = c5" : "=r"(c5));
+  asm ("c5:4 = %5\n\t"
+       "%0 = p0\n\t"
+       "%1 = p1\n\t"
+       "%2 = p2\n\t"
+       "%3 = p3\n\t"
+       "%4 = c5\n\t"
+       : "=r"(pregs->pregs.p0), "=r"(pregs->pregs.p1),
+         "=r"(pregs->pregs.p2), "=r"(pregs->pregs.p3), "=r"(c5)
+       : "r"(cval_pair)
+       : "p0", "p1", "p2", "p3");
+
  check(c5, 0xdeadbeef);
 }

+static void test_packet(void)
+{
+    /*
+     * Test that setting c4 inside a packet doesn't impact the predicates
+     * that are read during the packet.
+     */
+
+    int result;
+    int old_val = 0x0000001c;
+
+    /* Test a predicated register transfer */
+    result = old_val;
+    asm (
+         "c4 = %1\n\t"
+         "{\n\t"
+         "    c4 = %2\n\t"
+         "    if (!p2) %0 = %3\n\t"
+         "}\n\t"
+         : "+r"(result)
+         : "r"(0xffffffff), "r"(0xff00ffff), "r"(0x837ed653)
+         : "p0", "p1", "p2", "p3");
+    check(result, old_val);
+
+    /* Test a predicated store */
+    result = 0xffffffff;
+    asm ("c4 = %0\n\t"
+         "{\n\t"
+         "    c4 = %1\n\t"
+         "    if (!p2) memw(%2) = #0\n\t"
+         "}\n\t"
+         :
+         : "r"(0), "r"(0xffffffff), "r"(&result)
+         : "p0", "p1", "p2", "p3", "memory");
+    check(result, 0x0);
+}
+
 int main()
 {
    int c4;
@ -164,6 +198,8 @@ int main()
    creg_alias_pair(0xffffffff, &pregs);
    check(pregs.creg, 0xffffffff);

+    test_packet();
+
    puts(err ? "FAIL" : "PASS");
    return err;
 }
--- a/tests/tcg/hexagon/usr.c
+++ b/tests/tcg/hexagon/usr.c