diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c index 7b34cc98af..d477479bb1 100644 --- a/target/arm/tcg/vec_helper.c +++ b/target/arm/tcg/vec_helper.c @@ -317,10 +317,12 @@ void HELPER(neon_sqdmulh_idx_h)(void *vd, void *vn, void *vm, intptr_t i, j, opr_sz = simd_oprsz(desc); int idx = simd_data(desc); int16_t *d = vd, *n = vn, *m = (int16_t *)vm + H2(idx); + intptr_t elements = opr_sz / 2; + intptr_t eltspersegment = MIN(16 / 2, elements); - for (i = 0; i < opr_sz / 2; i += 16 / 2) { + for (i = 0; i < elements; i += 16 / 2) { int16_t mm = m[i]; - for (j = 0; j < 16 / 2; ++j) { + for (j = 0; j < eltspersegment; ++j) { d[i + j] = do_sqrdmlah_h(n[i + j], mm, 0, false, false, vq); } } @@ -333,10 +335,12 @@ void HELPER(neon_sqrdmulh_idx_h)(void *vd, void *vn, void *vm, intptr_t i, j, opr_sz = simd_oprsz(desc); int idx = simd_data(desc); int16_t *d = vd, *n = vn, *m = (int16_t *)vm + H2(idx); + intptr_t elements = opr_sz / 2; + intptr_t eltspersegment = MIN(16 / 2, elements); - for (i = 0; i < opr_sz / 2; i += 16 / 2) { + for (i = 0; i < elements; i += 16 / 2) { int16_t mm = m[i]; - for (j = 0; j < 16 / 2; ++j) { + for (j = 0; j < eltspersegment; ++j) { d[i + j] = do_sqrdmlah_h(n[i + j], mm, 0, false, true, vq); } } @@ -512,10 +516,12 @@ void HELPER(neon_sqdmulh_idx_s)(void *vd, void *vn, void *vm, intptr_t i, j, opr_sz = simd_oprsz(desc); int idx = simd_data(desc); int32_t *d = vd, *n = vn, *m = (int32_t *)vm + H4(idx); + intptr_t elements = opr_sz / 4; + intptr_t eltspersegment = MIN(16 / 4, elements); - for (i = 0; i < opr_sz / 4; i += 16 / 4) { + for (i = 0; i < elements; i += 16 / 4) { int32_t mm = m[i]; - for (j = 0; j < 16 / 4; ++j) { + for (j = 0; j < eltspersegment; ++j) { d[i + j] = do_sqrdmlah_s(n[i + j], mm, 0, false, false, vq); } } @@ -528,10 +534,12 @@ void HELPER(neon_sqrdmulh_idx_s)(void *vd, void *vn, void *vm, intptr_t i, j, opr_sz = simd_oprsz(desc); int idx = simd_data(desc); int32_t *d = vd, *n = vn, *m = (int32_t *)vm + H4(idx); + intptr_t elements = opr_sz / 4; + intptr_t eltspersegment = MIN(16 / 4, elements); - for (i = 0; i < opr_sz / 4; i += 16 / 4) { + for (i = 0; i < elements; i += 16 / 4) { int32_t mm = m[i]; - for (j = 0; j < 16 / 4; ++j) { + for (j = 0; j < eltspersegment; ++j) { d[i + j] = do_sqrdmlah_s(n[i + j], mm, 0, false, true, vq); } }