mirror of https://github.com/xemu-project/xemu.git
arm/translate-a64: add FP16 x2 ops for simd_indexed
A bunch of the vectorised bitwise operations just operate on larger chunks at a time. We can do the same for the new half-precision operations by introducing some TWOHALFOP helpers which work on each half of a pair of half-precision operations at once. Hopefully all this hoop jumping will get simpler once we have generically vectorised helpers here. Signed-off-by: Alex Bennée <alex.bennee@linaro.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20180227143852.11175-16-alex.bennee@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
parent
5d265064cf
commit
6089030c73
|
@ -629,8 +629,32 @@ ADVSIMD_HALFOP(max)
|
|||
ADVSIMD_HALFOP(minnum)
|
||||
ADVSIMD_HALFOP(maxnum)
|
||||
|
||||
#define ADVSIMD_TWOHALFOP(name) \
|
||||
uint32_t ADVSIMD_HELPER(name, 2h)(uint32_t two_a, uint32_t two_b, void *fpstp) \
|
||||
{ \
|
||||
float16 a1, a2, b1, b2; \
|
||||
uint32_t r1, r2; \
|
||||
float_status *fpst = fpstp; \
|
||||
a1 = extract32(two_a, 0, 16); \
|
||||
a2 = extract32(two_a, 16, 16); \
|
||||
b1 = extract32(two_b, 0, 16); \
|
||||
b2 = extract32(two_b, 16, 16); \
|
||||
r1 = float16_ ## name(a1, b1, fpst); \
|
||||
r2 = float16_ ## name(a2, b2, fpst); \
|
||||
return deposit32(r1, 16, 16, r2); \
|
||||
}
|
||||
|
||||
ADVSIMD_TWOHALFOP(add)
|
||||
ADVSIMD_TWOHALFOP(sub)
|
||||
ADVSIMD_TWOHALFOP(mul)
|
||||
ADVSIMD_TWOHALFOP(div)
|
||||
ADVSIMD_TWOHALFOP(min)
|
||||
ADVSIMD_TWOHALFOP(max)
|
||||
ADVSIMD_TWOHALFOP(minnum)
|
||||
ADVSIMD_TWOHALFOP(maxnum)
|
||||
|
||||
/* Data processing - scalar floating-point and advanced SIMD */
|
||||
float16 HELPER(advsimd_mulxh)(float16 a, float16 b, void *fpstp)
|
||||
static float16 float16_mulx(float16 a, float16 b, void *fpstp)
|
||||
{
|
||||
float_status *fpst = fpstp;
|
||||
|
||||
|
@ -646,6 +670,9 @@ float16 HELPER(advsimd_mulxh)(float16 a, float16 b, void *fpstp)
|
|||
return float16_mul(a, b, fpst);
|
||||
}
|
||||
|
||||
ADVSIMD_HALFOP(mulx)
|
||||
ADVSIMD_TWOHALFOP(mulx)
|
||||
|
||||
/* fused multiply-accumulate */
|
||||
float16 HELPER(advsimd_muladdh)(float16 a, float16 b, float16 c, void *fpstp)
|
||||
{
|
||||
|
@ -653,6 +680,23 @@ float16 HELPER(advsimd_muladdh)(float16 a, float16 b, float16 c, void *fpstp)
|
|||
return float16_muladd(a, b, c, 0, fpst);
|
||||
}
|
||||
|
||||
uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a, uint32_t two_b,
|
||||
uint32_t two_c, void *fpstp)
|
||||
{
|
||||
float_status *fpst = fpstp;
|
||||
float16 a1, a2, b1, b2, c1, c2;
|
||||
uint32_t r1, r2;
|
||||
a1 = extract32(two_a, 0, 16);
|
||||
a2 = extract32(two_a, 16, 16);
|
||||
b1 = extract32(two_b, 0, 16);
|
||||
b2 = extract32(two_b, 16, 16);
|
||||
c1 = extract32(two_c, 0, 16);
|
||||
c2 = extract32(two_c, 16, 16);
|
||||
r1 = float16_muladd(a1, b1, c1, 0, fpst);
|
||||
r2 = float16_muladd(a2, b2, c2, 0, fpst);
|
||||
return deposit32(r1, 16, 16, r2);
|
||||
}
|
||||
|
||||
/*
|
||||
* Floating point comparisons produce an integer result. Softfloat
|
||||
* routines return float_relation types which we convert to the 0/-1
|
||||
|
|
|
@ -65,3 +65,13 @@ DEF_HELPER_3(advsimd_acge_f16, i32, f16, f16, ptr)
|
|||
DEF_HELPER_3(advsimd_acgt_f16, i32, f16, f16, ptr)
|
||||
DEF_HELPER_3(advsimd_mulxh, f16, f16, f16, ptr)
|
||||
DEF_HELPER_4(advsimd_muladdh, f16, f16, f16, f16, ptr)
|
||||
DEF_HELPER_3(advsimd_add2h, i32, i32, i32, ptr)
|
||||
DEF_HELPER_3(advsimd_sub2h, i32, i32, i32, ptr)
|
||||
DEF_HELPER_3(advsimd_mul2h, i32, i32, i32, ptr)
|
||||
DEF_HELPER_3(advsimd_div2h, i32, i32, i32, ptr)
|
||||
DEF_HELPER_3(advsimd_max2h, i32, i32, i32, ptr)
|
||||
DEF_HELPER_3(advsimd_min2h, i32, i32, i32, ptr)
|
||||
DEF_HELPER_3(advsimd_maxnum2h, i32, i32, i32, ptr)
|
||||
DEF_HELPER_3(advsimd_minnum2h, i32, i32, i32, ptr)
|
||||
DEF_HELPER_3(advsimd_mulx2h, i32, i32, i32, ptr)
|
||||
DEF_HELPER_4(advsimd_muladd2h, i32, i32, i32, i32, ptr)
|
||||
|
|
|
@ -11417,8 +11417,13 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
|
|||
* multiply-add */
|
||||
tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
|
||||
}
|
||||
gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
|
||||
tcg_res, fpst);
|
||||
if (is_scalar) {
|
||||
gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
|
||||
tcg_res, fpst);
|
||||
} else {
|
||||
gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx,
|
||||
tcg_res, fpst);
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
if (opcode == 0x5) {
|
||||
|
@ -11437,10 +11442,21 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
|
|||
switch (size) {
|
||||
case 1:
|
||||
if (u) {
|
||||
gen_helper_advsimd_mulxh(tcg_res, tcg_op, tcg_idx,
|
||||
fpst);
|
||||
if (is_scalar) {
|
||||
gen_helper_advsimd_mulxh(tcg_res, tcg_op,
|
||||
tcg_idx, fpst);
|
||||
} else {
|
||||
gen_helper_advsimd_mulx2h(tcg_res, tcg_op,
|
||||
tcg_idx, fpst);
|
||||
}
|
||||
} else {
|
||||
g_assert_not_reached();
|
||||
if (is_scalar) {
|
||||
gen_helper_advsimd_mulh(tcg_res, tcg_op,
|
||||
tcg_idx, fpst);
|
||||
} else {
|
||||
gen_helper_advsimd_mul2h(tcg_res, tcg_op,
|
||||
tcg_idx, fpst);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
|
|
Loading…
Reference in New Issue