mirror of https://github.com/xemu-project/xemu.git
target/sparc: Fix FMULD8*X16
Not only do these instructions have f32 inputs, they also do not perform rounding. Since these are relatively simple, implement them properly inline. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-Id: <20240502165528.244004-6-richard.henderson@linaro.org> Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
This commit is contained in:
parent
a859602c74
commit
be8998e046
|
@ -99,8 +99,6 @@ DEF_HELPER_FLAGS_2(fmul8x16, TCG_CALL_NO_RWG_SE, i64, i32, i64)
|
||||||
DEF_HELPER_FLAGS_2(fmul8x16a, TCG_CALL_NO_RWG_SE, i64, i32, s32)
|
DEF_HELPER_FLAGS_2(fmul8x16a, TCG_CALL_NO_RWG_SE, i64, i32, s32)
|
||||||
DEF_HELPER_FLAGS_2(fmul8sux16, TCG_CALL_NO_RWG_SE, i64, i64, i64)
|
DEF_HELPER_FLAGS_2(fmul8sux16, TCG_CALL_NO_RWG_SE, i64, i64, i64)
|
||||||
DEF_HELPER_FLAGS_2(fmul8ulx16, TCG_CALL_NO_RWG_SE, i64, i64, i64)
|
DEF_HELPER_FLAGS_2(fmul8ulx16, TCG_CALL_NO_RWG_SE, i64, i64, i64)
|
||||||
DEF_HELPER_FLAGS_2(fmuld8sux16, TCG_CALL_NO_RWG_SE, i64, i64, i64)
|
|
||||||
DEF_HELPER_FLAGS_2(fmuld8ulx16, TCG_CALL_NO_RWG_SE, i64, i64, i64)
|
|
||||||
DEF_HELPER_FLAGS_1(fexpand, TCG_CALL_NO_RWG_SE, i64, i32)
|
DEF_HELPER_FLAGS_1(fexpand, TCG_CALL_NO_RWG_SE, i64, i32)
|
||||||
DEF_HELPER_FLAGS_3(pdist, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64)
|
DEF_HELPER_FLAGS_3(pdist, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64)
|
||||||
DEF_HELPER_FLAGS_2(fpack16, TCG_CALL_NO_RWG_SE, i32, i64, i64)
|
DEF_HELPER_FLAGS_2(fpack16, TCG_CALL_NO_RWG_SE, i32, i64, i64)
|
||||||
|
|
|
@ -74,8 +74,6 @@
|
||||||
# define gen_helper_fmul8sux16 ({ qemu_build_not_reached(); NULL; })
|
# define gen_helper_fmul8sux16 ({ qemu_build_not_reached(); NULL; })
|
||||||
# define gen_helper_fmul8ulx16 ({ qemu_build_not_reached(); NULL; })
|
# define gen_helper_fmul8ulx16 ({ qemu_build_not_reached(); NULL; })
|
||||||
# define gen_helper_fmul8x16 ({ qemu_build_not_reached(); NULL; })
|
# define gen_helper_fmul8x16 ({ qemu_build_not_reached(); NULL; })
|
||||||
# define gen_helper_fmuld8sux16 ({ qemu_build_not_reached(); NULL; })
|
|
||||||
# define gen_helper_fmuld8ulx16 ({ qemu_build_not_reached(); NULL; })
|
|
||||||
# define gen_helper_fpmerge ({ qemu_build_not_reached(); NULL; })
|
# define gen_helper_fpmerge ({ qemu_build_not_reached(); NULL; })
|
||||||
# define gen_helper_fqtox ({ qemu_build_not_reached(); NULL; })
|
# define gen_helper_fqtox ({ qemu_build_not_reached(); NULL; })
|
||||||
# define gen_helper_fstox ({ qemu_build_not_reached(); NULL; })
|
# define gen_helper_fstox ({ qemu_build_not_reached(); NULL; })
|
||||||
|
@ -730,6 +728,48 @@ static void gen_op_fmul8x16au(TCGv_i64 dst, TCGv_i32 src1, TCGv_i32 src2)
|
||||||
gen_helper_fmul8x16a(dst, src1, src2);
|
gen_helper_fmul8x16a(dst, src1, src2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void gen_op_fmuld8ulx16(TCGv_i64 dst, TCGv_i32 src1, TCGv_i32 src2)
|
||||||
|
{
|
||||||
|
TCGv_i32 t0 = tcg_temp_new_i32();
|
||||||
|
TCGv_i32 t1 = tcg_temp_new_i32();
|
||||||
|
TCGv_i32 t2 = tcg_temp_new_i32();
|
||||||
|
|
||||||
|
tcg_gen_ext8u_i32(t0, src1);
|
||||||
|
tcg_gen_ext16s_i32(t1, src2);
|
||||||
|
tcg_gen_mul_i32(t0, t0, t1);
|
||||||
|
|
||||||
|
tcg_gen_extract_i32(t1, src1, 16, 8);
|
||||||
|
tcg_gen_sextract_i32(t2, src2, 16, 16);
|
||||||
|
tcg_gen_mul_i32(t1, t1, t2);
|
||||||
|
|
||||||
|
tcg_gen_concat_i32_i64(dst, t0, t1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void gen_op_fmuld8sux16(TCGv_i64 dst, TCGv_i32 src1, TCGv_i32 src2)
|
||||||
|
{
|
||||||
|
TCGv_i32 t0 = tcg_temp_new_i32();
|
||||||
|
TCGv_i32 t1 = tcg_temp_new_i32();
|
||||||
|
TCGv_i32 t2 = tcg_temp_new_i32();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The insn description talks about extracting the upper 8 bits
|
||||||
|
* of the signed 16-bit input rs1, performing the multiply, then
|
||||||
|
* shifting left by 8 bits. Instead, zap the lower 8 bits of
|
||||||
|
* the rs1 input, which avoids the need for two shifts.
|
||||||
|
*/
|
||||||
|
tcg_gen_ext16s_i32(t0, src1);
|
||||||
|
tcg_gen_andi_i32(t0, t0, ~0xff);
|
||||||
|
tcg_gen_ext16s_i32(t1, src2);
|
||||||
|
tcg_gen_mul_i32(t0, t0, t1);
|
||||||
|
|
||||||
|
tcg_gen_sextract_i32(t1, src1, 16, 16);
|
||||||
|
tcg_gen_andi_i32(t1, t1, ~0xff);
|
||||||
|
tcg_gen_sextract_i32(t2, src2, 16, 16);
|
||||||
|
tcg_gen_mul_i32(t1, t1, t2);
|
||||||
|
|
||||||
|
tcg_gen_concat_i32_i64(dst, t0, t1);
|
||||||
|
}
|
||||||
|
|
||||||
static void finishing_insn(DisasContext *dc)
|
static void finishing_insn(DisasContext *dc)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
@ -4614,6 +4654,8 @@ static bool do_dff(DisasContext *dc, arg_r_r_r *a,
|
||||||
|
|
||||||
TRANS(FMUL8x16AU, VIS1, do_dff, a, gen_op_fmul8x16au)
|
TRANS(FMUL8x16AU, VIS1, do_dff, a, gen_op_fmul8x16au)
|
||||||
TRANS(FMUL8x16AL, VIS1, do_dff, a, gen_op_fmul8x16al)
|
TRANS(FMUL8x16AL, VIS1, do_dff, a, gen_op_fmul8x16al)
|
||||||
|
TRANS(FMULD8SUx16, VIS1, do_dff, a, gen_op_fmuld8sux16)
|
||||||
|
TRANS(FMULD8ULx16, VIS1, do_dff, a, gen_op_fmuld8ulx16)
|
||||||
|
|
||||||
static bool do_dfd(DisasContext *dc, arg_r_r_r *a,
|
static bool do_dfd(DisasContext *dc, arg_r_r_r *a,
|
||||||
void (*func)(TCGv_i64, TCGv_i32, TCGv_i64))
|
void (*func)(TCGv_i64, TCGv_i32, TCGv_i64))
|
||||||
|
@ -4654,8 +4696,6 @@ static bool do_ddd(DisasContext *dc, arg_r_r_r *a,
|
||||||
|
|
||||||
TRANS(FMUL8SUx16, VIS1, do_ddd, a, gen_helper_fmul8sux16)
|
TRANS(FMUL8SUx16, VIS1, do_ddd, a, gen_helper_fmul8sux16)
|
||||||
TRANS(FMUL8ULx16, VIS1, do_ddd, a, gen_helper_fmul8ulx16)
|
TRANS(FMUL8ULx16, VIS1, do_ddd, a, gen_helper_fmul8ulx16)
|
||||||
TRANS(FMULD8SUx16, VIS1, do_ddd, a, gen_helper_fmuld8sux16)
|
|
||||||
TRANS(FMULD8ULx16, VIS1, do_ddd, a, gen_helper_fmuld8ulx16)
|
|
||||||
TRANS(FPMERGE, VIS1, do_ddd, a, gen_helper_fpmerge)
|
TRANS(FPMERGE, VIS1, do_ddd, a, gen_helper_fpmerge)
|
||||||
|
|
||||||
TRANS(FPADD16, VIS1, do_ddd, a, tcg_gen_vec_add16_i64)
|
TRANS(FPADD16, VIS1, do_ddd, a, tcg_gen_vec_add16_i64)
|
||||||
|
|
|
@ -194,52 +194,6 @@ uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2)
|
||||||
return d.ll;
|
return d.ll;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t helper_fmuld8sux16(uint64_t src1, uint64_t src2)
|
|
||||||
{
|
|
||||||
VIS64 s, d;
|
|
||||||
uint32_t tmp;
|
|
||||||
|
|
||||||
s.ll = src1;
|
|
||||||
d.ll = src2;
|
|
||||||
|
|
||||||
#define PMUL(r) \
|
|
||||||
tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \
|
|
||||||
if ((tmp & 0xff) > 0x7f) { \
|
|
||||||
tmp += 0x100; \
|
|
||||||
} \
|
|
||||||
d.VIS_L64(r) = tmp;
|
|
||||||
|
|
||||||
/* Reverse calculation order to handle overlap */
|
|
||||||
PMUL(1);
|
|
||||||
PMUL(0);
|
|
||||||
#undef PMUL
|
|
||||||
|
|
||||||
return d.ll;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t helper_fmuld8ulx16(uint64_t src1, uint64_t src2)
|
|
||||||
{
|
|
||||||
VIS64 s, d;
|
|
||||||
uint32_t tmp;
|
|
||||||
|
|
||||||
s.ll = src1;
|
|
||||||
d.ll = src2;
|
|
||||||
|
|
||||||
#define PMUL(r) \
|
|
||||||
tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \
|
|
||||||
if ((tmp & 0xff) > 0x7f) { \
|
|
||||||
tmp += 0x100; \
|
|
||||||
} \
|
|
||||||
d.VIS_L64(r) = tmp;
|
|
||||||
|
|
||||||
/* Reverse calculation order to handle overlap */
|
|
||||||
PMUL(1);
|
|
||||||
PMUL(0);
|
|
||||||
#undef PMUL
|
|
||||||
|
|
||||||
return d.ll;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t helper_fexpand(uint32_t src2)
|
uint64_t helper_fexpand(uint32_t src2)
|
||||||
{
|
{
|
||||||
VIS32 s;
|
VIS32 s;
|
||||||
|
|
Loading…
Reference in New Issue