diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc index f573014532..8f3b97d9bf 100644 --- a/fpu/softfloat-specialize.c.inc +++ b/fpu/softfloat-specialize.c.inc @@ -447,6 +447,17 @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls, } else { return 1; } +#elif defined(TARGET_SPARC) + /* Prefer SNaN over QNaN, order B then A. */ + if (is_snan(b_cls)) { + return 1; + } else if (is_snan(a_cls)) { + return 0; + } else if (is_qnan(b_cls)) { + return 1; + } else { + return 0; + } #elif defined(TARGET_XTENSA) /* * Xtensa has two NaN propagation modes. @@ -624,6 +635,26 @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls, float_raise(float_flag_invalid | float_flag_invalid_imz, status); } return 3; /* default NaN */ +#elif defined(TARGET_SPARC) + /* For (inf,0,nan) return c. */ + if (infzero) { + float_raise(float_flag_invalid | float_flag_invalid_imz, status); + return 2; + } + /* Prefer SNaN over QNaN, order C, B, A. */ + if (is_snan(c_cls)) { + return 2; + } else if (is_snan(b_cls)) { + return 1; + } else if (is_snan(a_cls)) { + return 0; + } else if (is_qnan(c_cls)) { + return 2; + } else if (is_qnan(b_cls)) { + return 1; + } else { + return 0; + } #elif defined(TARGET_XTENSA) /* * For Xtensa, the (inf,zero,nan) case sets InvalidOp and returns diff --git a/linux-user/elfload.c b/linux-user/elfload.c index c1e1511ff2..0d4dc1f6d1 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -1003,6 +1003,9 @@ static uint32_t get_elf_hwcap(void) r |= features & CPU_FEATURE_FSMULD ? HWCAP_SPARC_FSMULD : 0; r |= features & CPU_FEATURE_VIS1 ? HWCAP_SPARC_VIS : 0; r |= features & CPU_FEATURE_VIS2 ? HWCAP_SPARC_VIS2 : 0; + r |= features & CPU_FEATURE_FMAF ? HWCAP_SPARC_FMAF : 0; + r |= features & CPU_FEATURE_VIS3 ? HWCAP_SPARC_VIS3 : 0; + r |= features & CPU_FEATURE_IMA ? HWCAP_SPARC_IMA : 0; #endif return r; diff --git a/linux-user/ioctls.h b/linux-user/ioctls.h index d508d0c04a..3b41128fd7 100644 --- a/linux-user/ioctls.h +++ b/linux-user/ioctls.h @@ -102,6 +102,7 @@ IOCTL(BLKRAGET, IOC_R, MK_PTR(TYPE_LONG)) IOCTL(BLKSSZGET, IOC_R, MK_PTR(TYPE_INT)) IOCTL(BLKBSZGET, IOC_R, MK_PTR(TYPE_INT)) + IOCTL(BLKBSZSET, IOC_W, MK_PTR(TYPE_INT)) IOCTL_SPECIAL(BLKPG, IOC_W, do_ioctl_blkpg, MK_PTR(MK_STRUCT(STRUCT_blkpg_ioctl_arg))) diff --git a/target/sparc/asi.h b/target/sparc/asi.h index a66829674b..14ffaa3842 100644 --- a/target/sparc/asi.h +++ b/target/sparc/asi.h @@ -144,6 +144,8 @@ * ASIs, "(4V)" designates SUN4V specific ASIs. "(NG4)" designates SPARC-T4 * and later ASIs. */ +#define ASI_MON_AIUP 0x12 /* (VIS4) Primary, user, monitor */ +#define ASI_MON_AIUS 0x13 /* (VIS4) Secondary, user, monitor */ #define ASI_REAL 0x14 /* Real address, cacheable */ #define ASI_PHYS_USE_EC 0x14 /* PADDR, E-cacheable */ #define ASI_REAL_IO 0x15 /* Real address, non-cacheable */ @@ -257,6 +259,8 @@ #define ASI_UDBL_CONTROL_R 0x7f /* External UDB control regs rd low*/ #define ASI_INTR_R 0x7f /* IRQ vector dispatch read */ #define ASI_INTR_DATAN_R 0x7f /* (III) In irq vector data reg N */ +#define ASI_MON_P 0x84 /* (VIS4) Primary, monitor */ +#define ASI_MON_S 0x85 /* (VIS4) Secondary, monitor */ #define ASI_PIC 0xb0 /* (NG4) PIC registers */ #define ASI_PST8_P 0xc0 /* Primary, 8 8-bit, partial */ #define ASI_PST8_S 0xc1 /* Secondary, 8 8-bit, partial */ diff --git a/target/sparc/cpu-feature.h.inc b/target/sparc/cpu-feature.h.inc index d800f18c4e..be81005237 100644 --- a/target/sparc/cpu-feature.h.inc +++ b/target/sparc/cpu-feature.h.inc @@ -12,3 +12,7 @@ FEATURE(ASR17) FEATURE(CACHE_CTRL) FEATURE(POWERDOWN) FEATURE(CASA) +FEATURE(FMAF) +FEATURE(VIS3) +FEATURE(IMA) +FEATURE(VIS4) diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c index 5be1592e66..9bacfb68cb 100644 --- a/target/sparc/cpu.c +++ b/target/sparc/cpu.c @@ -549,6 +549,10 @@ static const char * const feature_name[] = { [CPU_FEATURE_BIT_HYPV] = "hypv", [CPU_FEATURE_BIT_VIS1] = "vis1", [CPU_FEATURE_BIT_VIS2] = "vis2", + [CPU_FEATURE_BIT_FMAF] = "fmaf", + [CPU_FEATURE_BIT_VIS3] = "vis3", + [CPU_FEATURE_BIT_IMA] = "ima", + [CPU_FEATURE_BIT_VIS4] = "vis4", #else [CPU_FEATURE_BIT_MUL] = "mul", [CPU_FEATURE_BIT_DIV] = "div", @@ -877,6 +881,14 @@ static Property sparc_cpu_properties[] = { CPU_FEATURE_BIT_VIS1, false), DEFINE_PROP_BIT("vis2", SPARCCPU, env.def.features, CPU_FEATURE_BIT_VIS2, false), + DEFINE_PROP_BIT("fmaf", SPARCCPU, env.def.features, + CPU_FEATURE_BIT_FMAF, false), + DEFINE_PROP_BIT("vis3", SPARCCPU, env.def.features, + CPU_FEATURE_BIT_VIS3, false), + DEFINE_PROP_BIT("ima", SPARCCPU, env.def.features, + CPU_FEATURE_BIT_IMA, false), + DEFINE_PROP_BIT("vis4", SPARCCPU, env.def.features, + CPU_FEATURE_BIT_VIS4, false), #else DEFINE_PROP_BIT("mul", SPARCCPU, env.def.features, CPU_FEATURE_BIT_MUL, false), diff --git a/target/sparc/fop_helper.c b/target/sparc/fop_helper.c index 1205a599ef..0b30665b51 100644 --- a/target/sparc/fop_helper.c +++ b/target/sparc/fop_helper.c @@ -343,6 +343,90 @@ Int128 helper_fsqrtq(CPUSPARCState *env, Int128 src) return f128_ret(ret); } +float32 helper_fmadds(CPUSPARCState *env, float32 s1, + float32 s2, float32 s3, uint32_t op) +{ + float32 ret = float32_muladd(s1, s2, s3, op, &env->fp_status); + check_ieee_exceptions(env, GETPC()); + return ret; +} + +float64 helper_fmaddd(CPUSPARCState *env, float64 s1, + float64 s2, float64 s3, uint32_t op) +{ + float64 ret = float64_muladd(s1, s2, s3, op, &env->fp_status); + check_ieee_exceptions(env, GETPC()); + return ret; +} + +float32 helper_fnadds(CPUSPARCState *env, float32 src1, float32 src2) +{ + float32 ret = float32_add(src1, src2, &env->fp_status); + + /* + * NaN inputs or result do not get a sign change. + * Nor, apparently, does zero: on hardware, -(x + -x) yields +0. + */ + if (!float32_is_any_nan(ret) && !float32_is_zero(ret)) { + ret = float32_chs(ret); + } + check_ieee_exceptions(env, GETPC()); + return ret; +} + +float32 helper_fnmuls(CPUSPARCState *env, float32 src1, float32 src2) +{ + float32 ret = float32_mul(src1, src2, &env->fp_status); + + /* NaN inputs or result do not get a sign change. */ + if (!float32_is_any_nan(ret)) { + ret = float32_chs(ret); + } + check_ieee_exceptions(env, GETPC()); + return ret; +} + +float64 helper_fnaddd(CPUSPARCState *env, float64 src1, float64 src2) +{ + float64 ret = float64_add(src1, src2, &env->fp_status); + + /* + * NaN inputs or result do not get a sign change. + * Nor, apparently, does zero: on hardware, -(x + -x) yields +0. + */ + if (!float64_is_any_nan(ret) && !float64_is_zero(ret)) { + ret = float64_chs(ret); + } + check_ieee_exceptions(env, GETPC()); + return ret; +} + +float64 helper_fnmuld(CPUSPARCState *env, float64 src1, float64 src2) +{ + float64 ret = float64_mul(src1, src2, &env->fp_status); + + /* NaN inputs or result do not get a sign change. */ + if (!float64_is_any_nan(ret)) { + ret = float64_chs(ret); + } + check_ieee_exceptions(env, GETPC()); + return ret; +} + +float64 helper_fnsmuld(CPUSPARCState *env, float32 src1, float32 src2) +{ + float64 ret = float64_mul(float32_to_float64(src1, &env->fp_status), + float32_to_float64(src2, &env->fp_status), + &env->fp_status); + + /* NaN inputs or result do not get a sign change. */ + if (!float64_is_any_nan(ret)) { + ret = float64_chs(ret); + } + check_ieee_exceptions(env, GETPC()); + return ret; +} + static uint32_t finish_fcmp(CPUSPARCState *env, FloatRelation r, uintptr_t ra) { check_ieee_exceptions(env, ra); @@ -406,6 +490,52 @@ uint32_t helper_fcmpeq(CPUSPARCState *env, Int128 src1, Int128 src2) return finish_fcmp(env, r, GETPC()); } +uint32_t helper_flcmps(float32 src1, float32 src2) +{ + /* + * FLCMP never raises an exception nor modifies any FSR fields. + * Perform the comparison with a dummy fp environment. + */ + float_status discard = { }; + FloatRelation r = float32_compare_quiet(src1, src2, &discard); + + switch (r) { + case float_relation_equal: + if (src2 == float32_zero && src1 != float32_zero) { + return 1; /* -0.0 < +0.0 */ + } + return 0; + case float_relation_less: + return 1; + case float_relation_greater: + return 0; + case float_relation_unordered: + return float32_is_any_nan(src2) ? 3 : 2; + } + g_assert_not_reached(); +} + +uint32_t helper_flcmpd(float64 src1, float64 src2) +{ + float_status discard = { }; + FloatRelation r = float64_compare_quiet(src1, src2, &discard); + + switch (r) { + case float_relation_equal: + if (src2 == float64_zero && src1 != float64_zero) { + return 1; /* -0.0 < +0.0 */ + } + return 0; + case float_relation_less: + return 1; + case float_relation_greater: + return 0; + case float_relation_unordered: + return float64_is_any_nan(src2) ? 3 : 2; + } + g_assert_not_reached(); +} + target_ulong cpu_get_fsr(CPUSPARCState *env) { target_ulong fsr = env->fsr | env->fsr_cexc_ftt; @@ -472,3 +602,9 @@ void helper_set_fsr_nofcc_noftt(CPUSPARCState *env, uint32_t fsr) env->fsr_cexc_ftt |= fsr & FSR_CEXC_MASK; set_fsr_nonsplit(env, fsr); } + +void helper_set_fsr_nofcc(CPUSPARCState *env, uint32_t fsr) +{ + env->fsr_cexc_ftt = fsr & (FSR_CEXC_MASK | FSR_FTT_MASK); + set_fsr_nonsplit(env, fsr); +} diff --git a/target/sparc/helper.h b/target/sparc/helper.h index 97fbf6f66c..134e519a37 100644 --- a/target/sparc/helper.h +++ b/target/sparc/helper.h @@ -40,6 +40,7 @@ DEF_HELPER_FLAGS_4(ld_asi, TCG_CALL_NO_WG, i64, env, tl, int, i32) DEF_HELPER_FLAGS_5(st_asi, TCG_CALL_NO_WG, void, env, tl, i64, int, i32) #endif DEF_HELPER_FLAGS_1(get_fsr, TCG_CALL_NO_WG_SE, tl, env) +DEF_HELPER_FLAGS_2(set_fsr_nofcc, TCG_CALL_NO_RWG, void, env, i32) DEF_HELPER_FLAGS_2(set_fsr_nofcc_noftt, TCG_CALL_NO_RWG, void, env, i32) DEF_HELPER_FLAGS_2(fsqrts, TCG_CALL_NO_WG, f32, env, f32) DEF_HELPER_FLAGS_2(fsqrtd, TCG_CALL_NO_WG, f64, env, f64) @@ -50,12 +51,17 @@ DEF_HELPER_FLAGS_3(fcmpd, TCG_CALL_NO_WG, i32, env, f64, f64) DEF_HELPER_FLAGS_3(fcmped, TCG_CALL_NO_WG, i32, env, f64, f64) DEF_HELPER_FLAGS_3(fcmpq, TCG_CALL_NO_WG, i32, env, i128, i128) DEF_HELPER_FLAGS_3(fcmpeq, TCG_CALL_NO_WG, i32, env, i128, i128) +DEF_HELPER_FLAGS_2(flcmps, TCG_CALL_NO_RWG_SE, i32, f32, f32) +DEF_HELPER_FLAGS_2(flcmpd, TCG_CALL_NO_RWG_SE, i32, f64, f64) DEF_HELPER_2(raise_exception, noreturn, env, int) DEF_HELPER_FLAGS_3(faddd, TCG_CALL_NO_WG, f64, env, f64, f64) DEF_HELPER_FLAGS_3(fsubd, TCG_CALL_NO_WG, f64, env, f64, f64) DEF_HELPER_FLAGS_3(fmuld, TCG_CALL_NO_WG, f64, env, f64, f64) DEF_HELPER_FLAGS_3(fdivd, TCG_CALL_NO_WG, f64, env, f64, f64) +DEF_HELPER_FLAGS_5(fmaddd, TCG_CALL_NO_WG, f64, env, f64, f64, f64, i32) +DEF_HELPER_FLAGS_3(fnaddd, TCG_CALL_NO_WG, f64, env, f64, f64) +DEF_HELPER_FLAGS_3(fnmuld, TCG_CALL_NO_WG, f64, env, f64, f64) DEF_HELPER_FLAGS_3(faddq, TCG_CALL_NO_WG, i128, env, i128, i128) DEF_HELPER_FLAGS_3(fsubq, TCG_CALL_NO_WG, i128, env, i128, i128) @@ -66,8 +72,12 @@ DEF_HELPER_FLAGS_3(fadds, TCG_CALL_NO_WG, f32, env, f32, f32) DEF_HELPER_FLAGS_3(fsubs, TCG_CALL_NO_WG, f32, env, f32, f32) DEF_HELPER_FLAGS_3(fmuls, TCG_CALL_NO_WG, f32, env, f32, f32) DEF_HELPER_FLAGS_3(fdivs, TCG_CALL_NO_WG, f32, env, f32, f32) +DEF_HELPER_FLAGS_5(fmadds, TCG_CALL_NO_WG, f32, env, f32, f32, f32, i32) +DEF_HELPER_FLAGS_3(fnadds, TCG_CALL_NO_WG, f32, env, f32, f32) +DEF_HELPER_FLAGS_3(fnmuls, TCG_CALL_NO_WG, f32, env, f32, f32) DEF_HELPER_FLAGS_3(fsmuld, TCG_CALL_NO_WG, f64, env, f32, f32) +DEF_HELPER_FLAGS_3(fnsmuld, TCG_CALL_NO_WG, f64, env, f32, f32) DEF_HELPER_FLAGS_3(fdmulq, TCG_CALL_NO_WG, i128, env, f64, f64) DEF_HELPER_FLAGS_2(fitod, TCG_CALL_NO_WG, f64, env, s32) @@ -105,15 +115,28 @@ DEF_HELPER_FLAGS_2(fpack16, TCG_CALL_NO_RWG_SE, i32, i64, i64) DEF_HELPER_FLAGS_3(fpack32, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64) DEF_HELPER_FLAGS_2(fpackfix, TCG_CALL_NO_RWG_SE, i32, i64, i64) DEF_HELPER_FLAGS_3(bshuffle, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64) -#define VIS_CMPHELPER(name) \ +DEF_HELPER_FLAGS_2(cmask8, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(cmask16, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(cmask32, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(fchksm16, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(fmean16, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(fslas16, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(fslas32, TCG_CALL_NO_RWG_SE, i64, i64, i64) +#define VIS_CMPHELPER(name) \ + DEF_HELPER_FLAGS_2(f##name##8, TCG_CALL_NO_RWG_SE, \ + i64, i64, i64) \ DEF_HELPER_FLAGS_2(f##name##16, TCG_CALL_NO_RWG_SE, \ - i64, i64, i64) \ + i64, i64, i64) \ DEF_HELPER_FLAGS_2(f##name##32, TCG_CALL_NO_RWG_SE, \ i64, i64, i64) VIS_CMPHELPER(cmpgt) VIS_CMPHELPER(cmpeq) VIS_CMPHELPER(cmple) VIS_CMPHELPER(cmpne) +VIS_CMPHELPER(cmpugt) +VIS_CMPHELPER(cmpule) +DEF_HELPER_FLAGS_2(xmulx, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(xmulxhi, TCG_CALL_NO_RWG_SE, i64, i64, i64) #endif #undef VIS_HELPER #undef VIS_CMPHELPER diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode index e2d8a07dc4..fbcb4f7aef 100644 --- a/target/sparc/insns.decode +++ b/target/sparc/insns.decode @@ -26,6 +26,15 @@ CALL 01 i:s30 ## Major Opcode 10 -- integer, floating-point, vis, and system insns. ## +%dfp_rd 25:5 !function=extract_dfpreg +%dfp_rs1 14:5 !function=extract_dfpreg +%dfp_rs2 0:5 !function=extract_dfpreg +%dfp_rs3 9:5 !function=extract_dfpreg + +%qfp_rd 25:5 !function=extract_qfpreg +%qfp_rs1 14:5 !function=extract_qfpreg +%qfp_rs2 0:5 !function=extract_qfpreg + &r_r_ri rd rs1 rs2_or_imm imm:bool @n_r_ri .. ..... ...... rs1:5 imm:1 rs2_or_imm:s13 &r_r_ri rd=0 @r_r_ri .. rd:5 ...... rs1:5 imm:1 rs2_or_imm:s13 &r_r_ri @@ -37,11 +46,45 @@ CALL 01 i:s30 &r_r_r rd rs1 rs2 @r_r_r .. rd:5 ...... rs1:5 . ........ rs2:5 &r_r_r +@d_r_r .. ..... ...... rs1:5 . ........ rs2:5 \ + &r_r_r rd=%dfp_rd +@r_d_d .. rd:5 ...... ..... . ........ ..... \ + &r_r_r rs1=%dfp_rs1 rs2=%dfp_rs2 +@d_r_d .. ..... ...... rs1:5 . ........ ..... \ + &r_r_r rd=%dfp_rd rs2=%dfp_rs2 +@d_d_d .. ..... ...... ..... . ........ ..... \ + &r_r_r rd=%dfp_rd rs1=%dfp_rs1 rs2=%dfp_rs2 +@q_q_q .. ..... ...... ..... . ........ ..... \ + &r_r_r rd=%qfp_rd rs1=%qfp_rs1 rs2=%qfp_rs2 +@q_d_d .. ..... ...... ..... . ........ ..... \ + &r_r_r rd=%qfp_rd rs1=%dfp_rs1 rs2=%dfp_rs2 + @r_r_r_swap .. rd:5 ...... rs2:5 . ........ rs1:5 &r_r_r +@d_d_d_swap .. ..... ...... ..... . ........ ..... \ + &r_r_r rd=%dfp_rd rs1=%dfp_rs2 rs2=%dfp_rs1 &r_r rd rs @r_r1 .. rd:5 ...... rs:5 . ........ ..... &r_r @r_r2 .. rd:5 ...... ..... . ........ rs:5 &r_r +@r_d2 .. rd:5 ...... ..... . ........ ..... &r_r rs=%dfp_rs2 +@r_q2 .. rd:5 ...... ..... . ........ ..... &r_r rs=%qfp_rs2 +@d_r2 .. ..... ...... ..... . ........ rs:5 &r_r rd=%dfp_rd +@q_r2 .. ..... ...... ..... . ........ rs:5 &r_r rd=%qfp_rd +@d_d1 .. ..... ...... ..... . ........ ..... \ + &r_r rd=%dfp_rd rs=%dfp_rs1 +@d_d2 .. ..... ...... ..... . ........ ..... \ + &r_r rd=%dfp_rd rs=%dfp_rs2 +@d_q2 .. ..... ...... ..... . ........ ..... \ + &r_r rd=%dfp_rd rs=%qfp_rs2 +@q_q2 .. ..... ...... ..... . ........ ..... \ + &r_r rd=%qfp_rd rs=%qfp_rs2 +@q_d2 .. ..... ...... ..... . ........ ..... \ + &r_r rd=%qfp_rd rs=%dfp_rs2 + +&r_r_r_r rd rs1 rs2 rs3 +@r_r_r_r .. rd:5 ...... rs1:5 rs3:5 .... rs2:5 &r_r_r_r +@d_d_d_d .. ..... ...... ..... ..... .... ..... \ + &r_r_r_r rd=%dfp_rd rs1=%dfp_rs1 rs2=%dfp_rs2 rs3=%dfp_rs3 { [ @@ -81,6 +124,7 @@ CALL 01 i:s30 WRTICK_CMPR 10 10111 110000 ..... . ............. @n_r_ri WRSTICK 10 11000 110000 ..... . ............. @n_r_ri WRSTICK_CMPR 10 11001 110000 ..... . ............. @n_r_ri + WRMWAIT 10 11100 110000 ..... . ............. @n_r_ri ] # Before v8, rs1==0 was WRY, and the rest executed as nop. [ @@ -241,68 +285,89 @@ DONE 10 00000 111110 00000 0 0000000000000 RETRY 10 00001 111110 00000 0 0000000000000 FMOVs 10 ..... 110100 00000 0 0000 0001 ..... @r_r2 -FMOVd 10 ..... 110100 00000 0 0000 0010 ..... @r_r2 -FMOVq 10 ..... 110100 00000 0 0000 0011 ..... @r_r2 +FMOVd 10 ..... 110100 00000 0 0000 0010 ..... @d_d2 +FMOVq 10 ..... 110100 00000 0 0000 0011 ..... @q_q2 FNEGs 10 ..... 110100 00000 0 0000 0101 ..... @r_r2 -FNEGd 10 ..... 110100 00000 0 0000 0110 ..... @r_r2 -FNEGq 10 ..... 110100 00000 0 0000 0111 ..... @r_r2 +FNEGd 10 ..... 110100 00000 0 0000 0110 ..... @d_d2 +FNEGq 10 ..... 110100 00000 0 0000 0111 ..... @q_q2 FABSs 10 ..... 110100 00000 0 0000 1001 ..... @r_r2 -FABSd 10 ..... 110100 00000 0 0000 1010 ..... @r_r2 -FABSq 10 ..... 110100 00000 0 0000 1011 ..... @r_r2 +FABSd 10 ..... 110100 00000 0 0000 1010 ..... @d_d2 +FABSq 10 ..... 110100 00000 0 0000 1011 ..... @q_q2 FSQRTs 10 ..... 110100 00000 0 0010 1001 ..... @r_r2 -FSQRTd 10 ..... 110100 00000 0 0010 1010 ..... @r_r2 -FSQRTq 10 ..... 110100 00000 0 0010 1011 ..... @r_r2 +FSQRTd 10 ..... 110100 00000 0 0010 1010 ..... @d_d2 +FSQRTq 10 ..... 110100 00000 0 0010 1011 ..... @q_q2 FADDs 10 ..... 110100 ..... 0 0100 0001 ..... @r_r_r -FADDd 10 ..... 110100 ..... 0 0100 0010 ..... @r_r_r -FADDq 10 ..... 110100 ..... 0 0100 0011 ..... @r_r_r +FADDd 10 ..... 110100 ..... 0 0100 0010 ..... @d_d_d +FADDq 10 ..... 110100 ..... 0 0100 0011 ..... @q_q_q FSUBs 10 ..... 110100 ..... 0 0100 0101 ..... @r_r_r -FSUBd 10 ..... 110100 ..... 0 0100 0110 ..... @r_r_r -FSUBq 10 ..... 110100 ..... 0 0100 0111 ..... @r_r_r +FSUBd 10 ..... 110100 ..... 0 0100 0110 ..... @d_d_d +FSUBq 10 ..... 110100 ..... 0 0100 0111 ..... @q_q_q FMULs 10 ..... 110100 ..... 0 0100 1001 ..... @r_r_r -FMULd 10 ..... 110100 ..... 0 0100 1010 ..... @r_r_r -FMULq 10 ..... 110100 ..... 0 0100 1011 ..... @r_r_r +FMULd 10 ..... 110100 ..... 0 0100 1010 ..... @d_d_d +FMULq 10 ..... 110100 ..... 0 0100 1011 ..... @q_q_q FDIVs 10 ..... 110100 ..... 0 0100 1101 ..... @r_r_r -FDIVd 10 ..... 110100 ..... 0 0100 1110 ..... @r_r_r -FDIVq 10 ..... 110100 ..... 0 0100 1111 ..... @r_r_r -FsMULd 10 ..... 110100 ..... 0 0110 1001 ..... @r_r_r -FdMULq 10 ..... 110100 ..... 0 0110 1110 ..... @r_r_r +FDIVd 10 ..... 110100 ..... 0 0100 1110 ..... @d_d_d +FDIVq 10 ..... 110100 ..... 0 0100 1111 ..... @q_q_q +FNADDs 10 ..... 110100 ..... 0 0101 0001 ..... @r_r_r +FNADDd 10 ..... 110100 ..... 0 0101 0010 ..... @d_d_d +FNMULs 10 ..... 110100 ..... 0 0101 1001 ..... @r_r_r +FNMULd 10 ..... 110100 ..... 0 0101 1010 ..... @d_d_d +FHADDs 10 ..... 110100 ..... 0 0110 0001 ..... @r_r_r +FHADDd 10 ..... 110100 ..... 0 0110 0010 ..... @d_d_d +FHSUBs 10 ..... 110100 ..... 0 0110 0101 ..... @r_r_r +FHSUBd 10 ..... 110100 ..... 0 0110 0110 ..... @d_d_d +FsMULd 10 ..... 110100 ..... 0 0110 1001 ..... @d_r_r +FdMULq 10 ..... 110100 ..... 0 0110 1110 ..... @q_d_d +FNHADDs 10 ..... 110100 ..... 0 0111 0001 ..... @r_r_r +FNHADDd 10 ..... 110100 ..... 0 0111 0010 ..... @d_d_d +FNsMULd 10 ..... 110100 ..... 0 0111 1001 ..... @d_r_r FsTOx 10 ..... 110100 00000 0 1000 0001 ..... @r_r2 -FdTOx 10 ..... 110100 00000 0 1000 0010 ..... @r_r2 -FqTOx 10 ..... 110100 00000 0 1000 0011 ..... @r_r2 +FdTOx 10 ..... 110100 00000 0 1000 0010 ..... @r_d2 +FqTOx 10 ..... 110100 00000 0 1000 0011 ..... @r_q2 FxTOs 10 ..... 110100 00000 0 1000 0100 ..... @r_r2 -FxTOd 10 ..... 110100 00000 0 1000 1000 ..... @r_r2 -FxTOq 10 ..... 110100 00000 0 1000 1100 ..... @r_r2 +FxTOd 10 ..... 110100 00000 0 1000 1000 ..... @d_r2 +FxTOq 10 ..... 110100 00000 0 1000 1100 ..... @q_r2 FiTOs 10 ..... 110100 00000 0 1100 0100 ..... @r_r2 -FdTOs 10 ..... 110100 00000 0 1100 0110 ..... @r_r2 -FqTOs 10 ..... 110100 00000 0 1100 0111 ..... @r_r2 -FiTOd 10 ..... 110100 00000 0 1100 1000 ..... @r_r2 -FsTOd 10 ..... 110100 00000 0 1100 1001 ..... @r_r2 -FqTOd 10 ..... 110100 00000 0 1100 1011 ..... @r_r2 -FiTOq 10 ..... 110100 00000 0 1100 1100 ..... @r_r2 -FsTOq 10 ..... 110100 00000 0 1100 1101 ..... @r_r2 -FdTOq 10 ..... 110100 00000 0 1100 1110 ..... @r_r2 +FdTOs 10 ..... 110100 00000 0 1100 0110 ..... @r_d2 +FqTOs 10 ..... 110100 00000 0 1100 0111 ..... @r_q2 +FiTOd 10 ..... 110100 00000 0 1100 1000 ..... @d_r2 +FsTOd 10 ..... 110100 00000 0 1100 1001 ..... @d_r2 +FqTOd 10 ..... 110100 00000 0 1100 1011 ..... @d_q2 +FiTOq 10 ..... 110100 00000 0 1100 1100 ..... @q_r2 +FsTOq 10 ..... 110100 00000 0 1100 1101 ..... @q_r2 +FdTOq 10 ..... 110100 00000 0 1100 1110 ..... @q_d2 FsTOi 10 ..... 110100 00000 0 1101 0001 ..... @r_r2 -FdTOi 10 ..... 110100 00000 0 1101 0010 ..... @r_r2 -FqTOi 10 ..... 110100 00000 0 1101 0011 ..... @r_r2 +FdTOi 10 ..... 110100 00000 0 1101 0010 ..... @r_d2 +FqTOi 10 ..... 110100 00000 0 1101 0011 ..... @r_q2 FMOVscc 10 rd:5 110101 0 cond:4 1 cc:1 0 000001 rs2:5 -FMOVdcc 10 rd:5 110101 0 cond:4 1 cc:1 0 000010 rs2:5 -FMOVqcc 10 rd:5 110101 0 cond:4 1 cc:1 0 000011 rs2:5 +FMOVdcc 10 ..... 110101 0 cond:4 1 cc:1 0 000010 ..... \ + rd=%dfp_rd rs2=%dfp_rs2 +FMOVqcc 10 ..... 110101 0 cond:4 1 cc:1 0 000011 ..... \ + rd=%qfp_rd rs2=%qfp_rs2 FMOVsfcc 10 rd:5 110101 0 cond:4 0 cc:2 000001 rs2:5 -FMOVdfcc 10 rd:5 110101 0 cond:4 0 cc:2 000010 rs2:5 -FMOVqfcc 10 rd:5 110101 0 cond:4 0 cc:2 000011 rs2:5 +FMOVdfcc 10 ..... 110101 0 cond:4 0 cc:2 000010 ..... \ + rd=%dfp_rd rs2=%dfp_rs2 +FMOVqfcc 10 ..... 110101 0 cond:4 0 cc:2 000011 ..... \ + rd=%qfp_rd rs2=%qfp_rs2 FMOVRs 10 rd:5 110101 rs1:5 0 cond:3 00101 rs2:5 -FMOVRd 10 rd:5 110101 rs1:5 0 cond:3 00110 rs2:5 -FMOVRq 10 rd:5 110101 rs1:5 0 cond:3 00111 rs2:5 +FMOVRd 10 ..... 110101 rs1:5 0 cond:3 00110 ..... \ + rd=%dfp_rd rs2=%dfp_rs2 +FMOVRq 10 ..... 110101 rs1:5 0 cond:3 00111 ..... \ + rd=%qfp_rd rs2=%qfp_rs2 FCMPs 10 000 cc:2 110101 rs1:5 0 0101 0001 rs2:5 -FCMPd 10 000 cc:2 110101 rs1:5 0 0101 0010 rs2:5 -FCMPq 10 000 cc:2 110101 rs1:5 0 0101 0011 rs2:5 +FCMPd 10 000 cc:2 110101 ..... 0 0101 0010 ..... \ + rs1=%dfp_rs1 rs2=%dfp_rs2 +FCMPq 10 000 cc:2 110101 ..... 0 0101 0011 ..... \ + rs1=%qfp_rs1 rs2=%qfp_rs2 FCMPEs 10 000 cc:2 110101 rs1:5 0 0101 0101 rs2:5 -FCMPEd 10 000 cc:2 110101 rs1:5 0 0101 0110 rs2:5 -FCMPEq 10 000 cc:2 110101 rs1:5 0 0101 0111 rs2:5 +FCMPEd 10 000 cc:2 110101 ..... 0 0101 0110 ..... \ + rs1=%dfp_rs1 rs2=%dfp_rs2 +FCMPEq 10 000 cc:2 110101 ..... 0 0101 0111 ..... \ + rs1=%qfp_rs1 rs2=%qfp_rs2 { [ @@ -323,93 +388,187 @@ FCMPEq 10 000 cc:2 110101 rs1:5 0 0101 0111 rs2:5 ARRAY16 10 ..... 110110 ..... 0 0001 0010 ..... @r_r_r ARRAY32 10 ..... 110110 ..... 0 0001 0100 ..... @r_r_r + ADDXC 10 ..... 110110 ..... 0 0001 0001 ..... @r_r_r + ADDXCcc 10 ..... 110110 ..... 0 0001 0011 ..... @r_r_r + UMULXHI 10 ..... 110110 ..... 0 0001 0110 ..... @r_r_r + LZCNT 10 ..... 110110 00000 0 0001 0111 ..... @r_r2 + XMULX 10 ..... 110110 ..... 1 0001 0101 ..... @r_r_r + XMULXHI 10 ..... 110110 ..... 1 0001 0110 ..... @r_r_r + ALIGNADDR 10 ..... 110110 ..... 0 0001 1000 ..... @r_r_r ALIGNADDRL 10 ..... 110110 ..... 0 0001 1010 ..... @r_r_r BMASK 10 ..... 110110 ..... 0 0001 1001 ..... @r_r_r - FPCMPLE16 10 ..... 110110 ..... 0 0010 0000 ..... @r_r_r - FPCMPNE16 10 ..... 110110 ..... 0 0010 0010 ..... @r_r_r - FPCMPGT16 10 ..... 110110 ..... 0 0010 1000 ..... @r_r_r - FPCMPEQ16 10 ..... 110110 ..... 0 0010 1010 ..... @r_r_r - FPCMPLE32 10 ..... 110110 ..... 0 0010 0100 ..... @r_r_r - FPCMPNE32 10 ..... 110110 ..... 0 0010 0110 ..... @r_r_r - FPCMPGT32 10 ..... 110110 ..... 0 0010 1100 ..... @r_r_r - FPCMPEQ32 10 ..... 110110 ..... 0 0010 1110 ..... @r_r_r + CMASK8 10 00000 110110 00000 0 0001 1011 rs2:5 + CMASK16 10 00000 110110 00000 0 0001 1101 rs2:5 + CMASK32 10 00000 110110 00000 0 0001 1111 rs2:5 - FMUL8x16 10 ..... 110110 ..... 0 0011 0001 ..... @r_r_r - FMUL8x16AU 10 ..... 110110 ..... 0 0011 0011 ..... @r_r_r - FMUL8x16AL 10 ..... 110110 ..... 0 0011 0101 ..... @r_r_r - FMUL8SUx16 10 ..... 110110 ..... 0 0011 0110 ..... @r_r_r - FMUL8ULx16 10 ..... 110110 ..... 0 0011 0111 ..... @r_r_r - FMULD8SUx16 10 ..... 110110 ..... 0 0011 1000 ..... @r_r_r - FMULD8ULx16 10 ..... 110110 ..... 0 0011 1001 ..... @r_r_r - FPACK32 10 ..... 110110 ..... 0 0011 1010 ..... @r_r_r - FPACK16 10 ..... 110110 00000 0 0011 1011 ..... @r_r2 - FPACKFIX 10 ..... 110110 00000 0 0011 1101 ..... @r_r2 - PDIST 10 ..... 110110 ..... 0 0011 1110 ..... @r_r_r + FPCMPLE16 10 ..... 110110 ..... 0 0010 0000 ..... @r_d_d + FPCMPNE16 10 ..... 110110 ..... 0 0010 0010 ..... @r_d_d + FPCMPGT16 10 ..... 110110 ..... 0 0010 1000 ..... @r_d_d + FPCMPEQ16 10 ..... 110110 ..... 0 0010 1010 ..... @r_d_d + FPCMPLE32 10 ..... 110110 ..... 0 0010 0100 ..... @r_d_d + FPCMPNE32 10 ..... 110110 ..... 0 0010 0110 ..... @r_d_d + FPCMPGT32 10 ..... 110110 ..... 0 0010 1100 ..... @r_d_d + FPCMPEQ32 10 ..... 110110 ..... 0 0010 1110 ..... @r_d_d - FALIGNDATAg 10 ..... 110110 ..... 0 0100 1000 ..... @r_r_r - FPMERGE 10 ..... 110110 ..... 0 0100 1011 ..... @r_r_r - BSHUFFLE 10 ..... 110110 ..... 0 0100 1100 ..... @r_r_r - FEXPAND 10 ..... 110110 00000 0 0100 1101 ..... @r_r2 + FSLL16 10 ..... 110110 ..... 0 0010 0001 ..... @d_d_d + FSRL16 10 ..... 110110 ..... 0 0010 0011 ..... @d_d_d + FSLAS16 10 ..... 110110 ..... 0 0010 1001 ..... @d_d_d + FSRA16 10 ..... 110110 ..... 0 0010 1011 ..... @d_d_d + FSLL32 10 ..... 110110 ..... 0 0010 0101 ..... @d_d_d + FSRL32 10 ..... 110110 ..... 0 0010 0111 ..... @d_d_d + FSLAS32 10 ..... 110110 ..... 0 0010 1101 ..... @d_d_d + FSRA32 10 ..... 110110 ..... 0 0010 1111 ..... @d_d_d - FSRCd 10 ..... 110110 ..... 0 0111 0100 00000 @r_r1 # FSRC1d + FPCMPULE8 10 ..... 110110 ..... 1 0010 0000 ..... @r_d_d + FPCMPUGT8 10 ..... 110110 ..... 1 0010 1000 ..... @r_d_d + FPCMPNE8 10 ..... 110110 ..... 1 0010 0010 ..... @r_d_d + FPCMPEQ8 10 ..... 110110 ..... 1 0010 1010 ..... @r_d_d + FPCMPLE8 10 ..... 110110 ..... 0 0011 0100 ..... @r_d_d + FPCMPGT8 10 ..... 110110 ..... 0 0011 1100 ..... @r_d_d + FPCMPULE16 10 ..... 110110 ..... 1 0010 1110 ..... @r_d_d + FPCMPUGT16 10 ..... 110110 ..... 1 0010 1011 ..... @r_d_d + FPCMPULE32 10 ..... 110110 ..... 1 0010 1111 ..... @r_d_d + FPCMPUGT32 10 ..... 110110 ..... 1 0010 1100 ..... @r_d_d + + FMUL8x16 10 ..... 110110 ..... 0 0011 0001 ..... @d_r_d + FMUL8x16AU 10 ..... 110110 ..... 0 0011 0011 ..... @d_r_r + FMUL8x16AL 10 ..... 110110 ..... 0 0011 0101 ..... @d_r_r + FMUL8SUx16 10 ..... 110110 ..... 0 0011 0110 ..... @d_d_d + FMUL8ULx16 10 ..... 110110 ..... 0 0011 0111 ..... @d_d_d + FMULD8SUx16 10 ..... 110110 ..... 0 0011 1000 ..... @d_r_r + FMULD8ULx16 10 ..... 110110 ..... 0 0011 1001 ..... @d_r_r + FPACK32 10 ..... 110110 ..... 0 0011 1010 ..... @d_d_d + FPACK16 10 ..... 110110 00000 0 0011 1011 ..... @r_d2 + FPACKFIX 10 ..... 110110 00000 0 0011 1101 ..... @r_d2 + PDIST 10 ..... 110110 ..... 0 0011 1110 ..... \ + &r_r_r_r rd=%dfp_rd rs1=%dfp_rd rs2=%dfp_rs1 rs3=%dfp_rs2 + PDISTN 10 ..... 110110 ..... 0 0011 1111 ..... @r_d_d + + FMEAN16 10 ..... 110110 ..... 0 0100 0000 ..... @d_d_d + SUBXC 10 ..... 110110 ..... 0 0100 0001 ..... @r_r_r + SUBXCcc 10 ..... 110110 ..... 0 0100 0011 ..... @r_r_r + FCHKSM16 10 ..... 110110 ..... 0 0100 0100 ..... @d_d_d + FALIGNDATAg 10 ..... 110110 ..... 0 0100 1000 ..... @d_d_d + FPMERGE 10 ..... 110110 ..... 0 0100 1011 ..... @d_r_r + BSHUFFLE 10 ..... 110110 ..... 0 0100 1100 ..... @d_d_d + FEXPAND 10 ..... 110110 00000 0 0100 1101 ..... @d_r2 + FALIGNDATAi 10 ..... 110110 ..... 0 0100 1001 ..... @d_r_d + + FSRCd 10 ..... 110110 ..... 0 0111 0100 00000 @d_d1 # FSRC1d FSRCs 10 ..... 110110 ..... 0 0111 0101 00000 @r_r1 # FSRC1s - FSRCd 10 ..... 110110 00000 0 0111 1000 ..... @r_r2 # FSRC2d + FSRCd 10 ..... 110110 00000 0 0111 1000 ..... @d_d2 # FSRC2d FSRCs 10 ..... 110110 00000 0 0111 1001 ..... @r_r2 # FSRC2s - FNOTd 10 ..... 110110 ..... 0 0110 1010 00000 @r_r1 # FNOT1d + FNOTd 10 ..... 110110 ..... 0 0110 1010 00000 @d_d1 # FNOT1d FNOTs 10 ..... 110110 ..... 0 0110 1011 00000 @r_r1 # FNOT1s - FNOTd 10 ..... 110110 00000 0 0110 0110 ..... @r_r2 # FNOT2d + FNOTd 10 ..... 110110 00000 0 0110 0110 ..... @d_d2 # FNOT2d FNOTs 10 ..... 110110 00000 0 0110 0111 ..... @r_r2 # FNOT2s - FPADD16 10 ..... 110110 ..... 0 0101 0000 ..... @r_r_r + FPADD16 10 ..... 110110 ..... 0 0101 0000 ..... @d_d_d FPADD16s 10 ..... 110110 ..... 0 0101 0001 ..... @r_r_r - FPADD32 10 ..... 110110 ..... 0 0101 0010 ..... @r_r_r + FPADD32 10 ..... 110110 ..... 0 0101 0010 ..... @d_d_d FPADD32s 10 ..... 110110 ..... 0 0101 0011 ..... @r_r_r - FPSUB16 10 ..... 110110 ..... 0 0101 0100 ..... @r_r_r + FPADD64 10 ..... 110110 ..... 0 0100 0010 ..... @d_d_d + FPSUB16 10 ..... 110110 ..... 0 0101 0100 ..... @d_d_d FPSUB16s 10 ..... 110110 ..... 0 0101 0101 ..... @r_r_r - FPSUB32 10 ..... 110110 ..... 0 0101 0110 ..... @r_r_r + FPSUB32 10 ..... 110110 ..... 0 0101 0110 ..... @d_d_d FPSUB32s 10 ..... 110110 ..... 0 0101 0111 ..... @r_r_r + FPSUB64 10 ..... 110110 ..... 0 0100 0110 ..... @d_d_d - FNORd 10 ..... 110110 ..... 0 0110 0010 ..... @r_r_r + FPADDS16 10 ..... 110110 ..... 0 0101 1000 ..... @d_d_d + FPADDS16s 10 ..... 110110 ..... 0 0101 1001 ..... @r_r_r + FPADDS32 10 ..... 110110 ..... 0 0101 1010 ..... @d_d_d + FPADDS32s 10 ..... 110110 ..... 0 0101 1011 ..... @r_r_r + FPSUBS16 10 ..... 110110 ..... 0 0101 1100 ..... @d_d_d + FPSUBS16s 10 ..... 110110 ..... 0 0101 1101 ..... @r_r_r + FPSUBS32 10 ..... 110110 ..... 0 0101 1110 ..... @d_d_d + FPSUBS32s 10 ..... 110110 ..... 0 0101 1111 ..... @r_r_r + + FNORd 10 ..... 110110 ..... 0 0110 0010 ..... @d_d_d FNORs 10 ..... 110110 ..... 0 0110 0011 ..... @r_r_r - FANDNOTd 10 ..... 110110 ..... 0 0110 0100 ..... @r_r_r # FANDNOT2d + FANDNOTd 10 ..... 110110 ..... 0 0110 0100 ..... @d_d_d # FANDNOT2d FANDNOTs 10 ..... 110110 ..... 0 0110 0101 ..... @r_r_r # FANDNOT2s - FANDNOTd 10 ..... 110110 ..... 0 0110 1000 ..... @r_r_r_swap # ... 1d + FANDNOTd 10 ..... 110110 ..... 0 0110 1000 ..... @d_d_d_swap # ... 1d FANDNOTs 10 ..... 110110 ..... 0 0110 1001 ..... @r_r_r_swap # ... 1s - FXORd 10 ..... 110110 ..... 0 0110 1100 ..... @r_r_r + FXORd 10 ..... 110110 ..... 0 0110 1100 ..... @d_d_d FXORs 10 ..... 110110 ..... 0 0110 1101 ..... @r_r_r - FNANDd 10 ..... 110110 ..... 0 0110 1110 ..... @r_r_r + FNANDd 10 ..... 110110 ..... 0 0110 1110 ..... @d_d_d FNANDs 10 ..... 110110 ..... 0 0110 1111 ..... @r_r_r - FANDd 10 ..... 110110 ..... 0 0111 0000 ..... @r_r_r + FANDd 10 ..... 110110 ..... 0 0111 0000 ..... @d_d_d FANDs 10 ..... 110110 ..... 0 0111 0001 ..... @r_r_r - FXNORd 10 ..... 110110 ..... 0 0111 0010 ..... @r_r_r + FXNORd 10 ..... 110110 ..... 0 0111 0010 ..... @d_d_d FXNORs 10 ..... 110110 ..... 0 0111 0011 ..... @r_r_r - FORNOTd 10 ..... 110110 ..... 0 0111 0110 ..... @r_r_r # FORNOT2d + FORNOTd 10 ..... 110110 ..... 0 0111 0110 ..... @d_d_d # FORNOT2d FORNOTs 10 ..... 110110 ..... 0 0111 0111 ..... @r_r_r # FORNOT2s - FORNOTd 10 ..... 110110 ..... 0 0111 1010 ..... @r_r_r_swap # ... 1d + FORNOTd 10 ..... 110110 ..... 0 0111 1010 ..... @d_d_d_swap # ... 1d FORNOTs 10 ..... 110110 ..... 0 0111 1011 ..... @r_r_r_swap # ... 1s - FORd 10 ..... 110110 ..... 0 0111 1100 ..... @r_r_r + FORd 10 ..... 110110 ..... 0 0111 1100 ..... @d_d_d FORs 10 ..... 110110 ..... 0 0111 1101 ..... @r_r_r - FZEROd 10 rd:5 110110 00000 0 0110 0000 00000 + FZEROd 10 ..... 110110 00000 0 0110 0000 00000 rd=%dfp_rd FZEROs 10 rd:5 110110 00000 0 0110 0001 00000 - FONEd 10 rd:5 110110 00000 0 0111 1110 00000 + FONEd 10 ..... 110110 00000 0 0111 1110 00000 rd=%dfp_rd FONEs 10 rd:5 110110 00000 0 0111 1111 00000 + + MOVsTOuw 10 ..... 110110 00000 1 0001 0001 ..... @r_r2 + MOVsTOsw 10 ..... 110110 00000 1 0001 0011 ..... @r_r2 + MOVwTOs 10 ..... 110110 00000 1 0001 1001 ..... @r_r2 + MOVdTOx 10 ..... 110110 00000 1 0001 0000 ..... @r_d2 + MOVxTOd 10 ..... 110110 00000 1 0001 1000 ..... @d_r2 + + FPADD8 10 ..... 110110 ..... 1 0010 0100 ..... @d_d_d + FPADDS8 10 ..... 110110 ..... 1 0010 0110 ..... @d_d_d + FPADDUS8 10 ..... 110110 ..... 1 0010 0111 ..... @d_d_d + FPADDUS16 10 ..... 110110 ..... 1 0010 0011 ..... @d_d_d + FPSUB8 10 ..... 110110 ..... 1 0101 0100 ..... @d_d_d + FPSUBS8 10 ..... 110110 ..... 1 0101 0110 ..... @d_d_d + FPSUBUS8 10 ..... 110110 ..... 1 0101 0111 ..... @d_d_d + FPSUBUS16 10 ..... 110110 ..... 1 0101 0011 ..... @d_d_d + + FPMIN8 10 ..... 110110 ..... 1 0001 1010 ..... @d_d_d + FPMIN16 10 ..... 110110 ..... 1 0001 1011 ..... @d_d_d + FPMIN32 10 ..... 110110 ..... 1 0001 1100 ..... @d_d_d + FPMINU8 10 ..... 110110 ..... 1 0101 1010 ..... @d_d_d + FPMINU16 10 ..... 110110 ..... 1 0101 1011 ..... @d_d_d + FPMINU32 10 ..... 110110 ..... 1 0101 1100 ..... @d_d_d + + FPMAX8 10 ..... 110110 ..... 1 0001 1101 ..... @d_d_d + FPMAX16 10 ..... 110110 ..... 1 0001 1110 ..... @d_d_d + FPMAX32 10 ..... 110110 ..... 1 0001 1111 ..... @d_d_d + FPMAXU8 10 ..... 110110 ..... 1 0101 1101 ..... @d_d_d + FPMAXU16 10 ..... 110110 ..... 1 0101 1110 ..... @d_d_d + FPMAXU32 10 ..... 110110 ..... 1 0101 1111 ..... @d_d_d + + FLCMPs 10 000 cc:2 110110 rs1:5 1 0101 0001 rs2:5 + FLCMPd 10 000 cc:2 110110 ..... 1 0101 0010 ..... \ + rs1=%dfp_rs1 rs2=%dfp_rs2 ] NCP 10 ----- 110110 ----- --------- ----- # v8 CPop1 } -NCP 10 ----- 110111 ----- --------- ----- # v8 CPop2 +{ + [ + FMADDs 10 ..... 110111 ..... ..... 0001 ..... @r_r_r_r + FMADDd 10 ..... 110111 ..... ..... 0010 ..... @d_d_d_d + FMSUBs 10 ..... 110111 ..... ..... 0101 ..... @r_r_r_r + FMSUBd 10 ..... 110111 ..... ..... 0110 ..... @d_d_d_d + FNMSUBs 10 ..... 110111 ..... ..... 1001 ..... @r_r_r_r + FNMSUBd 10 ..... 110111 ..... ..... 1010 ..... @d_d_d_d + FNMADDs 10 ..... 110111 ..... ..... 1101 ..... @r_r_r_r + FNMADDd 10 ..... 110111 ..... ..... 1110 ..... @d_d_d_d + + FPMADDX 10 ..... 110111 ..... ..... 0000 ..... @d_d_d_d + FPMADDXHI 10 ..... 110111 ..... ..... 0100 ..... @d_d_d_d + ] + NCP 10 ----- 110111 ----- --------- ----- # v8 CPop2 +} ## ## Major Opcode 11 -- load and store instructions ## -%dfp_rd 25:5 !function=extract_dfpreg -%qfp_rd 25:5 !function=extract_qfpreg - &r_r_ri_asi rd rs1 rs2_or_imm asi imm:bool @r_r_ri_na .. rd:5 ...... rs1:5 imm:1 rs2_or_imm:s13 &r_r_ri_asi asi=-1 @d_r_ri_na .. ..... ...... rs1:5 imm:1 rs2_or_imm:s13 \ @@ -477,6 +636,7 @@ STX 11 ..... 011110 ..... . ............. @r_r_i_asi # STXA LDF 11 ..... 100000 ..... . ............. @r_r_ri_na LDFSR 11 00000 100001 ..... . ............. @n_r_ri LDXFSR 11 00001 100001 ..... . ............. @n_r_ri +LDXEFSR 11 00011 100001 ..... . ............. @n_r_ri LDQF 11 ..... 100010 ..... . ............. @q_r_ri_na LDDF 11 ..... 100011 ..... . ............. @d_r_ri_na diff --git a/target/sparc/ldst_helper.c b/target/sparc/ldst_helper.c index 7bdf99e0c0..2d48e98bf4 100644 --- a/target/sparc/ldst_helper.c +++ b/target/sparc/ldst_helper.c @@ -1395,6 +1395,10 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr, case ASI_TWINX_PL: /* Primary, twinx, LE */ case ASI_TWINX_S: /* Secondary, twinx */ case ASI_TWINX_SL: /* Secondary, twinx, LE */ + case ASI_MON_P: + case ASI_MON_S: + case ASI_MON_AIUP: + case ASI_MON_AIUS: /* These are always handled inline. */ g_assert_not_reached(); diff --git a/target/sparc/translate.c b/target/sparc/translate.c index dca072888a..113639083b 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -28,6 +28,7 @@ #include "exec/helper-gen.h" #include "exec/translator.h" #include "exec/log.h" +#include "fpu/softfloat.h" #include "asi.h" #define HELPER_H "helper.h" @@ -60,14 +61,27 @@ # define gen_helper_write_softint(E, S) qemu_build_not_reached() # define gen_helper_wrpil(E, S) qemu_build_not_reached() # define gen_helper_wrpstate(E, S) qemu_build_not_reached() +# define gen_helper_cmask8 ({ qemu_build_not_reached(); NULL; }) +# define gen_helper_cmask16 ({ qemu_build_not_reached(); NULL; }) +# define gen_helper_cmask32 ({ qemu_build_not_reached(); NULL; }) +# define gen_helper_fcmpeq8 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fcmpeq16 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fcmpeq32 ({ qemu_build_not_reached(); NULL; }) +# define gen_helper_fcmpgt8 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fcmpgt16 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fcmpgt32 ({ qemu_build_not_reached(); NULL; }) +# define gen_helper_fcmple8 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fcmple16 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fcmple32 ({ qemu_build_not_reached(); NULL; }) +# define gen_helper_fcmpne8 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fcmpne16 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fcmpne32 ({ qemu_build_not_reached(); NULL; }) +# define gen_helper_fcmpule8 ({ qemu_build_not_reached(); NULL; }) +# define gen_helper_fcmpule16 ({ qemu_build_not_reached(); NULL; }) +# define gen_helper_fcmpule32 ({ qemu_build_not_reached(); NULL; }) +# define gen_helper_fcmpugt8 ({ qemu_build_not_reached(); NULL; }) +# define gen_helper_fcmpugt16 ({ qemu_build_not_reached(); NULL; }) +# define gen_helper_fcmpugt32 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fdtox ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fexpand ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fmul8sux16 ({ qemu_build_not_reached(); NULL; }) @@ -75,11 +89,15 @@ # define gen_helper_fmul8x16 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fpmerge ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fqtox ({ qemu_build_not_reached(); NULL; }) +# define gen_helper_fslas16 ({ qemu_build_not_reached(); NULL; }) +# define gen_helper_fslas32 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fstox ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fxtod ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fxtoq ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fxtos ({ qemu_build_not_reached(); NULL; }) # define gen_helper_pdist ({ qemu_build_not_reached(); NULL; }) +# define gen_helper_xmulx ({ qemu_build_not_reached(); NULL; }) +# define gen_helper_xmulxhi ({ qemu_build_not_reached(); NULL; }) # define MAXTL_MASK 0 #endif @@ -123,8 +141,7 @@ static TCGv cpu_gsr; #define cpu_xcc_C ({ qemu_build_not_reached(); NULL; }) #endif -/* Floating point registers */ -static TCGv_i64 cpu_fpr[TARGET_DPREGS]; +/* Floating point comparison registers */ static TCGv_i32 cpu_fcc[TARGET_FCCREGS]; #define env_field_offsetof(X) offsetof(CPUSPARCState, X) @@ -190,14 +207,6 @@ typedef struct DisasContext { #define GET_FIELDs(x,a,b) sign_extend (GET_FIELD(x,a,b), (b) - (a) + 1) #define GET_FIELD_SPs(x,a,b) sign_extend (GET_FIELD_SP(x,a,b), ((b) - (a) + 1)) -#ifdef TARGET_SPARC64 -#define DFPREG(r) (((r & 1) << 5) | (r & 0x1e)) -#define QFPREG(r) (((r & 1) << 5) | (r & 0x1c)) -#else -#define DFPREG(r) (r & 0x1e) -#define QFPREG(r) (r & 0x1c) -#endif - #define UA2005_HTRAP_MASK 0xff #define V8_TRAP_MASK 0x7f @@ -217,59 +226,72 @@ static void gen_update_fprs_dirty(DisasContext *dc, int rd) } /* floating point registers moves */ + +static int gen_offset_fpr_F(unsigned int reg) +{ + int ret; + + tcg_debug_assert(reg < 32); + ret= offsetof(CPUSPARCState, fpr[reg / 2]); + if (reg & 1) { + ret += offsetof(CPU_DoubleU, l.lower); + } else { + ret += offsetof(CPU_DoubleU, l.upper); + } + return ret; +} + static TCGv_i32 gen_load_fpr_F(DisasContext *dc, unsigned int src) { TCGv_i32 ret = tcg_temp_new_i32(); - if (src & 1) { - tcg_gen_extrl_i64_i32(ret, cpu_fpr[src / 2]); - } else { - tcg_gen_extrh_i64_i32(ret, cpu_fpr[src / 2]); - } + tcg_gen_ld_i32(ret, tcg_env, gen_offset_fpr_F(src)); return ret; } static void gen_store_fpr_F(DisasContext *dc, unsigned int dst, TCGv_i32 v) { - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_extu_i32_i64(t, v); - tcg_gen_deposit_i64(cpu_fpr[dst / 2], cpu_fpr[dst / 2], t, - (dst & 1 ? 0 : 32), 32); + tcg_gen_st_i32(v, tcg_env, gen_offset_fpr_F(dst)); gen_update_fprs_dirty(dc, dst); } +static int gen_offset_fpr_D(unsigned int reg) +{ + tcg_debug_assert(reg < 64); + tcg_debug_assert(reg % 2 == 0); + return offsetof(CPUSPARCState, fpr[reg / 2]); +} + static TCGv_i64 gen_load_fpr_D(DisasContext *dc, unsigned int src) { - src = DFPREG(src); - return cpu_fpr[src / 2]; + TCGv_i64 ret = tcg_temp_new_i64(); + tcg_gen_ld_i64(ret, tcg_env, gen_offset_fpr_D(src)); + return ret; } static void gen_store_fpr_D(DisasContext *dc, unsigned int dst, TCGv_i64 v) { - dst = DFPREG(dst); - tcg_gen_mov_i64(cpu_fpr[dst / 2], v); + tcg_gen_st_i64(v, tcg_env, gen_offset_fpr_D(dst)); gen_update_fprs_dirty(dc, dst); } -static TCGv_i64 gen_dest_fpr_D(DisasContext *dc, unsigned int dst) -{ - return cpu_fpr[DFPREG(dst) / 2]; -} - static TCGv_i128 gen_load_fpr_Q(DisasContext *dc, unsigned int src) { TCGv_i128 ret = tcg_temp_new_i128(); + TCGv_i64 h = gen_load_fpr_D(dc, src); + TCGv_i64 l = gen_load_fpr_D(dc, src + 2); - src = QFPREG(src); - tcg_gen_concat_i64_i128(ret, cpu_fpr[src / 2 + 1], cpu_fpr[src / 2]); + tcg_gen_concat_i64_i128(ret, l, h); return ret; } static void gen_store_fpr_Q(DisasContext *dc, unsigned int dst, TCGv_i128 v) { - dst = DFPREG(dst); - tcg_gen_extr_i128_i64(cpu_fpr[dst / 2 + 1], cpu_fpr[dst / 2], v); - gen_update_fprs_dirty(dc, dst); + TCGv_i64 h = tcg_temp_new_i64(); + TCGv_i64 l = tcg_temp_new_i64(); + + tcg_gen_extr_i128_i64(l, h, v); + gen_store_fpr_D(dc, dst, h); + gen_store_fpr_D(dc, dst + 2, l); } /* moves */ @@ -428,6 +450,17 @@ static void gen_op_addccc(TCGv dst, TCGv src1, TCGv src2) gen_op_addcc_int(dst, src1, src2, gen_carry32()); } +static void gen_op_addxc(TCGv dst, TCGv src1, TCGv src2) +{ + tcg_gen_add_tl(dst, src1, src2); + tcg_gen_add_tl(dst, dst, cpu_cc_C); +} + +static void gen_op_addxccc(TCGv dst, TCGv src1, TCGv src2) +{ + gen_op_addcc_int(dst, src1, src2, cpu_cc_C); +} + static void gen_op_subcc_int(TCGv dst, TCGv src1, TCGv src2, TCGv cin) { TCGv z = tcg_constant_tl(0); @@ -482,6 +515,17 @@ static void gen_op_subccc(TCGv dst, TCGv src1, TCGv src2) gen_op_subcc_int(dst, src1, src2, gen_carry32()); } +static void gen_op_subxc(TCGv dst, TCGv src1, TCGv src2) +{ + tcg_gen_sub_tl(dst, src1, src2); + tcg_gen_sub_tl(dst, dst, cpu_cc_C); +} + +static void gen_op_subxccc(TCGv dst, TCGv src1, TCGv src2) +{ + gen_op_subcc_int(dst, src1, src2, cpu_cc_C); +} + static void gen_op_mulscc(TCGv dst, TCGv src1, TCGv src2) { TCGv zero = tcg_constant_tl(0); @@ -556,6 +600,32 @@ static void gen_op_smul(TCGv dst, TCGv src1, TCGv src2) gen_op_multiply(dst, src1, src2, 1); } +static void gen_op_umulxhi(TCGv dst, TCGv src1, TCGv src2) +{ + TCGv discard = tcg_temp_new(); + tcg_gen_mulu2_tl(discard, dst, src1, src2); +} + +static void gen_op_fpmaddx(TCGv_i64 dst, TCGv_i64 src1, + TCGv_i64 src2, TCGv_i64 src3) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_mul_i64(t, src1, src2); + tcg_gen_add_i64(dst, src3, t); +} + +static void gen_op_fpmaddxhi(TCGv_i64 dst, TCGv_i64 src1, + TCGv_i64 src2, TCGv_i64 src3) +{ + TCGv_i64 l = tcg_temp_new_i64(); + TCGv_i64 h = tcg_temp_new_i64(); + TCGv_i64 z = tcg_constant_i64(0); + + tcg_gen_mulu2_i64(l, h, src1, src2); + tcg_gen_add2_i64(l, dst, l, h, src3, z); +} + static void gen_op_sdiv(TCGv dst, TCGv src1, TCGv src2) { #ifdef TARGET_SPARC64 @@ -633,6 +703,11 @@ static void gen_op_popc(TCGv dst, TCGv src1, TCGv src2) tcg_gen_ctpop_tl(dst, src2); } +static void gen_op_lzcnt(TCGv dst, TCGv src) +{ + tcg_gen_clzi_tl(dst, src, TARGET_LONG_BITS); +} + #ifndef TARGET_SPARC64 static void gen_helper_array8(TCGv dst, TCGv src1, TCGv src2) { @@ -679,7 +754,80 @@ static void gen_op_fpack32(TCGv_i64 dst, TCGv_i64 src1, TCGv_i64 src2) #endif } -static void gen_op_faligndata(TCGv_i64 dst, TCGv_i64 s1, TCGv_i64 s2) +static void gen_op_fpadds16s(TCGv_i32 d, TCGv_i32 src1, TCGv_i32 src2) +{ + TCGv_i32 t[2]; + + for (int i = 0; i < 2; i++) { + TCGv_i32 u = tcg_temp_new_i32(); + TCGv_i32 v = tcg_temp_new_i32(); + + tcg_gen_sextract_i32(u, src1, i * 16, 16); + tcg_gen_sextract_i32(v, src2, i * 16, 16); + tcg_gen_add_i32(u, u, v); + tcg_gen_smax_i32(u, u, tcg_constant_i32(INT16_MIN)); + tcg_gen_smin_i32(u, u, tcg_constant_i32(INT16_MAX)); + t[i] = u; + } + tcg_gen_deposit_i32(d, t[0], t[1], 16, 16); +} + +static void gen_op_fpsubs16s(TCGv_i32 d, TCGv_i32 src1, TCGv_i32 src2) +{ + TCGv_i32 t[2]; + + for (int i = 0; i < 2; i++) { + TCGv_i32 u = tcg_temp_new_i32(); + TCGv_i32 v = tcg_temp_new_i32(); + + tcg_gen_sextract_i32(u, src1, i * 16, 16); + tcg_gen_sextract_i32(v, src2, i * 16, 16); + tcg_gen_sub_i32(u, u, v); + tcg_gen_smax_i32(u, u, tcg_constant_i32(INT16_MIN)); + tcg_gen_smin_i32(u, u, tcg_constant_i32(INT16_MAX)); + t[i] = u; + } + tcg_gen_deposit_i32(d, t[0], t[1], 16, 16); +} + +static void gen_op_fpadds32s(TCGv_i32 d, TCGv_i32 src1, TCGv_i32 src2) +{ + TCGv_i32 r = tcg_temp_new_i32(); + TCGv_i32 t = tcg_temp_new_i32(); + TCGv_i32 v = tcg_temp_new_i32(); + TCGv_i32 z = tcg_constant_i32(0); + + tcg_gen_add_i32(r, src1, src2); + tcg_gen_xor_i32(t, src1, src2); + tcg_gen_xor_i32(v, r, src2); + tcg_gen_andc_i32(v, v, t); + + tcg_gen_setcond_i32(TCG_COND_GE, t, r, z); + tcg_gen_addi_i32(t, t, INT32_MAX); + + tcg_gen_movcond_i32(TCG_COND_LT, d, v, z, t, r); +} + +static void gen_op_fpsubs32s(TCGv_i32 d, TCGv_i32 src1, TCGv_i32 src2) +{ + TCGv_i32 r = tcg_temp_new_i32(); + TCGv_i32 t = tcg_temp_new_i32(); + TCGv_i32 v = tcg_temp_new_i32(); + TCGv_i32 z = tcg_constant_i32(0); + + tcg_gen_sub_i32(r, src1, src2); + tcg_gen_xor_i32(t, src1, src2); + tcg_gen_xor_i32(v, r, src1); + tcg_gen_and_i32(v, v, t); + + tcg_gen_setcond_i32(TCG_COND_GE, t, r, z); + tcg_gen_addi_i32(t, t, INT32_MAX); + + tcg_gen_movcond_i32(TCG_COND_LT, d, v, z, t, r); +} + +static void gen_op_faligndata_i(TCGv_i64 dst, TCGv_i64 s1, + TCGv_i64 s2, TCGv gsr) { #ifdef TARGET_SPARC64 TCGv t1, t2, shift; @@ -688,7 +836,7 @@ static void gen_op_faligndata(TCGv_i64 dst, TCGv_i64 s1, TCGv_i64 s2) t2 = tcg_temp_new(); shift = tcg_temp_new(); - tcg_gen_andi_tl(shift, cpu_gsr, 7); + tcg_gen_andi_tl(shift, gsr, 7); tcg_gen_shli_tl(shift, shift, 3); tcg_gen_shl_tl(t1, s1, shift); @@ -706,6 +854,11 @@ static void gen_op_faligndata(TCGv_i64 dst, TCGv_i64 s1, TCGv_i64 s2) #endif } +static void gen_op_faligndata_g(TCGv_i64 dst, TCGv_i64 s1, TCGv_i64 s2) +{ + gen_op_faligndata_i(dst, s1, s2, cpu_gsr); +} + static void gen_op_bshuffle(TCGv_i64 dst, TCGv_i64 src1, TCGv_i64 src2) { #ifdef TARGET_SPARC64 @@ -715,6 +868,15 @@ static void gen_op_bshuffle(TCGv_i64 dst, TCGv_i64 src1, TCGv_i64 src2) #endif } +static void gen_op_pdistn(TCGv dst, TCGv_i64 src1, TCGv_i64 src2) +{ +#ifdef TARGET_SPARC64 + gen_helper_pdist(dst, tcg_constant_i64(0), src1, src2); +#else + g_assert_not_reached(); +#endif +} + static void gen_op_fmul8x16al(TCGv_i64 dst, TCGv_i32 src1, TCGv_i32 src2) { tcg_gen_ext16s_i32(src2, src2); @@ -769,6 +931,66 @@ static void gen_op_fmuld8sux16(TCGv_i64 dst, TCGv_i32 src1, TCGv_i32 src2) tcg_gen_concat_i32_i64(dst, t0, t1); } +#ifdef TARGET_SPARC64 +static void gen_vec_fchksm16(unsigned vece, TCGv_vec dst, + TCGv_vec src1, TCGv_vec src2) +{ + TCGv_vec a = tcg_temp_new_vec_matching(dst); + TCGv_vec c = tcg_temp_new_vec_matching(dst); + + tcg_gen_add_vec(vece, a, src1, src2); + tcg_gen_cmp_vec(TCG_COND_LTU, vece, c, a, src1); + /* Vector cmp produces -1 for true, so subtract to add carry. */ + tcg_gen_sub_vec(vece, dst, a, c); +} + +static void gen_op_fchksm16(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_cmp_vec, INDEX_op_add_vec, INDEX_op_sub_vec, + }; + static const GVecGen3 op = { + .fni8 = gen_helper_fchksm16, + .fniv = gen_vec_fchksm16, + .opt_opc = vecop_list, + .vece = MO_16, + }; + tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &op); +} + +static void gen_vec_fmean16(unsigned vece, TCGv_vec dst, + TCGv_vec src1, TCGv_vec src2) +{ + TCGv_vec t = tcg_temp_new_vec_matching(dst); + + tcg_gen_or_vec(vece, t, src1, src2); + tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(dst, vece, 1)); + tcg_gen_sari_vec(vece, src1, src1, 1); + tcg_gen_sari_vec(vece, src2, src2, 1); + tcg_gen_add_vec(vece, dst, src1, src2); + tcg_gen_add_vec(vece, dst, dst, t); +} + +static void gen_op_fmean16(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_add_vec, INDEX_op_sari_vec, + }; + static const GVecGen3 op = { + .fni8 = gen_helper_fmean16, + .fniv = gen_vec_fmean16, + .opt_opc = vecop_list, + .vece = MO_16, + }; + tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &op); +} +#else +#define gen_op_fchksm16 ({ qemu_build_not_reached(); NULL; }) +#define gen_op_fmean16 ({ qemu_build_not_reached(); NULL; }) +#endif + static void finishing_insn(DisasContext *dc) { /* @@ -1138,6 +1360,97 @@ static void gen_op_fabsq(TCGv_i128 dst, TCGv_i128 src) tcg_gen_concat_i64_i128(dst, l, h); } +static void gen_op_fmadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3) +{ + gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(0)); +} + +static void gen_op_fmaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3) +{ + gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(0)); +} + +static void gen_op_fmsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3) +{ + int op = float_muladd_negate_c; + gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op)); +} + +static void gen_op_fmsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3) +{ + int op = float_muladd_negate_c; + gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op)); +} + +static void gen_op_fnmsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3) +{ + int op = float_muladd_negate_c | float_muladd_negate_result; + gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op)); +} + +static void gen_op_fnmsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3) +{ + int op = float_muladd_negate_c | float_muladd_negate_result; + gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op)); +} + +static void gen_op_fnmadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3) +{ + int op = float_muladd_negate_result; + gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op)); +} + +static void gen_op_fnmaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3) +{ + int op = float_muladd_negate_result; + gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op)); +} + +/* Use muladd to compute (1 * src1) + src2 / 2 with one rounding. */ +static void gen_op_fhadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2) +{ + TCGv_i32 one = tcg_constant_i32(float32_one); + int op = float_muladd_halve_result; + gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op)); +} + +static void gen_op_fhaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2) +{ + TCGv_i64 one = tcg_constant_i64(float64_one); + int op = float_muladd_halve_result; + gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op)); +} + +/* Use muladd to compute (1 * src1) - src2 / 2 with one rounding. */ +static void gen_op_fhsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2) +{ + TCGv_i32 one = tcg_constant_i32(float32_one); + int op = float_muladd_negate_c | float_muladd_halve_result; + gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op)); +} + +static void gen_op_fhsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2) +{ + TCGv_i64 one = tcg_constant_i64(float64_one); + int op = float_muladd_negate_c | float_muladd_halve_result; + gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op)); +} + +/* Use muladd to compute -((1 * src1) + src2 / 2) with one rounding. */ +static void gen_op_fnhadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2) +{ + TCGv_i32 one = tcg_constant_i32(float32_one); + int op = float_muladd_negate_result | float_muladd_halve_result; + gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op)); +} + +static void gen_op_fnhaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2) +{ + TCGv_i64 one = tcg_constant_i64(float64_one); + int op = float_muladd_negate_result | float_muladd_halve_result; + gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op)); +} + static void gen_op_fpexception_im(DisasContext *dc, int ftt) { /* @@ -1294,6 +1607,7 @@ static DisasASI resolve_asi(DisasContext *dc, int asi, MemOp memop) case ASI_BLK_AIUP_L_4V: case ASI_BLK_AIUP: case ASI_BLK_AIUPL: + case ASI_MON_AIUP: mem_idx = MMU_USER_IDX; break; case ASI_AIUS: /* As if user secondary */ @@ -1304,6 +1618,7 @@ static DisasASI resolve_asi(DisasContext *dc, int asi, MemOp memop) case ASI_BLK_AIUS_L_4V: case ASI_BLK_AIUS: case ASI_BLK_AIUSL: + case ASI_MON_AIUS: mem_idx = MMU_USER_SECONDARY_IDX; break; case ASI_S: /* Secondary */ @@ -1317,6 +1632,7 @@ static DisasASI resolve_asi(DisasContext *dc, int asi, MemOp memop) case ASI_FL8_SL: case ASI_FL16_S: case ASI_FL16_SL: + case ASI_MON_S: if (mem_idx == MMU_USER_IDX) { mem_idx = MMU_USER_SECONDARY_IDX; } else if (mem_idx == MMU_KERNEL_IDX) { @@ -1334,6 +1650,7 @@ static DisasASI resolve_asi(DisasContext *dc, int asi, MemOp memop) case ASI_FL8_PL: case ASI_FL16_P: case ASI_FL16_PL: + case ASI_MON_P: break; } switch (asi) { @@ -1351,6 +1668,10 @@ static DisasASI resolve_asi(DisasContext *dc, int asi, MemOp memop) case ASI_SL: case ASI_P: case ASI_PL: + case ASI_MON_P: + case ASI_MON_S: + case ASI_MON_AIUP: + case ASI_MON_AIUS: type = GET_ASI_DIRECT; break; case ASI_TWINX_REAL: @@ -1627,7 +1948,7 @@ static void gen_ldf_asi(DisasContext *dc, DisasASI *da, MemOp orig_size, MemOp memop = da->memop; MemOp size = memop & MO_SIZE; TCGv_i32 d32; - TCGv_i64 d64; + TCGv_i64 d64, l64; TCGv addr_tmp; /* TODO: Use 128-bit load/store below. */ @@ -1649,16 +1970,20 @@ static void gen_ldf_asi(DisasContext *dc, DisasASI *da, MemOp orig_size, break; case MO_64: - tcg_gen_qemu_ld_i64(cpu_fpr[rd / 2], addr, da->mem_idx, memop); + d64 = tcg_temp_new_i64(); + tcg_gen_qemu_ld_i64(d64, addr, da->mem_idx, memop); + gen_store_fpr_D(dc, rd, d64); break; case MO_128: d64 = tcg_temp_new_i64(); + l64 = tcg_temp_new_i64(); tcg_gen_qemu_ld_i64(d64, addr, da->mem_idx, memop); addr_tmp = tcg_temp_new(); tcg_gen_addi_tl(addr_tmp, addr, 8); - tcg_gen_qemu_ld_i64(cpu_fpr[rd / 2 + 1], addr_tmp, da->mem_idx, memop); - tcg_gen_mov_i64(cpu_fpr[rd / 2], d64); + tcg_gen_qemu_ld_i64(l64, addr_tmp, da->mem_idx, memop); + gen_store_fpr_D(dc, rd, d64); + gen_store_fpr_D(dc, rd + 2, l64); break; default: g_assert_not_reached(); @@ -1670,9 +1995,11 @@ static void gen_ldf_asi(DisasContext *dc, DisasASI *da, MemOp orig_size, if (orig_size == MO_64 && (rd & 7) == 0) { /* The first operation checks required alignment. */ addr_tmp = tcg_temp_new(); + d64 = tcg_temp_new_i64(); for (int i = 0; ; ++i) { - tcg_gen_qemu_ld_i64(cpu_fpr[rd / 2 + i], addr, da->mem_idx, + tcg_gen_qemu_ld_i64(d64, addr, da->mem_idx, memop | (i == 0 ? MO_ALIGN_64 : 0)); + gen_store_fpr_D(dc, rd + 2 * i, d64); if (i == 7) { break; } @@ -1687,8 +2014,9 @@ static void gen_ldf_asi(DisasContext *dc, DisasASI *da, MemOp orig_size, case GET_ASI_SHORT: /* Valid for lddfa only. */ if (orig_size == MO_64) { - tcg_gen_qemu_ld_i64(cpu_fpr[rd / 2], addr, da->mem_idx, - memop | MO_ALIGN); + d64 = tcg_temp_new_i64(); + tcg_gen_qemu_ld_i64(d64, addr, da->mem_idx, memop | MO_ALIGN); + gen_store_fpr_D(dc, rd, d64); } else { gen_exception(dc, TT_ILL_INSN); } @@ -1713,17 +2041,19 @@ static void gen_ldf_asi(DisasContext *dc, DisasASI *da, MemOp orig_size, gen_store_fpr_F(dc, rd, d32); break; case MO_64: - gen_helper_ld_asi(cpu_fpr[rd / 2], tcg_env, addr, - r_asi, r_mop); + d64 = tcg_temp_new_i64(); + gen_helper_ld_asi(d64, tcg_env, addr, r_asi, r_mop); + gen_store_fpr_D(dc, rd, d64); break; case MO_128: d64 = tcg_temp_new_i64(); + l64 = tcg_temp_new_i64(); gen_helper_ld_asi(d64, tcg_env, addr, r_asi, r_mop); addr_tmp = tcg_temp_new(); tcg_gen_addi_tl(addr_tmp, addr, 8); - gen_helper_ld_asi(cpu_fpr[rd / 2 + 1], tcg_env, addr_tmp, - r_asi, r_mop); - tcg_gen_mov_i64(cpu_fpr[rd / 2], d64); + gen_helper_ld_asi(l64, tcg_env, addr_tmp, r_asi, r_mop); + gen_store_fpr_D(dc, rd, d64); + gen_store_fpr_D(dc, rd + 2, l64); break; default: g_assert_not_reached(); @@ -1739,6 +2069,7 @@ static void gen_stf_asi(DisasContext *dc, DisasASI *da, MemOp orig_size, MemOp memop = da->memop; MemOp size = memop & MO_SIZE; TCGv_i32 d32; + TCGv_i64 d64; TCGv addr_tmp; /* TODO: Use 128-bit load/store below. */ @@ -1758,8 +2089,8 @@ static void gen_stf_asi(DisasContext *dc, DisasASI *da, MemOp orig_size, tcg_gen_qemu_st_i32(d32, addr, da->mem_idx, memop | MO_ALIGN); break; case MO_64: - tcg_gen_qemu_st_i64(cpu_fpr[rd / 2], addr, da->mem_idx, - memop | MO_ALIGN_4); + d64 = gen_load_fpr_D(dc, rd); + tcg_gen_qemu_st_i64(d64, addr, da->mem_idx, memop | MO_ALIGN_4); break; case MO_128: /* Only 4-byte alignment required. However, it is legal for the @@ -1767,11 +2098,12 @@ static void gen_stf_asi(DisasContext *dc, DisasASI *da, MemOp orig_size, required to fix it up. Requiring 16-byte alignment here avoids having to probe the second page before performing the first write. */ - tcg_gen_qemu_st_i64(cpu_fpr[rd / 2], addr, da->mem_idx, - memop | MO_ALIGN_16); + d64 = gen_load_fpr_D(dc, rd); + tcg_gen_qemu_st_i64(d64, addr, da->mem_idx, memop | MO_ALIGN_16); addr_tmp = tcg_temp_new(); tcg_gen_addi_tl(addr_tmp, addr, 8); - tcg_gen_qemu_st_i64(cpu_fpr[rd / 2 + 1], addr_tmp, da->mem_idx, memop); + d64 = gen_load_fpr_D(dc, rd + 2); + tcg_gen_qemu_st_i64(d64, addr_tmp, da->mem_idx, memop); break; default: g_assert_not_reached(); @@ -1784,7 +2116,8 @@ static void gen_stf_asi(DisasContext *dc, DisasASI *da, MemOp orig_size, /* The first operation checks required alignment. */ addr_tmp = tcg_temp_new(); for (int i = 0; ; ++i) { - tcg_gen_qemu_st_i64(cpu_fpr[rd / 2 + i], addr, da->mem_idx, + d64 = gen_load_fpr_D(dc, rd + 2 * i); + tcg_gen_qemu_st_i64(d64, addr, da->mem_idx, memop | (i == 0 ? MO_ALIGN_64 : 0)); if (i == 7) { break; @@ -1800,8 +2133,8 @@ static void gen_stf_asi(DisasContext *dc, DisasASI *da, MemOp orig_size, case GET_ASI_SHORT: /* Valid for stdfa only. */ if (orig_size == MO_64) { - tcg_gen_qemu_st_i64(cpu_fpr[rd / 2], addr, da->mem_idx, - memop | MO_ALIGN); + d64 = gen_load_fpr_D(dc, rd); + tcg_gen_qemu_st_i64(d64, addr, da->mem_idx, memop | MO_ALIGN); } else { gen_exception(dc, TT_ILL_INSN); } @@ -2032,7 +2365,7 @@ static void gen_fmovs(DisasContext *dc, DisasCompare *cmp, int rd, int rs) static void gen_fmovd(DisasContext *dc, DisasCompare *cmp, int rd, int rs) { #ifdef TARGET_SPARC64 - TCGv_i64 dst = gen_dest_fpr_D(dc, rd); + TCGv_i64 dst = tcg_temp_new_i64(); tcg_gen_movcond_i64(cmp->cond, dst, cmp->c1, tcg_constant_tl(cmp->c2), gen_load_fpr_D(dc, rs), gen_load_fpr_D(dc, rd)); @@ -2045,16 +2378,18 @@ static void gen_fmovd(DisasContext *dc, DisasCompare *cmp, int rd, int rs) static void gen_fmovq(DisasContext *dc, DisasCompare *cmp, int rd, int rs) { #ifdef TARGET_SPARC64 - int qd = QFPREG(rd); - int qs = QFPREG(rs); TCGv c2 = tcg_constant_tl(cmp->c2); + TCGv_i64 h = tcg_temp_new_i64(); + TCGv_i64 l = tcg_temp_new_i64(); - tcg_gen_movcond_i64(cmp->cond, cpu_fpr[qd / 2], cmp->c1, c2, - cpu_fpr[qs / 2], cpu_fpr[qd / 2]); - tcg_gen_movcond_i64(cmp->cond, cpu_fpr[qd / 2 + 1], cmp->c1, c2, - cpu_fpr[qs / 2 + 1], cpu_fpr[qd / 2 + 1]); - - gen_update_fprs_dirty(dc, qd); + tcg_gen_movcond_i64(cmp->cond, h, cmp->c1, c2, + gen_load_fpr_D(dc, rs), + gen_load_fpr_D(dc, rd)); + tcg_gen_movcond_i64(cmp->cond, l, cmp->c1, c2, + gen_load_fpr_D(dc, rs + 2), + gen_load_fpr_D(dc, rd + 2)); + gen_store_fpr_D(dc, rd, h); + gen_store_fpr_D(dc, rd + 2, l); #else qemu_build_not_reached(); #endif @@ -2086,12 +2421,20 @@ static void gen_load_trap_state_at_tl(TCGv_ptr r_tsptr) static int extract_dfpreg(DisasContext *dc, int x) { - return DFPREG(x); + int r = x & 0x1e; +#ifdef TARGET_SPARC64 + r |= (x & 1) << 5; +#endif + return r; } static int extract_qfpreg(DisasContext *dc, int x) { - return QFPREG(x); + int r = x & 0x1c; +#ifdef TARGET_SPARC64 + r |= (x & 1) << 5; +#endif + return r; } /* Include the auto-generated decoder. */ @@ -2110,10 +2453,15 @@ static int extract_qfpreg(DisasContext *dc, int x) # define avail_MUL(C) true # define avail_POWERDOWN(C) false # define avail_64(C) true +# define avail_FMAF(C) ((C)->def->features & CPU_FEATURE_FMAF) # define avail_GL(C) ((C)->def->features & CPU_FEATURE_GL) # define avail_HYPV(C) ((C)->def->features & CPU_FEATURE_HYPV) +# define avail_IMA(C) ((C)->def->features & CPU_FEATURE_IMA) # define avail_VIS1(C) ((C)->def->features & CPU_FEATURE_VIS1) # define avail_VIS2(C) ((C)->def->features & CPU_FEATURE_VIS2) +# define avail_VIS3(C) ((C)->def->features & CPU_FEATURE_VIS3) +# define avail_VIS3B(C) avail_VIS3(C) +# define avail_VIS4(C) ((C)->def->features & CPU_FEATURE_VIS4) #else # define avail_32(C) true # define avail_ASR17(C) ((C)->def->features & CPU_FEATURE_ASR17) @@ -2122,10 +2470,15 @@ static int extract_qfpreg(DisasContext *dc, int x) # define avail_MUL(C) ((C)->def->features & CPU_FEATURE_MUL) # define avail_POWERDOWN(C) ((C)->def->features & CPU_FEATURE_POWERDOWN) # define avail_64(C) false +# define avail_FMAF(C) false # define avail_GL(C) false # define avail_HYPV(C) false +# define avail_IMA(C) false # define avail_VIS1(C) false # define avail_VIS2(C) false +# define avail_VIS3(C) false +# define avail_VIS3B(C) false +# define avail_VIS4(C) false #endif /* Default case for non jump instructions. */ @@ -2999,6 +3352,17 @@ static void do_wrpowerdown(DisasContext *dc, TCGv src) TRANS(WRPOWERDOWN, POWERDOWN, do_wr_special, a, supervisor(dc), do_wrpowerdown) +static void do_wrmwait(DisasContext *dc, TCGv src) +{ + /* + * TODO: This is a stub version of mwait, which merely recognizes + * interrupts immediately and does not wait. + */ + dc->base.is_jmp = DISAS_EXIT; +} + +TRANS(WRMWAIT, VIS4, do_wr_special, a, true, do_wrmwait) + static void do_wrpsr(DisasContext *dc, TCGv src) { gen_helper_wrpsr(tcg_env, src); @@ -3519,11 +3883,10 @@ static bool trans_SDIVX(DisasContext *dc, arg_r_r_ri *a) } static bool gen_edge(DisasContext *dc, arg_r_r_r *a, - int width, bool cc, bool left) + int width, bool cc, bool little_endian) { - TCGv dst, s1, s2, lo1, lo2; - uint64_t amask, tabl, tabr; - int shift, imask, omask; + TCGv dst, s1, s2, l, r, t, m; + uint64_t amask = address_mask_i(dc, -8); dst = gen_dest_gpr(dc, a->rd); s1 = gen_load_gpr(dc, a->rs1); @@ -3533,75 +3896,52 @@ static bool gen_edge(DisasContext *dc, arg_r_r_r *a, gen_op_subcc(cpu_cc_N, s1, s2); } - /* - * Theory of operation: there are two tables, left and right (not to - * be confused with the left and right versions of the opcode). These - * are indexed by the low 3 bits of the inputs. To make things "easy", - * these tables are loaded into two constants, TABL and TABR below. - * The operation index = (input & imask) << shift calculates the index - * into the constant, while val = (table >> index) & omask calculates - * the value we're looking for. - */ + l = tcg_temp_new(); + r = tcg_temp_new(); + t = tcg_temp_new(); + switch (width) { case 8: - imask = 0x7; - shift = 3; - omask = 0xff; - if (left) { - tabl = 0x80c0e0f0f8fcfeffULL; - tabr = 0xff7f3f1f0f070301ULL; - } else { - tabl = 0x0103070f1f3f7fffULL; - tabr = 0xfffefcf8f0e0c080ULL; - } + tcg_gen_andi_tl(l, s1, 7); + tcg_gen_andi_tl(r, s2, 7); + tcg_gen_xori_tl(r, r, 7); + m = tcg_constant_tl(0xff); break; case 16: - imask = 0x6; - shift = 1; - omask = 0xf; - if (left) { - tabl = 0x8cef; - tabr = 0xf731; - } else { - tabl = 0x137f; - tabr = 0xfec8; - } + tcg_gen_extract_tl(l, s1, 1, 2); + tcg_gen_extract_tl(r, s2, 1, 2); + tcg_gen_xori_tl(r, r, 3); + m = tcg_constant_tl(0xf); break; case 32: - imask = 0x4; - shift = 0; - omask = 0x3; - if (left) { - tabl = (2 << 2) | 3; - tabr = (3 << 2) | 1; - } else { - tabl = (1 << 2) | 3; - tabr = (3 << 2) | 2; - } + tcg_gen_extract_tl(l, s1, 2, 1); + tcg_gen_extract_tl(r, s2, 2, 1); + tcg_gen_xori_tl(r, r, 1); + m = tcg_constant_tl(0x3); break; default: abort(); } - lo1 = tcg_temp_new(); - lo2 = tcg_temp_new(); - tcg_gen_andi_tl(lo1, s1, imask); - tcg_gen_andi_tl(lo2, s2, imask); - tcg_gen_shli_tl(lo1, lo1, shift); - tcg_gen_shli_tl(lo2, lo2, shift); + /* Compute Left Edge */ + if (little_endian) { + tcg_gen_shl_tl(l, m, l); + tcg_gen_and_tl(l, l, m); + } else { + tcg_gen_shr_tl(l, m, l); + } + /* Compute Right Edge */ + if (little_endian) { + tcg_gen_shr_tl(r, m, r); + } else { + tcg_gen_shl_tl(r, m, r); + tcg_gen_and_tl(r, r, m); + } - tcg_gen_shr_tl(lo1, tcg_constant_tl(tabl), lo1); - tcg_gen_shr_tl(lo2, tcg_constant_tl(tabr), lo2); - tcg_gen_andi_tl(lo1, lo1, omask); - tcg_gen_andi_tl(lo2, lo2, omask); - - amask = address_mask_i(dc, -8); - tcg_gen_andi_tl(s1, s1, amask); - tcg_gen_andi_tl(s2, s2, amask); - - /* Compute dst = (s1 == s2 ? lo1 : lo1 & lo2). */ - tcg_gen_and_tl(lo2, lo2, lo1); - tcg_gen_movcond_tl(TCG_COND_EQ, dst, s1, s2, lo1, lo2); + /* Compute dst = (s1 == s2 under amask ? l : l & r) */ + tcg_gen_xor_tl(t, s1, s2); + tcg_gen_and_tl(r, r, l); + tcg_gen_movcond_tl(TCG_COND_TSTEQ, dst, t, tcg_constant_tl(amask), r, l); gen_store_gpr(dc, a->rd, dst); return advance_pc(dc); @@ -3621,6 +3961,19 @@ TRANS(EDGE16LN, VIS2, gen_edge, a, 16, 0, 1) TRANS(EDGE32N, VIS2, gen_edge, a, 32, 0, 0) TRANS(EDGE32LN, VIS2, gen_edge, a, 32, 0, 1) +static bool do_rr(DisasContext *dc, arg_r_r *a, + void (*func)(TCGv, TCGv)) +{ + TCGv dst = gen_dest_gpr(dc, a->rd); + TCGv src = gen_load_gpr(dc, a->rs); + + func(dst, src); + gen_store_gpr(dc, a->rd, dst); + return advance_pc(dc); +} + +TRANS(LZCNT, VIS3, do_rr, a, gen_op_lzcnt) + static bool do_rrr(DisasContext *dc, arg_r_r_r *a, void (*func)(TCGv, TCGv, TCGv)) { @@ -3637,6 +3990,14 @@ TRANS(ARRAY8, VIS1, do_rrr, a, gen_helper_array8) TRANS(ARRAY16, VIS1, do_rrr, a, gen_op_array16) TRANS(ARRAY32, VIS1, do_rrr, a, gen_op_array32) +TRANS(ADDXC, VIS3, do_rrr, a, gen_op_addxc) +TRANS(ADDXCcc, VIS3, do_rrr, a, gen_op_addxccc) + +TRANS(SUBXC, VIS4, do_rrr, a, gen_op_subxc) +TRANS(SUBXCcc, VIS4, do_rrr, a, gen_op_subxccc) + +TRANS(UMULXHI, VIS3, do_rrr, a, gen_op_umulxhi) + static void gen_op_alignaddr(TCGv dst, TCGv s1, TCGv s2) { #ifdef TARGET_SPARC64 @@ -3679,6 +4040,16 @@ static void gen_op_bmask(TCGv dst, TCGv s1, TCGv s2) TRANS(BMASK, VIS2, do_rrr, a, gen_op_bmask) +static bool do_cmask(DisasContext *dc, int rs2, void (*func)(TCGv, TCGv, TCGv)) +{ + func(cpu_gsr, cpu_gsr, gen_load_gpr(dc, rs2)); + return true; +} + +TRANS(CMASK8, VIS3, do_cmask, a->rs2, gen_helper_cmask8) +TRANS(CMASK16, VIS3, do_cmask, a->rs2, gen_helper_cmask16) +TRANS(CMASK32, VIS3, do_cmask, a->rs2, gen_helper_cmask32) + static bool do_shift_r(DisasContext *dc, arg_shiftr *a, bool l, bool u) { TCGv dst, src1, src2; @@ -4193,7 +4564,7 @@ static bool trans_LDFSR(DisasContext *dc, arg_r_r_ri *a) return advance_pc(dc); } -static bool trans_LDXFSR(DisasContext *dc, arg_r_r_ri *a) +static bool do_ldxfsr(DisasContext *dc, arg_r_r_ri *a, bool entire) { #ifdef TARGET_SPARC64 TCGv addr = gen_ldst_addr(dc, a->rs1, a->imm, a->rs2_or_imm); @@ -4218,13 +4589,20 @@ static bool trans_LDXFSR(DisasContext *dc, arg_r_r_ri *a) tcg_gen_extract_i32(cpu_fcc[2], hi, FSR_FCC2_SHIFT - 32, 2); tcg_gen_extract_i32(cpu_fcc[3], hi, FSR_FCC3_SHIFT - 32, 2); - gen_helper_set_fsr_nofcc_noftt(tcg_env, lo); + if (entire) { + gen_helper_set_fsr_nofcc(tcg_env, lo); + } else { + gen_helper_set_fsr_nofcc_noftt(tcg_env, lo); + } return advance_pc(dc); #else return false; #endif } +TRANS(LDXFSR, 64, do_ldxfsr, a, false) +TRANS(LDXEFSR, VIS3B, do_ldxfsr, a, true) + static bool do_stfsr(DisasContext *dc, arg_r_r_ri *a, MemOp mop) { TCGv addr = gen_ldst_addr(dc, a->rs1, a->imm, a->rs2_or_imm); @@ -4246,39 +4624,24 @@ static bool do_stfsr(DisasContext *dc, arg_r_r_ri *a, MemOp mop) TRANS(STFSR, ALL, do_stfsr, a, MO_TEUL) TRANS(STXFSR, 64, do_stfsr, a, MO_TEUQ) -static bool do_fc(DisasContext *dc, int rd, bool c) +static bool do_fc(DisasContext *dc, int rd, int32_t c) { - uint64_t mask; - if (gen_trap_ifnofpu(dc)) { return true; } - - if (rd & 1) { - mask = MAKE_64BIT_MASK(0, 32); - } else { - mask = MAKE_64BIT_MASK(32, 32); - } - if (c) { - tcg_gen_ori_i64(cpu_fpr[rd / 2], cpu_fpr[rd / 2], mask); - } else { - tcg_gen_andi_i64(cpu_fpr[rd / 2], cpu_fpr[rd / 2], ~mask); - } - gen_update_fprs_dirty(dc, rd); + gen_store_fpr_F(dc, rd, tcg_constant_i32(c)); return advance_pc(dc); } TRANS(FZEROs, VIS1, do_fc, a->rd, 0) -TRANS(FONEs, VIS1, do_fc, a->rd, 1) +TRANS(FONEs, VIS1, do_fc, a->rd, -1) static bool do_dc(DisasContext *dc, int rd, int64_t c) { if (gen_trap_ifnofpu(dc)) { return true; } - - tcg_gen_movi_i64(cpu_fpr[rd / 2], c); - gen_update_fprs_dirty(dc, rd); + gen_store_fpr_D(dc, rd, tcg_constant_i64(c)); return advance_pc(dc); } @@ -4375,7 +4738,7 @@ static bool do_dd(DisasContext *dc, arg_r_r *a, return true; } - dst = gen_dest_fpr_D(dc, a->rd); + dst = tcg_temp_new_i64(); src = gen_load_fpr_D(dc, a->rs); func(dst, src); gen_store_fpr_D(dc, a->rd, dst); @@ -4397,7 +4760,7 @@ static bool do_env_dd(DisasContext *dc, arg_r_r *a, return true; } - dst = gen_dest_fpr_D(dc, a->rd); + dst = tcg_temp_new_i64(); src = gen_load_fpr_D(dc, a->rs); func(dst, tcg_env, src); gen_store_fpr_D(dc, a->rd, dst); @@ -4437,7 +4800,7 @@ static bool do_env_df(DisasContext *dc, arg_r_r *a, return true; } - dst = gen_dest_fpr_D(dc, a->rd); + dst = tcg_temp_new_i64(); src = gen_load_fpr_F(dc, a->rs); func(dst, tcg_env, src); gen_store_fpr_D(dc, a->rd, dst); @@ -4528,7 +4891,7 @@ static bool do_env_dq(DisasContext *dc, arg_r_r *a, } src = gen_load_fpr_Q(dc, a->rs); - dst = gen_dest_fpr_D(dc, a->rd); + dst = tcg_temp_new_i64(); func(dst, tcg_env, src); gen_store_fpr_D(dc, a->rd, dst); return advance_pc(dc); @@ -4612,6 +4975,15 @@ TRANS(FXNORs, VIS1, do_fff, a, tcg_gen_eqv_i32) TRANS(FORNOTs, VIS1, do_fff, a, tcg_gen_orc_i32) TRANS(FORs, VIS1, do_fff, a, tcg_gen_or_i32) +TRANS(FHADDs, VIS3, do_fff, a, gen_op_fhadds) +TRANS(FHSUBs, VIS3, do_fff, a, gen_op_fhsubs) +TRANS(FNHADDs, VIS3, do_fff, a, gen_op_fnhadds) + +TRANS(FPADDS16s, VIS3, do_fff, a, gen_op_fpadds16s) +TRANS(FPSUBS16s, VIS3, do_fff, a, gen_op_fpsubs16s) +TRANS(FPADDS32s, VIS3, do_fff, a, gen_op_fpadds32s) +TRANS(FPSUBS32s, VIS3, do_fff, a, gen_op_fpsubs32s) + static bool do_env_fff(DisasContext *dc, arg_r_r_r *a, void (*func)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32)) { @@ -4632,6 +5004,8 @@ TRANS(FADDs, ALL, do_env_fff, a, gen_helper_fadds) TRANS(FSUBs, ALL, do_env_fff, a, gen_helper_fsubs) TRANS(FMULs, ALL, do_env_fff, a, gen_helper_fmuls) TRANS(FDIVs, ALL, do_env_fff, a, gen_helper_fdivs) +TRANS(FNADDs, VIS3, do_env_fff, a, gen_helper_fnadds) +TRANS(FNMULs, VIS3, do_env_fff, a, gen_helper_fnmuls) static bool do_dff(DisasContext *dc, arg_r_r_r *a, void (*func)(TCGv_i64, TCGv_i32, TCGv_i32)) @@ -4643,7 +5017,7 @@ static bool do_dff(DisasContext *dc, arg_r_r_r *a, return true; } - dst = gen_dest_fpr_D(dc, a->rd); + dst = tcg_temp_new_i64(); src1 = gen_load_fpr_F(dc, a->rs1); src2 = gen_load_fpr_F(dc, a->rs2); func(dst, src1, src2); @@ -4667,7 +5041,7 @@ static bool do_dfd(DisasContext *dc, arg_r_r_r *a, return true; } - dst = gen_dest_fpr_D(dc, a->rd); + dst = tcg_temp_new_i64(); src1 = gen_load_fpr_F(dc, a->rs1); src2 = gen_load_fpr_D(dc, a->rs2); func(dst, src1, src2); @@ -4677,6 +5051,63 @@ static bool do_dfd(DisasContext *dc, arg_r_r_r *a, TRANS(FMUL8x16, VIS1, do_dfd, a, gen_helper_fmul8x16) +static bool do_gvec_ddd(DisasContext *dc, arg_r_r_r *a, MemOp vece, + void (*func)(unsigned, uint32_t, uint32_t, + uint32_t, uint32_t, uint32_t)) +{ + if (gen_trap_ifnofpu(dc)) { + return true; + } + + func(vece, gen_offset_fpr_D(a->rd), gen_offset_fpr_D(a->rs1), + gen_offset_fpr_D(a->rs2), 8, 8); + return advance_pc(dc); +} + +TRANS(FPADD8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_add) +TRANS(FPADD16, VIS1, do_gvec_ddd, a, MO_16, tcg_gen_gvec_add) +TRANS(FPADD32, VIS1, do_gvec_ddd, a, MO_32, tcg_gen_gvec_add) + +TRANS(FPSUB8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_sub) +TRANS(FPSUB16, VIS1, do_gvec_ddd, a, MO_16, tcg_gen_gvec_sub) +TRANS(FPSUB32, VIS1, do_gvec_ddd, a, MO_32, tcg_gen_gvec_sub) + +TRANS(FCHKSM16, VIS3, do_gvec_ddd, a, MO_16, gen_op_fchksm16) +TRANS(FMEAN16, VIS3, do_gvec_ddd, a, MO_16, gen_op_fmean16) + +TRANS(FPADDS8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_ssadd) +TRANS(FPADDS16, VIS3, do_gvec_ddd, a, MO_16, tcg_gen_gvec_ssadd) +TRANS(FPADDS32, VIS3, do_gvec_ddd, a, MO_32, tcg_gen_gvec_ssadd) +TRANS(FPADDUS8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_usadd) +TRANS(FPADDUS16, VIS4, do_gvec_ddd, a, MO_16, tcg_gen_gvec_usadd) + +TRANS(FPSUBS8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_sssub) +TRANS(FPSUBS16, VIS3, do_gvec_ddd, a, MO_16, tcg_gen_gvec_sssub) +TRANS(FPSUBS32, VIS3, do_gvec_ddd, a, MO_32, tcg_gen_gvec_sssub) +TRANS(FPSUBUS8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_ussub) +TRANS(FPSUBUS16, VIS4, do_gvec_ddd, a, MO_16, tcg_gen_gvec_ussub) + +TRANS(FSLL16, VIS3, do_gvec_ddd, a, MO_16, tcg_gen_gvec_shlv) +TRANS(FSLL32, VIS3, do_gvec_ddd, a, MO_32, tcg_gen_gvec_shlv) +TRANS(FSRL16, VIS3, do_gvec_ddd, a, MO_16, tcg_gen_gvec_shrv) +TRANS(FSRL32, VIS3, do_gvec_ddd, a, MO_32, tcg_gen_gvec_shrv) +TRANS(FSRA16, VIS3, do_gvec_ddd, a, MO_16, tcg_gen_gvec_sarv) +TRANS(FSRA32, VIS3, do_gvec_ddd, a, MO_32, tcg_gen_gvec_sarv) + +TRANS(FPMIN8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_smin) +TRANS(FPMIN16, VIS4, do_gvec_ddd, a, MO_16, tcg_gen_gvec_smin) +TRANS(FPMIN32, VIS4, do_gvec_ddd, a, MO_32, tcg_gen_gvec_smin) +TRANS(FPMINU8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_umin) +TRANS(FPMINU16, VIS4, do_gvec_ddd, a, MO_16, tcg_gen_gvec_umin) +TRANS(FPMINU32, VIS4, do_gvec_ddd, a, MO_32, tcg_gen_gvec_umin) + +TRANS(FPMAX8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_smax) +TRANS(FPMAX16, VIS4, do_gvec_ddd, a, MO_16, tcg_gen_gvec_smax) +TRANS(FPMAX32, VIS4, do_gvec_ddd, a, MO_32, tcg_gen_gvec_smax) +TRANS(FPMAXU8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_umax) +TRANS(FPMAXU16, VIS4, do_gvec_ddd, a, MO_16, tcg_gen_gvec_umax) +TRANS(FPMAXU32, VIS4, do_gvec_ddd, a, MO_32, tcg_gen_gvec_umax) + static bool do_ddd(DisasContext *dc, arg_r_r_r *a, void (*func)(TCGv_i64, TCGv_i64, TCGv_i64)) { @@ -4686,7 +5117,7 @@ static bool do_ddd(DisasContext *dc, arg_r_r_r *a, return true; } - dst = gen_dest_fpr_D(dc, a->rd); + dst = tcg_temp_new_i64(); src1 = gen_load_fpr_D(dc, a->rs1); src2 = gen_load_fpr_D(dc, a->rs2); func(dst, src1, src2); @@ -4697,10 +5128,6 @@ static bool do_ddd(DisasContext *dc, arg_r_r_r *a, TRANS(FMUL8SUx16, VIS1, do_ddd, a, gen_helper_fmul8sux16) TRANS(FMUL8ULx16, VIS1, do_ddd, a, gen_helper_fmul8ulx16) -TRANS(FPADD16, VIS1, do_ddd, a, tcg_gen_vec_add16_i64) -TRANS(FPADD32, VIS1, do_ddd, a, tcg_gen_vec_add32_i64) -TRANS(FPSUB16, VIS1, do_ddd, a, tcg_gen_vec_sub16_i64) -TRANS(FPSUB32, VIS1, do_ddd, a, tcg_gen_vec_sub32_i64) TRANS(FNORd, VIS1, do_ddd, a, tcg_gen_nor_i64) TRANS(FANDNOTd, VIS1, do_ddd, a, tcg_gen_andc_i64) TRANS(FXORd, VIS1, do_ddd, a, tcg_gen_xor_i64) @@ -4711,9 +5138,18 @@ TRANS(FORNOTd, VIS1, do_ddd, a, tcg_gen_orc_i64) TRANS(FORd, VIS1, do_ddd, a, tcg_gen_or_i64) TRANS(FPACK32, VIS1, do_ddd, a, gen_op_fpack32) -TRANS(FALIGNDATAg, VIS1, do_ddd, a, gen_op_faligndata) +TRANS(FALIGNDATAg, VIS1, do_ddd, a, gen_op_faligndata_g) TRANS(BSHUFFLE, VIS2, do_ddd, a, gen_op_bshuffle) +TRANS(FHADDd, VIS3, do_ddd, a, gen_op_fhaddd) +TRANS(FHSUBd, VIS3, do_ddd, a, gen_op_fhsubd) +TRANS(FNHADDd, VIS3, do_ddd, a, gen_op_fnhaddd) + +TRANS(FPADD64, VIS3B, do_ddd, a, tcg_gen_add_i64) +TRANS(FPSUB64, VIS3B, do_ddd, a, tcg_gen_sub_i64) +TRANS(FSLAS16, VIS3, do_ddd, a, gen_helper_fslas16) +TRANS(FSLAS32, VIS3, do_ddd, a, gen_helper_fslas32) + static bool do_rdd(DisasContext *dc, arg_r_r_r *a, void (*func)(TCGv, TCGv_i64, TCGv_i64)) { @@ -4736,11 +5172,26 @@ TRANS(FPCMPLE16, VIS1, do_rdd, a, gen_helper_fcmple16) TRANS(FPCMPNE16, VIS1, do_rdd, a, gen_helper_fcmpne16) TRANS(FPCMPGT16, VIS1, do_rdd, a, gen_helper_fcmpgt16) TRANS(FPCMPEQ16, VIS1, do_rdd, a, gen_helper_fcmpeq16) +TRANS(FPCMPULE16, VIS4, do_rdd, a, gen_helper_fcmpule16) +TRANS(FPCMPUGT16, VIS4, do_rdd, a, gen_helper_fcmpugt16) TRANS(FPCMPLE32, VIS1, do_rdd, a, gen_helper_fcmple32) TRANS(FPCMPNE32, VIS1, do_rdd, a, gen_helper_fcmpne32) TRANS(FPCMPGT32, VIS1, do_rdd, a, gen_helper_fcmpgt32) TRANS(FPCMPEQ32, VIS1, do_rdd, a, gen_helper_fcmpeq32) +TRANS(FPCMPULE32, VIS4, do_rdd, a, gen_helper_fcmpule32) +TRANS(FPCMPUGT32, VIS4, do_rdd, a, gen_helper_fcmpugt32) + +TRANS(FPCMPEQ8, VIS3B, do_rdd, a, gen_helper_fcmpeq8) +TRANS(FPCMPNE8, VIS3B, do_rdd, a, gen_helper_fcmpne8) +TRANS(FPCMPULE8, VIS3B, do_rdd, a, gen_helper_fcmpule8) +TRANS(FPCMPUGT8, VIS3B, do_rdd, a, gen_helper_fcmpugt8) +TRANS(FPCMPLE8, VIS4, do_rdd, a, gen_helper_fcmple8) +TRANS(FPCMPGT8, VIS4, do_rdd, a, gen_helper_fcmpgt8) + +TRANS(PDISTN, VIS3, do_rdd, a, gen_op_pdistn) +TRANS(XMULX, VIS3, do_rrr, a, gen_helper_xmulx) +TRANS(XMULXHI, VIS3, do_rrr, a, gen_helper_xmulxhi) static bool do_env_ddd(DisasContext *dc, arg_r_r_r *a, void (*func)(TCGv_i64, TCGv_env, TCGv_i64, TCGv_i64)) @@ -4751,7 +5202,7 @@ static bool do_env_ddd(DisasContext *dc, arg_r_r_r *a, return true; } - dst = gen_dest_fpr_D(dc, a->rd); + dst = tcg_temp_new_i64(); src1 = gen_load_fpr_D(dc, a->rs1); src2 = gen_load_fpr_D(dc, a->rs2); func(dst, tcg_env, src1, src2); @@ -4763,6 +5214,8 @@ TRANS(FADDd, ALL, do_env_ddd, a, gen_helper_faddd) TRANS(FSUBd, ALL, do_env_ddd, a, gen_helper_fsubd) TRANS(FMULd, ALL, do_env_ddd, a, gen_helper_fmuld) TRANS(FDIVd, ALL, do_env_ddd, a, gen_helper_fdivd) +TRANS(FNADDd, VIS3, do_env_ddd, a, gen_helper_fnaddd) +TRANS(FNMULd, VIS3, do_env_ddd, a, gen_helper_fnmuld) static bool trans_FsMULd(DisasContext *dc, arg_r_r_r *a) { @@ -4776,7 +5229,7 @@ static bool trans_FsMULd(DisasContext *dc, arg_r_r_r *a) return raise_unimpfpop(dc); } - dst = gen_dest_fpr_D(dc, a->rd); + dst = tcg_temp_new_i64(); src1 = gen_load_fpr_F(dc, a->rs1); src2 = gen_load_fpr_F(dc, a->rs2); gen_helper_fsmuld(dst, tcg_env, src1, src2); @@ -4784,25 +5237,94 @@ static bool trans_FsMULd(DisasContext *dc, arg_r_r_r *a) return advance_pc(dc); } -static bool do_dddd(DisasContext *dc, arg_r_r_r *a, - void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) +static bool trans_FNsMULd(DisasContext *dc, arg_r_r_r *a) { - TCGv_i64 dst, src0, src1, src2; + TCGv_i64 dst; + TCGv_i32 src1, src2; + + if (!avail_VIS3(dc)) { + return false; + } + if (gen_trap_ifnofpu(dc)) { + return true; + } + dst = tcg_temp_new_i64(); + src1 = gen_load_fpr_F(dc, a->rs1); + src2 = gen_load_fpr_F(dc, a->rs2); + gen_helper_fnsmuld(dst, tcg_env, src1, src2); + gen_store_fpr_D(dc, a->rd, dst); + return advance_pc(dc); +} + +static bool do_ffff(DisasContext *dc, arg_r_r_r_r *a, + void (*func)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32)) +{ + TCGv_i32 dst, src1, src2, src3; if (gen_trap_ifnofpu(dc)) { return true; } - dst = gen_dest_fpr_D(dc, a->rd); - src0 = gen_load_fpr_D(dc, a->rd); + src1 = gen_load_fpr_F(dc, a->rs1); + src2 = gen_load_fpr_F(dc, a->rs2); + src3 = gen_load_fpr_F(dc, a->rs3); + dst = tcg_temp_new_i32(); + func(dst, src1, src2, src3); + gen_store_fpr_F(dc, a->rd, dst); + return advance_pc(dc); +} + +TRANS(FMADDs, FMAF, do_ffff, a, gen_op_fmadds) +TRANS(FMSUBs, FMAF, do_ffff, a, gen_op_fmsubs) +TRANS(FNMSUBs, FMAF, do_ffff, a, gen_op_fnmsubs) +TRANS(FNMADDs, FMAF, do_ffff, a, gen_op_fnmadds) + +static bool do_dddd(DisasContext *dc, arg_r_r_r_r *a, + void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) +{ + TCGv_i64 dst, src1, src2, src3; + + if (gen_trap_ifnofpu(dc)) { + return true; + } + + dst = tcg_temp_new_i64(); src1 = gen_load_fpr_D(dc, a->rs1); src2 = gen_load_fpr_D(dc, a->rs2); - func(dst, src0, src1, src2); + src3 = gen_load_fpr_D(dc, a->rs3); + func(dst, src1, src2, src3); gen_store_fpr_D(dc, a->rd, dst); return advance_pc(dc); } TRANS(PDIST, VIS1, do_dddd, a, gen_helper_pdist) +TRANS(FMADDd, FMAF, do_dddd, a, gen_op_fmaddd) +TRANS(FMSUBd, FMAF, do_dddd, a, gen_op_fmsubd) +TRANS(FNMSUBd, FMAF, do_dddd, a, gen_op_fnmsubd) +TRANS(FNMADDd, FMAF, do_dddd, a, gen_op_fnmaddd) +TRANS(FPMADDX, IMA, do_dddd, a, gen_op_fpmaddx) +TRANS(FPMADDXHI, IMA, do_dddd, a, gen_op_fpmaddxhi) + +static bool trans_FALIGNDATAi(DisasContext *dc, arg_r_r_r *a) +{ + TCGv_i64 dst, src1, src2; + TCGv src3; + + if (!avail_VIS4(dc)) { + return false; + } + if (gen_trap_ifnofpu(dc)) { + return true; + } + + dst = tcg_temp_new_i64(); + src1 = gen_load_fpr_D(dc, a->rd); + src2 = gen_load_fpr_D(dc, a->rs2); + src3 = gen_load_gpr(dc, a->rs1); + gen_op_faligndata_i(dst, src1, src2, src3); + gen_store_fpr_D(dc, a->rd, dst); + return advance_pc(dc); +} static bool do_env_qqq(DisasContext *dc, arg_r_r_r *a, void (*func)(TCGv_i128, TCGv_env, TCGv_i128, TCGv_i128)) @@ -4991,6 +5513,76 @@ static bool do_fcmpq(DisasContext *dc, arg_FCMPq *a, bool e) TRANS(FCMPq, ALL, do_fcmpq, a, false) TRANS(FCMPEq, ALL, do_fcmpq, a, true) +static bool trans_FLCMPs(DisasContext *dc, arg_FLCMPs *a) +{ + TCGv_i32 src1, src2; + + if (!avail_VIS3(dc)) { + return false; + } + if (gen_trap_ifnofpu(dc)) { + return true; + } + + src1 = gen_load_fpr_F(dc, a->rs1); + src2 = gen_load_fpr_F(dc, a->rs2); + gen_helper_flcmps(cpu_fcc[a->cc], src1, src2); + return advance_pc(dc); +} + +static bool trans_FLCMPd(DisasContext *dc, arg_FLCMPd *a) +{ + TCGv_i64 src1, src2; + + if (!avail_VIS3(dc)) { + return false; + } + if (gen_trap_ifnofpu(dc)) { + return true; + } + + src1 = gen_load_fpr_D(dc, a->rs1); + src2 = gen_load_fpr_D(dc, a->rs2); + gen_helper_flcmpd(cpu_fcc[a->cc], src1, src2); + return advance_pc(dc); +} + +static bool do_movf2r(DisasContext *dc, arg_r_r *a, + int (*offset)(unsigned int), + void (*load)(TCGv, TCGv_ptr, tcg_target_long)) +{ + TCGv dst; + + if (gen_trap_ifnofpu(dc)) { + return true; + } + dst = gen_dest_gpr(dc, a->rd); + load(dst, tcg_env, offset(a->rs)); + gen_store_gpr(dc, a->rd, dst); + return advance_pc(dc); +} + +TRANS(MOVsTOsw, VIS3B, do_movf2r, a, gen_offset_fpr_F, tcg_gen_ld32s_tl) +TRANS(MOVsTOuw, VIS3B, do_movf2r, a, gen_offset_fpr_F, tcg_gen_ld32u_tl) +TRANS(MOVdTOx, VIS3B, do_movf2r, a, gen_offset_fpr_D, tcg_gen_ld_tl) + +static bool do_movr2f(DisasContext *dc, arg_r_r *a, + int (*offset)(unsigned int), + void (*store)(TCGv, TCGv_ptr, tcg_target_long)) +{ + TCGv src; + + if (gen_trap_ifnofpu(dc)) { + return true; + } + src = gen_load_gpr(dc, a->rs); + store(src, tcg_env, offset(a->rd)); + return advance_pc(dc); +} + +TRANS(MOVwTOs, VIS3B, do_movr2f, a, gen_offset_fpr_F, tcg_gen_st32_tl) +TRANS(MOVxTOd, VIS3B, do_movr2f, a, gen_offset_fpr_D, tcg_gen_st_tl) + static void sparc_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) { DisasContext *dc = container_of(dcbase, DisasContext, base); @@ -5172,12 +5764,6 @@ void sparc_tcg_init(void) "l0", "l1", "l2", "l3", "l4", "l5", "l6", "l7", "i0", "i1", "i2", "i3", "i4", "i5", "i6", "i7", }; - static const char fregnames[32][4] = { - "f0", "f2", "f4", "f6", "f8", "f10", "f12", "f14", - "f16", "f18", "f20", "f22", "f24", "f26", "f28", "f30", - "f32", "f34", "f36", "f38", "f40", "f42", "f44", "f46", - "f48", "f50", "f52", "f54", "f56", "f58", "f60", "f62", - }; static const struct { TCGv_i32 *ptr; int off; const char *name; } r32[] = { #ifdef TARGET_SPARC64 @@ -5234,12 +5820,6 @@ void sparc_tcg_init(void) (i - 8) * sizeof(target_ulong), gregnames[i]); } - - for (i = 0; i < TARGET_DPREGS; i++) { - cpu_fpr[i] = tcg_global_mem_new_i64(tcg_env, - offsetof(CPUSPARCState, fpr[i]), - fregnames[i]); - } } void sparc_restore_state_to_opc(CPUState *cs, diff --git a/target/sparc/vis_helper.c b/target/sparc/vis_helper.c index e15c6bb34e..371f5445a1 100644 --- a/target/sparc/vis_helper.c +++ b/target/sparc/vis_helper.c @@ -20,26 +20,44 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/helper-proto.h" +#include "crypto/clmul.h" -/* This function uses non-native bit order */ -#define GET_FIELD(X, FROM, TO) \ - ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1)) - -/* This function uses the order in the manuals, i.e. bit 0 is 2^0 */ -#define GET_FIELD_SP(X, FROM, TO) \ - GET_FIELD(X, 63 - (TO), 63 - (FROM)) - -target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize) +target_ulong helper_array8(target_ulong rs1, target_ulong rs2) { - return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) | - (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) | - (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) | - (GET_FIELD_SP(pixel_addr, 56, 59) << 13) | - (GET_FIELD_SP(pixel_addr, 35, 38) << 9) | - (GET_FIELD_SP(pixel_addr, 13, 16) << 5) | - (((pixel_addr >> 55) & 1) << 4) | - (GET_FIELD_SP(pixel_addr, 33, 34) << 2) | - GET_FIELD_SP(pixel_addr, 11, 12); + /* + * From Oracle SPARC Architecture 2015: + * Architecturally, an illegal R[rs2] value (>5) causes the array + * instructions to produce undefined results. For historic reference, + * past implementations of these instructions have ignored R[rs2]{63:3} + * and have treated R[rs2] values of 6 and 7 as if they were 5. + */ + target_ulong n = MIN(rs2 & 7, 5); + + target_ulong x_int = (rs1 >> 11) & 0x7ff; + target_ulong y_int = (rs1 >> 33) & 0x7ff; + target_ulong z_int = rs1 >> 55; + + target_ulong lower_x = x_int & 3; + target_ulong lower_y = y_int & 3; + target_ulong lower_z = z_int & 1; + + target_ulong middle_x = (x_int >> 2) & 15; + target_ulong middle_y = (y_int >> 2) & 15; + target_ulong middle_z = (z_int >> 1) & 15; + + target_ulong upper_x = (x_int >> 6) & ((1 << n) - 1); + target_ulong upper_y = (y_int >> 6) & ((1 << n) - 1); + target_ulong upper_z = z_int >> 5; + + return (upper_z << (17 + 2 * n)) + | (upper_y << (17 + n)) + | (upper_x << 17) + | (middle_z << 13) + | (middle_y << 9) + | (middle_x << 5) + | (lower_z << 4) + | (lower_y << 2) + | lower_x; } #if HOST_BIG_ENDIAN @@ -48,6 +66,7 @@ target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize) #define VIS_W64(n) w[3 - (n)] #define VIS_SW64(n) sw[3 - (n)] #define VIS_L64(n) l[1 - (n)] +#define VIS_SL64(n) sl[1 - (n)] #define VIS_B32(n) b[3 - (n)] #define VIS_W32(n) w[1 - (n)] #else @@ -56,6 +75,7 @@ target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize) #define VIS_W64(n) w[n] #define VIS_SW64(n) sw[n] #define VIS_L64(n) l[n] +#define VIS_SL64(n) sl[n] #define VIS_B32(n) b[n] #define VIS_W32(n) w[n] #endif @@ -66,6 +86,7 @@ typedef union { uint16_t w[4]; int16_t sw[4]; uint32_t l[2]; + int32_t sl[2]; uint64_t ll; float64 d; } VIS64; @@ -157,10 +178,10 @@ uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2) s.ll = src1; d.ll = src2; - d.VIS_W64(0) = do_ms16b(s.VIS_B64(0), d.VIS_SW64(0)); - d.VIS_W64(1) = do_ms16b(s.VIS_B64(2), d.VIS_SW64(1)); - d.VIS_W64(2) = do_ms16b(s.VIS_B64(4), d.VIS_SW64(2)); - d.VIS_W64(3) = do_ms16b(s.VIS_B64(6), d.VIS_SW64(3)); + d.VIS_W64(0) = (s.VIS_B64(0) * d.VIS_SW64(0) + 0x8000) >> 16; + d.VIS_W64(1) = (s.VIS_B64(2) * d.VIS_SW64(1) + 0x8000) >> 16; + d.VIS_W64(2) = (s.VIS_B64(4) * d.VIS_SW64(2) + 0x8000) >> 16; + d.VIS_W64(3) = (s.VIS_B64(6) * d.VIS_SW64(3) + 0x8000) >> 16; return d.ll; } @@ -180,46 +201,171 @@ uint64_t helper_fexpand(uint32_t src2) return d.ll; } -#define VIS_CMPHELPER(name, F) \ - uint64_t name##16(uint64_t src1, uint64_t src2) \ - { \ - VIS64 s, d; \ - \ - s.ll = src1; \ - d.ll = src2; \ - \ - d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0; \ - d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0; \ - d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0; \ - d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0; \ - d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0; \ - \ - return d.ll; \ - } \ - \ - uint64_t name##32(uint64_t src1, uint64_t src2) \ - { \ - VIS64 s, d; \ - \ - s.ll = src1; \ - d.ll = src2; \ - \ - d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0; \ - d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0; \ - d.VIS_L64(1) = 0; \ - \ - return d.ll; \ +uint64_t helper_fcmpeq8(uint64_t src1, uint64_t src2) +{ + uint64_t a = src1 ^ src2; + uint64_t m = 0x7f7f7f7f7f7f7f7fULL; + uint64_t c = ~(((a & m) + m) | a | m); + + /* a.......b.......c.......d.......e.......f.......g.......h....... */ + c |= c << 7; + /* ab......bc......cd......de......ef......fg......gh......h....... */ + c |= c << 14; + /* abcd....bcde....cdef....defg....efgh....fgh.....gh......h....... */ + c |= c << 28; + /* abcdefghbcdefgh.cdefgh..defgh...efgh....fgh.....gh......h....... */ + return c >> 56; +} + +uint64_t helper_fcmpne8(uint64_t src1, uint64_t src2) +{ + return helper_fcmpeq8(src1, src2) ^ 0xff; +} + +uint64_t helper_fcmple8(uint64_t src1, uint64_t src2) +{ + VIS64 s1, s2; + uint64_t r = 0; + + s1.ll = src1; + s2.ll = src2; + + for (int i = 0; i < 8; ++i) { + r |= (s1.VIS_SB64(i) <= s2.VIS_SB64(i)) << i; } + return r; +} -#define FCMPGT(a, b) ((a) > (b)) -#define FCMPEQ(a, b) ((a) == (b)) -#define FCMPLE(a, b) ((a) <= (b)) -#define FCMPNE(a, b) ((a) != (b)) +uint64_t helper_fcmpgt8(uint64_t src1, uint64_t src2) +{ + return helper_fcmple8(src1, src2) ^ 0xff; +} -VIS_CMPHELPER(helper_fcmpgt, FCMPGT) -VIS_CMPHELPER(helper_fcmpeq, FCMPEQ) -VIS_CMPHELPER(helper_fcmple, FCMPLE) -VIS_CMPHELPER(helper_fcmpne, FCMPNE) +uint64_t helper_fcmpule8(uint64_t src1, uint64_t src2) +{ + VIS64 s1, s2; + uint64_t r = 0; + + s1.ll = src1; + s2.ll = src2; + + for (int i = 0; i < 8; ++i) { + r |= (s1.VIS_B64(i) <= s2.VIS_B64(i)) << i; + } + return r; +} + +uint64_t helper_fcmpugt8(uint64_t src1, uint64_t src2) +{ + return helper_fcmpule8(src1, src2) ^ 0xff; +} + +uint64_t helper_fcmpeq16(uint64_t src1, uint64_t src2) +{ + uint64_t a = src1 ^ src2; + uint64_t m = 0x7fff7fff7fff7fffULL; + uint64_t c = ~(((a & m) + m) | a | m); + + /* a...............b...............c...............d............... */ + c |= c << 15; + /* ab..............bc..............cd..............d............... */ + c |= c << 30; + /* abcd............bcd.............cd..............d............... */ + return c >> 60; +} + +uint64_t helper_fcmpne16(uint64_t src1, uint64_t src2) +{ + return helper_fcmpeq16(src1, src2) ^ 0xf; +} + +uint64_t helper_fcmple16(uint64_t src1, uint64_t src2) +{ + VIS64 s1, s2; + uint64_t r = 0; + + s1.ll = src1; + s2.ll = src2; + + for (int i = 0; i < 4; ++i) { + r |= (s1.VIS_SW64(i) <= s2.VIS_SW64(i)) << i; + } + return r; +} + +uint64_t helper_fcmpgt16(uint64_t src1, uint64_t src2) +{ + return helper_fcmple16(src1, src2) ^ 0xf; +} + +uint64_t helper_fcmpule16(uint64_t src1, uint64_t src2) +{ + VIS64 s1, s2; + uint64_t r = 0; + + s1.ll = src1; + s2.ll = src2; + + for (int i = 0; i < 4; ++i) { + r |= (s1.VIS_W64(i) <= s2.VIS_W64(i)) << i; + } + return r; +} + +uint64_t helper_fcmpugt16(uint64_t src1, uint64_t src2) +{ + return helper_fcmpule16(src1, src2) ^ 0xf; +} + +uint64_t helper_fcmpeq32(uint64_t src1, uint64_t src2) +{ + uint64_t a = src1 ^ src2; + return ((uint32_t)a == 0) | (a >> 32 ? 0 : 2); +} + +uint64_t helper_fcmpne32(uint64_t src1, uint64_t src2) +{ + uint64_t a = src1 ^ src2; + return ((uint32_t)a != 0) | (a >> 32 ? 2 : 0); +} + +uint64_t helper_fcmple32(uint64_t src1, uint64_t src2) +{ + VIS64 s1, s2; + uint64_t r = 0; + + s1.ll = src1; + s2.ll = src2; + + for (int i = 0; i < 2; ++i) { + r |= (s1.VIS_SL64(i) <= s2.VIS_SL64(i)) << i; + } + return r; +} + +uint64_t helper_fcmpgt32(uint64_t src1, uint64_t src2) +{ + return helper_fcmple32(src1, src2) ^ 3; +} + +uint64_t helper_fcmpule32(uint64_t src1, uint64_t src2) +{ + VIS64 s1, s2; + uint64_t r = 0; + + s1.ll = src1; + s2.ll = src2; + + for (int i = 0; i < 2; ++i) { + r |= (s1.VIS_L64(i) <= s2.VIS_L64(i)) << i; + } + return r; +} + +uint64_t helper_fcmpugt32(uint64_t src1, uint64_t src2) +{ + return helper_fcmpule32(src1, src2) ^ 3; +} uint64_t helper_pdist(uint64_t sum, uint64_t src1, uint64_t src2) { @@ -334,3 +480,131 @@ uint64_t helper_bshuffle(uint64_t gsr, uint64_t src1, uint64_t src2) return r.ll; } + +uint64_t helper_cmask8(uint64_t gsr, uint64_t src) +{ + uint32_t mask = 0; + + mask |= (src & 0x01 ? 0x00000007 : 0x0000000f); + mask |= (src & 0x02 ? 0x00000060 : 0x000000e0); + mask |= (src & 0x04 ? 0x00000500 : 0x00000d00); + mask |= (src & 0x08 ? 0x00004000 : 0x0000c000); + mask |= (src & 0x10 ? 0x00030000 : 0x000b0000); + mask |= (src & 0x20 ? 0x00200000 : 0x00a00000); + mask |= (src & 0x40 ? 0x01000000 : 0x09000000); + mask |= (src & 0x80 ? 0x00000000 : 0x80000000); + + return deposit64(gsr, 32, 32, mask); +} + +uint64_t helper_cmask16(uint64_t gsr, uint64_t src) +{ + uint32_t mask = 0; + + mask |= (src & 0x1 ? 0x00000067 : 0x000000ef); + mask |= (src & 0x2 ? 0x00004500 : 0x0000cd00); + mask |= (src & 0x4 ? 0x00230000 : 0x00ab0000); + mask |= (src & 0x8 ? 0x01000000 : 0x89000000); + + return deposit64(gsr, 32, 32, mask); +} + +uint64_t helper_cmask32(uint64_t gsr, uint64_t src) +{ + uint32_t mask = 0; + + mask |= (src & 0x1 ? 0x00004567 : 0x0000cdef); + mask |= (src & 0x2 ? 0x01230000 : 0x89ab0000); + + return deposit64(gsr, 32, 32, mask); +} + +static inline uint16_t do_fchksm16(uint16_t src1, uint16_t src2) +{ + uint16_t a = src1 + src2; + uint16_t c = a < src1; + return a + c; +} + +uint64_t helper_fchksm16(uint64_t src1, uint64_t src2) +{ + VIS64 r, s1, s2; + + s1.ll = src1; + s2.ll = src2; + r.ll = 0; + + r.VIS_W64(0) = do_fchksm16(s1.VIS_W64(0), s2.VIS_W64(0)); + r.VIS_W64(1) = do_fchksm16(s1.VIS_W64(1), s2.VIS_W64(1)); + r.VIS_W64(2) = do_fchksm16(s1.VIS_W64(2), s2.VIS_W64(2)); + r.VIS_W64(3) = do_fchksm16(s1.VIS_W64(3), s2.VIS_W64(3)); + + return r.ll; +} + +static inline int16_t do_fmean16(int16_t src1, int16_t src2) +{ + return (src1 + src2 + 1) / 2; +} + +uint64_t helper_fmean16(uint64_t src1, uint64_t src2) +{ + VIS64 r, s1, s2; + + s1.ll = src1; + s2.ll = src2; + r.ll = 0; + + r.VIS_SW64(0) = do_fmean16(s1.VIS_SW64(0), s2.VIS_SW64(0)); + r.VIS_SW64(1) = do_fmean16(s1.VIS_SW64(1), s2.VIS_SW64(1)); + r.VIS_SW64(2) = do_fmean16(s1.VIS_SW64(2), s2.VIS_SW64(2)); + r.VIS_SW64(3) = do_fmean16(s1.VIS_SW64(3), s2.VIS_SW64(3)); + + return r.ll; +} + +uint64_t helper_fslas16(uint64_t src1, uint64_t src2) +{ + VIS64 r, s1, s2; + + s1.ll = src1; + s2.ll = src2; + r.ll = 0; + + for (int i = 0; i < 4; ++i) { + int t = s1.VIS_SW64(i) << (s2.VIS_W64(i) % 16); + t = MIN(t, INT16_MAX); + t = MAX(t, INT16_MIN); + r.VIS_SW64(i) = t; + } + + return r.ll; +} + +uint64_t helper_fslas32(uint64_t src1, uint64_t src2) +{ + VIS64 r, s1, s2; + + s1.ll = src1; + s2.ll = src2; + r.ll = 0; + + for (int i = 0; i < 2; ++i) { + int64_t t = (int64_t)(int32_t)s1.VIS_L64(i) << (s2.VIS_L64(i) % 32); + t = MIN(t, INT32_MAX); + t = MAX(t, INT32_MIN); + r.VIS_L64(i) = t; + } + + return r.ll; +} + +uint64_t helper_xmulx(uint64_t src1, uint64_t src2) +{ + return int128_getlo(clmul_64(src1, src2)); +} + +uint64_t helper_xmulxhi(uint64_t src1, uint64_t src2) +{ + return int128_gethi(clmul_64(src1, src2)); +}