From 7df6a1ffdbdcaf98fa57747dc79216ac089e6215 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 20 May 2021 16:28:32 +0100 Subject: [PATCH 01/45] target/arm: Add isar feature check functions for MVE Add the isar feature check functions we will need for v8.1M MVE: * a check for MVE present: this corresponds to the pseudocode's CheckDecodeFaults(ExtType_Mve) * a check for the optional floating-point part of MVE: this corresponds to CheckDecodeFaults(ExtType_MveFp) Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20210520152840.24453-2-peter.maydell@linaro.org --- target/arm/cpu.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 04f8be35bf..f1bd7d787c 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -3817,6 +3817,28 @@ static inline bool isar_feature_aa32_fp16_arith(const ARMISARegisters *id) } } +static inline bool isar_feature_aa32_mve(const ARMISARegisters *id) +{ + /* + * Return true if MVE is supported (either integer or floating point). + * We must check for M-profile as the MVFR1 field means something + * else for A-profile. + */ + return isar_feature_aa32_mprofile(id) && + FIELD_EX32(id->mvfr1, MVFR1, MVE) > 0; +} + +static inline bool isar_feature_aa32_mve_fp(const ARMISARegisters *id) +{ + /* + * Return true if MVE is supported (either integer or floating point). + * We must check for M-profile as the MVFR1 field means something + * else for A-profile. + */ + return isar_feature_aa32_mprofile(id) && + FIELD_EX32(id->mvfr1, MVFR1, MVE) >= 2; +} + static inline bool isar_feature_aa32_vfp_simd(const ARMISARegisters *id) { /* From 69049bcf5114f7af379b4f3cccfb0b213b30f88a Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 20 May 2021 16:28:33 +0100 Subject: [PATCH 02/45] target/arm: Update feature checks for insns which are "MVE or FP" Some v8M instructions are present if either the floating point extension or MVE is implemented. Update our implementation of them to check for MVE as well as for FP. This is all the insns which use CheckDecodeFaults(ExtType_MveOrFp) or CheckDecodeFaults(ExtType_MveOrDpFp) in their pseudocode, which are essentially the loads and stores, moves and sysreg accesses, except for VMOV_reg_sp and VMOV_reg_dp, which we handle in subsequent patches because they need a refactor to provide a place to put the new MVE check. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20210520152840.24453-3-peter.maydell@linaro.org --- target/arm/translate-vfp.c | 48 +++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c index 3da84f30a0..2202f8985d 100644 --- a/target/arm/translate-vfp.c +++ b/target/arm/translate-vfp.c @@ -543,11 +543,16 @@ static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a) /* VMOV scalar to general purpose register */ TCGv_i32 tmp; - /* SIZE == MO_32 is a VFP instruction; otherwise NEON. */ - if (a->size == MO_32 - ? !dc_isar_feature(aa32_fpsp_v2, s) - : !arm_dc_feature(s, ARM_FEATURE_NEON)) { - return false; + /* + * SIZE == MO_32 is a VFP instruction; otherwise NEON. MVE has + * all sizes, whether the CPU has fp or not. + */ + if (!dc_isar_feature(aa32_mve, s)) { + if (a->size == MO_32 + ? !dc_isar_feature(aa32_fpsp_v2, s) + : !arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } } /* UNDEF accesses to D16-D31 if they don't exist */ @@ -571,11 +576,16 @@ static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a) /* VMOV general purpose register to scalar */ TCGv_i32 tmp; - /* SIZE == MO_32 is a VFP instruction; otherwise NEON. */ - if (a->size == MO_32 - ? !dc_isar_feature(aa32_fpsp_v2, s) - : !arm_dc_feature(s, ARM_FEATURE_NEON)) { - return false; + /* + * SIZE == MO_32 is a VFP instruction; otherwise NEON. MVE has + * all sizes, whether the CPU has fp or not. + */ + if (!dc_isar_feature(aa32_mve, s)) { + if (a->size == MO_32 + ? !dc_isar_feature(aa32_fpsp_v2, s) + : !arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } } /* UNDEF accesses to D16-D31 if they don't exist */ @@ -671,7 +681,7 @@ typedef enum FPSysRegCheckResult { static FPSysRegCheckResult fp_sysreg_checks(DisasContext *s, int regno) { - if (!dc_isar_feature(aa32_fpsp_v2, s)) { + if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { return FPSysRegCheckFailed; } @@ -1254,7 +1264,7 @@ static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a) { TCGv_i32 tmp; - if (!dc_isar_feature(aa32_fpsp_v2, s)) { + if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { return false; } @@ -1287,7 +1297,7 @@ static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a) { TCGv_i32 tmp; - if (!dc_isar_feature(aa32_fpsp_v2, s)) { + if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { return false; } @@ -1329,7 +1339,7 @@ static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a) * floating point register. Note that this does not require support * for double precision arithmetic. */ - if (!dc_isar_feature(aa32_fpsp_v2, s)) { + if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { return false; } @@ -1368,7 +1378,7 @@ static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a) uint32_t offset; TCGv_i32 addr, tmp; - if (!dc_isar_feature(aa32_fp16_arith, s)) { + if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { return false; } @@ -1403,7 +1413,7 @@ static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a) uint32_t offset; TCGv_i32 addr, tmp; - if (!dc_isar_feature(aa32_fpsp_v2, s)) { + if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { return false; } @@ -1439,7 +1449,7 @@ static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a) TCGv_i64 tmp; /* Note that this does not require support for double arithmetic. */ - if (!dc_isar_feature(aa32_fpsp_v2, s)) { + if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { return false; } @@ -1479,7 +1489,7 @@ static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a) TCGv_i32 addr, tmp; int i, n; - if (!dc_isar_feature(aa32_fpsp_v2, s)) { + if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { return false; } @@ -1557,7 +1567,7 @@ static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a) int i, n; /* Note that this does not require support for double arithmetic. */ - if (!dc_isar_feature(aa32_fpsp_v2, s)) { + if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { return false; } From 70b305d4f0f0e024b6c1adeb0630be8491bbae6a Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 20 May 2021 16:28:34 +0100 Subject: [PATCH 03/45] target/arm: Move fpsp/fpdp isar check into callers of do_vfp_2op_sp/dp The do_vfp_2op_sp() and do_vfp_2op_dp() functions currently check whether floating point is supported via the aa32_fpdp_v2 and aa32_fpsp_v2 isar checks. For v8.1M MVE support, the VMOV_reg trans functions (but not any of the others) need to update this to also allow the insn if MVE is implemented. Move the check out of the do_ function and into its callsites (which are all implemented via the DO_VFP_2OP macro), so we have a place to change the check for the VMOV insns. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20210520152840.24453-4-peter.maydell@linaro.org --- target/arm/translate-vfp.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c index 2202f8985d..89246a284a 100644 --- a/target/arm/translate-vfp.c +++ b/target/arm/translate-vfp.c @@ -1925,9 +1925,7 @@ static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm) int veclen = s->vec_len; TCGv_i32 f0, fd; - if (!dc_isar_feature(aa32_fpsp_v2, s)) { - return false; - } + /* Note that the caller must check the aa32_fpsp_v2 feature. */ if (!dc_isar_feature(aa32_fpshvec, s) && (veclen != 0 || s->vec_stride != 0)) { @@ -2002,6 +2000,8 @@ static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm) */ TCGv_i32 f0; + /* Note that the caller must check the aa32_fp16_arith feature */ + if (!dc_isar_feature(aa32_fp16_arith, s)) { return false; } @@ -2030,9 +2030,7 @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm) int veclen = s->vec_len; TCGv_i64 f0, fd; - if (!dc_isar_feature(aa32_fpdp_v2, s)) { - return false; - } + /* Note that the caller must check the aa32_fpdp_v2 feature. */ /* UNDEF accesses to D16-D31 if they don't exist */ if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vm) & 0x10)) { @@ -2810,23 +2808,26 @@ static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a) return true; } -#define DO_VFP_2OP(INSN, PREC, FN) \ +#define DO_VFP_2OP(INSN, PREC, FN, CHECK) \ static bool trans_##INSN##_##PREC(DisasContext *s, \ arg_##INSN##_##PREC *a) \ { \ + if (!dc_isar_feature(CHECK, s)) { \ + return false; \ + } \ return do_vfp_2op_##PREC(s, FN, a->vd, a->vm); \ } -DO_VFP_2OP(VMOV_reg, sp, tcg_gen_mov_i32) -DO_VFP_2OP(VMOV_reg, dp, tcg_gen_mov_i64) +DO_VFP_2OP(VMOV_reg, sp, tcg_gen_mov_i32, aa32_fpsp_v2) +DO_VFP_2OP(VMOV_reg, dp, tcg_gen_mov_i64, aa32_fpdp_v2) -DO_VFP_2OP(VABS, hp, gen_helper_vfp_absh) -DO_VFP_2OP(VABS, sp, gen_helper_vfp_abss) -DO_VFP_2OP(VABS, dp, gen_helper_vfp_absd) +DO_VFP_2OP(VABS, hp, gen_helper_vfp_absh, aa32_fp16_arith) +DO_VFP_2OP(VABS, sp, gen_helper_vfp_abss, aa32_fpsp_v2) +DO_VFP_2OP(VABS, dp, gen_helper_vfp_absd, aa32_fpdp_v2) -DO_VFP_2OP(VNEG, hp, gen_helper_vfp_negh) -DO_VFP_2OP(VNEG, sp, gen_helper_vfp_negs) -DO_VFP_2OP(VNEG, dp, gen_helper_vfp_negd) +DO_VFP_2OP(VNEG, hp, gen_helper_vfp_negh, aa32_fp16_arith) +DO_VFP_2OP(VNEG, sp, gen_helper_vfp_negs, aa32_fpsp_v2) +DO_VFP_2OP(VNEG, dp, gen_helper_vfp_negd, aa32_fpdp_v2) static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm) { @@ -2843,9 +2844,9 @@ static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm) gen_helper_vfp_sqrtd(vd, vm, cpu_env); } -DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp) -DO_VFP_2OP(VSQRT, sp, gen_VSQRT_sp) -DO_VFP_2OP(VSQRT, dp, gen_VSQRT_dp) +DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp, aa32_fp16_arith) +DO_VFP_2OP(VSQRT, sp, gen_VSQRT_sp, aa32_fpsp_v2) +DO_VFP_2OP(VSQRT, dp, gen_VSQRT_dp, aa32_fpdp_v2) static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a) { From 7e435b9ea645b370aa32364fa22f8e4cd9e7d9ec Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 20 May 2021 16:28:35 +0100 Subject: [PATCH 04/45] target/arm: Add MVE check to VMOV_reg_sp and VMOV_reg_dp Split out the handling of VMOV_reg_sp and VMOV_reg_dp so that we can permit the insns if either FP or MVE are present. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20210520152840.24453-5-peter.maydell@linaro.org --- target/arm/translate-vfp.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c index 89246a284a..ac5832a4ed 100644 --- a/target/arm/translate-vfp.c +++ b/target/arm/translate-vfp.c @@ -2818,8 +2818,19 @@ static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a) return do_vfp_2op_##PREC(s, FN, a->vd, a->vm); \ } -DO_VFP_2OP(VMOV_reg, sp, tcg_gen_mov_i32, aa32_fpsp_v2) -DO_VFP_2OP(VMOV_reg, dp, tcg_gen_mov_i64, aa32_fpdp_v2) +#define DO_VFP_VMOV(INSN, PREC, FN) \ + static bool trans_##INSN##_##PREC(DisasContext *s, \ + arg_##INSN##_##PREC *a) \ + { \ + if (!dc_isar_feature(aa32_fp##PREC##_v2, s) && \ + !dc_isar_feature(aa32_mve, s)) { \ + return false; \ + } \ + return do_vfp_2op_##PREC(s, FN, a->vd, a->vm); \ + } + +DO_VFP_VMOV(VMOV_reg, sp, tcg_gen_mov_i32) +DO_VFP_VMOV(VMOV_reg, dp, tcg_gen_mov_i64) DO_VFP_2OP(VABS, hp, gen_helper_vfp_absh, aa32_fp16_arith) DO_VFP_2OP(VABS, sp, gen_helper_vfp_abss, aa32_fpsp_v2) From 300137965dbacec02eb2e26b3c6763b491d1f1b2 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 20 May 2021 16:28:36 +0100 Subject: [PATCH 05/45] target/arm: Fix return values in fp_sysreg_checks() The fp_sysreg_checks() function is supposed to be returning an FPSysRegCheckResult, which is an enum with three possible values. However, three places in the function "return false" (a hangover from a previous iteration of the design where the function just returned a bool). Make these return FPSysRegCheckFailed instead (for no functional change, since both false and FPSysRegCheckFailed are zero). Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20210520152840.24453-6-peter.maydell@linaro.org --- target/arm/translate-vfp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c index ac5832a4ed..791c4f5f70 100644 --- a/target/arm/translate-vfp.c +++ b/target/arm/translate-vfp.c @@ -691,16 +691,16 @@ static FPSysRegCheckResult fp_sysreg_checks(DisasContext *s, int regno) break; case ARM_VFP_FPSCR_NZCVQC: if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) { - return false; + return FPSysRegCheckFailed; } break; case ARM_VFP_FPCXT_S: case ARM_VFP_FPCXT_NS: if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) { - return false; + return FPSysRegCheckFailed; } if (!s->v8m_secure) { - return false; + return FPSysRegCheckFailed; } break; default: From 7c3d47dab908ac1770726e68cf72e47bb5a9cbcb Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 20 May 2021 16:28:37 +0100 Subject: [PATCH 06/45] target/arm: Implement M-profile VPR register If MVE is implemented for an M-profile CPU then it has a VPR register, which tracks predication information. Implement the read and write handling of this register, and the migration of its state. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20210520152840.24453-7-peter.maydell@linaro.org --- target/arm/cpu.h | 6 ++++++ target/arm/machine.c | 19 +++++++++++++++++++ target/arm/translate-vfp.c | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index f1bd7d787c..df2f189c49 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -564,6 +564,7 @@ typedef struct CPUARMState { uint32_t cpacr[M_REG_NUM_BANKS]; uint32_t nsacr; int ltpsize; + uint32_t vpr; } v7m; /* Information associated with an exception about to be taken: @@ -1761,6 +1762,11 @@ FIELD(V7M_FPCCR, ASPEN, 31, 1) R_V7M_FPCCR_UFRDY_MASK | \ R_V7M_FPCCR_ASPEN_MASK) +/* v7M VPR bits */ +FIELD(V7M_VPR, P0, 0, 16) +FIELD(V7M_VPR, MASK01, 16, 4) +FIELD(V7M_VPR, MASK23, 20, 4) + /* * System register ID fields. */ diff --git a/target/arm/machine.c b/target/arm/machine.c index 6ad1d306b1..62a71a3b64 100644 --- a/target/arm/machine.c +++ b/target/arm/machine.c @@ -318,6 +318,24 @@ static const VMStateDescription vmstate_m_fp = { } }; +static bool mve_needed(void *opaque) +{ + ARMCPU *cpu = opaque; + + return cpu_isar_feature(aa32_mve, cpu); +} + +static const VMStateDescription vmstate_m_mve = { + .name = "cpu/m/mve", + .version_id = 1, + .minimum_version_id = 1, + .needed = mve_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT32(env.v7m.vpr, ARMCPU), + VMSTATE_END_OF_LIST() + }, +}; + static const VMStateDescription vmstate_m = { .name = "cpu/m", .version_id = 4, @@ -344,6 +362,7 @@ static const VMStateDescription vmstate_m = { &vmstate_m_other_sp, &vmstate_m_v8m, &vmstate_m_fp, + &vmstate_m_mve, NULL } }; diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c index 791c4f5f70..2316e105ac 100644 --- a/target/arm/translate-vfp.c +++ b/target/arm/translate-vfp.c @@ -703,6 +703,12 @@ static FPSysRegCheckResult fp_sysreg_checks(DisasContext *s, int regno) return FPSysRegCheckFailed; } break; + case ARM_VFP_VPR: + case ARM_VFP_P0: + if (!dc_isar_feature(aa32_mve, s)) { + return FPSysRegCheckFailed; + } + break; default: return FPSysRegCheckFailed; } @@ -817,6 +823,25 @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno, tcg_temp_free_i32(sfpa); break; } + case ARM_VFP_VPR: + /* Behaves as NOP if not privileged */ + if (IS_USER(s)) { + break; + } + tmp = loadfn(s, opaque); + store_cpu_field(tmp, v7m.vpr); + break; + case ARM_VFP_P0: + { + TCGv_i32 vpr; + tmp = loadfn(s, opaque); + vpr = load_cpu_field(v7m.vpr); + tcg_gen_deposit_i32(vpr, vpr, tmp, + R_V7M_VPR_P0_SHIFT, R_V7M_VPR_P0_LENGTH); + store_cpu_field(vpr, v7m.vpr); + tcg_temp_free_i32(tmp); + break; + } default: g_assert_not_reached(); } @@ -935,6 +960,19 @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno, tcg_temp_free_i32(fpscr); break; } + case ARM_VFP_VPR: + /* Behaves as NOP if not privileged */ + if (IS_USER(s)) { + break; + } + tmp = load_cpu_field(v7m.vpr); + storefn(s, opaque, tmp); + break; + case ARM_VFP_P0: + tmp = load_cpu_field(v7m.vpr); + tcg_gen_extract_i32(tmp, tmp, R_V7M_VPR_P0_SHIFT, R_V7M_VPR_P0_LENGTH); + storefn(s, opaque, tmp); + break; default: g_assert_not_reached(); } From b26b5629c0be4a9539833de4189184a224590d14 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 20 May 2021 16:28:38 +0100 Subject: [PATCH 07/45] target/arm: Make FPSCR.LTPSIZE writable for MVE The M-profile FPSCR has an LTPSIZE field, but if MVE is not implemented it is read-only and always reads as 4; this is how QEMU currently handles it. Make the field writable when MVE is implemented. We can safely add the field to the MVE migration struct because currently no CPUs enable MVE and so the migration struct is never used. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20210520152840.24453-8-peter.maydell@linaro.org --- target/arm/cpu.h | 3 ++- target/arm/machine.c | 1 + target/arm/vfp_helper.c | 9 ++++++--- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index df2f189c49..c389b1e969 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -563,7 +563,7 @@ typedef struct CPUARMState { uint32_t fpdscr[M_REG_NUM_BANKS]; uint32_t cpacr[M_REG_NUM_BANKS]; uint32_t nsacr; - int ltpsize; + uint32_t ltpsize; uint32_t vpr; } v7m; @@ -1562,6 +1562,7 @@ void vfp_set_fpscr(CPUARMState *env, uint32_t val); #define FPCR_LTPSIZE_SHIFT 16 /* LTPSIZE, M-profile only */ #define FPCR_LTPSIZE_MASK (7 << FPCR_LTPSIZE_SHIFT) +#define FPCR_LTPSIZE_LENGTH 3 #define FPCR_NZCV_MASK (FPCR_N | FPCR_Z | FPCR_C | FPCR_V) #define FPCR_NZCVQC_MASK (FPCR_NZCV_MASK | FPCR_QC) diff --git a/target/arm/machine.c b/target/arm/machine.c index 62a71a3b64..81e30de824 100644 --- a/target/arm/machine.c +++ b/target/arm/machine.c @@ -332,6 +332,7 @@ static const VMStateDescription vmstate_m_mve = { .needed = mve_needed, .fields = (VMStateField[]) { VMSTATE_UINT32(env.v7m.vpr, ARMCPU), + VMSTATE_UINT32(env.v7m.ltpsize, ARMCPU), VMSTATE_END_OF_LIST() }, }; diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c index 01b9d8557f..e0886ab5a5 100644 --- a/target/arm/vfp_helper.c +++ b/target/arm/vfp_helper.c @@ -195,8 +195,10 @@ uint32_t vfp_get_fpscr(CPUARMState *env) void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val) { + ARMCPU *cpu = env_archcpu(env); + /* When ARMv8.2-FP16 is not supported, FZ16 is RES0. */ - if (!cpu_isar_feature(any_fp16, env_archcpu(env))) { + if (!cpu_isar_feature(any_fp16, cpu)) { val &= ~FPCR_FZ16; } @@ -210,11 +212,12 @@ void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val) * because in v7A no-short-vector-support cores still had to * allow Stride/Len to be written with the only effect that * some insns are required to UNDEF if the guest sets them. - * - * TODO: if M-profile MVE implemented, set LTPSIZE. */ env->vfp.vec_len = extract32(val, 16, 3); env->vfp.vec_stride = extract32(val, 20, 2); + } else if (cpu_isar_feature(aa32_mve, cpu)) { + env->v7m.ltpsize = extract32(val, FPCR_LTPSIZE_SHIFT, + FPCR_LTPSIZE_LENGTH); } if (arm_feature(env, ARM_FEATURE_NEON)) { From 7cda2149521f25b7caedd464f2e35104d9990315 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 20 May 2021 16:28:40 +0100 Subject: [PATCH 08/45] target/arm: Allow board models to specify initial NS VTOR Currently we allow board models to specify the initial value of the Secure VTOR register, using an init-svtor property on the TYPE_ARMV7M object which is plumbed through to the CPU. Allow board models to also specify the initial value of the Non-secure VTOR via a similar init-nsvtor property. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20210520152840.24453-10-peter.maydell@linaro.org --- hw/arm/armv7m.c | 7 +++++++ include/hw/arm/armv7m.h | 2 ++ target/arm/cpu.c | 10 ++++++++++ target/arm/cpu.h | 2 ++ 4 files changed, 21 insertions(+) diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c index af0d935bf7..9ce5c30cd5 100644 --- a/hw/arm/armv7m.c +++ b/hw/arm/armv7m.c @@ -176,6 +176,12 @@ static void armv7m_realize(DeviceState *dev, Error **errp) return; } } + if (object_property_find(OBJECT(s->cpu), "init-nsvtor")) { + if (!object_property_set_uint(OBJECT(s->cpu), "init-nsvtor", + s->init_nsvtor, errp)) { + return; + } + } if (object_property_find(OBJECT(s->cpu), "start-powered-off")) { if (!object_property_set_bool(OBJECT(s->cpu), "start-powered-off", s->start_powered_off, errp)) { @@ -254,6 +260,7 @@ static Property armv7m_properties[] = { MemoryRegion *), DEFINE_PROP_LINK("idau", ARMv7MState, idau, TYPE_IDAU_INTERFACE, Object *), DEFINE_PROP_UINT32("init-svtor", ARMv7MState, init_svtor, 0), + DEFINE_PROP_UINT32("init-nsvtor", ARMv7MState, init_nsvtor, 0), DEFINE_PROP_BOOL("enable-bitband", ARMv7MState, enable_bitband, false), DEFINE_PROP_BOOL("start-powered-off", ARMv7MState, start_powered_off, false), diff --git a/include/hw/arm/armv7m.h b/include/hw/arm/armv7m.h index 189b23a8ce..bc6733c518 100644 --- a/include/hw/arm/armv7m.h +++ b/include/hw/arm/armv7m.h @@ -46,6 +46,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(ARMv7MState, ARMV7M) * devices will be automatically layered on top of this view.) * + Property "idau": IDAU interface (forwarded to CPU object) * + Property "init-svtor": secure VTOR reset value (forwarded to CPU object) + * + Property "init-nsvtor": non-secure VTOR reset value (forwarded to CPU object) * + Property "vfp": enable VFP (forwarded to CPU object) * + Property "dsp": enable DSP (forwarded to CPU object) * + Property "enable-bitband": expose bitbanded IO @@ -69,6 +70,7 @@ struct ARMv7MState { MemoryRegion *board_memory; Object *idau; uint32_t init_svtor; + uint32_t init_nsvtor; bool enable_bitband; bool start_powered_off; bool vfp; diff --git a/target/arm/cpu.c b/target/arm/cpu.c index ad65b60b04..9ad6f5911b 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -327,6 +327,7 @@ static void arm_cpu_reset(DeviceState *dev) env->regs[14] = 0xffffffff; env->v7m.vecbase[M_REG_S] = cpu->init_svtor & 0xffffff80; + env->v7m.vecbase[M_REG_NS] = cpu->init_nsvtor & 0xffffff80; /* Load the initial SP and PC from offset 0 and 4 in the vector table */ vecbase = env->v7m.vecbase[env->v7m.secure]; @@ -1272,6 +1273,15 @@ void arm_cpu_post_init(Object *obj) &cpu->init_svtor, OBJ_PROP_FLAG_READWRITE); } + if (arm_feature(&cpu->env, ARM_FEATURE_M)) { + /* + * Initial value of the NS VTOR (for cores without the Security + * extension, this is the only VTOR) + */ + object_property_add_uint32_ptr(obj, "init-nsvtor", + &cpu->init_nsvtor, + OBJ_PROP_FLAG_READWRITE); + } qdev_property_add_static(DEVICE(obj), &arm_cpu_cfgend_property); diff --git a/target/arm/cpu.h b/target/arm/cpu.h index c389b1e969..5f234834c0 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -869,6 +869,8 @@ struct ARMCPU { /* For v8M, initial value of the Secure VTOR */ uint32_t init_svtor; + /* For v8M, initial value of the Non-secure VTOR */ + uint32_t init_nsvtor; /* [QEMU_]KVM_ARM_TARGET_* constant for this CPU, or * QEMU_KVM_ARM_TARGET_NONE if the kernel doesn't support this CPU type. From f548f20176cb5f440693120f9bde911dd3639748 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 27 May 2021 10:51:52 +0100 Subject: [PATCH 09/45] arm: Consistently use "Cortex-Axx", not "Cortex Axx" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The official punctuation for Arm CPU names uses a hyphen, like "Cortex-A9". We mostly follow this, but in a few places usage without the hyphen has crept in. Fix those so we consistently use the same way of writing the CPU name. This commit was created with: git grep -z -l 'Cortex ' | xargs -0 sed -i 's/Cortex /Cortex-/' Signed-off-by: Peter Maydell Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Reviewed-by: Alex Bennée Message-id: 20210527095152.10968-1-peter.maydell@linaro.org --- docs/system/arm/aspeed.rst | 4 ++-- docs/system/arm/nuvoton.rst | 6 +++--- docs/system/arm/sabrelite.rst | 2 +- hw/arm/aspeed.c | 6 +++--- hw/arm/mcimx6ul-evk.c | 2 +- hw/arm/mcimx7d-sabre.c | 2 +- hw/arm/npcm7xx_boards.c | 4 ++-- hw/arm/sabrelite.c | 2 +- hw/misc/npcm7xx_clk.c | 2 +- include/hw/arm/allwinner-h3.h | 2 +- 10 files changed, 16 insertions(+), 16 deletions(-) diff --git a/docs/system/arm/aspeed.rst b/docs/system/arm/aspeed.rst index a1911f9403..57ee2bd94f 100644 --- a/docs/system/arm/aspeed.rst +++ b/docs/system/arm/aspeed.rst @@ -5,7 +5,7 @@ The QEMU Aspeed machines model BMCs of various OpenPOWER systems and Aspeed evaluation boards. They are based on different releases of the Aspeed SoC : the AST2400 integrating an ARM926EJ-S CPU (400MHz), the AST2500 with an ARM1176JZS CPU (800MHz) and more recently the AST2600 -with dual cores ARM Cortex A7 CPUs (1.2GHz). +with dual cores ARM Cortex-A7 CPUs (1.2GHz). The SoC comes with RAM, Gigabit ethernet, USB, SD/MMC, USB, SPI, I2C, etc. @@ -24,7 +24,7 @@ AST2500 SoC based machines : AST2600 SoC based machines : -- ``ast2600-evb`` Aspeed AST2600 Evaluation board (Cortex A7) +- ``ast2600-evb`` Aspeed AST2600 Evaluation board (Cortex-A7) - ``tacoma-bmc`` OpenPOWER Witherspoon POWER9 AST2600 BMC Supported devices diff --git a/docs/system/arm/nuvoton.rst b/docs/system/arm/nuvoton.rst index d3cf2d9cd7..ca011bd479 100644 --- a/docs/system/arm/nuvoton.rst +++ b/docs/system/arm/nuvoton.rst @@ -3,19 +3,19 @@ Nuvoton iBMC boards (``npcm750-evb``, ``quanta-gsj``) The `Nuvoton iBMC`_ chips (NPCM7xx) are a family of ARM-based SoCs that are designed to be used as Baseboard Management Controllers (BMCs) in various -servers. They all feature one or two ARM Cortex A9 CPU cores, as well as an +servers. They all feature one or two ARM Cortex-A9 CPU cores, as well as an assortment of peripherals targeted for either Enterprise or Data Center / Hyperscale applications. The former is a superset of the latter, so NPCM750 has all the peripherals of NPCM730 and more. .. _Nuvoton iBMC: https://www.nuvoton.com/products/cloud-computing/ibmc/ -The NPCM750 SoC has two Cortex A9 cores and is targeted for the Enterprise +The NPCM750 SoC has two Cortex-A9 cores and is targeted for the Enterprise segment. The following machines are based on this chip : - ``npcm750-evb`` Nuvoton NPCM750 Evaluation board -The NPCM730 SoC has two Cortex A9 cores and is targeted for Data Center and +The NPCM730 SoC has two Cortex-A9 cores and is targeted for Data Center and Hyperscale applications. The following machines are based on this chip : - ``quanta-gsj`` Quanta GSJ server BMC diff --git a/docs/system/arm/sabrelite.rst b/docs/system/arm/sabrelite.rst index 71713310e3..4ccb0560af 100644 --- a/docs/system/arm/sabrelite.rst +++ b/docs/system/arm/sabrelite.rst @@ -10,7 +10,7 @@ Supported devices The SABRE Lite machine supports the following devices: - * Up to 4 Cortex A9 cores + * Up to 4 Cortex-A9 cores * Generic Interrupt Controller * 1 Clock Controller Module * 1 System Reset Controller diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c index 3fe6c55744..0eafc79154 100644 --- a/hw/arm/aspeed.c +++ b/hw/arm/aspeed.c @@ -947,7 +947,7 @@ static void aspeed_machine_ast2600_evb_class_init(ObjectClass *oc, void *data) MachineClass *mc = MACHINE_CLASS(oc); AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc); - mc->desc = "Aspeed AST2600 EVB (Cortex A7)"; + mc->desc = "Aspeed AST2600 EVB (Cortex-A7)"; amc->soc_name = "ast2600-a1"; amc->hw_strap1 = AST2600_EVB_HW_STRAP1; amc->hw_strap2 = AST2600_EVB_HW_STRAP2; @@ -966,7 +966,7 @@ static void aspeed_machine_tacoma_class_init(ObjectClass *oc, void *data) MachineClass *mc = MACHINE_CLASS(oc); AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc); - mc->desc = "OpenPOWER Tacoma BMC (Cortex A7)"; + mc->desc = "OpenPOWER Tacoma BMC (Cortex-A7)"; amc->soc_name = "ast2600-a1"; amc->hw_strap1 = TACOMA_BMC_HW_STRAP1; amc->hw_strap2 = TACOMA_BMC_HW_STRAP2; @@ -1003,7 +1003,7 @@ static void aspeed_machine_rainier_class_init(ObjectClass *oc, void *data) MachineClass *mc = MACHINE_CLASS(oc); AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc); - mc->desc = "IBM Rainier BMC (Cortex A7)"; + mc->desc = "IBM Rainier BMC (Cortex-A7)"; amc->soc_name = "ast2600-a1"; amc->hw_strap1 = RAINIER_BMC_HW_STRAP1; amc->hw_strap2 = RAINIER_BMC_HW_STRAP2; diff --git a/hw/arm/mcimx6ul-evk.c b/hw/arm/mcimx6ul-evk.c index ce16b6b317..77fae874b1 100644 --- a/hw/arm/mcimx6ul-evk.c +++ b/hw/arm/mcimx6ul-evk.c @@ -67,7 +67,7 @@ static void mcimx6ul_evk_init(MachineState *machine) static void mcimx6ul_evk_machine_init(MachineClass *mc) { - mc->desc = "Freescale i.MX6UL Evaluation Kit (Cortex A7)"; + mc->desc = "Freescale i.MX6UL Evaluation Kit (Cortex-A7)"; mc->init = mcimx6ul_evk_init; mc->max_cpus = FSL_IMX6UL_NUM_CPUS; mc->default_ram_id = "mcimx6ul-evk.ram"; diff --git a/hw/arm/mcimx7d-sabre.c b/hw/arm/mcimx7d-sabre.c index e896222c34..935d4b0f1c 100644 --- a/hw/arm/mcimx7d-sabre.c +++ b/hw/arm/mcimx7d-sabre.c @@ -67,7 +67,7 @@ static void mcimx7d_sabre_init(MachineState *machine) static void mcimx7d_sabre_machine_init(MachineClass *mc) { - mc->desc = "Freescale i.MX7 DUAL SABRE (Cortex A7)"; + mc->desc = "Freescale i.MX7 DUAL SABRE (Cortex-A7)"; mc->init = mcimx7d_sabre_init; mc->max_cpus = FSL_IMX7_NUM_CPUS; mc->default_ram_id = "mcimx7d-sabre.ram"; diff --git a/hw/arm/npcm7xx_boards.c b/hw/arm/npcm7xx_boards.c index d4553e3786..698be46d30 100644 --- a/hw/arm/npcm7xx_boards.c +++ b/hw/arm/npcm7xx_boards.c @@ -299,7 +299,7 @@ static void npcm750_evb_machine_class_init(ObjectClass *oc, void *data) npcm7xx_set_soc_type(nmc, TYPE_NPCM750); - mc->desc = "Nuvoton NPCM750 Evaluation Board (Cortex A9)"; + mc->desc = "Nuvoton NPCM750 Evaluation Board (Cortex-A9)"; mc->init = npcm750_evb_init; mc->default_ram_size = 512 * MiB; }; @@ -311,7 +311,7 @@ static void gsj_machine_class_init(ObjectClass *oc, void *data) npcm7xx_set_soc_type(nmc, TYPE_NPCM730); - mc->desc = "Quanta GSJ (Cortex A9)"; + mc->desc = "Quanta GSJ (Cortex-A9)"; mc->init = quanta_gsj_init; mc->default_ram_size = 512 * MiB; }; diff --git a/hw/arm/sabrelite.c b/hw/arm/sabrelite.c index 42348e5cb1..29fc777b61 100644 --- a/hw/arm/sabrelite.c +++ b/hw/arm/sabrelite.c @@ -105,7 +105,7 @@ static void sabrelite_init(MachineState *machine) static void sabrelite_machine_init(MachineClass *mc) { - mc->desc = "Freescale i.MX6 Quad SABRE Lite Board (Cortex A9)"; + mc->desc = "Freescale i.MX6 Quad SABRE Lite Board (Cortex-A9)"; mc->init = sabrelite_init; mc->max_cpus = FSL_IMX6_NUM_CPUS; mc->ignore_memory_transaction_failures = true; diff --git a/hw/misc/npcm7xx_clk.c b/hw/misc/npcm7xx_clk.c index a1ee67dc9a..0b61070c52 100644 --- a/hw/misc/npcm7xx_clk.c +++ b/hw/misc/npcm7xx_clk.c @@ -35,7 +35,7 @@ #define NPCM7XX_CLOCK_REF_HZ (25000000) /* Register Field Definitions */ -#define NPCM7XX_CLK_WDRCR_CA9C BIT(0) /* Cortex A9 Cores */ +#define NPCM7XX_CLK_WDRCR_CA9C BIT(0) /* Cortex-A9 Cores */ #define PLLCON_LOKI BIT(31) #define PLLCON_LOKS BIT(30) diff --git a/include/hw/arm/allwinner-h3.h b/include/hw/arm/allwinner-h3.h index cc308a5d2c..63025fb27c 100644 --- a/include/hw/arm/allwinner-h3.h +++ b/include/hw/arm/allwinner-h3.h @@ -18,7 +18,7 @@ */ /* - * The Allwinner H3 is a System on Chip containing four ARM Cortex A7 + * The Allwinner H3 is a System on Chip containing four ARM Cortex-A7 * processor cores. Features and specifications include DDR2/DDR3 memory, * SD/MMC storage cards, 10/100/1000Mbit Ethernet, USB 2.0, HDMI and * various I/O modules. From 269a7e97865cb863c9ca19e5f2e6a40ac9eddf82 Mon Sep 17 00:00:00 2001 From: Damien Goutte-Gattat Date: Mon, 3 May 2021 17:14:22 +0100 Subject: [PATCH 10/45] docs: Fix installation of man pages with Sphinx 4.x The 4.x branch of Sphinx introduces a breaking change, as generated man pages are now written to subdirectories corresponding to the manual section they belong to. This results in `make install` erroring out when attempting to install the man pages, because they are not where it expects to find them. This patch restores the behavior of Sphinx 3.x regarding man pages. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/256 Signed-off-by: Damien Goutte-Gattat Message-id: 20210503161422.15028-1-dgouttegattat@incenp.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- docs/conf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/conf.py b/docs/conf.py index 00cf66ab54..42729e22bb 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -279,6 +279,7 @@ man_pages = [ ['Stefan Hajnoczi ', 'Masayoshi Mizuma '], 1), ] +man_make_section_directory = False # -- Options for Texinfo output ------------------------------------------- From 0711a634355a68cd83966872e387402a8b4b048a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 1 Jun 2021 19:07:20 -0700 Subject: [PATCH 11/45] target/arm: Mark LDS{MIN,MAX} as signed operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The operands to tcg_gen_atomic_fetch_s{min,max}_i64 must be signed, so that the inputs are properly extended. Zero extend the result afterward, as needed. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/364 Signed-off-by: Richard Henderson Reviewed-by: Alex Bennée Message-id: 20210602020720.47679-1-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-a64.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index ceac0ee2bd..d6906d9012 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -3355,8 +3355,9 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, int o3_opc = extract32(insn, 12, 4); bool r = extract32(insn, 22, 1); bool a = extract32(insn, 23, 1); - TCGv_i64 tcg_rs, clean_addr; + TCGv_i64 tcg_rs, tcg_rt, clean_addr; AtomicThreeOpFn *fn = NULL; + MemOp mop = s->be_data | size | MO_ALIGN; if (is_vector || !dc_isar_feature(aa64_atomics, s)) { unallocated_encoding(s); @@ -3377,9 +3378,11 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, break; case 004: /* LDSMAX */ fn = tcg_gen_atomic_fetch_smax_i64; + mop |= MO_SIGN; break; case 005: /* LDSMIN */ fn = tcg_gen_atomic_fetch_smin_i64; + mop |= MO_SIGN; break; case 006: /* LDUMAX */ fn = tcg_gen_atomic_fetch_umax_i64; @@ -3422,6 +3425,7 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, } tcg_rs = read_cpu_reg(s, rs, true); + tcg_rt = cpu_reg(s, rt); if (o3_opc == 1) { /* LDCLR */ tcg_gen_not_i64(tcg_rs, tcg_rs); @@ -3430,8 +3434,11 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, /* The tcg atomic primitives are all full barriers. Therefore we * can ignore the Acquire and Release bits of this instruction. */ - fn(cpu_reg(s, rt), clean_addr, tcg_rs, get_mem_index(s), - s->be_data | size | MO_ALIGN); + fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop); + + if ((mop & MO_SIGN) && size != MO_64) { + tcg_gen_ext32u_i64(tcg_rt, tcg_rt); + } } /* From 6e0c60a2be30c333b06d3558a62b0f177199cbfb Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Wed, 26 May 2021 13:18:44 +0100 Subject: [PATCH 12/45] target/arm: fix missing exception class MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DAIF and PAC checks used raise_exception_ra to raise an exception and unwind CPU state but raise_exception_ra is currently designed for handling data aborts as the syndrome is partially precomputed and encoded in the TB and then merged in merge_syn_data_abort when handling the data abort. Using raise_exception_ra for DAIF and PAC checks results in an empty syndrome being retrieved from data[2] in restore_state_to_opc and setting ESR to 0. This manifested as: kvm [571]: Unknown exception class: esr: 0x000000 – Unknown/Uncategorized when launching a KVM guest when the host qemu used a CPU supporting EL2+pointer authentication and enabling pointer authentication in the guest. Rework raise_exception_ra such that the state is restored before raising the exception so that the exception is not clobbered by restore_state_to_opc. Fixes: 0d43e1a2d29a ("target/arm: Add PAuth helpers") Cc: Richard Henderson Cc: Peter Maydell Signed-off-by: Jamie Iles [PMM: added comment] Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/op_helper.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c index efcb600992..1a95972bcc 100644 --- a/target/arm/op_helper.c +++ b/target/arm/op_helper.c @@ -63,8 +63,15 @@ void raise_exception(CPUARMState *env, uint32_t excp, void raise_exception_ra(CPUARMState *env, uint32_t excp, uint32_t syndrome, uint32_t target_el, uintptr_t ra) { - CPUState *cs = do_raise_exception(env, excp, syndrome, target_el); - cpu_loop_exit_restore(cs, ra); + CPUState *cs = env_cpu(env); + + /* + * restore_state_to_opc() will set env->exception.syndrome, so + * we must restore CPU state here before setting the syndrome + * the caller passed us, and cannot use cpu_loop_exit_restore(). + */ + cpu_restore_state(cs, ra, true); + raise_exception(env, excp, syndrome, target_el); } uint64_t HELPER(neon_tbl)(CPUARMState *env, uint32_t desc, From 154acaba0ea9675f175118787b9ca44cbbbc23b7 Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Wed, 26 May 2021 13:18:45 +0100 Subject: [PATCH 13/45] target/arm: fold do_raise_exception into raise_exception Now that there are no other users of do_raise_exception, fold it into raise_exception. Cc: Richard Henderson Cc: Peter Maydell Signed-off-by: Jamie Iles Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/op_helper.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c index 1a95972bcc..4132f5e430 100644 --- a/target/arm/op_helper.c +++ b/target/arm/op_helper.c @@ -27,8 +27,8 @@ #define SIGNBIT (uint32_t)0x80000000 #define SIGNBIT64 ((uint64_t)1 << 63) -static CPUState *do_raise_exception(CPUARMState *env, uint32_t excp, - uint32_t syndrome, uint32_t target_el) +void raise_exception(CPUARMState *env, uint32_t excp, + uint32_t syndrome, uint32_t target_el) { CPUState *cs = env_cpu(env); @@ -49,14 +49,6 @@ static CPUState *do_raise_exception(CPUARMState *env, uint32_t excp, cs->exception_index = excp; env->exception.syndrome = syndrome; env->exception.target_el = target_el; - - return cs; -} - -void raise_exception(CPUARMState *env, uint32_t excp, - uint32_t syndrome, uint32_t target_el) -{ - CPUState *cs = do_raise_exception(env, excp, syndrome, target_el); cpu_loop_exit(cs); } From 5bf100c3200bc74b5d7b4e430c3b23e60caf1605 Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Wed, 26 May 2021 13:18:46 +0100 Subject: [PATCH 14/45] target/arm: use raise_exception_ra for MTE check failure Now that raise_exception_ra restores the state before raising the exception we can use restore_exception_ra to perform the state restore + exception raising without clobbering the syndrome. Cc: Richard Henderson Cc: Peter Maydell Signed-off-by: Jamie Iles [PMM: Keep the one line of the comment that is still relevant] Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/mte_helper.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c index a6fccc6e69..166b9d260f 100644 --- a/target/arm/mte_helper.c +++ b/target/arm/mte_helper.c @@ -563,20 +563,14 @@ static void mte_check_fail(CPUARMState *env, uint32_t desc, switch (tcf) { case 1: - /* - * Tag check fail causes a synchronous exception. - * - * In restore_state_to_opc, we set the exception syndrome - * for the load or store operation. Unwind first so we - * may overwrite that with the syndrome for the tag check. - */ - cpu_restore_state(env_cpu(env), ra, true); + /* Tag check fail causes a synchronous exception. */ env->exception.vaddress = dirty_ptr; is_write = FIELD_EX32(desc, MTEDESC, WRITE); syn = syn_data_abort_no_iss(arm_current_el(env) != 0, 0, 0, 0, 0, is_write, 0x11); - raise_exception(env, EXCP_DATA_ABORT, syn, exception_target_el(env)); + raise_exception_ra(env, EXCP_DATA_ABORT, syn, + exception_target_el(env), ra); /* noreturn, but fall through to the assert anyway */ case 0: From 9d75d45c0b88c87ac25ee4c65e724447834c1d3b Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Wed, 26 May 2021 13:18:47 +0100 Subject: [PATCH 15/45] target/arm: use raise_exception_ra for stack limit exception The sequence cpu_restore_state() + raise_exception() is equivalent to raise_exception_ra(), so use that instead. (In this case we never cared about the syndrome value, because M-profile doesn't use the syndrome; the old code was just written unnecessarily awkwardly.) Cc: Richard Henderson Cc: Peter Maydell Signed-off-by: Jamie Iles [PMM: Retain edited version of comment; rewrite commit message] Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/m_helper.c | 5 +---- target/arm/op_helper.c | 9 +++------ 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/target/arm/m_helper.c b/target/arm/m_helper.c index eda74e5545..074c543455 100644 --- a/target/arm/m_helper.c +++ b/target/arm/m_helper.c @@ -2601,10 +2601,7 @@ void HELPER(v7m_msr)(CPUARMState *env, uint32_t maskreg, uint32_t val) limit = is_psp ? env->v7m.psplim[false] : env->v7m.msplim[false]; if (val < limit) { - CPUState *cs = env_cpu(env); - - cpu_restore_state(cs, GETPC(), true); - raise_exception(env, EXCP_STKOF, 0, 1); + raise_exception_ra(env, EXCP_STKOF, 0, 1, GETPC()); } if (is_psp) { diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c index 4132f5e430..e98fd86305 100644 --- a/target/arm/op_helper.c +++ b/target/arm/op_helper.c @@ -95,15 +95,12 @@ void HELPER(v8m_stackcheck)(CPUARMState *env, uint32_t newvalue) * raising an exception if the limit is breached. */ if (newvalue < v7m_sp_limit(env)) { - CPUState *cs = env_cpu(env); - /* * Stack limit exceptions are a rare case, so rather than syncing - * PC/condbits before the call, we use cpu_restore_state() to - * get them right before raising the exception. + * PC/condbits before the call, we use raise_exception_ra() so + * that cpu_restore_state() will sort them out. */ - cpu_restore_state(cs, GETPC(), true); - raise_exception(env, EXCP_STKOF, 0, 1); + raise_exception_ra(env, EXCP_STKOF, 0, 1, GETPC()); } } From c0b9e8a48906f25acce17c9b60447bb7fd9d5cd2 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 25 May 2021 15:58:06 -0700 Subject: [PATCH 16/45] target/arm: Add isar_feature_{aa32, aa64, aa64_sve}_bf16 Note that the SVE BFLOAT16 support does not require SVE2, it is an independent extension. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210525225817.400336-2-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/cpu.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 5f234834c0..be9a4dceae 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -3792,6 +3792,11 @@ static inline bool isar_feature_aa32_predinv(const ARMISARegisters *id) return FIELD_EX32(id->id_isar6, ID_ISAR6, SPECRES) != 0; } +static inline bool isar_feature_aa32_bf16(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar6, ID_ISAR6, BF16) != 0; +} + static inline bool isar_feature_aa32_i8mm(const ARMISARegisters *id) { return FIELD_EX32(id->id_isar6, ID_ISAR6, I8MM) != 0; @@ -4153,6 +4158,11 @@ static inline bool isar_feature_aa64_dcpodp(const ARMISARegisters *id) return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, DPB) >= 2; } +static inline bool isar_feature_aa64_bf16(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, BF16) != 0; +} + static inline bool isar_feature_aa64_fp_simd(const ARMISARegisters *id) { /* We always set the AdvSIMD and FP fields identically. */ @@ -4297,6 +4307,11 @@ static inline bool isar_feature_aa64_sve2_bitperm(const ARMISARegisters *id) return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, BITPERM) != 0; } +static inline bool isar_feature_aa64_sve_bf16(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, BFLOAT16) != 0; +} + static inline bool isar_feature_aa64_sve2_sha3(const ARMISARegisters *id) { return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, SHA3) != 0; From fc5200ee4583cb9110ee7bda71a65890d787882e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 25 May 2021 15:58:07 -0700 Subject: [PATCH 17/45] target/arm: Unify unallocated path in disas_fp_1src Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210525225817.400336-3-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-a64.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index d6906d9012..95c2853f39 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -6501,8 +6501,7 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn) int rd = extract32(insn, 0, 5); if (mos) { - unallocated_encoding(s); - return; + goto do_unallocated; } switch (opcode) { @@ -6511,8 +6510,7 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn) /* FCVT between half, single and double precision */ int dtype = extract32(opcode, 0, 2); if (type == 2 || dtype == type) { - unallocated_encoding(s); - return; + goto do_unallocated; } if (!fp_access_check(s)) { return; @@ -6524,8 +6522,7 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn) case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */ if (type > 1 || !dc_isar_feature(aa64_frint, s)) { - unallocated_encoding(s); - return; + goto do_unallocated; } /* fall through */ case 0x0 ... 0x3: @@ -6547,8 +6544,7 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn) break; case 3: if (!dc_isar_feature(aa64_fp16, s)) { - unallocated_encoding(s); - return; + goto do_unallocated; } if (!fp_access_check(s)) { @@ -6557,11 +6553,12 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn) handle_fp_1src_half(s, opcode, rd, rn); break; default: - unallocated_encoding(s); + goto do_unallocated; } break; default: + do_unallocated: unallocated_encoding(s); break; } From 3a98ac40fa9fca85bb37f4281e872b7519e733c5 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 25 May 2021 15:58:08 -0700 Subject: [PATCH 18/45] target/arm: Implement scalar float32 to bfloat16 conversion This is the 64-bit BFCVT and the 32-bit VCVT{B,T}.BF16.F32. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210525225817.400336-4-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper.h | 1 + target/arm/translate-a64.c | 19 +++++++++++++++++++ target/arm/translate-vfp.c | 24 ++++++++++++++++++++++++ target/arm/vfp.decode | 2 ++ target/arm/vfp_helper.c | 5 +++++ 5 files changed, 51 insertions(+) diff --git a/target/arm/helper.h b/target/arm/helper.h index 23ccb0f72f..9977a827e9 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -143,6 +143,7 @@ DEF_HELPER_3(vfp_cmped, void, f64, f64, env) DEF_HELPER_2(vfp_fcvtds, f64, f32, env) DEF_HELPER_2(vfp_fcvtsd, f32, f64, env) +DEF_HELPER_FLAGS_2(bfcvt, TCG_CALL_NO_RWG, i32, f32, ptr) DEF_HELPER_2(vfp_uitoh, f16, i32, ptr) DEF_HELPER_2(vfp_uitos, f32, i32, ptr) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 95c2853f39..b335ca8735 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -6280,6 +6280,9 @@ static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn) case 0x3: /* FSQRT */ gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env); goto done; + case 0x6: /* BFCVT */ + gen_fpst = gen_helper_bfcvt; + break; case 0x8: /* FRINTN */ case 0x9: /* FRINTP */ case 0xa: /* FRINTM */ @@ -6557,6 +6560,22 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn) } break; + case 0x6: + switch (type) { + case 1: /* BFCVT */ + if (!dc_isar_feature(aa64_bf16, s)) { + goto do_unallocated; + } + if (!fp_access_check(s)) { + return; + } + handle_fp_1src_single(s, opcode, rd, rn); + break; + default: + goto do_unallocated; + } + break; + default: do_unallocated: unallocated_encoding(s); diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c index 2316e105ac..d01e465821 100644 --- a/target/arm/translate-vfp.c +++ b/target/arm/translate-vfp.c @@ -3085,6 +3085,30 @@ static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a) return true; } +static bool trans_VCVT_b16_f32(DisasContext *s, arg_VCVT_b16_f32 *a) +{ + TCGv_ptr fpst; + TCGv_i32 tmp; + + if (!dc_isar_feature(aa32_bf16, s)) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fpst = fpstatus_ptr(FPST_FPCR); + tmp = tcg_temp_new_i32(); + + vfp_load_reg32(tmp, a->vm); + gen_helper_bfcvt(tmp, tmp, fpst); + tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t)); + tcg_temp_free_ptr(fpst); + tcg_temp_free_i32(tmp); + return true; +} + static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a) { TCGv_ptr fpst; diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode index 6f7f28f9a4..52535d9b0b 100644 --- a/target/arm/vfp.decode +++ b/target/arm/vfp.decode @@ -205,6 +205,8 @@ VCVT_f64_f16 ---- 1110 1.11 0010 .... 1011 t:1 1.0 .... \ # VCVTB and VCVTT to f16: Vd format is always vd_sp; # Vm format depends on size bit +VCVT_b16_f32 ---- 1110 1.11 0011 .... 1001 t:1 1.0 .... \ + vd=%vd_sp vm=%vm_sp VCVT_f16_f32 ---- 1110 1.11 0011 .... 1010 t:1 1.0 .... \ vd=%vd_sp vm=%vm_sp VCVT_f16_f64 ---- 1110 1.11 0011 .... 1011 t:1 1.0 .... \ diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c index e0886ab5a5..200439ad66 100644 --- a/target/arm/vfp_helper.c +++ b/target/arm/vfp_helper.c @@ -411,6 +411,11 @@ float32 VFP_HELPER(fcvts, d)(float64 x, CPUARMState *env) return float64_to_float32(x, &env->vfp.fp_status); } +uint32_t HELPER(bfcvt)(float32 x, void *status) +{ + return float32_to_bfloat16(x, status); +} + /* * VFP3 fixed point conversion. The AArch32 versions of fix-to-float * must always round-to-nearest; the AArch64 ones honour the FPSCR From d29b17ca3eac79ab77c76ec8285698b0c57d04b2 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 25 May 2021 15:58:09 -0700 Subject: [PATCH 19/45] target/arm: Implement vector float32 to bfloat16 conversion This is BFCVT{N,T} for both AArch64 AdvSIMD and SVE, and VCVT.BF16.F32 for AArch32 NEON. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210525225817.400336-5-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper-sve.h | 4 ++++ target/arm/helper.h | 1 + target/arm/neon-dp.decode | 1 + target/arm/sve.decode | 2 ++ target/arm/sve_helper.c | 2 ++ target/arm/translate-a64.c | 17 ++++++++++++++ target/arm/translate-neon.c | 45 +++++++++++++++++++++++++++++++++++++ target/arm/translate-sve.c | 16 +++++++++++++ target/arm/vfp_helper.c | 7 ++++++ 9 files changed, 95 insertions(+) diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h index 29a14a21f5..dc629f851a 100644 --- a/target/arm/helper-sve.h +++ b/target/arm/helper-sve.h @@ -1197,6 +1197,8 @@ DEF_HELPER_FLAGS_5(sve_fcvt_hd, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(sve_fcvt_sd, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sve_bfcvt, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(sve_fcvtzs_hh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) @@ -2752,6 +2754,8 @@ DEF_HELPER_FLAGS_5(sve2_fcvtnt_sh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(sve2_fcvtnt_ds, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sve_bfcvtnt, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(sve2_fcvtlt_hs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) diff --git a/target/arm/helper.h b/target/arm/helper.h index 9977a827e9..8b4b7d92f3 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -144,6 +144,7 @@ DEF_HELPER_3(vfp_cmped, void, f64, f64, env) DEF_HELPER_2(vfp_fcvtds, f64, f32, env) DEF_HELPER_2(vfp_fcvtsd, f32, f64, env) DEF_HELPER_FLAGS_2(bfcvt, TCG_CALL_NO_RWG, i32, f32, ptr) +DEF_HELPER_FLAGS_2(bfcvt_pair, TCG_CALL_NO_RWG, i32, i64, ptr) DEF_HELPER_2(vfp_uitoh, f16, i32, ptr) DEF_HELPER_2(vfp_uitos, f32, i32, ptr) diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode index ec83f10ab3..fd3a01bfa0 100644 --- a/target/arm/neon-dp.decode +++ b/target/arm/neon-dp.decode @@ -521,6 +521,7 @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm VRINTZ 1111 001 11 . 11 .. 10 .... 0 1011 . . 0 .... @2misc VCVT_F16_F32 1111 001 11 . 11 .. 10 .... 0 1100 0 . 0 .... @2misc_q0 + VCVT_B16_F32 1111 001 11 . 11 .. 10 .... 0 1100 1 . 0 .... @2misc_q0 VRINTM 1111 001 11 . 11 .. 10 .... 0 1101 . . 0 .... @2misc diff --git a/target/arm/sve.decode b/target/arm/sve.decode index cb077bfde9..18d1a0eecc 100644 --- a/target/arm/sve.decode +++ b/target/arm/sve.decode @@ -1036,6 +1036,7 @@ FNMLS_zpzzz 01100101 .. 1 ..... 111 ... ..... ..... @rdn_pg_rm_ra # SVE floating-point convert precision FCVT_sh 01100101 10 0010 00 101 ... ..... ..... @rd_pg_rn_e0 FCVT_hs 01100101 10 0010 01 101 ... ..... ..... @rd_pg_rn_e0 +BFCVT 01100101 10 0010 10 101 ... ..... ..... @rd_pg_rn_e0 FCVT_dh 01100101 11 0010 00 101 ... ..... ..... @rd_pg_rn_e0 FCVT_hd 01100101 11 0010 01 101 ... ..... ..... @rd_pg_rn_e0 FCVT_ds 01100101 11 0010 10 101 ... ..... ..... @rd_pg_rn_e0 @@ -1610,6 +1611,7 @@ RAX1 01000101 00 1 ..... 11110 1 ..... ..... @rd_rn_rm_e0 FCVTXNT_ds 01100100 00 0010 10 101 ... ..... ..... @rd_pg_rn_e0 FCVTX_ds 01100101 00 0010 10 101 ... ..... ..... @rd_pg_rn_e0 FCVTNT_sh 01100100 10 0010 00 101 ... ..... ..... @rd_pg_rn_e0 +BFCVTNT 01100100 10 0010 10 101 ... ..... ..... @rd_pg_rn_e0 FCVTLT_hs 01100100 10 0010 01 101 ... ..... ..... @rd_pg_rn_e0 FCVTNT_ds 01100100 11 0010 10 101 ... ..... ..... @rd_pg_rn_e0 FCVTLT_sd 01100100 11 0010 11 101 ... ..... ..... @rd_pg_rn_e0 diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index 40af3024df..46a957b6fb 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -4708,6 +4708,7 @@ static inline uint64_t vfp_float64_to_uint64_rtz(float64 f, float_status *s) DO_ZPZ_FP(sve_fcvt_sh, uint32_t, H1_4, sve_f32_to_f16) DO_ZPZ_FP(sve_fcvt_hs, uint32_t, H1_4, sve_f16_to_f32) +DO_ZPZ_FP(sve_bfcvt, uint32_t, H1_4, float32_to_bfloat16) DO_ZPZ_FP(sve_fcvt_dh, uint64_t, , sve_f64_to_f16) DO_ZPZ_FP(sve_fcvt_hd, uint64_t, , sve_f16_to_f64) DO_ZPZ_FP(sve_fcvt_ds, uint64_t, , float64_to_float32) @@ -7740,6 +7741,7 @@ void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, uint32_t desc) \ } while (i != 0); \ } +DO_FCVTNT(sve_bfcvtnt, uint32_t, uint16_t, H1_4, H1_2, float32_to_bfloat16) DO_FCVTNT(sve2_fcvtnt_sh, uint32_t, uint16_t, H1_4, H1_2, sve_f32_to_f16) DO_FCVTNT(sve2_fcvtnt_ds, uint64_t, uint32_t, , H1_4, float64_to_float32) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index b335ca8735..0f15fa42fc 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -10353,6 +10353,13 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar, tcg_temp_free_i32(ahp); } break; + case 0x36: /* BFCVTN, BFCVTN2 */ + { + TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); + gen_helper_bfcvt_pair(tcg_res[pass], tcg_op, fpst); + tcg_temp_free_ptr(fpst); + } + break; case 0x56: /* FCVTXN, FCVTXN2 */ /* 64 bit to 32 bit float conversion * with von Neumann rounding (round to odd) @@ -12753,6 +12760,16 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) } handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd); return; + case 0x36: /* BFCVTN, BFCVTN2 */ + if (!dc_isar_feature(aa64_bf16, s) || size != 2) { + unallocated_encoding(s); + return; + } + if (!fp_access_check(s)) { + return; + } + handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd); + return; case 0x17: /* FCVTL, FCVTL2 */ if (!fp_access_check(s)) { return; diff --git a/target/arm/translate-neon.c b/target/arm/translate-neon.c index 9e990b41ed..6d94229c69 100644 --- a/target/arm/translate-neon.c +++ b/target/arm/translate-neon.c @@ -3422,6 +3422,51 @@ static bool trans_VSHLL(DisasContext *s, arg_2misc *a) return true; } +static bool trans_VCVT_B16_F32(DisasContext *s, arg_2misc *a) +{ + TCGv_ptr fpst; + TCGv_i64 tmp; + TCGv_i32 dst0, dst1; + + if (!dc_isar_feature(aa32_bf16, s)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if ((a->vm & 1) || (a->size != 1)) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fpst = fpstatus_ptr(FPST_STD); + tmp = tcg_temp_new_i64(); + dst0 = tcg_temp_new_i32(); + dst1 = tcg_temp_new_i32(); + + read_neon_element64(tmp, a->vm, 0, MO_64); + gen_helper_bfcvt_pair(dst0, tmp, fpst); + + read_neon_element64(tmp, a->vm, 1, MO_64); + gen_helper_bfcvt_pair(dst1, tmp, fpst); + + write_neon_element32(dst0, a->vd, 0, MO_32); + write_neon_element32(dst1, a->vd, 1, MO_32); + + tcg_temp_free_i64(tmp); + tcg_temp_free_i32(dst0); + tcg_temp_free_i32(dst1); + tcg_temp_free_ptr(fpst); + return true; +} + static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a) { TCGv_ptr fpst; diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 9574efe957..fb692a1835 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -4777,6 +4777,14 @@ static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a) return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs); } +static bool trans_BFCVT(DisasContext *s, arg_rpr_esz *a) +{ + if (!dc_isar_feature(aa64_sve_bf16, s)) { + return false; + } + return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvt); +} + static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a) { return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh); @@ -8472,6 +8480,14 @@ static bool trans_FCVTNT_sh(DisasContext *s, arg_rpr_esz *a) return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_sh); } +static bool trans_BFCVTNT(DisasContext *s, arg_rpr_esz *a) +{ + if (!dc_isar_feature(aa64_sve_bf16, s)) { + return false; + } + return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvtnt); +} + static bool trans_FCVTNT_ds(DisasContext *s, arg_rpr_esz *a) { if (!dc_isar_feature(aa64_sve2, s)) { diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c index 200439ad66..496f003477 100644 --- a/target/arm/vfp_helper.c +++ b/target/arm/vfp_helper.c @@ -416,6 +416,13 @@ uint32_t HELPER(bfcvt)(float32 x, void *status) return float32_to_bfloat16(x, status); } +uint32_t HELPER(bfcvt_pair)(uint64_t pair, void *status) +{ + bfloat16 lo = float32_to_bfloat16(extract64(pair, 0, 32), status); + bfloat16 hi = float32_to_bfloat16(extract64(pair, 32, 32), status); + return deposit32(lo, 16, 16, hi); +} + /* * VFP3 fixed point conversion. The AArch32 versions of fix-to-float * must always round-to-nearest; the AArch64 ones honour the FPSCR From 60c8f7265d7eb51dfb38ea6701d10cbe2d7c7a64 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 25 May 2021 15:58:10 -0700 Subject: [PATCH 20/45] softfpu: Add float_round_to_odd_inf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For Arm BFDOT and BFMMLA, we need a version of round-to-odd that overflows to infinity, instead of the max normal number. Cc: Alex Bennée Signed-off-by: Richard Henderson Message-id: 20210525225817.400336-6-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- fpu/softfloat-parts.c.inc | 6 ++++-- include/fpu/softfloat-types.h | 4 +++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc index a897a5a743..7f69da1d8f 100644 --- a/fpu/softfloat-parts.c.inc +++ b/fpu/softfloat-parts.c.inc @@ -176,13 +176,12 @@ static void partsN(uncanon)(FloatPartsN *p, float_status *s, g_assert_not_reached(); } + overflow_norm = false; switch (s->float_rounding_mode) { case float_round_nearest_even: - overflow_norm = false; inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0); break; case float_round_ties_away: - overflow_norm = false; inc = frac_lsbm1; break; case float_round_to_zero: @@ -199,6 +198,8 @@ static void partsN(uncanon)(FloatPartsN *p, float_status *s, break; case float_round_to_odd: overflow_norm = true; + /* fall through */ + case float_round_to_odd_inf: inc = p->frac_lo & frac_lsb ? 0 : round_mask; break; default: @@ -259,6 +260,7 @@ static void partsN(uncanon)(FloatPartsN *p, float_status *s, ? frac_lsbm1 : 0); break; case float_round_to_odd: + case float_round_to_odd_inf: inc = p->frac_lo & frac_lsb ? 0 : round_mask; break; default: diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h index 8a3f20fae9..3b757c3d6a 100644 --- a/include/fpu/softfloat-types.h +++ b/include/fpu/softfloat-types.h @@ -134,8 +134,10 @@ typedef enum __attribute__((__packed__)) { float_round_up = 2, float_round_to_zero = 3, float_round_ties_away = 4, - /* Not an IEEE rounding mode: round to the closest odd mantissa value */ + /* Not an IEEE rounding mode: round to closest odd, overflow to max */ float_round_to_odd = 5, + /* Not an IEEE rounding mode: round to closest odd, overflow to inf */ + float_round_to_odd_inf = 6, } FloatRoundMode; /* From cb8657f7f9fcc2ebe8dfb1cbc9e572670d2af568 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 25 May 2021 15:58:11 -0700 Subject: [PATCH 21/45] target/arm: Implement bfloat16 dot product (vector) This is BFDOT for both AArch64 AdvSIMD and SVE, and VDOT.BF16 for AArch32 NEON. Signed-off-by: Richard Henderson Message-id: 20210525225817.400336-7-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/helper.h | 3 +++ target/arm/neon-shared.decode | 2 ++ target/arm/sve.decode | 3 +++ target/arm/translate-a64.c | 20 ++++++++++++++++++ target/arm/translate-neon.c | 9 ++++++++ target/arm/translate-sve.c | 12 +++++++++++ target/arm/vec_helper.c | 40 +++++++++++++++++++++++++++++++++++ 7 files changed, 89 insertions(+) diff --git a/target/arm/helper.h b/target/arm/helper.h index 8b4b7d92f3..de2f5331dc 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -1002,6 +1002,9 @@ DEF_HELPER_FLAGS_5(gvec_ummla_b, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_5(gvec_usmmla_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_bfdot, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + #ifdef TARGET_AARCH64 #include "helper-a64.h" #include "helper-sve.h" diff --git a/target/arm/neon-shared.decode b/target/arm/neon-shared.decode index cc9f4cdd85..31a0839bbb 100644 --- a/target/arm/neon-shared.decode +++ b/target/arm/neon-shared.decode @@ -52,6 +52,8 @@ VUDOT 1111 110 00 . 10 .... .... 1101 . q:1 . 1 .... \ vm=%vm_dp vn=%vn_dp vd=%vd_dp VUSDOT 1111 110 01 . 10 .... .... 1101 . q:1 . 0 .... \ vm=%vm_dp vn=%vn_dp vd=%vd_dp +VDOT_b16 1111 110 00 . 00 .... .... 1101 . q:1 . 0 .... \ + vm=%vm_dp vn=%vn_dp vd=%vd_dp # VFM[AS]L VFML 1111 110 0 s:1 . 10 .... .... 1000 . 0 . 1 .... \ diff --git a/target/arm/sve.decode b/target/arm/sve.decode index 18d1a0eecc..a7429b293f 100644 --- a/target/arm/sve.decode +++ b/target/arm/sve.decode @@ -1625,6 +1625,9 @@ FMLALT_zzzw 01100100 10 1 ..... 10 0 00 1 ..... ..... @rda_rn_rm_e0 FMLSLB_zzzw 01100100 10 1 ..... 10 1 00 0 ..... ..... @rda_rn_rm_e0 FMLSLT_zzzw 01100100 10 1 ..... 10 1 00 1 ..... ..... @rda_rn_rm_e0 +### SVE2 floating-point bfloat16 dot-product +BFDOT_zzzz 01100100 01 1 ..... 10 0 00 0 ..... ..... @rda_rn_rm_e0 + ### SVE2 floating-point multiply-add long (indexed) FMLALB_zzxw 01100100 10 1 ..... 0100.0 ..... ..... @rrxr_3a esz=2 FMLALT_zzxw 01100100 10 1 ..... 0100.1 ..... ..... @rrxr_3a esz=2 diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 0f15fa42fc..3c36de3df2 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -12235,6 +12235,16 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) } feature = dc_isar_feature(aa64_fcma, s); break; + case 0x1f: /* BFDOT */ + switch (size) { + case 1: + feature = dc_isar_feature(aa64_bf16, s); + break; + default: + unallocated_encoding(s); + return; + } + break; default: unallocated_encoding(s); return; @@ -12318,6 +12328,16 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) } return; + case 0xf: /* BFDOT */ + switch (size) { + case 1: + gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfdot); + break; + default: + g_assert_not_reached(); + } + return; + default: g_assert_not_reached(); } diff --git a/target/arm/translate-neon.c b/target/arm/translate-neon.c index 6d94229c69..9460857b2a 100644 --- a/target/arm/translate-neon.c +++ b/target/arm/translate-neon.c @@ -296,6 +296,15 @@ static bool trans_VUSDOT(DisasContext *s, arg_VUSDOT *a) gen_helper_gvec_usdot_b); } +static bool trans_VDOT_b16(DisasContext *s, arg_VDOT_b16 *a) +{ + if (!dc_isar_feature(aa32_bf16, s)) { + return false; + } + return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0, + gen_helper_gvec_bfdot); +} + static bool trans_VFML(DisasContext *s, arg_VFML *a) { int opr_sz; diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index fb692a1835..ed290827ad 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -8653,3 +8653,15 @@ static bool trans_UMMLA(DisasContext *s, arg_rrrr_esz *a) { return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_ummla_b, 0); } + +static bool trans_BFDOT_zzzz(DisasContext *s, arg_rrrr_esz *a) +{ + if (!dc_isar_feature(aa64_sve_bf16, s)) { + return false; + } + if (sve_access_check(s)) { + gen_gvec_ool_zzzz(s, gen_helper_gvec_bfdot, + a->rd, a->rn, a->rm, a->ra, 0); + } + return true; +} diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c index e84b438340..7eefcd06ea 100644 --- a/target/arm/vec_helper.c +++ b/target/arm/vec_helper.c @@ -2412,3 +2412,43 @@ static void do_mmla_b(void *vd, void *vn, void *vm, void *va, uint32_t desc, DO_MMLA_B(gvec_smmla_b, do_smmla_b) DO_MMLA_B(gvec_ummla_b, do_ummla_b) DO_MMLA_B(gvec_usmmla_b, do_usmmla_b) + +/* + * BFloat16 Dot Product + */ + +static float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2) +{ + /* FPCR is ignored for BFDOT and BFMMLA. */ + float_status bf_status = { + .tininess_before_rounding = float_tininess_before_rounding, + .float_rounding_mode = float_round_to_odd_inf, + .flush_to_zero = true, + .flush_inputs_to_zero = true, + .default_nan_mode = true, + }; + float32 t1, t2; + + /* + * Extract each BFloat16 from the element pair, and shift + * them such that they become float32. + */ + t1 = float32_mul(e1 << 16, e2 << 16, &bf_status); + t2 = float32_mul(e1 & 0xffff0000u, e2 & 0xffff0000u, &bf_status); + t1 = float32_add(t1, t2, &bf_status); + t1 = float32_add(sum, t1, &bf_status); + + return t1; +} + +void HELPER(gvec_bfdot)(void *vd, void *vn, void *vm, void *va, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + float32 *d = vd, *a = va; + uint32_t *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 4; ++i) { + d[i] = bfdotadd(a[i], n[i], m[i]); + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} From 839144784b613998edf7a7277ed2ed2015b0b4d7 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 25 May 2021 15:58:12 -0700 Subject: [PATCH 22/45] target/arm: Implement bfloat16 dot product (indexed) This is BFDOT for both AArch64 AdvSIMD and SVE, and VDOT.BF16 for AArch32 NEON. Signed-off-by: Richard Henderson Message-id: 20210525225817.400336-8-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/helper.h | 2 ++ target/arm/neon-shared.decode | 2 ++ target/arm/sve.decode | 3 +++ target/arm/translate-a64.c | 41 +++++++++++++++++++++++++++-------- target/arm/translate-neon.c | 9 ++++++++ target/arm/translate-sve.c | 12 ++++++++++ target/arm/vec_helper.c | 20 +++++++++++++++++ 7 files changed, 80 insertions(+), 9 deletions(-) diff --git a/target/arm/helper.h b/target/arm/helper.h index de2f5331dc..376c1cef0f 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -1004,6 +1004,8 @@ DEF_HELPER_FLAGS_5(gvec_usmmla_b, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_5(gvec_bfdot, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_bfdot_idx, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) #ifdef TARGET_AARCH64 #include "helper-a64.h" diff --git a/target/arm/neon-shared.decode b/target/arm/neon-shared.decode index 31a0839bbb..fa3cf14e3a 100644 --- a/target/arm/neon-shared.decode +++ b/target/arm/neon-shared.decode @@ -81,6 +81,8 @@ VUSDOT_scalar 1111 1110 1 . 00 .... .... 1101 . q:1 index:1 0 vm:4 \ vn=%vn_dp vd=%vd_dp VSUDOT_scalar 1111 1110 1 . 00 .... .... 1101 . q:1 index:1 1 vm:4 \ vn=%vn_dp vd=%vd_dp +VDOT_b16_scal 1111 1110 0 . 00 .... .... 1101 . q:1 index:1 0 vm:4 \ + vn=%vn_dp vd=%vd_dp %vfml_scalar_q0_rm 0:3 5:1 %vfml_scalar_q1_index 5:1 3:1 diff --git a/target/arm/sve.decode b/target/arm/sve.decode index a7429b293f..51f87e8937 100644 --- a/target/arm/sve.decode +++ b/target/arm/sve.decode @@ -1633,3 +1633,6 @@ FMLALB_zzxw 01100100 10 1 ..... 0100.0 ..... ..... @rrxr_3a esz=2 FMLALT_zzxw 01100100 10 1 ..... 0100.1 ..... ..... @rrxr_3a esz=2 FMLSLB_zzxw 01100100 10 1 ..... 0110.0 ..... ..... @rrxr_3a esz=2 FMLSLT_zzxw 01100100 10 1 ..... 0110.1 ..... ..... @rrxr_3a esz=2 + +### SVE2 floating-point bfloat16 dot-product (indexed) +BFDOT_zzxz 01100100 01 1 ..... 010000 ..... ..... @rrxr_2 esz=2 diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 3c36de3df2..71de75e568 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -13449,8 +13449,22 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) return; } break; - case 0x0f: /* SUDOT, USDOT */ - if (is_scalar || (size & 1) || !dc_isar_feature(aa64_i8mm, s)) { + case 0x0f: + switch (size) { + case 0: /* SUDOT */ + case 2: /* USDOT */ + if (is_scalar || !dc_isar_feature(aa64_i8mm, s)) { + unallocated_encoding(s); + return; + } + break; + case 1: /* BFDOT */ + if (is_scalar || !dc_isar_feature(aa64_bf16, s)) { + unallocated_encoding(s); + return; + } + break; + default: unallocated_encoding(s); return; } @@ -13570,13 +13584,22 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b); return; - case 0x0f: /* SUDOT, USDOT */ - gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, - extract32(insn, 23, 1) - ? gen_helper_gvec_usdot_idx_b - : gen_helper_gvec_sudot_idx_b); - return; - + case 0x0f: + switch (extract32(insn, 22, 2)) { + case 0: /* SUDOT */ + gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, + gen_helper_gvec_sudot_idx_b); + return; + case 1: /* BFDOT */ + gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, + gen_helper_gvec_bfdot_idx); + return; + case 2: /* USDOT */ + gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, + gen_helper_gvec_usdot_idx_b); + return; + } + g_assert_not_reached(); case 0x11: /* FCMLA #0 */ case 0x13: /* FCMLA #90 */ case 0x15: /* FCMLA #180 */ diff --git a/target/arm/translate-neon.c b/target/arm/translate-neon.c index 9460857b2a..8099767792 100644 --- a/target/arm/translate-neon.c +++ b/target/arm/translate-neon.c @@ -390,6 +390,15 @@ static bool trans_VSUDOT_scalar(DisasContext *s, arg_VSUDOT_scalar *a) gen_helper_gvec_sudot_idx_b); } +static bool trans_VDOT_b16_scal(DisasContext *s, arg_VDOT_b16_scal *a) +{ + if (!dc_isar_feature(aa32_bf16, s)) { + return false; + } + return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index, + gen_helper_gvec_bfdot_idx); +} + static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a) { int opr_sz; diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index ed290827ad..6f02030635 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -8665,3 +8665,15 @@ static bool trans_BFDOT_zzzz(DisasContext *s, arg_rrrr_esz *a) } return true; } + +static bool trans_BFDOT_zzxz(DisasContext *s, arg_rrxr_esz *a) +{ + if (!dc_isar_feature(aa64_sve_bf16, s)) { + return false; + } + if (sve_access_check(s)) { + gen_gvec_ool_zzzz(s, gen_helper_gvec_bfdot_idx, + a->rd, a->rn, a->rm, a->ra, a->index); + } + return true; +} diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c index 7eefcd06ea..74a497f38c 100644 --- a/target/arm/vec_helper.c +++ b/target/arm/vec_helper.c @@ -2452,3 +2452,23 @@ void HELPER(gvec_bfdot)(void *vd, void *vn, void *vm, void *va, uint32_t desc) } clear_tail(d, opr_sz, simd_maxsz(desc)); } + +void HELPER(gvec_bfdot_idx)(void *vd, void *vn, void *vm, + void *va, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc); + intptr_t index = simd_data(desc); + intptr_t elements = opr_sz / 4; + intptr_t eltspersegment = MIN(16 / 4, elements); + float32 *d = vd, *a = va; + uint32_t *n = vn, *m = vm; + + for (i = 0; i < elements; i += eltspersegment) { + uint32_t m_idx = m[i + H4(index)]; + + for (j = i; j < i + eltspersegment; j++) { + d[j] = bfdotadd(a[j], n[j], m_idx); + } + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} From 81266a1f58bf557280c6f7ce3cad1ba8ed8a56f1 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 25 May 2021 15:58:13 -0700 Subject: [PATCH 23/45] target/arm: Implement bfloat16 matrix multiply accumulate This is BFMMLA for both AArch64 AdvSIMD and SVE, and VMMLA.BF16 for AArch32 NEON. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210525225817.400336-9-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper.h | 3 +++ target/arm/neon-shared.decode | 2 ++ target/arm/sve.decode | 6 +++-- target/arm/translate-a64.c | 10 +++++++++ target/arm/translate-neon.c | 9 ++++++++ target/arm/translate-sve.c | 12 ++++++++++ target/arm/vec_helper.c | 42 ++++++++++++++++++++++++++++++++++- 7 files changed, 81 insertions(+), 3 deletions(-) diff --git a/target/arm/helper.h b/target/arm/helper.h index 376c1cef0f..af75d7f25f 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -1007,6 +1007,9 @@ DEF_HELPER_FLAGS_5(gvec_bfdot, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_5(gvec_bfdot_idx, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_bfmmla, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + #ifdef TARGET_AARCH64 #include "helper-a64.h" #include "helper-sve.h" diff --git a/target/arm/neon-shared.decode b/target/arm/neon-shared.decode index fa3cf14e3a..4e0a25d27c 100644 --- a/target/arm/neon-shared.decode +++ b/target/arm/neon-shared.decode @@ -67,6 +67,8 @@ VUMMLA 1111 1100 0.10 .... .... 1100 .1.1 .... \ vm=%vm_dp vn=%vn_dp vd=%vd_dp VUSMMLA 1111 1100 1.10 .... .... 1100 .1.0 .... \ vm=%vm_dp vn=%vn_dp vd=%vd_dp +VMMLA_b16 1111 1100 0.00 .... .... 1100 .1.0 .... \ + vm=%vm_dp vn=%vn_dp vd=%vd_dp VCMLA_scalar 1111 1110 0 . rot:2 .... .... 1000 . q:1 index:1 0 vm:4 \ vn=%vn_dp vd=%vd_dp size=1 diff --git a/target/arm/sve.decode b/target/arm/sve.decode index 51f87e8937..6c17898dee 100644 --- a/target/arm/sve.decode +++ b/target/arm/sve.decode @@ -1568,8 +1568,10 @@ SQRDCMLAH_zzzz 01000100 esz:2 0 rm:5 0011 rot:2 rn:5 rd:5 ra=%reg_movprfx USDOT_zzzz 01000100 .. 0 ..... 011 110 ..... ..... @rda_rn_rm ### SVE2 floating point matrix multiply accumulate - -FMMLA 01100100 .. 1 ..... 111001 ..... ..... @rda_rn_rm +{ + BFMMLA 01100100 01 1 ..... 111 001 ..... ..... @rda_rn_rm_e0 + FMMLA 01100100 .. 1 ..... 111 001 ..... ..... @rda_rn_rm +} ### SVE2 Memory Gather Load Group diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 71de75e568..9ce2f5a7d4 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -12235,6 +12235,13 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) } feature = dc_isar_feature(aa64_fcma, s); break; + case 0x1d: /* BFMMLA */ + if (size != MO_16 || !is_q) { + unallocated_encoding(s); + return; + } + feature = dc_isar_feature(aa64_bf16, s); + break; case 0x1f: /* BFDOT */ switch (size) { case 1: @@ -12328,6 +12335,9 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) } return; + case 0xd: /* BFMMLA */ + gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla); + return; case 0xf: /* BFDOT */ switch (size) { case 1: diff --git a/target/arm/translate-neon.c b/target/arm/translate-neon.c index 8099767792..9d227a1e13 100644 --- a/target/arm/translate-neon.c +++ b/target/arm/translate-neon.c @@ -4126,3 +4126,12 @@ static bool trans_VUSMMLA(DisasContext *s, arg_VUSMMLA *a) return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0, gen_helper_gvec_usmmla_b); } + +static bool trans_VMMLA_b16(DisasContext *s, arg_VMMLA_b16 *a) +{ + if (!dc_isar_feature(aa32_bf16, s)) { + return false; + } + return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0, + gen_helper_gvec_bfmmla); +} diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 6f02030635..4f575dc334 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -8677,3 +8677,15 @@ static bool trans_BFDOT_zzxz(DisasContext *s, arg_rrxr_esz *a) } return true; } + +static bool trans_BFMMLA(DisasContext *s, arg_rrrr_esz *a) +{ + if (!dc_isar_feature(aa64_sve_bf16, s)) { + return false; + } + if (sve_access_check(s)) { + gen_gvec_ool_zzzz(s, gen_helper_gvec_bfmmla, + a->rd, a->rn, a->rm, a->ra, 0); + } + return true; +} diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c index 74a497f38c..27e9bdd329 100644 --- a/target/arm/vec_helper.c +++ b/target/arm/vec_helper.c @@ -2385,7 +2385,7 @@ static void do_mmla_b(void *vd, void *vn, void *vm, void *va, uint32_t desc, * Process the entire segment at once, writing back the * results only after we've consumed all of the inputs. * - * Key to indicies by column: + * Key to indices by column: * i j i j */ sum0 = a[H4(0 + 0)]; @@ -2472,3 +2472,43 @@ void HELPER(gvec_bfdot_idx)(void *vd, void *vn, void *vm, } clear_tail(d, opr_sz, simd_maxsz(desc)); } + +void HELPER(gvec_bfmmla)(void *vd, void *vn, void *vm, void *va, uint32_t desc) +{ + intptr_t s, opr_sz = simd_oprsz(desc); + float32 *d = vd, *a = va; + uint32_t *n = vn, *m = vm; + + for (s = 0; s < opr_sz / 4; s += 4) { + float32 sum00, sum01, sum10, sum11; + + /* + * Process the entire segment at once, writing back the + * results only after we've consumed all of the inputs. + * + * Key to indicies by column: + * i j i k j k + */ + sum00 = a[s + H4(0 + 0)]; + sum00 = bfdotadd(sum00, n[s + H4(0 + 0)], m[s + H4(0 + 0)]); + sum00 = bfdotadd(sum00, n[s + H4(0 + 1)], m[s + H4(0 + 1)]); + + sum01 = a[s + H4(0 + 1)]; + sum01 = bfdotadd(sum01, n[s + H4(0 + 0)], m[s + H4(2 + 0)]); + sum01 = bfdotadd(sum01, n[s + H4(0 + 1)], m[s + H4(2 + 1)]); + + sum10 = a[s + H4(2 + 0)]; + sum10 = bfdotadd(sum10, n[s + H4(2 + 0)], m[s + H4(0 + 0)]); + sum10 = bfdotadd(sum10, n[s + H4(2 + 1)], m[s + H4(0 + 1)]); + + sum11 = a[s + H4(2 + 1)]; + sum11 = bfdotadd(sum11, n[s + H4(2 + 0)], m[s + H4(2 + 0)]); + sum11 = bfdotadd(sum11, n[s + H4(2 + 1)], m[s + H4(2 + 1)]); + + d[s + H4(0 + 0)] = sum00; + d[s + H4(0 + 1)] = sum01; + d[s + H4(2 + 0)] = sum10; + d[s + H4(2 + 1)] = sum11; + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} From 5693887f2e97335362d945c778f2bbddd4e9d1bb Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 25 May 2021 15:58:14 -0700 Subject: [PATCH 24/45] target/arm: Implement bfloat widening fma (vector) This is BFMLAL{B,T} for both AArch64 AdvSIMD and SVE, and VFMA{B,T}.BF16 for AArch32 NEON. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210525225817.400336-10-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper.h | 3 +++ target/arm/neon-shared.decode | 3 +++ target/arm/sve.decode | 3 +++ target/arm/translate-a64.c | 13 +++++++++---- target/arm/translate-neon.c | 9 +++++++++ target/arm/translate-sve.c | 30 ++++++++++++++++++++++++++++++ target/arm/vec_helper.c | 16 ++++++++++++++++ 7 files changed, 73 insertions(+), 4 deletions(-) diff --git a/target/arm/helper.h b/target/arm/helper.h index af75d7f25f..36b3c9dd2d 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -1010,6 +1010,9 @@ DEF_HELPER_FLAGS_5(gvec_bfdot_idx, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_5(gvec_bfmmla, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_6(gvec_bfmlal, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) + #ifdef TARGET_AARCH64 #include "helper-a64.h" #include "helper-sve.h" diff --git a/target/arm/neon-shared.decode b/target/arm/neon-shared.decode index 4e0a25d27c..b61addd98b 100644 --- a/target/arm/neon-shared.decode +++ b/target/arm/neon-shared.decode @@ -70,6 +70,9 @@ VUSMMLA 1111 1100 1.10 .... .... 1100 .1.0 .... \ VMMLA_b16 1111 1100 0.00 .... .... 1100 .1.0 .... \ vm=%vm_dp vn=%vn_dp vd=%vd_dp +VFMA_b16 1111 110 0 0.11 .... .... 1000 . q:1 . 1 .... \ + vm=%vm_dp vn=%vn_dp vd=%vd_dp + VCMLA_scalar 1111 1110 0 . rot:2 .... .... 1000 . q:1 index:1 0 vm:4 \ vn=%vn_dp vd=%vd_dp size=1 VCMLA_scalar 1111 1110 1 . rot:2 .... .... 1000 . q:1 . 0 .... \ diff --git a/target/arm/sve.decode b/target/arm/sve.decode index 6c17898dee..5281164eea 100644 --- a/target/arm/sve.decode +++ b/target/arm/sve.decode @@ -1627,6 +1627,9 @@ FMLALT_zzzw 01100100 10 1 ..... 10 0 00 1 ..... ..... @rda_rn_rm_e0 FMLSLB_zzzw 01100100 10 1 ..... 10 1 00 0 ..... ..... @rda_rn_rm_e0 FMLSLT_zzzw 01100100 10 1 ..... 10 1 00 1 ..... ..... @rda_rn_rm_e0 +BFMLALB_zzzw 01100100 11 1 ..... 10 0 00 0 ..... ..... @rda_rn_rm_e0 +BFMLALT_zzzw 01100100 11 1 ..... 10 0 00 1 ..... ..... @rda_rn_rm_e0 + ### SVE2 floating-point bfloat16 dot-product BFDOT_zzzz 01100100 01 1 ..... 10 0 00 0 ..... ..... @rda_rn_rm_e0 diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 9ce2f5a7d4..8dcb15ac0f 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -12242,9 +12242,10 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) } feature = dc_isar_feature(aa64_bf16, s); break; - case 0x1f: /* BFDOT */ + case 0x1f: switch (size) { - case 1: + case 1: /* BFDOT */ + case 3: /* BFMLAL{B,T} */ feature = dc_isar_feature(aa64_bf16, s); break; default: @@ -12338,11 +12339,15 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) case 0xd: /* BFMMLA */ gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla); return; - case 0xf: /* BFDOT */ + case 0xf: switch (size) { - case 1: + case 1: /* BFDOT */ gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfdot); break; + case 3: /* BFMLAL{B,T} */ + gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, false, is_q, + gen_helper_gvec_bfmlal); + break; default: g_assert_not_reached(); } diff --git a/target/arm/translate-neon.c b/target/arm/translate-neon.c index 9d227a1e13..4d0c2494dc 100644 --- a/target/arm/translate-neon.c +++ b/target/arm/translate-neon.c @@ -4135,3 +4135,12 @@ static bool trans_VMMLA_b16(DisasContext *s, arg_VMMLA_b16 *a) return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0, gen_helper_gvec_bfmmla); } + +static bool trans_VFMA_b16(DisasContext *s, arg_VFMA_b16 *a) +{ + if (!dc_isar_feature(aa32_bf16, s)) { + return false; + } + return do_neon_ddda_fpst(s, 7, a->vd, a->vn, a->vm, a->q, FPST_STD, + gen_helper_gvec_bfmlal); +} diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 4f575dc334..ba8f5d7b7d 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -8689,3 +8689,33 @@ static bool trans_BFMMLA(DisasContext *s, arg_rrrr_esz *a) } return true; } + +static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) +{ + if (!dc_isar_feature(aa64_sve_bf16, s)) { + return false; + } + if (sve_access_check(s)) { + TCGv_ptr status = fpstatus_ptr(FPST_FPCR); + unsigned vsz = vec_full_reg_size(s); + + tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd), + vec_full_reg_offset(s, a->rn), + vec_full_reg_offset(s, a->rm), + vec_full_reg_offset(s, a->ra), + status, vsz, vsz, sel, + gen_helper_gvec_bfmlal); + tcg_temp_free_ptr(status); + } + return true; +} + +static bool trans_BFMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a) +{ + return do_BFMLAL_zzzw(s, a, false); +} + +static bool trans_BFMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a) +{ + return do_BFMLAL_zzzw(s, a, true); +} diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c index 27e9bdd329..d82736b5e6 100644 --- a/target/arm/vec_helper.c +++ b/target/arm/vec_helper.c @@ -2512,3 +2512,19 @@ void HELPER(gvec_bfmmla)(void *vd, void *vn, void *vm, void *va, uint32_t desc) } clear_tail(d, opr_sz, simd_maxsz(desc)); } + +void HELPER(gvec_bfmlal)(void *vd, void *vn, void *vm, void *va, + void *stat, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + intptr_t sel = simd_data(desc); + float32 *d = vd, *a = va; + bfloat16 *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 4; ++i) { + float32 nn = n[H2(i * 2 + sel)] << 16; + float32 mm = m[H2(i * 2 + sel)] << 16; + d[H4(i)] = float32_muladd(nn, mm, a[H4(i)], 0, stat); + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} From 458d0ab6830f9bcd76af9df4d1d4db8ab646fcef Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 25 May 2021 15:58:15 -0700 Subject: [PATCH 25/45] target/arm: Implement bfloat widening fma (indexed) This is BFMLAL{B,T} for both AArch64 AdvSIMD and SVE, and VFMA{B,T}.BF16 for AArch32 NEON. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20210525225817.400336-11-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper.h | 2 ++ target/arm/neon-shared.decode | 2 ++ target/arm/sve.decode | 2 ++ target/arm/translate-a64.c | 15 ++++++++++++++- target/arm/translate-neon.c | 10 ++++++++++ target/arm/translate-sve.c | 30 ++++++++++++++++++++++++++++++ target/arm/vec_helper.c | 22 ++++++++++++++++++++++ 7 files changed, 82 insertions(+), 1 deletion(-) diff --git a/target/arm/helper.h b/target/arm/helper.h index 36b3c9dd2d..dc6eb96d43 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -1012,6 +1012,8 @@ DEF_HELPER_FLAGS_5(gvec_bfmmla, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(gvec_bfmlal, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_6(gvec_bfmlal_idx, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) #ifdef TARGET_AARCH64 #include "helper-a64.h" diff --git a/target/arm/neon-shared.decode b/target/arm/neon-shared.decode index b61addd98b..df80e6ebf6 100644 --- a/target/arm/neon-shared.decode +++ b/target/arm/neon-shared.decode @@ -95,3 +95,5 @@ VFML_scalar 1111 1110 0 . 0 s:1 .... .... 1000 . 0 . 1 index:1 ... \ rm=%vfml_scalar_q0_rm vn=%vn_sp vd=%vd_dp q=0 VFML_scalar 1111 1110 0 . 0 s:1 .... .... 1000 . 1 . 1 . rm:3 \ index=%vfml_scalar_q1_index vn=%vn_dp vd=%vd_dp q=1 +VFMA_b16_scal 1111 1110 0.11 .... .... 1000 . q:1 . 1 . vm:3 \ + index=%vfml_scalar_q1_index vn=%vn_dp vd=%vd_dp diff --git a/target/arm/sve.decode b/target/arm/sve.decode index 5281164eea..a62c169f1a 100644 --- a/target/arm/sve.decode +++ b/target/arm/sve.decode @@ -1638,6 +1638,8 @@ FMLALB_zzxw 01100100 10 1 ..... 0100.0 ..... ..... @rrxr_3a esz=2 FMLALT_zzxw 01100100 10 1 ..... 0100.1 ..... ..... @rrxr_3a esz=2 FMLSLB_zzxw 01100100 10 1 ..... 0110.0 ..... ..... @rrxr_3a esz=2 FMLSLT_zzxw 01100100 10 1 ..... 0110.1 ..... ..... @rrxr_3a esz=2 +BFMLALB_zzxw 01100100 11 1 ..... 0100.0 ..... ..... @rrxr_3a esz=2 +BFMLALT_zzxw 01100100 11 1 ..... 0100.1 ..... ..... @rrxr_3a esz=2 ### SVE2 floating-point bfloat16 dot-product (indexed) BFDOT_zzxz 01100100 01 1 ..... 010000 ..... ..... @rrxr_2 esz=2 diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 8dcb15ac0f..8713dfec17 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -13472,18 +13472,27 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) unallocated_encoding(s); return; } + size = MO_32; break; case 1: /* BFDOT */ if (is_scalar || !dc_isar_feature(aa64_bf16, s)) { unallocated_encoding(s); return; } + size = MO_32; + break; + case 3: /* BFMLAL{B,T} */ + if (is_scalar || !dc_isar_feature(aa64_bf16, s)) { + unallocated_encoding(s); + return; + } + /* can't set is_fp without other incorrect size checks */ + size = MO_16; break; default: unallocated_encoding(s); return; } - size = MO_32; break; case 0x11: /* FCMLA #0 */ case 0x13: /* FCMLA #90 */ @@ -13613,6 +13622,10 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, gen_helper_gvec_usdot_idx_b); return; + case 3: /* BFMLAL{B,T} */ + gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, 0, (index << 1) | is_q, + gen_helper_gvec_bfmlal_idx); + return; } g_assert_not_reached(); case 0x11: /* FCMLA #0 */ diff --git a/target/arm/translate-neon.c b/target/arm/translate-neon.c index 4d0c2494dc..633fef3bf7 100644 --- a/target/arm/translate-neon.c +++ b/target/arm/translate-neon.c @@ -4144,3 +4144,13 @@ static bool trans_VFMA_b16(DisasContext *s, arg_VFMA_b16 *a) return do_neon_ddda_fpst(s, 7, a->vd, a->vn, a->vm, a->q, FPST_STD, gen_helper_gvec_bfmlal); } + +static bool trans_VFMA_b16_scal(DisasContext *s, arg_VFMA_b16_scal *a) +{ + if (!dc_isar_feature(aa32_bf16, s)) { + return false; + } + return do_neon_ddda_fpst(s, 6, a->vd, a->vn, a->vm, + (a->index << 1) | a->q, FPST_STD, + gen_helper_gvec_bfmlal_idx); +} diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index ba8f5d7b7d..46210eb696 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -8719,3 +8719,33 @@ static bool trans_BFMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a) { return do_BFMLAL_zzzw(s, a, true); } + +static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) +{ + if (!dc_isar_feature(aa64_sve_bf16, s)) { + return false; + } + if (sve_access_check(s)) { + TCGv_ptr status = fpstatus_ptr(FPST_FPCR); + unsigned vsz = vec_full_reg_size(s); + + tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd), + vec_full_reg_offset(s, a->rn), + vec_full_reg_offset(s, a->rm), + vec_full_reg_offset(s, a->ra), + status, vsz, vsz, (a->index << 1) | sel, + gen_helper_gvec_bfmlal_idx); + tcg_temp_free_ptr(status); + } + return true; +} + +static bool trans_BFMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a) +{ + return do_BFMLAL_zzxw(s, a, false); +} + +static bool trans_BFMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a) +{ + return do_BFMLAL_zzxw(s, a, true); +} diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c index d82736b5e6..5862f187cd 100644 --- a/target/arm/vec_helper.c +++ b/target/arm/vec_helper.c @@ -2528,3 +2528,25 @@ void HELPER(gvec_bfmlal)(void *vd, void *vn, void *vm, void *va, } clear_tail(d, opr_sz, simd_maxsz(desc)); } + +void HELPER(gvec_bfmlal_idx)(void *vd, void *vn, void *vm, + void *va, void *stat, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc); + intptr_t sel = extract32(desc, SIMD_DATA_SHIFT, 1); + intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 1, 3); + intptr_t elements = opr_sz / 4; + intptr_t eltspersegment = MIN(16 / 4, elements); + float32 *d = vd, *a = va; + bfloat16 *n = vn, *m = vm; + + for (i = 0; i < elements; i += eltspersegment) { + float32 m_idx = m[H2(2 * i + index)] << 16; + + for (j = i; j < i + eltspersegment; j++) { + float32 n_j = n[H2(2 * j + sel)] << 16; + d[H4(j)] = float32_muladd(n_j, m_idx, a[H4(j)], 0, stat); + } + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} From 6c47a9053cc2255ec444e0f964abd496fdce6433 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 25 May 2021 15:58:16 -0700 Subject: [PATCH 26/45] linux-user/aarch64: Enable hwcap bits for bfloat16 Signed-off-by: Richard Henderson Message-id: 20210525225817.400336-12-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- linux-user/elfload.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 1ab97e38e0..17ab06f612 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -659,7 +659,9 @@ static uint32_t get_elf_hwcap2(void) GET_FEATURE_ID(aa64_sve_i8mm, ARM_HWCAP2_A64_SVEI8MM); GET_FEATURE_ID(aa64_sve_f32mm, ARM_HWCAP2_A64_SVEF32MM); GET_FEATURE_ID(aa64_sve_f64mm, ARM_HWCAP2_A64_SVEF64MM); + GET_FEATURE_ID(aa64_sve_bf16, ARM_HWCAP2_A64_SVEBF16); GET_FEATURE_ID(aa64_i8mm, ARM_HWCAP2_A64_I8MM); + GET_FEATURE_ID(aa64_bf16, ARM_HWCAP2_A64_BF16); GET_FEATURE_ID(aa64_rndr, ARM_HWCAP2_A64_RNG); GET_FEATURE_ID(aa64_bti, ARM_HWCAP2_A64_BTI); GET_FEATURE_ID(aa64_mte, ARM_HWCAP2_A64_MTE); From 3c93dfa42c394fdd55684f2fbf24cf2f39b97d47 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 25 May 2021 15:58:17 -0700 Subject: [PATCH 27/45] target/arm: Enable BFloat16 extensions Disable BF16 again for !have_neon and !have_vfp during realize. Signed-off-by: Richard Henderson Message-id: 20210525225817.400336-13-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/cpu.c | 3 +++ target/arm/cpu64.c | 3 +++ target/arm/cpu_tcg.c | 1 + 3 files changed, 7 insertions(+) diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 9ad6f5911b..9cddfd6a44 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -1473,6 +1473,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) u = cpu->isar.id_isar6; u = FIELD_DP32(u, ID_ISAR6, JSCVT, 0); + u = FIELD_DP32(u, ID_ISAR6, BF16, 0); cpu->isar.id_isar6 = u; u = cpu->isar.mvfr0; @@ -1513,6 +1514,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) t = cpu->isar.id_aa64isar1; t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 0); + t = FIELD_DP64(t, ID_AA64ISAR1, BF16, 0); t = FIELD_DP64(t, ID_AA64ISAR1, I8MM, 0); cpu->isar.id_aa64isar1 = t; @@ -1528,6 +1530,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) u = cpu->isar.id_isar6; u = FIELD_DP32(u, ID_ISAR6, DP, 0); u = FIELD_DP32(u, ID_ISAR6, FHM, 0); + u = FIELD_DP32(u, ID_ISAR6, BF16, 0); u = FIELD_DP32(u, ID_ISAR6, I8MM, 0); cpu->isar.id_isar6 = u; diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index d561dc7acc..1c23187d1a 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -661,6 +661,7 @@ static void aarch64_max_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 1); t = FIELD_DP64(t, ID_AA64ISAR1, SB, 1); t = FIELD_DP64(t, ID_AA64ISAR1, SPECRES, 1); + t = FIELD_DP64(t, ID_AA64ISAR1, BF16, 1); t = FIELD_DP64(t, ID_AA64ISAR1, FRINTTS, 1); t = FIELD_DP64(t, ID_AA64ISAR1, LRCPC, 2); /* ARMv8.4-RCPC */ t = FIELD_DP64(t, ID_AA64ISAR1, I8MM, 1); @@ -708,6 +709,7 @@ static void aarch64_max_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64ZFR0, SVEVER, 1); t = FIELD_DP64(t, ID_AA64ZFR0, AES, 2); /* PMULL */ t = FIELD_DP64(t, ID_AA64ZFR0, BITPERM, 1); + t = FIELD_DP64(t, ID_AA64ZFR0, BFLOAT16, 1); t = FIELD_DP64(t, ID_AA64ZFR0, SHA3, 1); t = FIELD_DP64(t, ID_AA64ZFR0, SM4, 1); t = FIELD_DP64(t, ID_AA64ZFR0, I8MM, 1); @@ -731,6 +733,7 @@ static void aarch64_max_initfn(Object *obj) u = FIELD_DP32(u, ID_ISAR6, FHM, 1); u = FIELD_DP32(u, ID_ISAR6, SB, 1); u = FIELD_DP32(u, ID_ISAR6, SPECRES, 1); + u = FIELD_DP32(u, ID_ISAR6, BF16, 1); u = FIELD_DP32(u, ID_ISAR6, I8MM, 1); cpu->isar.id_isar6 = u; diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c index 2e0e508f0e..d2d97115ea 100644 --- a/target/arm/cpu_tcg.c +++ b/target/arm/cpu_tcg.c @@ -968,6 +968,7 @@ static void arm_max_initfn(Object *obj) t = FIELD_DP32(t, ID_ISAR6, FHM, 1); t = FIELD_DP32(t, ID_ISAR6, SB, 1); t = FIELD_DP32(t, ID_ISAR6, SPECRES, 1); + t = FIELD_DP32(t, ID_ISAR6, BF16, 1); t = FIELD_DP32(t, ID_ISAR6, I8MM, 1); cpu->isar.id_isar6 = t; From d57bc3c1098f1614db1c5763f3672fe01d768972 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 3 Jun 2021 14:09:30 +0100 Subject: [PATCH 28/45] hvf: Move assert_hvf_ok() into common directory Until now, Hypervisor.framework has only been available on x86_64 systems. With Apple Silicon shipping now, it extends its reach to aarch64. To prepare for support for multiple architectures, let's start moving common code out into its own accel directory. This patch moves assert_hvf_ok() and introduces generic build infrastructure. Signed-off-by: Alexander Graf Reviewed-by: Sergio Lopez Message-id: 20210519202253.76782-2-agraf@csgraf.de Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- MAINTAINERS | 8 +++++++ accel/hvf/hvf-all.c | 47 ++++++++++++++++++++++++++++++++++++++++ accel/hvf/meson.build | 6 +++++ accel/meson.build | 1 + include/sysemu/hvf_int.h | 18 +++++++++++++++ target/i386/hvf/hvf.c | 33 +--------------------------- 6 files changed, 81 insertions(+), 32 deletions(-) create mode 100644 accel/hvf/hvf-all.c create mode 100644 accel/hvf/meson.build create mode 100644 include/sysemu/hvf_int.h diff --git a/MAINTAINERS b/MAINTAINERS index 96a4eeb5a5..de5426f672 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -436,7 +436,15 @@ M: Roman Bolshakov W: https://wiki.qemu.org/Features/HVF S: Maintained F: target/i386/hvf/ + +HVF +M: Cameron Esfahani +M: Roman Bolshakov +W: https://wiki.qemu.org/Features/HVF +S: Maintained +F: accel/hvf/ F: include/sysemu/hvf.h +F: include/sysemu/hvf_int.h WHPX CPUs M: Sunil Muthuswamy diff --git a/accel/hvf/hvf-all.c b/accel/hvf/hvf-all.c new file mode 100644 index 0000000000..f185b0830a --- /dev/null +++ b/accel/hvf/hvf-all.c @@ -0,0 +1,47 @@ +/* + * QEMU Hypervisor.framework support + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qemu/error-report.h" +#include "sysemu/hvf.h" +#include "sysemu/hvf_int.h" + +void assert_hvf_ok(hv_return_t ret) +{ + if (ret == HV_SUCCESS) { + return; + } + + switch (ret) { + case HV_ERROR: + error_report("Error: HV_ERROR"); + break; + case HV_BUSY: + error_report("Error: HV_BUSY"); + break; + case HV_BAD_ARGUMENT: + error_report("Error: HV_BAD_ARGUMENT"); + break; + case HV_NO_RESOURCES: + error_report("Error: HV_NO_RESOURCES"); + break; + case HV_NO_DEVICE: + error_report("Error: HV_NO_DEVICE"); + break; + case HV_UNSUPPORTED: + error_report("Error: HV_UNSUPPORTED"); + break; + default: + error_report("Unknown Error"); + } + + abort(); +} diff --git a/accel/hvf/meson.build b/accel/hvf/meson.build new file mode 100644 index 0000000000..227b11cd71 --- /dev/null +++ b/accel/hvf/meson.build @@ -0,0 +1,6 @@ +hvf_ss = ss.source_set() +hvf_ss.add(files( + 'hvf-all.c', +)) + +specific_ss.add_all(when: 'CONFIG_HVF', if_true: hvf_ss) diff --git a/accel/meson.build b/accel/meson.build index b44ba30c86..dfd808d2c8 100644 --- a/accel/meson.build +++ b/accel/meson.build @@ -2,6 +2,7 @@ specific_ss.add(files('accel-common.c')) softmmu_ss.add(files('accel-softmmu.c')) user_ss.add(files('accel-user.c')) +subdir('hvf') subdir('qtest') subdir('kvm') subdir('tcg') diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h new file mode 100644 index 0000000000..3deb4cfacc --- /dev/null +++ b/include/sysemu/hvf_int.h @@ -0,0 +1,18 @@ +/* + * QEMU Hypervisor.framework (HVF) support + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +/* header to be included in HVF-specific code */ + +#ifndef HVF_INT_H +#define HVF_INT_H + +#include + +void assert_hvf_ok(hv_return_t ret); + +#endif diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index f044181d06..32f42f1592 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -51,6 +51,7 @@ #include "qemu/error-report.h" #include "sysemu/hvf.h" +#include "sysemu/hvf_int.h" #include "sysemu/runstate.h" #include "hvf-i386.h" #include "vmcs.h" @@ -76,38 +77,6 @@ HVFState *hvf_state; -static void assert_hvf_ok(hv_return_t ret) -{ - if (ret == HV_SUCCESS) { - return; - } - - switch (ret) { - case HV_ERROR: - error_report("Error: HV_ERROR"); - break; - case HV_BUSY: - error_report("Error: HV_BUSY"); - break; - case HV_BAD_ARGUMENT: - error_report("Error: HV_BAD_ARGUMENT"); - break; - case HV_NO_RESOURCES: - error_report("Error: HV_NO_RESOURCES"); - break; - case HV_NO_DEVICE: - error_report("Error: HV_NO_DEVICE"); - break; - case HV_UNSUPPORTED: - error_report("Error: HV_UNSUPPORTED"); - break; - default: - error_report("Unknown Error"); - } - - abort(); -} - /* Memory slots */ hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t size) { From 39a3445012e44b01f6a7e7b29cf7477ef2a656eb Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 3 Jun 2021 14:09:31 +0100 Subject: [PATCH 29/45] hvf: Move vcpu thread functions into common directory Until now, Hypervisor.framework has only been available on x86_64 systems. With Apple Silicon shipping now, it extends its reach to aarch64. To prepare for support for multiple architectures, let's start moving common code out into its own accel directory. This patch moves the vCPU thread loop over. Signed-off-by: Alexander Graf Reviewed-by: Sergio Lopez Message-id: 20210519202253.76782-3-agraf@csgraf.de Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- {target/i386 => accel}/hvf/hvf-accel-ops.c | 0 {target/i386 => accel}/hvf/hvf-accel-ops.h | 0 accel/hvf/meson.build | 1 + target/i386/hvf/meson.build | 1 - target/i386/hvf/x86hvf.c | 2 +- 5 files changed, 2 insertions(+), 2 deletions(-) rename {target/i386 => accel}/hvf/hvf-accel-ops.c (100%) rename {target/i386 => accel}/hvf/hvf-accel-ops.h (100%) diff --git a/target/i386/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c similarity index 100% rename from target/i386/hvf/hvf-accel-ops.c rename to accel/hvf/hvf-accel-ops.c diff --git a/target/i386/hvf/hvf-accel-ops.h b/accel/hvf/hvf-accel-ops.h similarity index 100% rename from target/i386/hvf/hvf-accel-ops.h rename to accel/hvf/hvf-accel-ops.h diff --git a/accel/hvf/meson.build b/accel/hvf/meson.build index 227b11cd71..fc52cb7843 100644 --- a/accel/hvf/meson.build +++ b/accel/hvf/meson.build @@ -1,6 +1,7 @@ hvf_ss = ss.source_set() hvf_ss.add(files( 'hvf-all.c', + 'hvf-accel-ops.c', )) specific_ss.add_all(when: 'CONFIG_HVF', if_true: hvf_ss) diff --git a/target/i386/hvf/meson.build b/target/i386/hvf/meson.build index d253d5fd10..f6d4c394d3 100644 --- a/target/i386/hvf/meson.build +++ b/target/i386/hvf/meson.build @@ -1,6 +1,5 @@ i386_softmmu_ss.add(when: [hvf, 'CONFIG_HVF'], if_true: files( 'hvf.c', - 'hvf-accel-ops.c', 'x86.c', 'x86_cpuid.c', 'x86_decode.c', diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c index 0d7533742e..2b99f3eaa2 100644 --- a/target/i386/hvf/x86hvf.c +++ b/target/i386/hvf/x86hvf.c @@ -32,7 +32,7 @@ #include #include -#include "hvf-accel-ops.h" +#include "accel/hvf/hvf-accel-ops.h" void hvf_set_segment(struct CPUState *cpu, struct vmx_segment *vmx_seg, SegmentCache *qseg, bool is_tr) From 358e7505b2795d3df505661da7be9dc81eaa91d9 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 3 Jun 2021 14:09:31 +0100 Subject: [PATCH 30/45] hvf: Move cpu functions into common directory Until now, Hypervisor.framework has only been available on x86_64 systems. With Apple Silicon shipping now, it extends its reach to aarch64. To prepare for support for multiple architectures, let's start moving common code out into its own accel directory. This patch moves CPU and memory operations over. While at it, make sure the code is consumable on non-i386 systems. Signed-off-by: Alexander Graf Reviewed-by: Sergio Lopez Message-id: 20210519202253.76782-4-agraf@csgraf.de Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- accel/hvf/hvf-accel-ops.c | 308 ++++++++++++++++++++++++++++++++++++- include/sysemu/hvf_int.h | 4 + target/i386/hvf/hvf-i386.h | 2 - target/i386/hvf/hvf.c | 302 ------------------------------------ target/i386/hvf/x86hvf.h | 2 - 5 files changed, 311 insertions(+), 307 deletions(-) diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c index cbaad238e0..c2136dfbb8 100644 --- a/accel/hvf/hvf-accel-ops.c +++ b/accel/hvf/hvf-accel-ops.c @@ -50,13 +50,319 @@ #include "qemu/osdep.h" #include "qemu/error-report.h" #include "qemu/main-loop.h" +#include "exec/address-spaces.h" +#include "exec/exec-all.h" +#include "sysemu/cpus.h" #include "sysemu/hvf.h" +#include "sysemu/hvf_int.h" #include "sysemu/runstate.h" -#include "target/i386/cpu.h" #include "qemu/guest-random.h" #include "hvf-accel-ops.h" +HVFState *hvf_state; + +/* Memory slots */ + +hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t size) +{ + hvf_slot *slot; + int x; + for (x = 0; x < hvf_state->num_slots; ++x) { + slot = &hvf_state->slots[x]; + if (slot->size && start < (slot->start + slot->size) && + (start + size) > slot->start) { + return slot; + } + } + return NULL; +} + +struct mac_slot { + int present; + uint64_t size; + uint64_t gpa_start; + uint64_t gva; +}; + +struct mac_slot mac_slots[32]; + +static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags) +{ + struct mac_slot *macslot; + hv_return_t ret; + + macslot = &mac_slots[slot->slot_id]; + + if (macslot->present) { + if (macslot->size != slot->size) { + macslot->present = 0; + ret = hv_vm_unmap(macslot->gpa_start, macslot->size); + assert_hvf_ok(ret); + } + } + + if (!slot->size) { + return 0; + } + + macslot->present = 1; + macslot->gpa_start = slot->start; + macslot->size = slot->size; + ret = hv_vm_map((hv_uvaddr_t)slot->mem, slot->start, slot->size, flags); + assert_hvf_ok(ret); + return 0; +} + +void hvf_set_phys_mem(MemoryRegionSection *section, bool add) +{ + hvf_slot *mem; + MemoryRegion *area = section->mr; + bool writeable = !area->readonly && !area->rom_device; + hv_memory_flags_t flags; + + if (!memory_region_is_ram(area)) { + if (writeable) { + return; + } else if (!memory_region_is_romd(area)) { + /* + * If the memory device is not in romd_mode, then we actually want + * to remove the hvf memory slot so all accesses will trap. + */ + add = false; + } + } + + mem = hvf_find_overlap_slot( + section->offset_within_address_space, + int128_get64(section->size)); + + if (mem && add) { + if (mem->size == int128_get64(section->size) && + mem->start == section->offset_within_address_space && + mem->mem == (memory_region_get_ram_ptr(area) + + section->offset_within_region)) { + return; /* Same region was attempted to register, go away. */ + } + } + + /* Region needs to be reset. set the size to 0 and remap it. */ + if (mem) { + mem->size = 0; + if (do_hvf_set_memory(mem, 0)) { + error_report("Failed to reset overlapping slot"); + abort(); + } + } + + if (!add) { + return; + } + + if (area->readonly || + (!memory_region_is_ram(area) && memory_region_is_romd(area))) { + flags = HV_MEMORY_READ | HV_MEMORY_EXEC; + } else { + flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC; + } + + /* Now make a new slot. */ + int x; + + for (x = 0; x < hvf_state->num_slots; ++x) { + mem = &hvf_state->slots[x]; + if (!mem->size) { + break; + } + } + + if (x == hvf_state->num_slots) { + error_report("No free slots"); + abort(); + } + + mem->size = int128_get64(section->size); + mem->mem = memory_region_get_ram_ptr(area) + section->offset_within_region; + mem->start = section->offset_within_address_space; + mem->region = area; + + if (do_hvf_set_memory(mem, flags)) { + error_report("Error registering new memory slot"); + abort(); + } +} + +static void do_hvf_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) +{ + if (!cpu->vcpu_dirty) { + hvf_get_registers(cpu); + cpu->vcpu_dirty = true; + } +} + +void hvf_cpu_synchronize_state(CPUState *cpu) +{ + if (!cpu->vcpu_dirty) { + run_on_cpu(cpu, do_hvf_cpu_synchronize_state, RUN_ON_CPU_NULL); + } +} + +static void do_hvf_cpu_synchronize_post_reset(CPUState *cpu, + run_on_cpu_data arg) +{ + hvf_put_registers(cpu); + cpu->vcpu_dirty = false; +} + +void hvf_cpu_synchronize_post_reset(CPUState *cpu) +{ + run_on_cpu(cpu, do_hvf_cpu_synchronize_post_reset, RUN_ON_CPU_NULL); +} + +static void do_hvf_cpu_synchronize_post_init(CPUState *cpu, + run_on_cpu_data arg) +{ + hvf_put_registers(cpu); + cpu->vcpu_dirty = false; +} + +void hvf_cpu_synchronize_post_init(CPUState *cpu) +{ + run_on_cpu(cpu, do_hvf_cpu_synchronize_post_init, RUN_ON_CPU_NULL); +} + +static void do_hvf_cpu_synchronize_pre_loadvm(CPUState *cpu, + run_on_cpu_data arg) +{ + cpu->vcpu_dirty = true; +} + +void hvf_cpu_synchronize_pre_loadvm(CPUState *cpu) +{ + run_on_cpu(cpu, do_hvf_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL); +} + +static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on) +{ + hvf_slot *slot; + + slot = hvf_find_overlap_slot( + section->offset_within_address_space, + int128_get64(section->size)); + + /* protect region against writes; begin tracking it */ + if (on) { + slot->flags |= HVF_SLOT_LOG; + hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size, + HV_MEMORY_READ); + /* stop tracking region*/ + } else { + slot->flags &= ~HVF_SLOT_LOG; + hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size, + HV_MEMORY_READ | HV_MEMORY_WRITE); + } +} + +static void hvf_log_start(MemoryListener *listener, + MemoryRegionSection *section, int old, int new) +{ + if (old != 0) { + return; + } + + hvf_set_dirty_tracking(section, 1); +} + +static void hvf_log_stop(MemoryListener *listener, + MemoryRegionSection *section, int old, int new) +{ + if (new != 0) { + return; + } + + hvf_set_dirty_tracking(section, 0); +} + +static void hvf_log_sync(MemoryListener *listener, + MemoryRegionSection *section) +{ + /* + * sync of dirty pages is handled elsewhere; just make sure we keep + * tracking the region. + */ + hvf_set_dirty_tracking(section, 1); +} + +static void hvf_region_add(MemoryListener *listener, + MemoryRegionSection *section) +{ + hvf_set_phys_mem(section, true); +} + +static void hvf_region_del(MemoryListener *listener, + MemoryRegionSection *section) +{ + hvf_set_phys_mem(section, false); +} + +static MemoryListener hvf_memory_listener = { + .priority = 10, + .region_add = hvf_region_add, + .region_del = hvf_region_del, + .log_start = hvf_log_start, + .log_stop = hvf_log_stop, + .log_sync = hvf_log_sync, +}; + +static void dummy_signal(int sig) +{ +} + +bool hvf_allowed; + +static int hvf_accel_init(MachineState *ms) +{ + int x; + hv_return_t ret; + HVFState *s; + + ret = hv_vm_create(HV_VM_DEFAULT); + assert_hvf_ok(ret); + + s = g_new0(HVFState, 1); + + s->num_slots = 32; + for (x = 0; x < s->num_slots; ++x) { + s->slots[x].size = 0; + s->slots[x].slot_id = x; + } + + hvf_state = s; + memory_listener_register(&hvf_memory_listener, &address_space_memory); + return 0; +} + +static void hvf_accel_class_init(ObjectClass *oc, void *data) +{ + AccelClass *ac = ACCEL_CLASS(oc); + ac->name = "HVF"; + ac->init_machine = hvf_accel_init; + ac->allowed = &hvf_allowed; +} + +static const TypeInfo hvf_accel_type = { + .name = TYPE_HVF_ACCEL, + .parent = TYPE_ACCEL, + .class_init = hvf_accel_class_init, +}; + +static void hvf_type_init(void) +{ + type_register_static(&hvf_accel_type); +} + +type_init(hvf_type_init); + /* * The HVF-specific vCPU thread function. This one should only run when the host * CPU supports the VMX "unrestricted guest" feature. diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h index 3deb4cfacc..4c657b054c 100644 --- a/include/sysemu/hvf_int.h +++ b/include/sysemu/hvf_int.h @@ -13,6 +13,10 @@ #include +void hvf_set_phys_mem(MemoryRegionSection *, bool); void assert_hvf_ok(hv_return_t ret); +hvf_slot *hvf_find_overlap_slot(uint64_t, uint64_t); +int hvf_put_registers(CPUState *); +int hvf_get_registers(CPUState *); #endif diff --git a/target/i386/hvf/hvf-i386.h b/target/i386/hvf/hvf-i386.h index 59cfca8875..94e5c788c4 100644 --- a/target/i386/hvf/hvf-i386.h +++ b/target/i386/hvf/hvf-i386.h @@ -51,9 +51,7 @@ struct HVFState { }; extern HVFState *hvf_state; -void hvf_set_phys_mem(MemoryRegionSection *, bool); void hvf_handle_io(CPUArchState *, uint16_t, void *, int, int, int); -hvf_slot *hvf_find_overlap_slot(uint64_t, uint64_t); #ifdef NEED_CPU_H /* Functions exported to host specific mode */ diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 32f42f1592..100ede2a4d 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -75,137 +75,6 @@ #include "hvf-accel-ops.h" -HVFState *hvf_state; - -/* Memory slots */ -hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t size) -{ - hvf_slot *slot; - int x; - for (x = 0; x < hvf_state->num_slots; ++x) { - slot = &hvf_state->slots[x]; - if (slot->size && start < (slot->start + slot->size) && - (start + size) > slot->start) { - return slot; - } - } - return NULL; -} - -struct mac_slot { - int present; - uint64_t size; - uint64_t gpa_start; - uint64_t gva; -}; - -struct mac_slot mac_slots[32]; - -static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags) -{ - struct mac_slot *macslot; - hv_return_t ret; - - macslot = &mac_slots[slot->slot_id]; - - if (macslot->present) { - if (macslot->size != slot->size) { - macslot->present = 0; - ret = hv_vm_unmap(macslot->gpa_start, macslot->size); - assert_hvf_ok(ret); - } - } - - if (!slot->size) { - return 0; - } - - macslot->present = 1; - macslot->gpa_start = slot->start; - macslot->size = slot->size; - ret = hv_vm_map((hv_uvaddr_t)slot->mem, slot->start, slot->size, flags); - assert_hvf_ok(ret); - return 0; -} - -void hvf_set_phys_mem(MemoryRegionSection *section, bool add) -{ - hvf_slot *mem; - MemoryRegion *area = section->mr; - bool writeable = !area->readonly && !area->rom_device; - hv_memory_flags_t flags; - - if (!memory_region_is_ram(area)) { - if (writeable) { - return; - } else if (!memory_region_is_romd(area)) { - /* - * If the memory device is not in romd_mode, then we actually want - * to remove the hvf memory slot so all accesses will trap. - */ - add = false; - } - } - - mem = hvf_find_overlap_slot( - section->offset_within_address_space, - int128_get64(section->size)); - - if (mem && add) { - if (mem->size == int128_get64(section->size) && - mem->start == section->offset_within_address_space && - mem->mem == (memory_region_get_ram_ptr(area) + - section->offset_within_region)) { - return; /* Same region was attempted to register, go away. */ - } - } - - /* Region needs to be reset. set the size to 0 and remap it. */ - if (mem) { - mem->size = 0; - if (do_hvf_set_memory(mem, 0)) { - error_report("Failed to reset overlapping slot"); - abort(); - } - } - - if (!add) { - return; - } - - if (area->readonly || - (!memory_region_is_ram(area) && memory_region_is_romd(area))) { - flags = HV_MEMORY_READ | HV_MEMORY_EXEC; - } else { - flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC; - } - - /* Now make a new slot. */ - int x; - - for (x = 0; x < hvf_state->num_slots; ++x) { - mem = &hvf_state->slots[x]; - if (!mem->size) { - break; - } - } - - if (x == hvf_state->num_slots) { - error_report("No free slots"); - abort(); - } - - mem->size = int128_get64(section->size); - mem->mem = memory_region_get_ram_ptr(area) + section->offset_within_region; - mem->start = section->offset_within_address_space; - mem->region = area; - - if (do_hvf_set_memory(mem, flags)) { - error_report("Error registering new memory slot"); - abort(); - } -} - void vmx_update_tpr(CPUState *cpu) { /* TODO: need integrate APIC handling */ @@ -245,56 +114,6 @@ void hvf_handle_io(CPUArchState *env, uint16_t port, void *buffer, } } -static void do_hvf_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) -{ - if (!cpu->vcpu_dirty) { - hvf_get_registers(cpu); - cpu->vcpu_dirty = true; - } -} - -void hvf_cpu_synchronize_state(CPUState *cpu) -{ - if (!cpu->vcpu_dirty) { - run_on_cpu(cpu, do_hvf_cpu_synchronize_state, RUN_ON_CPU_NULL); - } -} - -static void do_hvf_cpu_synchronize_post_reset(CPUState *cpu, - run_on_cpu_data arg) -{ - hvf_put_registers(cpu); - cpu->vcpu_dirty = false; -} - -void hvf_cpu_synchronize_post_reset(CPUState *cpu) -{ - run_on_cpu(cpu, do_hvf_cpu_synchronize_post_reset, RUN_ON_CPU_NULL); -} - -static void do_hvf_cpu_synchronize_post_init(CPUState *cpu, - run_on_cpu_data arg) -{ - hvf_put_registers(cpu); - cpu->vcpu_dirty = false; -} - -void hvf_cpu_synchronize_post_init(CPUState *cpu) -{ - run_on_cpu(cpu, do_hvf_cpu_synchronize_post_init, RUN_ON_CPU_NULL); -} - -static void do_hvf_cpu_synchronize_pre_loadvm(CPUState *cpu, - run_on_cpu_data arg) -{ - cpu->vcpu_dirty = true; -} - -void hvf_cpu_synchronize_pre_loadvm(CPUState *cpu) -{ - run_on_cpu(cpu, do_hvf_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL); -} - static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual) { int read, write; @@ -339,78 +158,6 @@ static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual) return false; } -static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on) -{ - hvf_slot *slot; - - slot = hvf_find_overlap_slot( - section->offset_within_address_space, - int128_get64(section->size)); - - /* protect region against writes; begin tracking it */ - if (on) { - slot->flags |= HVF_SLOT_LOG; - hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size, - HV_MEMORY_READ); - /* stop tracking region*/ - } else { - slot->flags &= ~HVF_SLOT_LOG; - hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size, - HV_MEMORY_READ | HV_MEMORY_WRITE); - } -} - -static void hvf_log_start(MemoryListener *listener, - MemoryRegionSection *section, int old, int new) -{ - if (old != 0) { - return; - } - - hvf_set_dirty_tracking(section, 1); -} - -static void hvf_log_stop(MemoryListener *listener, - MemoryRegionSection *section, int old, int new) -{ - if (new != 0) { - return; - } - - hvf_set_dirty_tracking(section, 0); -} - -static void hvf_log_sync(MemoryListener *listener, - MemoryRegionSection *section) -{ - /* - * sync of dirty pages is handled elsewhere; just make sure we keep - * tracking the region. - */ - hvf_set_dirty_tracking(section, 1); -} - -static void hvf_region_add(MemoryListener *listener, - MemoryRegionSection *section) -{ - hvf_set_phys_mem(section, true); -} - -static void hvf_region_del(MemoryListener *listener, - MemoryRegionSection *section) -{ - hvf_set_phys_mem(section, false); -} - -static MemoryListener hvf_memory_listener = { - .priority = 10, - .region_add = hvf_region_add, - .region_del = hvf_region_del, - .log_start = hvf_log_start, - .log_stop = hvf_log_stop, - .log_sync = hvf_log_sync, -}; - void hvf_vcpu_destroy(CPUState *cpu) { X86CPU *x86_cpu = X86_CPU(cpu); @@ -421,10 +168,6 @@ void hvf_vcpu_destroy(CPUState *cpu) assert_hvf_ok(ret); } -static void dummy_signal(int sig) -{ -} - static void init_tsc_freq(CPUX86State *env) { size_t length; @@ -931,48 +674,3 @@ int hvf_vcpu_exec(CPUState *cpu) return ret; } - -bool hvf_allowed; - -static int hvf_accel_init(MachineState *ms) -{ - int x; - hv_return_t ret; - HVFState *s; - - ret = hv_vm_create(HV_VM_DEFAULT); - assert_hvf_ok(ret); - - s = g_new0(HVFState, 1); - - s->num_slots = 32; - for (x = 0; x < s->num_slots; ++x) { - s->slots[x].size = 0; - s->slots[x].slot_id = x; - } - - hvf_state = s; - memory_listener_register(&hvf_memory_listener, &address_space_memory); - return 0; -} - -static void hvf_accel_class_init(ObjectClass *oc, void *data) -{ - AccelClass *ac = ACCEL_CLASS(oc); - ac->name = "HVF"; - ac->init_machine = hvf_accel_init; - ac->allowed = &hvf_allowed; -} - -static const TypeInfo hvf_accel_type = { - .name = TYPE_HVF_ACCEL, - .parent = TYPE_ACCEL, - .class_init = hvf_accel_class_init, -}; - -static void hvf_type_init(void) -{ - type_register_static(&hvf_accel_type); -} - -type_init(hvf_type_init); diff --git a/target/i386/hvf/x86hvf.h b/target/i386/hvf/x86hvf.h index 635ab0f34e..99ed8d608d 100644 --- a/target/i386/hvf/x86hvf.h +++ b/target/i386/hvf/x86hvf.h @@ -21,8 +21,6 @@ #include "x86_descr.h" int hvf_process_events(CPUState *); -int hvf_put_registers(CPUState *); -int hvf_get_registers(CPUState *); bool hvf_inject_interrupts(CPUState *); void hvf_set_segment(struct CPUState *cpu, struct vmx_segment *vmx_seg, SegmentCache *qseg, bool is_tr); From 861457ce73bc9e0e56d866b5c06e2c745fe53448 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 3 Jun 2021 14:09:31 +0100 Subject: [PATCH 31/45] hvf: Move hvf internal definitions into common header Until now, Hypervisor.framework has only been available on x86_64 systems. With Apple Silicon shipping now, it extends its reach to aarch64. To prepare for support for multiple architectures, let's start moving common code out into its own accel directory. This patch moves a few internal struct and constant defines over. Signed-off-by: Alexander Graf Reviewed-by: Sergio Lopez Message-id: 20210519202253.76782-5-agraf@csgraf.de Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- include/sysemu/hvf_int.h | 30 ++++++++++++++++++++++++++++++ target/i386/hvf/hvf-i386.h | 31 +------------------------------ 2 files changed, 31 insertions(+), 30 deletions(-) diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h index 4c657b054c..ef84a24dd9 100644 --- a/include/sysemu/hvf_int.h +++ b/include/sysemu/hvf_int.h @@ -13,6 +13,36 @@ #include +/* hvf_slot flags */ +#define HVF_SLOT_LOG (1 << 0) + +typedef struct hvf_slot { + uint64_t start; + uint64_t size; + uint8_t *mem; + int slot_id; + uint32_t flags; + MemoryRegion *region; +} hvf_slot; + +typedef struct hvf_vcpu_caps { + uint64_t vmx_cap_pinbased; + uint64_t vmx_cap_procbased; + uint64_t vmx_cap_procbased2; + uint64_t vmx_cap_entry; + uint64_t vmx_cap_exit; + uint64_t vmx_cap_preemption_timer; +} hvf_vcpu_caps; + +struct HVFState { + AccelState parent; + hvf_slot slots[32]; + int num_slots; + + hvf_vcpu_caps *hvf_caps; +}; +extern HVFState *hvf_state; + void hvf_set_phys_mem(MemoryRegionSection *, bool); void assert_hvf_ok(hv_return_t ret); hvf_slot *hvf_find_overlap_slot(uint64_t, uint64_t); diff --git a/target/i386/hvf/hvf-i386.h b/target/i386/hvf/hvf-i386.h index 94e5c788c4..76e9235524 100644 --- a/target/i386/hvf/hvf-i386.h +++ b/target/i386/hvf/hvf-i386.h @@ -18,39 +18,10 @@ #include "qemu/accel.h" #include "sysemu/hvf.h" +#include "sysemu/hvf_int.h" #include "cpu.h" #include "x86.h" -/* hvf_slot flags */ -#define HVF_SLOT_LOG (1 << 0) - -typedef struct hvf_slot { - uint64_t start; - uint64_t size; - uint8_t *mem; - int slot_id; - uint32_t flags; - MemoryRegion *region; -} hvf_slot; - -typedef struct hvf_vcpu_caps { - uint64_t vmx_cap_pinbased; - uint64_t vmx_cap_procbased; - uint64_t vmx_cap_procbased2; - uint64_t vmx_cap_entry; - uint64_t vmx_cap_exit; - uint64_t vmx_cap_preemption_timer; -} hvf_vcpu_caps; - -struct HVFState { - AccelState parent; - hvf_slot slots[32]; - int num_slots; - - hvf_vcpu_caps *hvf_caps; -}; -extern HVFState *hvf_state; - void hvf_handle_io(CPUArchState *, uint16_t, void *, int, int, int); #ifdef NEED_CPU_H From 3f965ef4e013d37391f5bb94c243d4a4b1825b1f Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 3 Jun 2021 14:09:32 +0100 Subject: [PATCH 32/45] hvf: Make hvf_set_phys_mem() static The hvf_set_phys_mem() function is only called within the same file. Make it static. Signed-off-by: Alexander Graf Reviewed-by: Sergio Lopez Message-id: 20210519202253.76782-6-agraf@csgraf.de Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- accel/hvf/hvf-accel-ops.c | 2 +- include/sysemu/hvf_int.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c index c2136dfbb8..5bec7b4d6d 100644 --- a/accel/hvf/hvf-accel-ops.c +++ b/accel/hvf/hvf-accel-ops.c @@ -114,7 +114,7 @@ static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags) return 0; } -void hvf_set_phys_mem(MemoryRegionSection *section, bool add) +static void hvf_set_phys_mem(MemoryRegionSection *section, bool add) { hvf_slot *mem; MemoryRegion *area = section->mr; diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h index ef84a24dd9..d15fa3302a 100644 --- a/include/sysemu/hvf_int.h +++ b/include/sysemu/hvf_int.h @@ -43,7 +43,6 @@ struct HVFState { }; extern HVFState *hvf_state; -void hvf_set_phys_mem(MemoryRegionSection *, bool); void assert_hvf_ok(hv_return_t ret); hvf_slot *hvf_find_overlap_slot(uint64_t, uint64_t); int hvf_put_registers(CPUState *); From 6e19f86a80746270ab2ab6fe4828c680d982fa72 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 3 Jun 2021 14:09:32 +0100 Subject: [PATCH 33/45] hvf: Remove use of hv_uvaddr_t and hv_gpaddr_t The ARM version of Hypervisor.framework no longer defines these two types, so let's just revert to standard ones. Signed-off-by: Alexander Graf Reviewed-by: Sergio Lopez Message-id: 20210519202253.76782-7-agraf@csgraf.de Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- accel/hvf/hvf-accel-ops.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c index 5bec7b4d6d..7370fcfba0 100644 --- a/accel/hvf/hvf-accel-ops.c +++ b/accel/hvf/hvf-accel-ops.c @@ -109,7 +109,7 @@ static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags) macslot->present = 1; macslot->gpa_start = slot->start; macslot->size = slot->size; - ret = hv_vm_map((hv_uvaddr_t)slot->mem, slot->start, slot->size, flags); + ret = hv_vm_map(slot->mem, slot->start, slot->size, flags); assert_hvf_ok(ret); return 0; } @@ -253,12 +253,12 @@ static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on) /* protect region against writes; begin tracking it */ if (on) { slot->flags |= HVF_SLOT_LOG; - hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size, + hv_vm_protect((uintptr_t)slot->start, (size_t)slot->size, HV_MEMORY_READ); /* stop tracking region*/ } else { slot->flags &= ~HVF_SLOT_LOG; - hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size, + hv_vm_protect((uintptr_t)slot->start, (size_t)slot->size, HV_MEMORY_READ | HV_MEMORY_WRITE); } } From cfe58455f3b79810375cb4ef560071c1ecff6dea Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 3 Jun 2021 14:09:32 +0100 Subject: [PATCH 34/45] hvf: Split out common code on vcpu init and destroy Until now, Hypervisor.framework has only been available on x86_64 systems. With Apple Silicon shipping now, it extends its reach to aarch64. To prepare for support for multiple architectures, let's start moving common code out into its own accel directory. This patch splits the vcpu init and destroy functions into a generic and an architecture specific portion. This also allows us to move the generic functions into the generic hvf code, removing exported functions. Signed-off-by: Alexander Graf Reviewed-by: Sergio Lopez Message-id: 20210519202253.76782-8-agraf@csgraf.de Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- accel/hvf/hvf-accel-ops.c | 30 ++++++++++++++++++++++++++++++ accel/hvf/hvf-accel-ops.h | 2 -- include/sysemu/hvf_int.h | 2 ++ target/i386/hvf/hvf.c | 23 ++--------------------- 4 files changed, 34 insertions(+), 23 deletions(-) diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c index 7370fcfba0..b262efd8b6 100644 --- a/accel/hvf/hvf-accel-ops.c +++ b/accel/hvf/hvf-accel-ops.c @@ -363,6 +363,36 @@ static void hvf_type_init(void) type_init(hvf_type_init); +static void hvf_vcpu_destroy(CPUState *cpu) +{ + hv_return_t ret = hv_vcpu_destroy(cpu->hvf_fd); + assert_hvf_ok(ret); + + hvf_arch_vcpu_destroy(cpu); +} + +static int hvf_init_vcpu(CPUState *cpu) +{ + int r; + + /* init cpu signals */ + sigset_t set; + struct sigaction sigact; + + memset(&sigact, 0, sizeof(sigact)); + sigact.sa_handler = dummy_signal; + sigaction(SIG_IPI, &sigact, NULL); + + pthread_sigmask(SIG_BLOCK, NULL, &set); + sigdelset(&set, SIG_IPI); + + r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf_fd, HV_VCPU_DEFAULT); + cpu->vcpu_dirty = 1; + assert_hvf_ok(r); + + return hvf_arch_init_vcpu(cpu); +} + /* * The HVF-specific vCPU thread function. This one should only run when the host * CPU supports the VMX "unrestricted guest" feature. diff --git a/accel/hvf/hvf-accel-ops.h b/accel/hvf/hvf-accel-ops.h index 8f992da168..09fcf22067 100644 --- a/accel/hvf/hvf-accel-ops.h +++ b/accel/hvf/hvf-accel-ops.h @@ -12,12 +12,10 @@ #include "sysemu/cpus.h" -int hvf_init_vcpu(CPUState *); int hvf_vcpu_exec(CPUState *); void hvf_cpu_synchronize_state(CPUState *); void hvf_cpu_synchronize_post_reset(CPUState *); void hvf_cpu_synchronize_post_init(CPUState *); void hvf_cpu_synchronize_pre_loadvm(CPUState *); -void hvf_vcpu_destroy(CPUState *); #endif /* HVF_CPUS_H */ diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h index d15fa3302a..80c1a8f946 100644 --- a/include/sysemu/hvf_int.h +++ b/include/sysemu/hvf_int.h @@ -44,6 +44,8 @@ struct HVFState { extern HVFState *hvf_state; void assert_hvf_ok(hv_return_t ret); +int hvf_arch_init_vcpu(CPUState *cpu); +void hvf_arch_vcpu_destroy(CPUState *cpu); hvf_slot *hvf_find_overlap_slot(uint64_t, uint64_t); int hvf_put_registers(CPUState *); int hvf_get_registers(CPUState *); diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 100ede2a4d..c7132ee370 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -158,14 +158,12 @@ static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual) return false; } -void hvf_vcpu_destroy(CPUState *cpu) +void hvf_arch_vcpu_destroy(CPUState *cpu) { X86CPU *x86_cpu = X86_CPU(cpu); CPUX86State *env = &x86_cpu->env; - hv_return_t ret = hv_vcpu_destroy((hv_vcpuid_t)cpu->hvf_fd); g_free(env->hvf_mmio_buf); - assert_hvf_ok(ret); } static void init_tsc_freq(CPUX86State *env) @@ -210,23 +208,10 @@ static inline bool apic_bus_freq_is_known(CPUX86State *env) return env->apic_bus_freq != 0; } -int hvf_init_vcpu(CPUState *cpu) +int hvf_arch_init_vcpu(CPUState *cpu) { - X86CPU *x86cpu = X86_CPU(cpu); CPUX86State *env = &x86cpu->env; - int r; - - /* init cpu signals */ - sigset_t set; - struct sigaction sigact; - - memset(&sigact, 0, sizeof(sigact)); - sigact.sa_handler = dummy_signal; - sigaction(SIG_IPI, &sigact, NULL); - - pthread_sigmask(SIG_BLOCK, NULL, &set); - sigdelset(&set, SIG_IPI); init_emu(); init_decoder(); @@ -243,10 +228,6 @@ int hvf_init_vcpu(CPUState *cpu) } } - r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf_fd, HV_VCPU_DEFAULT); - cpu->vcpu_dirty = 1; - assert_hvf_ok(r); - if (hv_vmx_read_capability(HV_VMX_CAP_PINBASED, &hvf_state->hvf_caps->vmx_cap_pinbased)) { abort(); From 65c725b5204f22987950b4368ab7db67bcc87f54 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 3 Jun 2021 14:09:33 +0100 Subject: [PATCH 35/45] hvf: Use cpu_synchronize_state() There is no reason to call the hvf specific hvf_cpu_synchronize_state() when we can just use the generic cpu_synchronize_state() instead. This allows us to have less dependency on internal function definitions and allows us to make hvf_cpu_synchronize_state() static. Signed-off-by: Alexander Graf Reviewed-by: Sergio Lopez Message-id: 20210519202253.76782-9-agraf@csgraf.de Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- accel/hvf/hvf-accel-ops.c | 2 +- accel/hvf/hvf-accel-ops.h | 1 - target/i386/hvf/x86hvf.c | 9 ++++----- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c index b262efd8b6..3b599ac57c 100644 --- a/accel/hvf/hvf-accel-ops.c +++ b/accel/hvf/hvf-accel-ops.c @@ -200,7 +200,7 @@ static void do_hvf_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) } } -void hvf_cpu_synchronize_state(CPUState *cpu) +static void hvf_cpu_synchronize_state(CPUState *cpu) { if (!cpu->vcpu_dirty) { run_on_cpu(cpu, do_hvf_cpu_synchronize_state, RUN_ON_CPU_NULL); diff --git a/accel/hvf/hvf-accel-ops.h b/accel/hvf/hvf-accel-ops.h index 09fcf22067..f6192b56f0 100644 --- a/accel/hvf/hvf-accel-ops.h +++ b/accel/hvf/hvf-accel-ops.h @@ -13,7 +13,6 @@ #include "sysemu/cpus.h" int hvf_vcpu_exec(CPUState *); -void hvf_cpu_synchronize_state(CPUState *); void hvf_cpu_synchronize_post_reset(CPUState *); void hvf_cpu_synchronize_post_init(CPUState *); void hvf_cpu_synchronize_pre_loadvm(CPUState *); diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c index 2b99f3eaa2..cc381307ab 100644 --- a/target/i386/hvf/x86hvf.c +++ b/target/i386/hvf/x86hvf.c @@ -26,14 +26,13 @@ #include "cpu.h" #include "x86_descr.h" #include "x86_decode.h" +#include "sysemu/hw_accel.h" #include "hw/i386/apic_internal.h" #include #include -#include "accel/hvf/hvf-accel-ops.h" - void hvf_set_segment(struct CPUState *cpu, struct vmx_segment *vmx_seg, SegmentCache *qseg, bool is_tr) { @@ -437,7 +436,7 @@ int hvf_process_events(CPUState *cpu_state) env->eflags = rreg(cpu_state->hvf_fd, HV_X86_RFLAGS); if (cpu_state->interrupt_request & CPU_INTERRUPT_INIT) { - hvf_cpu_synchronize_state(cpu_state); + cpu_synchronize_state(cpu_state); do_cpu_init(cpu); } @@ -451,12 +450,12 @@ int hvf_process_events(CPUState *cpu_state) cpu_state->halted = 0; } if (cpu_state->interrupt_request & CPU_INTERRUPT_SIPI) { - hvf_cpu_synchronize_state(cpu_state); + cpu_synchronize_state(cpu_state); do_cpu_sipi(cpu); } if (cpu_state->interrupt_request & CPU_INTERRUPT_TPR) { cpu_state->interrupt_request &= ~CPU_INTERRUPT_TPR; - hvf_cpu_synchronize_state(cpu_state); + cpu_synchronize_state(cpu_state); apic_handle_tpr_access_report(cpu->apic_state, env->eip, env->tpr_access_type); } From 36464fafcb34c1599b6cd22e409a3de600974944 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 3 Jun 2021 14:09:33 +0100 Subject: [PATCH 36/45] hvf: Make synchronize functions static The hvf accel synchronize functions are only used as input for local callback functions, so we can make them static. Signed-off-by: Alexander Graf Reviewed-by: Sergio Lopez Message-id: 20210519202253.76782-10-agraf@csgraf.de Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- accel/hvf/hvf-accel-ops.c | 6 +++--- accel/hvf/hvf-accel-ops.h | 3 --- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c index 3b599ac57c..69741ce708 100644 --- a/accel/hvf/hvf-accel-ops.c +++ b/accel/hvf/hvf-accel-ops.c @@ -214,7 +214,7 @@ static void do_hvf_cpu_synchronize_post_reset(CPUState *cpu, cpu->vcpu_dirty = false; } -void hvf_cpu_synchronize_post_reset(CPUState *cpu) +static void hvf_cpu_synchronize_post_reset(CPUState *cpu) { run_on_cpu(cpu, do_hvf_cpu_synchronize_post_reset, RUN_ON_CPU_NULL); } @@ -226,7 +226,7 @@ static void do_hvf_cpu_synchronize_post_init(CPUState *cpu, cpu->vcpu_dirty = false; } -void hvf_cpu_synchronize_post_init(CPUState *cpu) +static void hvf_cpu_synchronize_post_init(CPUState *cpu) { run_on_cpu(cpu, do_hvf_cpu_synchronize_post_init, RUN_ON_CPU_NULL); } @@ -237,7 +237,7 @@ static void do_hvf_cpu_synchronize_pre_loadvm(CPUState *cpu, cpu->vcpu_dirty = true; } -void hvf_cpu_synchronize_pre_loadvm(CPUState *cpu) +static void hvf_cpu_synchronize_pre_loadvm(CPUState *cpu) { run_on_cpu(cpu, do_hvf_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL); } diff --git a/accel/hvf/hvf-accel-ops.h b/accel/hvf/hvf-accel-ops.h index f6192b56f0..018a4e22f6 100644 --- a/accel/hvf/hvf-accel-ops.h +++ b/accel/hvf/hvf-accel-ops.h @@ -13,8 +13,5 @@ #include "sysemu/cpus.h" int hvf_vcpu_exec(CPUState *); -void hvf_cpu_synchronize_post_reset(CPUState *); -void hvf_cpu_synchronize_post_init(CPUState *); -void hvf_cpu_synchronize_pre_loadvm(CPUState *); #endif /* HVF_CPUS_H */ From d662ede2b1eb033883b7c96866e84e8b54524ccb Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 3 Jun 2021 14:09:33 +0100 Subject: [PATCH 37/45] hvf: Remove hvf-accel-ops.h We can move the definition of hvf_vcpu_exec() into our internal hvf header, obsoleting the need for hvf-accel-ops.h. Signed-off-by: Alexander Graf Reviewed-by: Sergio Lopez Message-id: 20210519202253.76782-11-agraf@csgraf.de Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- accel/hvf/hvf-accel-ops.c | 2 -- accel/hvf/hvf-accel-ops.h | 17 ----------------- include/sysemu/hvf_int.h | 1 + target/i386/hvf/hvf.c | 2 -- 4 files changed, 1 insertion(+), 21 deletions(-) delete mode 100644 accel/hvf/hvf-accel-ops.h diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c index 69741ce708..14fc49791e 100644 --- a/accel/hvf/hvf-accel-ops.c +++ b/accel/hvf/hvf-accel-ops.c @@ -58,8 +58,6 @@ #include "sysemu/runstate.h" #include "qemu/guest-random.h" -#include "hvf-accel-ops.h" - HVFState *hvf_state; /* Memory slots */ diff --git a/accel/hvf/hvf-accel-ops.h b/accel/hvf/hvf-accel-ops.h deleted file mode 100644 index 018a4e22f6..0000000000 --- a/accel/hvf/hvf-accel-ops.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Accelerator CPUS Interface - * - * Copyright 2020 SUSE LLC - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - */ - -#ifndef HVF_CPUS_H -#define HVF_CPUS_H - -#include "sysemu/cpus.h" - -int hvf_vcpu_exec(CPUState *); - -#endif /* HVF_CPUS_H */ diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h index 80c1a8f946..fd1dcaf26e 100644 --- a/include/sysemu/hvf_int.h +++ b/include/sysemu/hvf_int.h @@ -46,6 +46,7 @@ extern HVFState *hvf_state; void assert_hvf_ok(hv_return_t ret); int hvf_arch_init_vcpu(CPUState *cpu); void hvf_arch_vcpu_destroy(CPUState *cpu); +int hvf_vcpu_exec(CPUState *); hvf_slot *hvf_find_overlap_slot(uint64_t, uint64_t); int hvf_put_registers(CPUState *); int hvf_get_registers(CPUState *); diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index c7132ee370..02f7be6cfd 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -73,8 +73,6 @@ #include "qemu/accel.h" #include "target/i386/cpu.h" -#include "hvf-accel-ops.h" - void vmx_update_tpr(CPUState *cpu) { /* TODO: need integrate APIC handling */ From b533450e74500dd67f0aa49775809ea33bc465b7 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 3 Jun 2021 14:09:34 +0100 Subject: [PATCH 38/45] hvf: Introduce hvf vcpu struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We will need more than a single field for hvf going forward. To keep the global vcpu struct uncluttered, let's allocate a special hvf vcpu struct, similar to how hax does it. Signed-off-by: Alexander Graf Reviewed-by: Roman Bolshakov Tested-by: Roman Bolshakov Reviewed-by: Alex Bennée Reviewed-by: Sergio Lopez Message-id: 20210519202253.76782-12-agraf@csgraf.de Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- accel/hvf/hvf-accel-ops.c | 8 +- include/hw/core/cpu.h | 3 +- include/sysemu/hvf_int.h | 4 + target/i386/hvf/hvf.c | 104 +++++++++--------- target/i386/hvf/vmx.h | 24 +++-- target/i386/hvf/x86.c | 28 ++--- target/i386/hvf/x86_descr.c | 26 ++--- target/i386/hvf/x86_emu.c | 62 +++++------ target/i386/hvf/x86_mmu.c | 4 +- target/i386/hvf/x86_task.c | 12 +-- target/i386/hvf/x86hvf.c | 210 ++++++++++++++++++------------------ 11 files changed, 248 insertions(+), 237 deletions(-) diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c index 14fc49791e..ded918c443 100644 --- a/accel/hvf/hvf-accel-ops.c +++ b/accel/hvf/hvf-accel-ops.c @@ -363,16 +363,20 @@ type_init(hvf_type_init); static void hvf_vcpu_destroy(CPUState *cpu) { - hv_return_t ret = hv_vcpu_destroy(cpu->hvf_fd); + hv_return_t ret = hv_vcpu_destroy(cpu->hvf->fd); assert_hvf_ok(ret); hvf_arch_vcpu_destroy(cpu); + g_free(cpu->hvf); + cpu->hvf = NULL; } static int hvf_init_vcpu(CPUState *cpu) { int r; + cpu->hvf = g_malloc0(sizeof(*cpu->hvf)); + /* init cpu signals */ sigset_t set; struct sigaction sigact; @@ -384,7 +388,7 @@ static int hvf_init_vcpu(CPUState *cpu) pthread_sigmask(SIG_BLOCK, NULL, &set); sigdelset(&set, SIG_IPI); - r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf_fd, HV_VCPU_DEFAULT); + r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf->fd, HV_VCPU_DEFAULT); cpu->vcpu_dirty = 1; assert_hvf_ok(r); diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index 6b3bd3a1d4..4e0ea68efc 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -214,6 +214,7 @@ struct KVMState; struct kvm_run; struct hax_vcpu_state; +struct hvf_vcpu_state; #define TB_JMP_CACHE_BITS 12 #define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS) @@ -406,7 +407,7 @@ struct CPUState { struct hax_vcpu_state *hax_vcpu; - int hvf_fd; + struct hvf_vcpu_state *hvf; /* track IOMMUs whose translations we've cached in the TCG TLB */ GArray *iommu_notifiers; diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h index fd1dcaf26e..8b66a4e7d0 100644 --- a/include/sysemu/hvf_int.h +++ b/include/sysemu/hvf_int.h @@ -43,6 +43,10 @@ struct HVFState { }; extern HVFState *hvf_state; +struct hvf_vcpu_state { + int fd; +}; + void assert_hvf_ok(hv_return_t ret); int hvf_arch_init_vcpu(CPUState *cpu); void hvf_arch_vcpu_destroy(CPUState *cpu); diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 02f7be6cfd..346dbcc26f 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -80,11 +80,11 @@ void vmx_update_tpr(CPUState *cpu) int tpr = cpu_get_apic_tpr(x86_cpu->apic_state) << 4; int irr = apic_get_highest_priority_irr(x86_cpu->apic_state); - wreg(cpu->hvf_fd, HV_X86_TPR, tpr); + wreg(cpu->hvf->fd, HV_X86_TPR, tpr); if (irr == -1) { - wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0); + wvmcs(cpu->hvf->fd, VMCS_TPR_THRESHOLD, 0); } else { - wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 : + wvmcs(cpu->hvf->fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 : irr >> 4); } } @@ -92,7 +92,7 @@ void vmx_update_tpr(CPUState *cpu) static void update_apic_tpr(CPUState *cpu) { X86CPU *x86_cpu = X86_CPU(cpu); - int tpr = rreg(cpu->hvf_fd, HV_X86_TPR) >> 4; + int tpr = rreg(cpu->hvf->fd, HV_X86_TPR) >> 4; cpu_set_apic_tpr(x86_cpu->apic_state, tpr); } @@ -244,43 +244,43 @@ int hvf_arch_init_vcpu(CPUState *cpu) } /* set VMCS control fields */ - wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS, + wvmcs(cpu->hvf->fd, VMCS_PIN_BASED_CTLS, cap2ctrl(hvf_state->hvf_caps->vmx_cap_pinbased, VMCS_PIN_BASED_CTLS_EXTINT | VMCS_PIN_BASED_CTLS_NMI | VMCS_PIN_BASED_CTLS_VNMI)); - wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS, + wvmcs(cpu->hvf->fd, VMCS_PRI_PROC_BASED_CTLS, cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased, VMCS_PRI_PROC_BASED_CTLS_HLT | VMCS_PRI_PROC_BASED_CTLS_MWAIT | VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET | VMCS_PRI_PROC_BASED_CTLS_TPR_SHADOW) | VMCS_PRI_PROC_BASED_CTLS_SEC_CONTROL); - wvmcs(cpu->hvf_fd, VMCS_SEC_PROC_BASED_CTLS, + wvmcs(cpu->hvf->fd, VMCS_SEC_PROC_BASED_CTLS, cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased2, VMCS_PRI_PROC_BASED2_CTLS_APIC_ACCESSES)); - wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry, + wvmcs(cpu->hvf->fd, VMCS_ENTRY_CTLS, cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry, 0)); - wvmcs(cpu->hvf_fd, VMCS_EXCEPTION_BITMAP, 0); /* Double fault */ + wvmcs(cpu->hvf->fd, VMCS_EXCEPTION_BITMAP, 0); /* Double fault */ - wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0); + wvmcs(cpu->hvf->fd, VMCS_TPR_THRESHOLD, 0); x86cpu = X86_CPU(cpu); x86cpu->env.xsave_buf = qemu_memalign(4096, 4096); - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_STAR, 1); - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_LSTAR, 1); - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_CSTAR, 1); - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FMASK, 1); - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FSBASE, 1); - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_GSBASE, 1); - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_KERNELGSBASE, 1); - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_TSC_AUX, 1); - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_TSC, 1); - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_CS, 1); - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_EIP, 1); - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_ESP, 1); + hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_STAR, 1); + hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_LSTAR, 1); + hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_CSTAR, 1); + hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_FMASK, 1); + hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_FSBASE, 1); + hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_GSBASE, 1); + hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_KERNELGSBASE, 1); + hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_TSC_AUX, 1); + hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_IA32_TSC, 1); + hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_IA32_SYSENTER_CS, 1); + hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_IA32_SYSENTER_EIP, 1); + hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_IA32_SYSENTER_ESP, 1); return 0; } @@ -321,16 +321,16 @@ static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_in } if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) { env->has_error_code = true; - env->error_code = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_ERROR); + env->error_code = rvmcs(cpu->hvf->fd, VMCS_IDT_VECTORING_ERROR); } } - if ((rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) & + if ((rvmcs(cpu->hvf->fd, VMCS_GUEST_INTERRUPTIBILITY) & VMCS_INTERRUPTIBILITY_NMI_BLOCKING)) { env->hflags2 |= HF2_NMI_MASK; } else { env->hflags2 &= ~HF2_NMI_MASK; } - if (rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) & + if (rvmcs(cpu->hvf->fd, VMCS_GUEST_INTERRUPTIBILITY) & (VMCS_INTERRUPTIBILITY_STI_BLOCKING | VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)) { env->hflags |= HF_INHIBIT_IRQ_MASK; @@ -409,20 +409,20 @@ int hvf_vcpu_exec(CPUState *cpu) return EXCP_HLT; } - hv_return_t r = hv_vcpu_run(cpu->hvf_fd); + hv_return_t r = hv_vcpu_run(cpu->hvf->fd); assert_hvf_ok(r); /* handle VMEXIT */ - uint64_t exit_reason = rvmcs(cpu->hvf_fd, VMCS_EXIT_REASON); - uint64_t exit_qual = rvmcs(cpu->hvf_fd, VMCS_EXIT_QUALIFICATION); - uint32_t ins_len = (uint32_t)rvmcs(cpu->hvf_fd, + uint64_t exit_reason = rvmcs(cpu->hvf->fd, VMCS_EXIT_REASON); + uint64_t exit_qual = rvmcs(cpu->hvf->fd, VMCS_EXIT_QUALIFICATION); + uint32_t ins_len = (uint32_t)rvmcs(cpu->hvf->fd, VMCS_EXIT_INSTRUCTION_LENGTH); - uint64_t idtvec_info = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO); + uint64_t idtvec_info = rvmcs(cpu->hvf->fd, VMCS_IDT_VECTORING_INFO); hvf_store_events(cpu, ins_len, idtvec_info); - rip = rreg(cpu->hvf_fd, HV_X86_RIP); - env->eflags = rreg(cpu->hvf_fd, HV_X86_RFLAGS); + rip = rreg(cpu->hvf->fd, HV_X86_RIP); + env->eflags = rreg(cpu->hvf->fd, HV_X86_RFLAGS); qemu_mutex_lock_iothread(); @@ -452,7 +452,7 @@ int hvf_vcpu_exec(CPUState *cpu) case EXIT_REASON_EPT_FAULT: { hvf_slot *slot; - uint64_t gpa = rvmcs(cpu->hvf_fd, VMCS_GUEST_PHYSICAL_ADDRESS); + uint64_t gpa = rvmcs(cpu->hvf->fd, VMCS_GUEST_PHYSICAL_ADDRESS); if (((idtvec_info & VMCS_IDT_VEC_VALID) == 0) && ((exit_qual & EXIT_QUAL_NMIUDTI) != 0)) { @@ -497,7 +497,7 @@ int hvf_vcpu_exec(CPUState *cpu) store_regs(cpu); break; } else if (!string && !in) { - RAX(env) = rreg(cpu->hvf_fd, HV_X86_RAX); + RAX(env) = rreg(cpu->hvf->fd, HV_X86_RAX); hvf_handle_io(env, port, &RAX(env), 1, size, 1); macvm_set_rip(cpu, rip + ins_len); break; @@ -513,21 +513,21 @@ int hvf_vcpu_exec(CPUState *cpu) break; } case EXIT_REASON_CPUID: { - uint32_t rax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX); - uint32_t rbx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RBX); - uint32_t rcx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX); - uint32_t rdx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX); + uint32_t rax = (uint32_t)rreg(cpu->hvf->fd, HV_X86_RAX); + uint32_t rbx = (uint32_t)rreg(cpu->hvf->fd, HV_X86_RBX); + uint32_t rcx = (uint32_t)rreg(cpu->hvf->fd, HV_X86_RCX); + uint32_t rdx = (uint32_t)rreg(cpu->hvf->fd, HV_X86_RDX); if (rax == 1) { /* CPUID1.ecx.OSXSAVE needs to know CR4 */ - env->cr[4] = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR4); + env->cr[4] = rvmcs(cpu->hvf->fd, VMCS_GUEST_CR4); } hvf_cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx); - wreg(cpu->hvf_fd, HV_X86_RAX, rax); - wreg(cpu->hvf_fd, HV_X86_RBX, rbx); - wreg(cpu->hvf_fd, HV_X86_RCX, rcx); - wreg(cpu->hvf_fd, HV_X86_RDX, rdx); + wreg(cpu->hvf->fd, HV_X86_RAX, rax); + wreg(cpu->hvf->fd, HV_X86_RBX, rbx); + wreg(cpu->hvf->fd, HV_X86_RCX, rcx); + wreg(cpu->hvf->fd, HV_X86_RDX, rdx); macvm_set_rip(cpu, rip + ins_len); break; @@ -535,16 +535,16 @@ int hvf_vcpu_exec(CPUState *cpu) case EXIT_REASON_XSETBV: { X86CPU *x86_cpu = X86_CPU(cpu); CPUX86State *env = &x86_cpu->env; - uint32_t eax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX); - uint32_t ecx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX); - uint32_t edx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX); + uint32_t eax = (uint32_t)rreg(cpu->hvf->fd, HV_X86_RAX); + uint32_t ecx = (uint32_t)rreg(cpu->hvf->fd, HV_X86_RCX); + uint32_t edx = (uint32_t)rreg(cpu->hvf->fd, HV_X86_RDX); if (ecx) { macvm_set_rip(cpu, rip + ins_len); break; } env->xcr0 = ((uint64_t)edx << 32) | eax; - wreg(cpu->hvf_fd, HV_X86_XCR0, env->xcr0 | 1); + wreg(cpu->hvf->fd, HV_X86_XCR0, env->xcr0 | 1); macvm_set_rip(cpu, rip + ins_len); break; } @@ -583,11 +583,11 @@ int hvf_vcpu_exec(CPUState *cpu) switch (cr) { case 0x0: { - macvm_set_cr0(cpu->hvf_fd, RRX(env, reg)); + macvm_set_cr0(cpu->hvf->fd, RRX(env, reg)); break; } case 4: { - macvm_set_cr4(cpu->hvf_fd, RRX(env, reg)); + macvm_set_cr4(cpu->hvf->fd, RRX(env, reg)); break; } case 8: { @@ -623,7 +623,7 @@ int hvf_vcpu_exec(CPUState *cpu) break; } case EXIT_REASON_TASK_SWITCH: { - uint64_t vinfo = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO); + uint64_t vinfo = rvmcs(cpu->hvf->fd, VMCS_IDT_VECTORING_INFO); x68_segment_selector sel = {.sel = exit_qual & 0xffff}; vmx_handle_task_switch(cpu, sel, (exit_qual >> 30) & 0x3, vinfo & VMCS_INTR_VALID, vinfo & VECTORING_INFO_VECTOR_MASK, vinfo @@ -636,8 +636,8 @@ int hvf_vcpu_exec(CPUState *cpu) break; } case EXIT_REASON_RDPMC: - wreg(cpu->hvf_fd, HV_X86_RAX, 0); - wreg(cpu->hvf_fd, HV_X86_RDX, 0); + wreg(cpu->hvf->fd, HV_X86_RAX, 0); + wreg(cpu->hvf->fd, HV_X86_RDX, 0); macvm_set_rip(cpu, rip + ins_len); break; case VMX_REASON_VMCALL: diff --git a/target/i386/hvf/vmx.h b/target/i386/hvf/vmx.h index 24c4cdf0be..6df87116f6 100644 --- a/target/i386/hvf/vmx.h +++ b/target/i386/hvf/vmx.h @@ -30,6 +30,8 @@ #include "vmcs.h" #include "cpu.h" #include "x86.h" +#include "sysemu/hvf.h" +#include "sysemu/hvf_int.h" #include "exec/address-spaces.h" @@ -179,15 +181,15 @@ static inline void macvm_set_rip(CPUState *cpu, uint64_t rip) uint64_t val; /* BUG, should take considering overlap.. */ - wreg(cpu->hvf_fd, HV_X86_RIP, rip); + wreg(cpu->hvf->fd, HV_X86_RIP, rip); env->eip = rip; /* after moving forward in rip, we need to clean INTERRUPTABILITY */ - val = rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY); + val = rvmcs(cpu->hvf->fd, VMCS_GUEST_INTERRUPTIBILITY); if (val & (VMCS_INTERRUPTIBILITY_STI_BLOCKING | VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)) { env->hflags &= ~HF_INHIBIT_IRQ_MASK; - wvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY, + wvmcs(cpu->hvf->fd, VMCS_GUEST_INTERRUPTIBILITY, val & ~(VMCS_INTERRUPTIBILITY_STI_BLOCKING | VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)); } @@ -199,9 +201,9 @@ static inline void vmx_clear_nmi_blocking(CPUState *cpu) CPUX86State *env = &x86_cpu->env; env->hflags2 &= ~HF2_NMI_MASK; - uint32_t gi = (uint32_t) rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY); + uint32_t gi = (uint32_t) rvmcs(cpu->hvf->fd, VMCS_GUEST_INTERRUPTIBILITY); gi &= ~VMCS_INTERRUPTIBILITY_NMI_BLOCKING; - wvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY, gi); + wvmcs(cpu->hvf->fd, VMCS_GUEST_INTERRUPTIBILITY, gi); } static inline void vmx_set_nmi_blocking(CPUState *cpu) @@ -210,16 +212,16 @@ static inline void vmx_set_nmi_blocking(CPUState *cpu) CPUX86State *env = &x86_cpu->env; env->hflags2 |= HF2_NMI_MASK; - uint32_t gi = (uint32_t)rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY); + uint32_t gi = (uint32_t)rvmcs(cpu->hvf->fd, VMCS_GUEST_INTERRUPTIBILITY); gi |= VMCS_INTERRUPTIBILITY_NMI_BLOCKING; - wvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY, gi); + wvmcs(cpu->hvf->fd, VMCS_GUEST_INTERRUPTIBILITY, gi); } static inline void vmx_set_nmi_window_exiting(CPUState *cpu) { uint64_t val; - val = rvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS); - wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS, val | + val = rvmcs(cpu->hvf->fd, VMCS_PRI_PROC_BASED_CTLS); + wvmcs(cpu->hvf->fd, VMCS_PRI_PROC_BASED_CTLS, val | VMCS_PRI_PROC_BASED_CTLS_NMI_WINDOW_EXITING); } @@ -228,8 +230,8 @@ static inline void vmx_clear_nmi_window_exiting(CPUState *cpu) { uint64_t val; - val = rvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS); - wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS, val & + val = rvmcs(cpu->hvf->fd, VMCS_PRI_PROC_BASED_CTLS); + wvmcs(cpu->hvf->fd, VMCS_PRI_PROC_BASED_CTLS, val & ~VMCS_PRI_PROC_BASED_CTLS_NMI_WINDOW_EXITING); } diff --git a/target/i386/hvf/x86.c b/target/i386/hvf/x86.c index cd045183a8..2898bb70a8 100644 --- a/target/i386/hvf/x86.c +++ b/target/i386/hvf/x86.c @@ -62,11 +62,11 @@ bool x86_read_segment_descriptor(struct CPUState *cpu, } if (GDT_SEL == sel.ti) { - base = rvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_BASE); - limit = rvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_LIMIT); + base = rvmcs(cpu->hvf->fd, VMCS_GUEST_GDTR_BASE); + limit = rvmcs(cpu->hvf->fd, VMCS_GUEST_GDTR_LIMIT); } else { - base = rvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_BASE); - limit = rvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_LIMIT); + base = rvmcs(cpu->hvf->fd, VMCS_GUEST_LDTR_BASE); + limit = rvmcs(cpu->hvf->fd, VMCS_GUEST_LDTR_LIMIT); } if (sel.index * 8 >= limit) { @@ -85,11 +85,11 @@ bool x86_write_segment_descriptor(struct CPUState *cpu, uint32_t limit; if (GDT_SEL == sel.ti) { - base = rvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_BASE); - limit = rvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_LIMIT); + base = rvmcs(cpu->hvf->fd, VMCS_GUEST_GDTR_BASE); + limit = rvmcs(cpu->hvf->fd, VMCS_GUEST_GDTR_LIMIT); } else { - base = rvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_BASE); - limit = rvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_LIMIT); + base = rvmcs(cpu->hvf->fd, VMCS_GUEST_LDTR_BASE); + limit = rvmcs(cpu->hvf->fd, VMCS_GUEST_LDTR_LIMIT); } if (sel.index * 8 >= limit) { @@ -103,8 +103,8 @@ bool x86_write_segment_descriptor(struct CPUState *cpu, bool x86_read_call_gate(struct CPUState *cpu, struct x86_call_gate *idt_desc, int gate) { - target_ulong base = rvmcs(cpu->hvf_fd, VMCS_GUEST_IDTR_BASE); - uint32_t limit = rvmcs(cpu->hvf_fd, VMCS_GUEST_IDTR_LIMIT); + target_ulong base = rvmcs(cpu->hvf->fd, VMCS_GUEST_IDTR_BASE); + uint32_t limit = rvmcs(cpu->hvf->fd, VMCS_GUEST_IDTR_LIMIT); memset(idt_desc, 0, sizeof(*idt_desc)); if (gate * 8 >= limit) { @@ -118,7 +118,7 @@ bool x86_read_call_gate(struct CPUState *cpu, struct x86_call_gate *idt_desc, bool x86_is_protected(struct CPUState *cpu) { - uint64_t cr0 = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR0); + uint64_t cr0 = rvmcs(cpu->hvf->fd, VMCS_GUEST_CR0); return cr0 & CR0_PE; } @@ -136,7 +136,7 @@ bool x86_is_v8086(struct CPUState *cpu) bool x86_is_long_mode(struct CPUState *cpu) { - return rvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER) & MSR_EFER_LMA; + return rvmcs(cpu->hvf->fd, VMCS_GUEST_IA32_EFER) & MSR_EFER_LMA; } bool x86_is_long64_mode(struct CPUState *cpu) @@ -149,13 +149,13 @@ bool x86_is_long64_mode(struct CPUState *cpu) bool x86_is_paging_mode(struct CPUState *cpu) { - uint64_t cr0 = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR0); + uint64_t cr0 = rvmcs(cpu->hvf->fd, VMCS_GUEST_CR0); return cr0 & CR0_PG; } bool x86_is_pae_enabled(struct CPUState *cpu) { - uint64_t cr4 = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR4); + uint64_t cr4 = rvmcs(cpu->hvf->fd, VMCS_GUEST_CR4); return cr4 & CR4_PAE; } diff --git a/target/i386/hvf/x86_descr.c b/target/i386/hvf/x86_descr.c index 9f539e73f6..af15c06ac5 100644 --- a/target/i386/hvf/x86_descr.c +++ b/target/i386/hvf/x86_descr.c @@ -48,47 +48,47 @@ static const struct vmx_segment_field { uint32_t vmx_read_segment_limit(CPUState *cpu, X86Seg seg) { - return (uint32_t)rvmcs(cpu->hvf_fd, vmx_segment_fields[seg].limit); + return (uint32_t)rvmcs(cpu->hvf->fd, vmx_segment_fields[seg].limit); } uint32_t vmx_read_segment_ar(CPUState *cpu, X86Seg seg) { - return (uint32_t)rvmcs(cpu->hvf_fd, vmx_segment_fields[seg].ar_bytes); + return (uint32_t)rvmcs(cpu->hvf->fd, vmx_segment_fields[seg].ar_bytes); } uint64_t vmx_read_segment_base(CPUState *cpu, X86Seg seg) { - return rvmcs(cpu->hvf_fd, vmx_segment_fields[seg].base); + return rvmcs(cpu->hvf->fd, vmx_segment_fields[seg].base); } x68_segment_selector vmx_read_segment_selector(CPUState *cpu, X86Seg seg) { x68_segment_selector sel; - sel.sel = rvmcs(cpu->hvf_fd, vmx_segment_fields[seg].selector); + sel.sel = rvmcs(cpu->hvf->fd, vmx_segment_fields[seg].selector); return sel; } void vmx_write_segment_selector(struct CPUState *cpu, x68_segment_selector selector, X86Seg seg) { - wvmcs(cpu->hvf_fd, vmx_segment_fields[seg].selector, selector.sel); + wvmcs(cpu->hvf->fd, vmx_segment_fields[seg].selector, selector.sel); } void vmx_read_segment_descriptor(struct CPUState *cpu, struct vmx_segment *desc, X86Seg seg) { - desc->sel = rvmcs(cpu->hvf_fd, vmx_segment_fields[seg].selector); - desc->base = rvmcs(cpu->hvf_fd, vmx_segment_fields[seg].base); - desc->limit = rvmcs(cpu->hvf_fd, vmx_segment_fields[seg].limit); - desc->ar = rvmcs(cpu->hvf_fd, vmx_segment_fields[seg].ar_bytes); + desc->sel = rvmcs(cpu->hvf->fd, vmx_segment_fields[seg].selector); + desc->base = rvmcs(cpu->hvf->fd, vmx_segment_fields[seg].base); + desc->limit = rvmcs(cpu->hvf->fd, vmx_segment_fields[seg].limit); + desc->ar = rvmcs(cpu->hvf->fd, vmx_segment_fields[seg].ar_bytes); } void vmx_write_segment_descriptor(CPUState *cpu, struct vmx_segment *desc, X86Seg seg) { const struct vmx_segment_field *sf = &vmx_segment_fields[seg]; - wvmcs(cpu->hvf_fd, sf->base, desc->base); - wvmcs(cpu->hvf_fd, sf->limit, desc->limit); - wvmcs(cpu->hvf_fd, sf->selector, desc->sel); - wvmcs(cpu->hvf_fd, sf->ar_bytes, desc->ar); + wvmcs(cpu->hvf->fd, sf->base, desc->base); + wvmcs(cpu->hvf->fd, sf->limit, desc->limit); + wvmcs(cpu->hvf->fd, sf->selector, desc->sel); + wvmcs(cpu->hvf->fd, sf->ar_bytes, desc->ar); } void x86_segment_descriptor_to_vmx(struct CPUState *cpu, x68_segment_selector selector, struct x86_segment_descriptor *desc, struct vmx_segment *vmx_desc) diff --git a/target/i386/hvf/x86_emu.c b/target/i386/hvf/x86_emu.c index e52c39ddb1..7c8203b21f 100644 --- a/target/i386/hvf/x86_emu.c +++ b/target/i386/hvf/x86_emu.c @@ -674,7 +674,7 @@ void simulate_rdmsr(struct CPUState *cpu) switch (msr) { case MSR_IA32_TSC: - val = rdtscp() + rvmcs(cpu->hvf_fd, VMCS_TSC_OFFSET); + val = rdtscp() + rvmcs(cpu->hvf->fd, VMCS_TSC_OFFSET); break; case MSR_IA32_APICBASE: val = cpu_get_apic_base(X86_CPU(cpu)->apic_state); @@ -683,16 +683,16 @@ void simulate_rdmsr(struct CPUState *cpu) val = x86_cpu->ucode_rev; break; case MSR_EFER: - val = rvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER); + val = rvmcs(cpu->hvf->fd, VMCS_GUEST_IA32_EFER); break; case MSR_FSBASE: - val = rvmcs(cpu->hvf_fd, VMCS_GUEST_FS_BASE); + val = rvmcs(cpu->hvf->fd, VMCS_GUEST_FS_BASE); break; case MSR_GSBASE: - val = rvmcs(cpu->hvf_fd, VMCS_GUEST_GS_BASE); + val = rvmcs(cpu->hvf->fd, VMCS_GUEST_GS_BASE); break; case MSR_KERNELGSBASE: - val = rvmcs(cpu->hvf_fd, VMCS_HOST_FS_BASE); + val = rvmcs(cpu->hvf->fd, VMCS_HOST_FS_BASE); break; case MSR_STAR: abort(); @@ -780,13 +780,13 @@ void simulate_wrmsr(struct CPUState *cpu) cpu_set_apic_base(X86_CPU(cpu)->apic_state, data); break; case MSR_FSBASE: - wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_BASE, data); + wvmcs(cpu->hvf->fd, VMCS_GUEST_FS_BASE, data); break; case MSR_GSBASE: - wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_BASE, data); + wvmcs(cpu->hvf->fd, VMCS_GUEST_GS_BASE, data); break; case MSR_KERNELGSBASE: - wvmcs(cpu->hvf_fd, VMCS_HOST_FS_BASE, data); + wvmcs(cpu->hvf->fd, VMCS_HOST_FS_BASE, data); break; case MSR_STAR: abort(); @@ -799,9 +799,9 @@ void simulate_wrmsr(struct CPUState *cpu) break; case MSR_EFER: /*printf("new efer %llx\n", EFER(cpu));*/ - wvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER, data); + wvmcs(cpu->hvf->fd, VMCS_GUEST_IA32_EFER, data); if (data & MSR_EFER_NXE) { - hv_vcpu_invalidate_tlb(cpu->hvf_fd); + hv_vcpu_invalidate_tlb(cpu->hvf->fd); } break; case MSR_MTRRphysBase(0): @@ -1425,21 +1425,21 @@ void load_regs(struct CPUState *cpu) CPUX86State *env = &x86_cpu->env; int i = 0; - RRX(env, R_EAX) = rreg(cpu->hvf_fd, HV_X86_RAX); - RRX(env, R_EBX) = rreg(cpu->hvf_fd, HV_X86_RBX); - RRX(env, R_ECX) = rreg(cpu->hvf_fd, HV_X86_RCX); - RRX(env, R_EDX) = rreg(cpu->hvf_fd, HV_X86_RDX); - RRX(env, R_ESI) = rreg(cpu->hvf_fd, HV_X86_RSI); - RRX(env, R_EDI) = rreg(cpu->hvf_fd, HV_X86_RDI); - RRX(env, R_ESP) = rreg(cpu->hvf_fd, HV_X86_RSP); - RRX(env, R_EBP) = rreg(cpu->hvf_fd, HV_X86_RBP); + RRX(env, R_EAX) = rreg(cpu->hvf->fd, HV_X86_RAX); + RRX(env, R_EBX) = rreg(cpu->hvf->fd, HV_X86_RBX); + RRX(env, R_ECX) = rreg(cpu->hvf->fd, HV_X86_RCX); + RRX(env, R_EDX) = rreg(cpu->hvf->fd, HV_X86_RDX); + RRX(env, R_ESI) = rreg(cpu->hvf->fd, HV_X86_RSI); + RRX(env, R_EDI) = rreg(cpu->hvf->fd, HV_X86_RDI); + RRX(env, R_ESP) = rreg(cpu->hvf->fd, HV_X86_RSP); + RRX(env, R_EBP) = rreg(cpu->hvf->fd, HV_X86_RBP); for (i = 8; i < 16; i++) { - RRX(env, i) = rreg(cpu->hvf_fd, HV_X86_RAX + i); + RRX(env, i) = rreg(cpu->hvf->fd, HV_X86_RAX + i); } - env->eflags = rreg(cpu->hvf_fd, HV_X86_RFLAGS); + env->eflags = rreg(cpu->hvf->fd, HV_X86_RFLAGS); rflags_to_lflags(env); - env->eip = rreg(cpu->hvf_fd, HV_X86_RIP); + env->eip = rreg(cpu->hvf->fd, HV_X86_RIP); } void store_regs(struct CPUState *cpu) @@ -1448,20 +1448,20 @@ void store_regs(struct CPUState *cpu) CPUX86State *env = &x86_cpu->env; int i = 0; - wreg(cpu->hvf_fd, HV_X86_RAX, RAX(env)); - wreg(cpu->hvf_fd, HV_X86_RBX, RBX(env)); - wreg(cpu->hvf_fd, HV_X86_RCX, RCX(env)); - wreg(cpu->hvf_fd, HV_X86_RDX, RDX(env)); - wreg(cpu->hvf_fd, HV_X86_RSI, RSI(env)); - wreg(cpu->hvf_fd, HV_X86_RDI, RDI(env)); - wreg(cpu->hvf_fd, HV_X86_RBP, RBP(env)); - wreg(cpu->hvf_fd, HV_X86_RSP, RSP(env)); + wreg(cpu->hvf->fd, HV_X86_RAX, RAX(env)); + wreg(cpu->hvf->fd, HV_X86_RBX, RBX(env)); + wreg(cpu->hvf->fd, HV_X86_RCX, RCX(env)); + wreg(cpu->hvf->fd, HV_X86_RDX, RDX(env)); + wreg(cpu->hvf->fd, HV_X86_RSI, RSI(env)); + wreg(cpu->hvf->fd, HV_X86_RDI, RDI(env)); + wreg(cpu->hvf->fd, HV_X86_RBP, RBP(env)); + wreg(cpu->hvf->fd, HV_X86_RSP, RSP(env)); for (i = 8; i < 16; i++) { - wreg(cpu->hvf_fd, HV_X86_RAX + i, RRX(env, i)); + wreg(cpu->hvf->fd, HV_X86_RAX + i, RRX(env, i)); } lflags_to_rflags(env); - wreg(cpu->hvf_fd, HV_X86_RFLAGS, env->eflags); + wreg(cpu->hvf->fd, HV_X86_RFLAGS, env->eflags); macvm_set_rip(cpu, env->eip); } diff --git a/target/i386/hvf/x86_mmu.c b/target/i386/hvf/x86_mmu.c index 78fff04684..e9ed0f5aa1 100644 --- a/target/i386/hvf/x86_mmu.c +++ b/target/i386/hvf/x86_mmu.c @@ -127,7 +127,7 @@ static bool test_pt_entry(struct CPUState *cpu, struct gpt_translation *pt, pt->err_code |= MMU_PAGE_PT; } - uint32_t cr0 = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR0); + uint32_t cr0 = rvmcs(cpu->hvf->fd, VMCS_GUEST_CR0); /* check protection */ if (cr0 & CR0_WP) { if (pt->write_access && !pte_write_access(pte)) { @@ -172,7 +172,7 @@ static bool walk_gpt(struct CPUState *cpu, target_ulong addr, int err_code, { int top_level, level; bool is_large = false; - target_ulong cr3 = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR3); + target_ulong cr3 = rvmcs(cpu->hvf->fd, VMCS_GUEST_CR3); uint64_t page_mask = pae ? PAE_PTE_PAGE_MASK : LEGACY_PTE_PAGE_MASK; memset(pt, 0, sizeof(*pt)); diff --git a/target/i386/hvf/x86_task.c b/target/i386/hvf/x86_task.c index d66dfd7669..422156128b 100644 --- a/target/i386/hvf/x86_task.c +++ b/target/i386/hvf/x86_task.c @@ -62,7 +62,7 @@ static void load_state_from_tss32(CPUState *cpu, struct x86_tss_segment32 *tss) X86CPU *x86_cpu = X86_CPU(cpu); CPUX86State *env = &x86_cpu->env; - wvmcs(cpu->hvf_fd, VMCS_GUEST_CR3, tss->cr3); + wvmcs(cpu->hvf->fd, VMCS_GUEST_CR3, tss->cr3); env->eip = tss->eip; env->eflags = tss->eflags | 2; @@ -111,11 +111,11 @@ static int task_switch_32(CPUState *cpu, x68_segment_selector tss_sel, x68_segme void vmx_handle_task_switch(CPUState *cpu, x68_segment_selector tss_sel, int reason, bool gate_valid, uint8_t gate, uint64_t gate_type) { - uint64_t rip = rreg(cpu->hvf_fd, HV_X86_RIP); + uint64_t rip = rreg(cpu->hvf->fd, HV_X86_RIP); if (!gate_valid || (gate_type != VMCS_INTR_T_HWEXCEPTION && gate_type != VMCS_INTR_T_HWINTR && gate_type != VMCS_INTR_T_NMI)) { - int ins_len = rvmcs(cpu->hvf_fd, VMCS_EXIT_INSTRUCTION_LENGTH); + int ins_len = rvmcs(cpu->hvf->fd, VMCS_EXIT_INSTRUCTION_LENGTH); macvm_set_rip(cpu, rip + ins_len); return; } @@ -174,12 +174,12 @@ void vmx_handle_task_switch(CPUState *cpu, x68_segment_selector tss_sel, int rea //ret = task_switch_16(cpu, tss_sel, old_tss_sel, old_tss_base, &next_tss_desc); VM_PANIC("task_switch_16"); - macvm_set_cr0(cpu->hvf_fd, rvmcs(cpu->hvf_fd, VMCS_GUEST_CR0) | CR0_TS); + macvm_set_cr0(cpu->hvf->fd, rvmcs(cpu->hvf->fd, VMCS_GUEST_CR0) | CR0_TS); x86_segment_descriptor_to_vmx(cpu, tss_sel, &next_tss_desc, &vmx_seg); vmx_write_segment_descriptor(cpu, &vmx_seg, R_TR); store_regs(cpu); - hv_vcpu_invalidate_tlb(cpu->hvf_fd); - hv_vcpu_flush(cpu->hvf_fd); + hv_vcpu_invalidate_tlb(cpu->hvf->fd); + hv_vcpu_flush(cpu->hvf->fd); } diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c index cc381307ab..28cfee4f60 100644 --- a/target/i386/hvf/x86hvf.c +++ b/target/i386/hvf/x86hvf.c @@ -80,7 +80,7 @@ void hvf_put_xsave(CPUState *cpu_state) x86_cpu_xsave_all_areas(X86_CPU(cpu_state), xsave); - if (hv_vcpu_write_fpstate(cpu_state->hvf_fd, (void*)xsave, 4096)) { + if (hv_vcpu_write_fpstate(cpu_state->hvf->fd, (void*)xsave, 4096)) { abort(); } } @@ -90,19 +90,19 @@ void hvf_put_segments(CPUState *cpu_state) CPUX86State *env = &X86_CPU(cpu_state)->env; struct vmx_segment seg; - wvmcs(cpu_state->hvf_fd, VMCS_GUEST_IDTR_LIMIT, env->idt.limit); - wvmcs(cpu_state->hvf_fd, VMCS_GUEST_IDTR_BASE, env->idt.base); + wvmcs(cpu_state->hvf->fd, VMCS_GUEST_IDTR_LIMIT, env->idt.limit); + wvmcs(cpu_state->hvf->fd, VMCS_GUEST_IDTR_BASE, env->idt.base); - wvmcs(cpu_state->hvf_fd, VMCS_GUEST_GDTR_LIMIT, env->gdt.limit); - wvmcs(cpu_state->hvf_fd, VMCS_GUEST_GDTR_BASE, env->gdt.base); + wvmcs(cpu_state->hvf->fd, VMCS_GUEST_GDTR_LIMIT, env->gdt.limit); + wvmcs(cpu_state->hvf->fd, VMCS_GUEST_GDTR_BASE, env->gdt.base); - /* wvmcs(cpu_state->hvf_fd, VMCS_GUEST_CR2, env->cr[2]); */ - wvmcs(cpu_state->hvf_fd, VMCS_GUEST_CR3, env->cr[3]); + /* wvmcs(cpu_state->hvf->fd, VMCS_GUEST_CR2, env->cr[2]); */ + wvmcs(cpu_state->hvf->fd, VMCS_GUEST_CR3, env->cr[3]); vmx_update_tpr(cpu_state); - wvmcs(cpu_state->hvf_fd, VMCS_GUEST_IA32_EFER, env->efer); + wvmcs(cpu_state->hvf->fd, VMCS_GUEST_IA32_EFER, env->efer); - macvm_set_cr4(cpu_state->hvf_fd, env->cr[4]); - macvm_set_cr0(cpu_state->hvf_fd, env->cr[0]); + macvm_set_cr4(cpu_state->hvf->fd, env->cr[4]); + macvm_set_cr0(cpu_state->hvf->fd, env->cr[0]); hvf_set_segment(cpu_state, &seg, &env->segs[R_CS], false); vmx_write_segment_descriptor(cpu_state, &seg, R_CS); @@ -128,31 +128,31 @@ void hvf_put_segments(CPUState *cpu_state) hvf_set_segment(cpu_state, &seg, &env->ldt, false); vmx_write_segment_descriptor(cpu_state, &seg, R_LDTR); - hv_vcpu_flush(cpu_state->hvf_fd); + hv_vcpu_flush(cpu_state->hvf->fd); } void hvf_put_msrs(CPUState *cpu_state) { CPUX86State *env = &X86_CPU(cpu_state)->env; - hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_IA32_SYSENTER_CS, + hv_vcpu_write_msr(cpu_state->hvf->fd, MSR_IA32_SYSENTER_CS, env->sysenter_cs); - hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_IA32_SYSENTER_ESP, + hv_vcpu_write_msr(cpu_state->hvf->fd, MSR_IA32_SYSENTER_ESP, env->sysenter_esp); - hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_IA32_SYSENTER_EIP, + hv_vcpu_write_msr(cpu_state->hvf->fd, MSR_IA32_SYSENTER_EIP, env->sysenter_eip); - hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_STAR, env->star); + hv_vcpu_write_msr(cpu_state->hvf->fd, MSR_STAR, env->star); #ifdef TARGET_X86_64 - hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_CSTAR, env->cstar); - hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_KERNELGSBASE, env->kernelgsbase); - hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_FMASK, env->fmask); - hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_LSTAR, env->lstar); + hv_vcpu_write_msr(cpu_state->hvf->fd, MSR_CSTAR, env->cstar); + hv_vcpu_write_msr(cpu_state->hvf->fd, MSR_KERNELGSBASE, env->kernelgsbase); + hv_vcpu_write_msr(cpu_state->hvf->fd, MSR_FMASK, env->fmask); + hv_vcpu_write_msr(cpu_state->hvf->fd, MSR_LSTAR, env->lstar); #endif - hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_GSBASE, env->segs[R_GS].base); - hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_FSBASE, env->segs[R_FS].base); + hv_vcpu_write_msr(cpu_state->hvf->fd, MSR_GSBASE, env->segs[R_GS].base); + hv_vcpu_write_msr(cpu_state->hvf->fd, MSR_FSBASE, env->segs[R_FS].base); } @@ -162,7 +162,7 @@ void hvf_get_xsave(CPUState *cpu_state) xsave = X86_CPU(cpu_state)->env.xsave_buf; - if (hv_vcpu_read_fpstate(cpu_state->hvf_fd, (void*)xsave, 4096)) { + if (hv_vcpu_read_fpstate(cpu_state->hvf->fd, (void*)xsave, 4096)) { abort(); } @@ -201,17 +201,17 @@ void hvf_get_segments(CPUState *cpu_state) vmx_read_segment_descriptor(cpu_state, &seg, R_LDTR); hvf_get_segment(&env->ldt, &seg); - env->idt.limit = rvmcs(cpu_state->hvf_fd, VMCS_GUEST_IDTR_LIMIT); - env->idt.base = rvmcs(cpu_state->hvf_fd, VMCS_GUEST_IDTR_BASE); - env->gdt.limit = rvmcs(cpu_state->hvf_fd, VMCS_GUEST_GDTR_LIMIT); - env->gdt.base = rvmcs(cpu_state->hvf_fd, VMCS_GUEST_GDTR_BASE); + env->idt.limit = rvmcs(cpu_state->hvf->fd, VMCS_GUEST_IDTR_LIMIT); + env->idt.base = rvmcs(cpu_state->hvf->fd, VMCS_GUEST_IDTR_BASE); + env->gdt.limit = rvmcs(cpu_state->hvf->fd, VMCS_GUEST_GDTR_LIMIT); + env->gdt.base = rvmcs(cpu_state->hvf->fd, VMCS_GUEST_GDTR_BASE); - env->cr[0] = rvmcs(cpu_state->hvf_fd, VMCS_GUEST_CR0); + env->cr[0] = rvmcs(cpu_state->hvf->fd, VMCS_GUEST_CR0); env->cr[2] = 0; - env->cr[3] = rvmcs(cpu_state->hvf_fd, VMCS_GUEST_CR3); - env->cr[4] = rvmcs(cpu_state->hvf_fd, VMCS_GUEST_CR4); + env->cr[3] = rvmcs(cpu_state->hvf->fd, VMCS_GUEST_CR3); + env->cr[4] = rvmcs(cpu_state->hvf->fd, VMCS_GUEST_CR4); - env->efer = rvmcs(cpu_state->hvf_fd, VMCS_GUEST_IA32_EFER); + env->efer = rvmcs(cpu_state->hvf->fd, VMCS_GUEST_IA32_EFER); } void hvf_get_msrs(CPUState *cpu_state) @@ -219,27 +219,27 @@ void hvf_get_msrs(CPUState *cpu_state) CPUX86State *env = &X86_CPU(cpu_state)->env; uint64_t tmp; - hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_IA32_SYSENTER_CS, &tmp); + hv_vcpu_read_msr(cpu_state->hvf->fd, MSR_IA32_SYSENTER_CS, &tmp); env->sysenter_cs = tmp; - hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_IA32_SYSENTER_ESP, &tmp); + hv_vcpu_read_msr(cpu_state->hvf->fd, MSR_IA32_SYSENTER_ESP, &tmp); env->sysenter_esp = tmp; - hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_IA32_SYSENTER_EIP, &tmp); + hv_vcpu_read_msr(cpu_state->hvf->fd, MSR_IA32_SYSENTER_EIP, &tmp); env->sysenter_eip = tmp; - hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_STAR, &env->star); + hv_vcpu_read_msr(cpu_state->hvf->fd, MSR_STAR, &env->star); #ifdef TARGET_X86_64 - hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_CSTAR, &env->cstar); - hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_KERNELGSBASE, &env->kernelgsbase); - hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_FMASK, &env->fmask); - hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_LSTAR, &env->lstar); + hv_vcpu_read_msr(cpu_state->hvf->fd, MSR_CSTAR, &env->cstar); + hv_vcpu_read_msr(cpu_state->hvf->fd, MSR_KERNELGSBASE, &env->kernelgsbase); + hv_vcpu_read_msr(cpu_state->hvf->fd, MSR_FMASK, &env->fmask); + hv_vcpu_read_msr(cpu_state->hvf->fd, MSR_LSTAR, &env->lstar); #endif - hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_IA32_APICBASE, &tmp); + hv_vcpu_read_msr(cpu_state->hvf->fd, MSR_IA32_APICBASE, &tmp); - env->tsc = rdtscp() + rvmcs(cpu_state->hvf_fd, VMCS_TSC_OFFSET); + env->tsc = rdtscp() + rvmcs(cpu_state->hvf->fd, VMCS_TSC_OFFSET); } int hvf_put_registers(CPUState *cpu_state) @@ -247,26 +247,26 @@ int hvf_put_registers(CPUState *cpu_state) X86CPU *x86cpu = X86_CPU(cpu_state); CPUX86State *env = &x86cpu->env; - wreg(cpu_state->hvf_fd, HV_X86_RAX, env->regs[R_EAX]); - wreg(cpu_state->hvf_fd, HV_X86_RBX, env->regs[R_EBX]); - wreg(cpu_state->hvf_fd, HV_X86_RCX, env->regs[R_ECX]); - wreg(cpu_state->hvf_fd, HV_X86_RDX, env->regs[R_EDX]); - wreg(cpu_state->hvf_fd, HV_X86_RBP, env->regs[R_EBP]); - wreg(cpu_state->hvf_fd, HV_X86_RSP, env->regs[R_ESP]); - wreg(cpu_state->hvf_fd, HV_X86_RSI, env->regs[R_ESI]); - wreg(cpu_state->hvf_fd, HV_X86_RDI, env->regs[R_EDI]); - wreg(cpu_state->hvf_fd, HV_X86_R8, env->regs[8]); - wreg(cpu_state->hvf_fd, HV_X86_R9, env->regs[9]); - wreg(cpu_state->hvf_fd, HV_X86_R10, env->regs[10]); - wreg(cpu_state->hvf_fd, HV_X86_R11, env->regs[11]); - wreg(cpu_state->hvf_fd, HV_X86_R12, env->regs[12]); - wreg(cpu_state->hvf_fd, HV_X86_R13, env->regs[13]); - wreg(cpu_state->hvf_fd, HV_X86_R14, env->regs[14]); - wreg(cpu_state->hvf_fd, HV_X86_R15, env->regs[15]); - wreg(cpu_state->hvf_fd, HV_X86_RFLAGS, env->eflags); - wreg(cpu_state->hvf_fd, HV_X86_RIP, env->eip); + wreg(cpu_state->hvf->fd, HV_X86_RAX, env->regs[R_EAX]); + wreg(cpu_state->hvf->fd, HV_X86_RBX, env->regs[R_EBX]); + wreg(cpu_state->hvf->fd, HV_X86_RCX, env->regs[R_ECX]); + wreg(cpu_state->hvf->fd, HV_X86_RDX, env->regs[R_EDX]); + wreg(cpu_state->hvf->fd, HV_X86_RBP, env->regs[R_EBP]); + wreg(cpu_state->hvf->fd, HV_X86_RSP, env->regs[R_ESP]); + wreg(cpu_state->hvf->fd, HV_X86_RSI, env->regs[R_ESI]); + wreg(cpu_state->hvf->fd, HV_X86_RDI, env->regs[R_EDI]); + wreg(cpu_state->hvf->fd, HV_X86_R8, env->regs[8]); + wreg(cpu_state->hvf->fd, HV_X86_R9, env->regs[9]); + wreg(cpu_state->hvf->fd, HV_X86_R10, env->regs[10]); + wreg(cpu_state->hvf->fd, HV_X86_R11, env->regs[11]); + wreg(cpu_state->hvf->fd, HV_X86_R12, env->regs[12]); + wreg(cpu_state->hvf->fd, HV_X86_R13, env->regs[13]); + wreg(cpu_state->hvf->fd, HV_X86_R14, env->regs[14]); + wreg(cpu_state->hvf->fd, HV_X86_R15, env->regs[15]); + wreg(cpu_state->hvf->fd, HV_X86_RFLAGS, env->eflags); + wreg(cpu_state->hvf->fd, HV_X86_RIP, env->eip); - wreg(cpu_state->hvf_fd, HV_X86_XCR0, env->xcr0); + wreg(cpu_state->hvf->fd, HV_X86_XCR0, env->xcr0); hvf_put_xsave(cpu_state); @@ -274,14 +274,14 @@ int hvf_put_registers(CPUState *cpu_state) hvf_put_msrs(cpu_state); - wreg(cpu_state->hvf_fd, HV_X86_DR0, env->dr[0]); - wreg(cpu_state->hvf_fd, HV_X86_DR1, env->dr[1]); - wreg(cpu_state->hvf_fd, HV_X86_DR2, env->dr[2]); - wreg(cpu_state->hvf_fd, HV_X86_DR3, env->dr[3]); - wreg(cpu_state->hvf_fd, HV_X86_DR4, env->dr[4]); - wreg(cpu_state->hvf_fd, HV_X86_DR5, env->dr[5]); - wreg(cpu_state->hvf_fd, HV_X86_DR6, env->dr[6]); - wreg(cpu_state->hvf_fd, HV_X86_DR7, env->dr[7]); + wreg(cpu_state->hvf->fd, HV_X86_DR0, env->dr[0]); + wreg(cpu_state->hvf->fd, HV_X86_DR1, env->dr[1]); + wreg(cpu_state->hvf->fd, HV_X86_DR2, env->dr[2]); + wreg(cpu_state->hvf->fd, HV_X86_DR3, env->dr[3]); + wreg(cpu_state->hvf->fd, HV_X86_DR4, env->dr[4]); + wreg(cpu_state->hvf->fd, HV_X86_DR5, env->dr[5]); + wreg(cpu_state->hvf->fd, HV_X86_DR6, env->dr[6]); + wreg(cpu_state->hvf->fd, HV_X86_DR7, env->dr[7]); return 0; } @@ -291,40 +291,40 @@ int hvf_get_registers(CPUState *cpu_state) X86CPU *x86cpu = X86_CPU(cpu_state); CPUX86State *env = &x86cpu->env; - env->regs[R_EAX] = rreg(cpu_state->hvf_fd, HV_X86_RAX); - env->regs[R_EBX] = rreg(cpu_state->hvf_fd, HV_X86_RBX); - env->regs[R_ECX] = rreg(cpu_state->hvf_fd, HV_X86_RCX); - env->regs[R_EDX] = rreg(cpu_state->hvf_fd, HV_X86_RDX); - env->regs[R_EBP] = rreg(cpu_state->hvf_fd, HV_X86_RBP); - env->regs[R_ESP] = rreg(cpu_state->hvf_fd, HV_X86_RSP); - env->regs[R_ESI] = rreg(cpu_state->hvf_fd, HV_X86_RSI); - env->regs[R_EDI] = rreg(cpu_state->hvf_fd, HV_X86_RDI); - env->regs[8] = rreg(cpu_state->hvf_fd, HV_X86_R8); - env->regs[9] = rreg(cpu_state->hvf_fd, HV_X86_R9); - env->regs[10] = rreg(cpu_state->hvf_fd, HV_X86_R10); - env->regs[11] = rreg(cpu_state->hvf_fd, HV_X86_R11); - env->regs[12] = rreg(cpu_state->hvf_fd, HV_X86_R12); - env->regs[13] = rreg(cpu_state->hvf_fd, HV_X86_R13); - env->regs[14] = rreg(cpu_state->hvf_fd, HV_X86_R14); - env->regs[15] = rreg(cpu_state->hvf_fd, HV_X86_R15); + env->regs[R_EAX] = rreg(cpu_state->hvf->fd, HV_X86_RAX); + env->regs[R_EBX] = rreg(cpu_state->hvf->fd, HV_X86_RBX); + env->regs[R_ECX] = rreg(cpu_state->hvf->fd, HV_X86_RCX); + env->regs[R_EDX] = rreg(cpu_state->hvf->fd, HV_X86_RDX); + env->regs[R_EBP] = rreg(cpu_state->hvf->fd, HV_X86_RBP); + env->regs[R_ESP] = rreg(cpu_state->hvf->fd, HV_X86_RSP); + env->regs[R_ESI] = rreg(cpu_state->hvf->fd, HV_X86_RSI); + env->regs[R_EDI] = rreg(cpu_state->hvf->fd, HV_X86_RDI); + env->regs[8] = rreg(cpu_state->hvf->fd, HV_X86_R8); + env->regs[9] = rreg(cpu_state->hvf->fd, HV_X86_R9); + env->regs[10] = rreg(cpu_state->hvf->fd, HV_X86_R10); + env->regs[11] = rreg(cpu_state->hvf->fd, HV_X86_R11); + env->regs[12] = rreg(cpu_state->hvf->fd, HV_X86_R12); + env->regs[13] = rreg(cpu_state->hvf->fd, HV_X86_R13); + env->regs[14] = rreg(cpu_state->hvf->fd, HV_X86_R14); + env->regs[15] = rreg(cpu_state->hvf->fd, HV_X86_R15); - env->eflags = rreg(cpu_state->hvf_fd, HV_X86_RFLAGS); - env->eip = rreg(cpu_state->hvf_fd, HV_X86_RIP); + env->eflags = rreg(cpu_state->hvf->fd, HV_X86_RFLAGS); + env->eip = rreg(cpu_state->hvf->fd, HV_X86_RIP); hvf_get_xsave(cpu_state); - env->xcr0 = rreg(cpu_state->hvf_fd, HV_X86_XCR0); + env->xcr0 = rreg(cpu_state->hvf->fd, HV_X86_XCR0); hvf_get_segments(cpu_state); hvf_get_msrs(cpu_state); - env->dr[0] = rreg(cpu_state->hvf_fd, HV_X86_DR0); - env->dr[1] = rreg(cpu_state->hvf_fd, HV_X86_DR1); - env->dr[2] = rreg(cpu_state->hvf_fd, HV_X86_DR2); - env->dr[3] = rreg(cpu_state->hvf_fd, HV_X86_DR3); - env->dr[4] = rreg(cpu_state->hvf_fd, HV_X86_DR4); - env->dr[5] = rreg(cpu_state->hvf_fd, HV_X86_DR5); - env->dr[6] = rreg(cpu_state->hvf_fd, HV_X86_DR6); - env->dr[7] = rreg(cpu_state->hvf_fd, HV_X86_DR7); + env->dr[0] = rreg(cpu_state->hvf->fd, HV_X86_DR0); + env->dr[1] = rreg(cpu_state->hvf->fd, HV_X86_DR1); + env->dr[2] = rreg(cpu_state->hvf->fd, HV_X86_DR2); + env->dr[3] = rreg(cpu_state->hvf->fd, HV_X86_DR3); + env->dr[4] = rreg(cpu_state->hvf->fd, HV_X86_DR4); + env->dr[5] = rreg(cpu_state->hvf->fd, HV_X86_DR5); + env->dr[6] = rreg(cpu_state->hvf->fd, HV_X86_DR6); + env->dr[7] = rreg(cpu_state->hvf->fd, HV_X86_DR7); x86_update_hflags(env); return 0; @@ -333,16 +333,16 @@ int hvf_get_registers(CPUState *cpu_state) static void vmx_set_int_window_exiting(CPUState *cpu) { uint64_t val; - val = rvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS); - wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS, val | + val = rvmcs(cpu->hvf->fd, VMCS_PRI_PROC_BASED_CTLS); + wvmcs(cpu->hvf->fd, VMCS_PRI_PROC_BASED_CTLS, val | VMCS_PRI_PROC_BASED_CTLS_INT_WINDOW_EXITING); } void vmx_clear_int_window_exiting(CPUState *cpu) { uint64_t val; - val = rvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS); - wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS, val & + val = rvmcs(cpu->hvf->fd, VMCS_PRI_PROC_BASED_CTLS); + wvmcs(cpu->hvf->fd, VMCS_PRI_PROC_BASED_CTLS, val & ~VMCS_PRI_PROC_BASED_CTLS_INT_WINDOW_EXITING); } @@ -378,7 +378,7 @@ bool hvf_inject_interrupts(CPUState *cpu_state) uint64_t info = 0; if (have_event) { info = vector | intr_type | VMCS_INTR_VALID; - uint64_t reason = rvmcs(cpu_state->hvf_fd, VMCS_EXIT_REASON); + uint64_t reason = rvmcs(cpu_state->hvf->fd, VMCS_EXIT_REASON); if (env->nmi_injected && reason != EXIT_REASON_TASK_SWITCH) { vmx_clear_nmi_blocking(cpu_state); } @@ -387,17 +387,17 @@ bool hvf_inject_interrupts(CPUState *cpu_state) info &= ~(1 << 12); /* clear undefined bit */ if (intr_type == VMCS_INTR_T_SWINTR || intr_type == VMCS_INTR_T_SWEXCEPTION) { - wvmcs(cpu_state->hvf_fd, VMCS_ENTRY_INST_LENGTH, env->ins_len); + wvmcs(cpu_state->hvf->fd, VMCS_ENTRY_INST_LENGTH, env->ins_len); } if (env->has_error_code) { - wvmcs(cpu_state->hvf_fd, VMCS_ENTRY_EXCEPTION_ERROR, + wvmcs(cpu_state->hvf->fd, VMCS_ENTRY_EXCEPTION_ERROR, env->error_code); /* Indicate that VMCS_ENTRY_EXCEPTION_ERROR is valid */ info |= VMCS_INTR_DEL_ERRCODE; } /*printf("reinject %lx err %d\n", info, err);*/ - wvmcs(cpu_state->hvf_fd, VMCS_ENTRY_INTR_INFO, info); + wvmcs(cpu_state->hvf->fd, VMCS_ENTRY_INTR_INFO, info); }; } @@ -405,7 +405,7 @@ bool hvf_inject_interrupts(CPUState *cpu_state) if (!(env->hflags2 & HF2_NMI_MASK) && !(info & VMCS_INTR_VALID)) { cpu_state->interrupt_request &= ~CPU_INTERRUPT_NMI; info = VMCS_INTR_VALID | VMCS_INTR_T_NMI | EXCP02_NMI; - wvmcs(cpu_state->hvf_fd, VMCS_ENTRY_INTR_INFO, info); + wvmcs(cpu_state->hvf->fd, VMCS_ENTRY_INTR_INFO, info); } else { vmx_set_nmi_window_exiting(cpu_state); } @@ -417,7 +417,7 @@ bool hvf_inject_interrupts(CPUState *cpu_state) int line = cpu_get_pic_interrupt(&x86cpu->env); cpu_state->interrupt_request &= ~CPU_INTERRUPT_HARD; if (line >= 0) { - wvmcs(cpu_state->hvf_fd, VMCS_ENTRY_INTR_INFO, line | + wvmcs(cpu_state->hvf->fd, VMCS_ENTRY_INTR_INFO, line | VMCS_INTR_VALID | VMCS_INTR_T_HWINTR); } } @@ -433,7 +433,7 @@ int hvf_process_events(CPUState *cpu_state) X86CPU *cpu = X86_CPU(cpu_state); CPUX86State *env = &cpu->env; - env->eflags = rreg(cpu_state->hvf_fd, HV_X86_RFLAGS); + env->eflags = rreg(cpu_state->hvf->fd, HV_X86_RFLAGS); if (cpu_state->interrupt_request & CPU_INTERRUPT_INIT) { cpu_synchronize_state(cpu_state); From bac969ef30e8a8b73acbeb6d68abff6f68b2056c Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 3 Jun 2021 14:42:41 +0100 Subject: [PATCH 39/45] hvf: Simplify post reset/init/loadvm hooks The hooks we have that call us after reset, init and loadvm really all just want to say "The reference of all register state is in the QEMU vcpu struct, please push it". We already have a working pushing mechanism though called cpu->vcpu_dirty, so we can just reuse that for all of the above, syncing state properly the next time we actually execute a vCPU. This fixes PSCI resets on ARM, as they modify CPU state even after the post init call has completed, but before we execute the vCPU again. To also make the scheme work for x86, we have to make sure we don't move stale eflags into our env when the vcpu state is dirty. Signed-off-by: Alexander Graf Reviewed-by: Roman Bolshakov Tested-by: Roman Bolshakov Reviewed-by: Sergio Lopez Message-id: 20210519202253.76782-13-agraf@csgraf.de Signed-off-by: Peter Maydell --- accel/hvf/hvf-accel-ops.c | 27 +++++++-------------------- target/i386/hvf/x86hvf.c | 5 ++++- 2 files changed, 11 insertions(+), 21 deletions(-) diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c index ded918c443..d1691be989 100644 --- a/accel/hvf/hvf-accel-ops.c +++ b/accel/hvf/hvf-accel-ops.c @@ -205,39 +205,26 @@ static void hvf_cpu_synchronize_state(CPUState *cpu) } } -static void do_hvf_cpu_synchronize_post_reset(CPUState *cpu, - run_on_cpu_data arg) +static void do_hvf_cpu_synchronize_set_dirty(CPUState *cpu, + run_on_cpu_data arg) { - hvf_put_registers(cpu); - cpu->vcpu_dirty = false; + /* QEMU state is the reference, push it to HVF now and on next entry */ + cpu->vcpu_dirty = true; } static void hvf_cpu_synchronize_post_reset(CPUState *cpu) { - run_on_cpu(cpu, do_hvf_cpu_synchronize_post_reset, RUN_ON_CPU_NULL); -} - -static void do_hvf_cpu_synchronize_post_init(CPUState *cpu, - run_on_cpu_data arg) -{ - hvf_put_registers(cpu); - cpu->vcpu_dirty = false; + run_on_cpu(cpu, do_hvf_cpu_synchronize_set_dirty, RUN_ON_CPU_NULL); } static void hvf_cpu_synchronize_post_init(CPUState *cpu) { - run_on_cpu(cpu, do_hvf_cpu_synchronize_post_init, RUN_ON_CPU_NULL); -} - -static void do_hvf_cpu_synchronize_pre_loadvm(CPUState *cpu, - run_on_cpu_data arg) -{ - cpu->vcpu_dirty = true; + run_on_cpu(cpu, do_hvf_cpu_synchronize_set_dirty, RUN_ON_CPU_NULL); } static void hvf_cpu_synchronize_pre_loadvm(CPUState *cpu) { - run_on_cpu(cpu, do_hvf_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL); + run_on_cpu(cpu, do_hvf_cpu_synchronize_set_dirty, RUN_ON_CPU_NULL); } static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on) diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c index 28cfee4f60..2ced2c2478 100644 --- a/target/i386/hvf/x86hvf.c +++ b/target/i386/hvf/x86hvf.c @@ -433,7 +433,10 @@ int hvf_process_events(CPUState *cpu_state) X86CPU *cpu = X86_CPU(cpu_state); CPUX86State *env = &cpu->env; - env->eflags = rreg(cpu_state->hvf->fd, HV_X86_RFLAGS); + if (!cpu_state->vcpu_dirty) { + /* light weight sync for CPU_INTERRUPT_HARD and IF_MASK */ + env->eflags = rreg(cpu_state->hvf->fd, HV_X86_RFLAGS); + } if (cpu_state->interrupt_request & CPU_INTERRUPT_INIT) { cpu_synchronize_state(cpu_state); From 3c65e439b2724cc70af31a87447a0c7292babac5 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 25 May 2021 14:44:53 +0100 Subject: [PATCH 40/45] tests/qtest/bios-tables-test: Check for dup2() failure Coverity notes that we don't check for dup2() failing. Add some assertions so that if it does ever happen we get some indication. (This is similar to how we handle other "don't expect this syscall to fail" checks in this test code.) Fixes: Coverity CID 1432346 Signed-off-by: Peter Maydell Reviewed-by: Stefan Berger Message-id: 20210525134458.6675-2-peter.maydell@linaro.org --- tests/qtest/bios-tables-test.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c index 156d4174aa..51d3a4e239 100644 --- a/tests/qtest/bios-tables-test.c +++ b/tests/qtest/bios-tables-test.c @@ -489,10 +489,14 @@ static void test_acpi_asl(test_data *data) exp_sdt->asl_file, sdt->asl_file); int out = dup(STDOUT_FILENO); int ret G_GNUC_UNUSED; + int dupret; - dup2(STDERR_FILENO, STDOUT_FILENO); + g_assert(out >= 0); + dupret = dup2(STDERR_FILENO, STDOUT_FILENO); + g_assert(dupret >= 0); ret = system(diff) ; - dup2(out, STDOUT_FILENO); + dupret = dup2(out, STDOUT_FILENO); + g_assert(dupret >= 0); close(out); g_free(diff); } From 380822edb3252962001a47c02facb7c7e977dc7d Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 25 May 2021 14:44:54 +0100 Subject: [PATCH 41/45] tests/qtest/e1000e-test: Check qemu_recv() succeeded The e1000e_send_verify() test calls qemu_recv() but doesn't check that the call succeeded, which annoys Coverity. Add an explicit test check for the length of the data. (This is a test check, not a "we assume this syscall always succeeds", so we use g_assert_cmpint() rather than g_assert().) Fixes: Coverity CID 1432324 Signed-off-by: Peter Maydell Reviewed-by: Stefan Berger Message-id: 20210525134458.6675-3-peter.maydell@linaro.org --- tests/qtest/e1000e-test.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/qtest/e1000e-test.c b/tests/qtest/e1000e-test.c index fc226fdfeb..0273fe4c15 100644 --- a/tests/qtest/e1000e-test.c +++ b/tests/qtest/e1000e-test.c @@ -93,7 +93,8 @@ static void e1000e_send_verify(QE1000E *d, int *test_sockets, QGuestAllocator *a /* Check data sent to the backend */ ret = qemu_recv(test_sockets[0], &recv_len, sizeof(recv_len), 0); g_assert_cmpint(ret, == , sizeof(recv_len)); - qemu_recv(test_sockets[0], buffer, 64, 0); + ret = qemu_recv(test_sockets[0], buffer, 64, 0); + g_assert_cmpint(ret, >=, 5); g_assert_cmpstr(buffer, == , "TEST"); /* Free test data buffer */ From 2c398ee5e3a4b8db995ec4b211f28a1119bf10ca Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 25 May 2021 14:44:55 +0100 Subject: [PATCH 42/45] tests/qtest/hd-geo-test: Fix checks on mkstemp() return value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Coverity notices that the checks against mkstemp() failing in create_qcow2_with_mbr() are wrong: mkstemp returns -1 on failure but the check is just "g_assert(fd)". Fix to use "g_assert(fd >= 0)", matching the correct check in create_test_img(). Fixes: Coverity CID 1432274 Signed-off-by: Peter Maydell Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Stefan Berger Message-id: 20210525134458.6675-4-peter.maydell@linaro.org --- tests/qtest/hd-geo-test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/qtest/hd-geo-test.c b/tests/qtest/hd-geo-test.c index f7b7cfbc2d..113126ae06 100644 --- a/tests/qtest/hd-geo-test.c +++ b/tests/qtest/hd-geo-test.c @@ -464,7 +464,7 @@ static char *create_qcow2_with_mbr(MBRpartitions mbr, uint64_t sectors) } fd = mkstemp(raw_path); - g_assert(fd); + g_assert(fd >= 0); close(fd); fd = open(raw_path, O_WRONLY); @@ -474,7 +474,7 @@ static char *create_qcow2_with_mbr(MBRpartitions mbr, uint64_t sectors) close(fd); fd = mkstemp(qcow2_path); - g_assert(fd); + g_assert(fd >= 0); close(fd); qemu_img_path = getenv("QTEST_QEMU_IMG"); From 909e4a0826cde069555d90a9797590c5988d9b7e Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 25 May 2021 14:44:56 +0100 Subject: [PATCH 43/45] tests/qtest/pflash-cfi02-test: Avoid potential integer overflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Coverity points out that we calculate a 64-bit value using 32-bit arithmetic; add the cast to force the multiply to be done as 64-bits. (The overflow will never happen with the current test data.) Fixes: Coverity CID 1432320 Signed-off-by: Peter Maydell Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Stefan Berger Message-id: 20210525134458.6675-5-peter.maydell@linaro.org --- tests/qtest/pflash-cfi02-test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/qtest/pflash-cfi02-test.c b/tests/qtest/pflash-cfi02-test.c index 60db81a3a2..6168edc821 100644 --- a/tests/qtest/pflash-cfi02-test.c +++ b/tests/qtest/pflash-cfi02-test.c @@ -406,7 +406,7 @@ static void test_geometry(const void *opaque) for (int region = 0; region < nb_erase_regions; ++region) { for (uint32_t i = 0; i < c->nb_blocs[region]; ++i) { - uint64_t byte_addr = i * c->sector_len[region]; + uint64_t byte_addr = (uint64_t)i * c->sector_len[region]; g_assert_cmphex(flash_read(c, byte_addr), ==, bank_mask(c)); } } From d2304612b525e6a0d9df93717c0d1e5321b6b845 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 25 May 2021 14:44:57 +0100 Subject: [PATCH 44/45] tests/qtest/tpm-tests: Remove unnecessary NULL checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Coverity points out that in tpm_test_swtpm_migration_test() we assume that src_tpm_addr and dst_tpm_addr are non-NULL (we pass them to tpm_util_migration_start_qemu() which will unconditionally dereference them) but then later explicitly check them for NULL. Remove the pointless checks. Fixes: Coverity CID 1432367, 1432359 Signed-off-by: Peter Maydell Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Stefan Berger Message-id: 20210525134458.6675-6-peter.maydell@linaro.org --- tests/qtest/tpm-tests.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/tests/qtest/tpm-tests.c b/tests/qtest/tpm-tests.c index 0da3a8a4df..25073d1f9e 100644 --- a/tests/qtest/tpm-tests.c +++ b/tests/qtest/tpm-tests.c @@ -123,14 +123,10 @@ void tpm_test_swtpm_migration_test(const char *src_tpm_path, qtest_quit(src_qemu); tpm_util_swtpm_kill(dst_tpm_pid); - if (dst_tpm_addr) { - g_unlink(dst_tpm_addr->u.q_unix.path); - qapi_free_SocketAddress(dst_tpm_addr); - } + g_unlink(dst_tpm_addr->u.q_unix.path); + qapi_free_SocketAddress(dst_tpm_addr); tpm_util_swtpm_kill(src_tpm_pid); - if (src_tpm_addr) { - g_unlink(src_tpm_addr->u.q_unix.path); - qapi_free_SocketAddress(src_tpm_addr); - } + g_unlink(src_tpm_addr->u.q_unix.path); + qapi_free_SocketAddress(src_tpm_addr); } From 1c861885894d840235954060050d240259f5340b Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 25 May 2021 14:44:58 +0100 Subject: [PATCH 45/45] tests/unit/test-vmstate: Assert that dup() and mkstemp() succeed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Coverity complains that we don't check for failures from dup() and mkstemp(); add asserts that these syscalls succeeded. Fixes: Coverity CID 1432516, 1432574 Signed-off-by: Peter Maydell Reviewed-by: Stefan Berger Reviewed-by: Philippe Mathieu-Daudé Message-id: 20210525134458.6675-7-peter.maydell@linaro.org --- tests/unit/test-vmstate.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/unit/test-vmstate.c b/tests/unit/test-vmstate.c index a001879585..4688c03ea7 100644 --- a/tests/unit/test-vmstate.c +++ b/tests/unit/test-vmstate.c @@ -40,10 +40,12 @@ static int temp_fd; /* Duplicate temp_fd and seek to the beginning of the file */ static QEMUFile *open_test_file(bool write) { - int fd = dup(temp_fd); + int fd; QIOChannel *ioc; QEMUFile *f; + fd = dup(temp_fd); + g_assert(fd >= 0); lseek(fd, 0, SEEK_SET); if (write) { g_assert_cmpint(ftruncate(fd, 0), ==, 0); @@ -1486,6 +1488,7 @@ int main(int argc, char **argv) g_autofree char *temp_file = g_strdup_printf("%s/vmst.test.XXXXXX", g_get_tmp_dir()); temp_fd = mkstemp(temp_file); + g_assert(temp_fd >= 0); module_call_init(MODULE_INIT_QOM);