From ab64da79774060450046ce8c800eef000024dc8c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 6 Jun 2023 17:52:41 -0700 Subject: [PATCH 1/5] tcg/tci: Adjust passing of MemOpIdx Since adding MO_ATOM_MASK, the maximum MemOpIdx requires 15 bits, which overflows the 12 bit field allocated for TCI memory ops. Expand the field to 16 bits for 2-operand memory ops, and place the value in TCG_REG_TMP for 3-operand memory ops (same as we already do for 4-operand memory ops). Cures a debug assert for aarch64, with FEAT_LSE2 enabled. Signed-off-by: Richard Henderson --- tcg/tci.c | 30 +++++++++++++----------------- tcg/tci/tcg-target.c.inc | 21 ++++----------------- 2 files changed, 17 insertions(+), 34 deletions(-) diff --git a/tcg/tci.c b/tcg/tci.c index 813572ff39..4640902c88 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -106,7 +106,7 @@ static void tci_args_rrm(uint32_t insn, TCGReg *r0, { *r0 = extract32(insn, 8, 4); *r1 = extract32(insn, 12, 4); - *m2 = extract32(insn, 20, 12); + *m2 = extract32(insn, 16, 16); } static void tci_args_rrr(uint32_t insn, TCGReg *r0, TCGReg *r1, TCGReg *r2) @@ -141,15 +141,6 @@ static void tci_args_rrrc(uint32_t insn, *c3 = extract32(insn, 20, 4); } -static void tci_args_rrrm(uint32_t insn, - TCGReg *r0, TCGReg *r1, TCGReg *r2, MemOpIdx *m3) -{ - *r0 = extract32(insn, 8, 4); - *r1 = extract32(insn, 12, 4); - *r2 = extract32(insn, 16, 4); - *m3 = extract32(insn, 20, 12); -} - static void tci_args_rrrbb(uint32_t insn, TCGReg *r0, TCGReg *r1, TCGReg *r2, uint8_t *i3, uint8_t *i4) { @@ -929,8 +920,9 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrm(insn, &r0, &r1, &oi); taddr = regs[r1]; } else { - tci_args_rrrm(insn, &r0, &r1, &r2, &oi); + tci_args_rrrr(insn, &r0, &r1, &r2, &r3); taddr = tci_uint64(regs[r2], regs[r1]); + oi = regs[r3]; } do_ld_i32: regs[r0] = tci_qemu_ld(env, taddr, oi, tb_ptr); @@ -941,8 +933,9 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrm(insn, &r0, &r1, &oi); taddr = (uint32_t)regs[r1]; } else { - tci_args_rrrm(insn, &r0, &r1, &r2, &oi); + tci_args_rrrr(insn, &r0, &r1, &r2, &r3); taddr = (uint32_t)regs[r2]; + oi = regs[r3]; } goto do_ld_i64; case INDEX_op_qemu_ld_a64_i64: @@ -972,8 +965,9 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrm(insn, &r0, &r1, &oi); taddr = regs[r1]; } else { - tci_args_rrrm(insn, &r0, &r1, &r2, &oi); + tci_args_rrrr(insn, &r0, &r1, &r2, &r3); taddr = tci_uint64(regs[r2], regs[r1]); + oi = regs[r3]; } do_st_i32: tci_qemu_st(env, taddr, regs[r0], oi, tb_ptr); @@ -985,9 +979,10 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tmp64 = regs[r0]; taddr = (uint32_t)regs[r1]; } else { - tci_args_rrrm(insn, &r0, &r1, &r2, &oi); + tci_args_rrrr(insn, &r0, &r1, &r2, &r3); tmp64 = tci_uint64(regs[r1], regs[r0]); taddr = (uint32_t)regs[r2]; + oi = regs[r3]; } goto do_st_i64; case INDEX_op_qemu_st_a64_i64: @@ -1293,9 +1288,10 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) op_name, str_r(r0), str_r(r1), oi); break; case 3: - tci_args_rrrm(insn, &r0, &r1, &r2, &oi); - info->fprintf_func(info->stream, "%-12s %s, %s, %s, %x", - op_name, str_r(r0), str_r(r1), str_r(r2), oi); + tci_args_rrrr(insn, &r0, &r1, &r2, &r3); + info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s", + op_name, str_r(r0), str_r(r1), + str_r(r2), str_r(r3)); break; case 4: tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4); diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index c9516a5e8b..5b456e1277 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -331,11 +331,11 @@ static void tcg_out_op_rrm(TCGContext *s, TCGOpcode op, { tcg_insn_unit insn = 0; - tcg_debug_assert(m2 == extract32(m2, 0, 12)); + tcg_debug_assert(m2 == extract32(m2, 0, 16)); insn = deposit32(insn, 0, 8, op); insn = deposit32(insn, 8, 4, r0); insn = deposit32(insn, 12, 4, r1); - insn = deposit32(insn, 20, 12, m2); + insn = deposit32(insn, 16, 16, m2); tcg_out32(s, insn); } @@ -392,20 +392,6 @@ static void tcg_out_op_rrrc(TCGContext *s, TCGOpcode op, tcg_out32(s, insn); } -static void tcg_out_op_rrrm(TCGContext *s, TCGOpcode op, - TCGReg r0, TCGReg r1, TCGReg r2, TCGArg m3) -{ - tcg_insn_unit insn = 0; - - tcg_debug_assert(m3 == extract32(m3, 0, 12)); - insn = deposit32(insn, 0, 8, op); - insn = deposit32(insn, 8, 4, r0); - insn = deposit32(insn, 12, 4, r1); - insn = deposit32(insn, 16, 4, r2); - insn = deposit32(insn, 20, 12, m3); - tcg_out32(s, insn); -} - static void tcg_out_op_rrrbb(TCGContext *s, TCGOpcode op, TCGReg r0, TCGReg r1, TCGReg r2, uint8_t b3, uint8_t b4) { @@ -860,7 +846,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, if (TCG_TARGET_REG_BITS == 64) { tcg_out_op_rrm(s, opc, args[0], args[1], args[2]); } else { - tcg_out_op_rrrm(s, opc, args[0], args[1], args[2], args[3]); + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, args[4]); + tcg_out_op_rrrr(s, opc, args[0], args[1], args[2], TCG_REG_TMP); } break; case INDEX_op_qemu_ld_a64_i64: From 0cabaef3ed13697e2da0ceb18b3da9e21d0b4d83 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 6 Jun 2023 18:06:15 -0700 Subject: [PATCH 2/5] tcg/tci: Adjust call-clobbered regs for int128_t MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We require either 2 or 4 registers to hold int128_t. Failure to do so results in a register allocation assert. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/tci/tcg-target.c.inc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 5b456e1277..0037f904f1 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -179,8 +179,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) } static const int tcg_target_reg_alloc_order[] = { - TCG_REG_R2, - TCG_REG_R3, TCG_REG_R4, TCG_REG_R5, TCG_REG_R6, @@ -193,6 +191,9 @@ static const int tcg_target_reg_alloc_order[] = { TCG_REG_R13, TCG_REG_R14, TCG_REG_R15, + /* Either 2 or 4 of these are call clobbered, so use them last. */ + TCG_REG_R3, + TCG_REG_R2, TCG_REG_R1, TCG_REG_R0, }; @@ -934,11 +935,11 @@ static void tcg_target_init(TCGContext *s) /* * The interpreter "registers" are in the local stack frame and * cannot be clobbered by the called helper functions. However, - * the interpreter assumes a 64-bit return value and assigns to + * the interpreter assumes a 128-bit return value and assigns to * the return value registers. */ tcg_target_call_clobber_regs = - MAKE_64BIT_MASK(TCG_REG_R0, 64 / TCG_TARGET_REG_BITS); + MAKE_64BIT_MASK(TCG_REG_R0, 128 / TCG_TARGET_REG_BITS); s->reserved_regs = 0; tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); From 007cd176e590c77e91d1531ec5acbe86b15b0f00 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 6 Jun 2023 19:42:58 +0000 Subject: [PATCH 3/5] target/arm: Only include tcg/oversized-guest.h if CONFIG_TCG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the build for --disable-tcg. This header is only needed for cross-hosting. Without CONFIG_TCG, we know this is an AArch64 host, CONFIG_ATOMIC64 will be set, and the TCG_OVERSIZED_GUEST block will never be compiled. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/arm/ptw.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/target/arm/ptw.c b/target/arm/ptw.c index b2dc223525..37bcb17a9e 100644 --- a/target/arm/ptw.c +++ b/target/arm/ptw.c @@ -14,8 +14,9 @@ #include "cpu.h" #include "internals.h" #include "idau.h" -#include "tcg/oversized-guest.h" - +#ifdef CONFIG_TCG +# include "tcg/oversized-guest.h" +#endif typedef struct S1Translate { ARMMMUIdx in_mmu_idx; From c81e2d5477c92b4a96c779bffbba3dddb23b91be Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 6 Jun 2023 19:53:35 +0000 Subject: [PATCH 4/5] gitlab: Add cross-arm64-kvm-only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are not currently running a --disable-tcg test for arm64, like we are for mips, ppc and s390x. We have a job for the native aarch64 runner, but it is not run by default and it is not helpful for normal developer testing without access to qemu's private runner. Use --without-default-features to eliminate most tests. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Thomas Huth Signed-off-by: Richard Henderson --- .gitlab-ci.d/crossbuilds.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.gitlab-ci.d/crossbuilds.yml b/.gitlab-ci.d/crossbuilds.yml index 61b8ac86ee..1e0e6c7f2c 100644 --- a/.gitlab-ci.d/crossbuilds.yml +++ b/.gitlab-ci.d/crossbuilds.yml @@ -29,6 +29,14 @@ cross-arm64-user: variables: IMAGE: debian-arm64-cross +cross-arm64-kvm-only: + extends: .cross_accel_build_job + needs: + job: arm64-debian-cross-container + variables: + IMAGE: debian-arm64-cross + EXTRA_CONFIGURE_OPTS: --disable-tcg --without-default-features + cross-i386-user: extends: - .cross_user_build_job From dcc28ab603f30df5cc8be1f759b423e94ae7d10f Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Wed, 7 Jun 2023 17:36:06 +0300 Subject: [PATCH 5/5] iotests: fix 194: filter out racy postcopy-active event The event is racy: it will not appear in the output if bitmap is migrated during downtime period of migration and postcopy phase is not started. Fixes: ae00aa239847 "iotests: 194: test also migration of dirty bitmap" Reported-by: Richard Henderson Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20230607143606.1557395-1-vsementsov@yandex-team.ru> Reviewed-by: Richard Henderson Signed-off-by: Richard Henderson --- tests/qemu-iotests/194 | 5 +++++ tests/qemu-iotests/194.out | 1 - 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/qemu-iotests/194 b/tests/qemu-iotests/194 index 68894371f5..c0ce82dd25 100755 --- a/tests/qemu-iotests/194 +++ b/tests/qemu-iotests/194 @@ -74,6 +74,11 @@ with iotests.FilePath('source.img') as source_img_path, \ while True: event1 = source_vm.event_wait('MIGRATION') + if event1['data']['status'] == 'postcopy-active': + # This event is racy, it depends do we really do postcopy or bitmap + # was migrated during downtime (and no data to migrate in postcopy + # phase). So, don't log it. + continue iotests.log(event1, filters=[iotests.filter_qmp_event]) if event1['data']['status'] in ('completed', 'failed'): iotests.log('Gracefully ending the `drive-mirror` job on source...') diff --git a/tests/qemu-iotests/194.out b/tests/qemu-iotests/194.out index 4e6df1565a..376ed1d2e6 100644 --- a/tests/qemu-iotests/194.out +++ b/tests/qemu-iotests/194.out @@ -14,7 +14,6 @@ Starting migration... {"return": {}} {"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} {"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -{"data": {"status": "postcopy-active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} {"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} Gracefully ending the `drive-mirror` job on source... {"return": {}}