Optimize across branches.

Add logging for cpu_io_recompile.
 -----BEGIN PGP SIGNATURE-----
 
 iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAl+YT3IdHHJpY2hhcmQu
 aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV9ucQf+NkvOZDM2h+ymyJ0v
 QHvUWSVCr7c51dMCgI7YJVMN/XcojEUhSOaU6nfOwbikl0mV2ZTvrexbbTstY0LI
 71xSPXaPab9DLFfq8JX2NL4bqRJQkyJwiXRBpfxAK567NXcY4Xs1XJXN/1uhmN8c
 JO49QMtgHSS0HB/LR6ldboYCMQOEGxqWPy8RBA98ZiA5Ce1+Lk3CbGnZ1tfpcSVt
 63/sbZi2yFTjco5Atbq1KB9r7942M5FRKf6BXNYTPzEdJZLSrJ4x+sCa5BQaZ6ac
 aAgeuFaq9QEiNcUbsyWu3BnQ26548HAJ1Iu9M2D95+Mt6/0w0QUFMRe/Lu4DSOMu
 FVggzw==
 =9URn
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/rth-gitlab/tags/pull-tcg-20201027' into staging

Optimize across branches.
Add logging for cpu_io_recompile.

# gpg: Signature made Tue 27 Oct 2020 16:48:50 GMT
# gpg:                using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg:                issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A  05C0 64DF 38E8 AF7E 215F

* remotes/rth-gitlab/tags/pull-tcg-20201027:
  accel/tcg: Add CPU_LOG_EXEC tracing for cpu_io_recompile()
  tcg/optimize: Flush data at labels not TCG_OPF_BB_END
  tcg: Do not kill globals at conditional branches

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2020-10-30 19:47:54 +00:00
commit c3dee4de92
5 changed files with 82 additions and 23 deletions

View File

@ -2267,6 +2267,10 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
tb_destroy(tb); tb_destroy(tb);
} }
qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
"cpu_io_recompile: rewound execution of TB to "
TARGET_FMT_lx "\n", tb->pc);
/* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
* the first in the TB) then we end up generating a whole new TB and * the first in the TB) then we end up generating a whole new TB and
* repeating the fault, which is horribly inefficient. * repeating the fault, which is horribly inefficient.

View File

@ -81,7 +81,7 @@ DEF(extract_i32, 1, 1, 2, IMPL(TCG_TARGET_HAS_extract_i32))
DEF(sextract_i32, 1, 1, 2, IMPL(TCG_TARGET_HAS_sextract_i32)) DEF(sextract_i32, 1, 1, 2, IMPL(TCG_TARGET_HAS_sextract_i32))
DEF(extract2_i32, 1, 2, 1, IMPL(TCG_TARGET_HAS_extract2_i32)) DEF(extract2_i32, 1, 2, 1, IMPL(TCG_TARGET_HAS_extract2_i32))
DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END) DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH)
DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_add2_i32)) DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_add2_i32))
DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_sub2_i32)) DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_sub2_i32))
@ -89,7 +89,8 @@ DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_mulu2_i32))
DEF(muls2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_muls2_i32)) DEF(muls2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_muls2_i32))
DEF(muluh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i32)) DEF(muluh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i32))
DEF(mulsh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i32)) DEF(mulsh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i32))
DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | IMPL(TCG_TARGET_REG_BITS == 32)) DEF(brcond2_i32, 0, 4, 2,
TCG_OPF_BB_END | TCG_OPF_COND_BRANCH | IMPL(TCG_TARGET_REG_BITS == 32))
DEF(setcond2_i32, 1, 4, 1, IMPL(TCG_TARGET_REG_BITS == 32)) DEF(setcond2_i32, 1, 4, 1, IMPL(TCG_TARGET_REG_BITS == 32))
DEF(ext8s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8s_i32)) DEF(ext8s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8s_i32))
@ -159,7 +160,7 @@ DEF(extrh_i64_i32, 1, 1, 0,
IMPL(TCG_TARGET_HAS_extrh_i64_i32) IMPL(TCG_TARGET_HAS_extrh_i64_i32)
| (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0)) | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0))
DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | IMPL64) DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH | IMPL64)
DEF(ext8s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8s_i64)) DEF(ext8s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8s_i64))
DEF(ext16s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16s_i64)) DEF(ext16s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16s_i64))
DEF(ext32s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32s_i64)) DEF(ext32s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32s_i64))

View File

@ -990,7 +990,7 @@ typedef struct TCGArgConstraint {
#define TCG_MAX_OP_ARGS 16 #define TCG_MAX_OP_ARGS 16
/* Bits for TCGOpDef->flags, 8 bits available. */ /* Bits for TCGOpDef->flags, 8 bits available, all used. */
enum { enum {
/* Instruction exits the translation block. */ /* Instruction exits the translation block. */
TCG_OPF_BB_EXIT = 0x01, TCG_OPF_BB_EXIT = 0x01,
@ -1008,6 +1008,8 @@ enum {
TCG_OPF_NOT_PRESENT = 0x20, TCG_OPF_NOT_PRESENT = 0x20,
/* Instruction operands are vectors. */ /* Instruction operands are vectors. */
TCG_OPF_VECTOR = 0x40, TCG_OPF_VECTOR = 0x40,
/* Instruction is a conditional branch. */
TCG_OPF_COND_BRANCH = 0x80
}; };
typedef struct TCGOpDef { typedef struct TCGOpDef {

View File

@ -1484,29 +1484,30 @@ void tcg_optimize(TCGContext *s)
} }
} }
} }
goto do_reset_output; /* fall through */
default: default:
do_default: do_default:
/* Default case: we know nothing about operation (or were unable /*
to compute the operation result) so no propagation is done. * Default case: we know nothing about operation (or were unable
We trash everything if the operation is the end of a basic * to compute the operation result) so no propagation is done.
block, otherwise we only trash the output args. "mask" is */
the non-zero bits mask for the first output arg. */ for (i = 0; i < nb_oargs; i++) {
if (def->flags & TCG_OPF_BB_END) { reset_temp(op->args[i]);
bitmap_zero(temps_used.l, nb_temps); /*
} else { * Save the corresponding known-zero bits mask for the
do_reset_output: * first output argument (only one supported so far).
for (i = 0; i < nb_oargs; i++) { */
reset_temp(op->args[i]); if (i == 0) {
/* Save the corresponding known-zero bits mask for the arg_info(op->args[i])->mask = mask;
first output argument (only one supported so far). */
if (i == 0) {
arg_info(op->args[i])->mask = mask;
}
} }
} }
break; break;
case INDEX_op_set_label:
/* Trash everything at the start of a new extended bb. */
bitmap_zero(temps_used.l, nb_temps);
break;
} }
/* Eliminate duplicate and redundant fence instructions. */ /* Eliminate duplicate and redundant fence instructions. */

View File

@ -2519,6 +2519,28 @@ static void la_global_sync(TCGContext *s, int ng)
} }
} }
/*
* liveness analysis: conditional branch: all temps are dead,
* globals and local temps should be synced.
*/
static void la_bb_sync(TCGContext *s, int ng, int nt)
{
la_global_sync(s, ng);
for (int i = ng; i < nt; ++i) {
if (s->temps[i].temp_local) {
int state = s->temps[i].state;
s->temps[i].state = state | TS_MEM;
if (state != TS_DEAD) {
continue;
}
} else {
s->temps[i].state = TS_DEAD;
}
la_reset_pref(&s->temps[i]);
}
}
/* liveness analysis: sync globals back to memory and kill. */ /* liveness analysis: sync globals back to memory and kill. */
static void la_global_kill(TCGContext *s, int ng) static void la_global_kill(TCGContext *s, int ng)
{ {
@ -2795,6 +2817,8 @@ static void liveness_pass_1(TCGContext *s)
/* If end of basic block, update. */ /* If end of basic block, update. */
if (def->flags & TCG_OPF_BB_EXIT) { if (def->flags & TCG_OPF_BB_EXIT) {
la_func_end(s, nb_globals, nb_temps); la_func_end(s, nb_globals, nb_temps);
} else if (def->flags & TCG_OPF_COND_BRANCH) {
la_bb_sync(s, nb_globals, nb_temps);
} else if (def->flags & TCG_OPF_BB_END) { } else if (def->flags & TCG_OPF_BB_END) {
la_bb_end(s, nb_globals, nb_temps); la_bb_end(s, nb_globals, nb_temps);
} else if (def->flags & TCG_OPF_SIDE_EFFECTS) { } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
@ -2907,7 +2931,10 @@ static bool liveness_pass_2(TCGContext *s)
nb_oargs = def->nb_oargs; nb_oargs = def->nb_oargs;
/* Set flags similar to how calls require. */ /* Set flags similar to how calls require. */
if (def->flags & TCG_OPF_BB_END) { if (def->flags & TCG_OPF_COND_BRANCH) {
/* Like reading globals: sync_globals */
call_flags = TCG_CALL_NO_WRITE_GLOBALS;
} else if (def->flags & TCG_OPF_BB_END) {
/* Like writing globals: save_globals */ /* Like writing globals: save_globals */
call_flags = 0; call_flags = 0;
} else if (def->flags & TCG_OPF_SIDE_EFFECTS) { } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
@ -3379,6 +3406,28 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
save_globals(s, allocated_regs); save_globals(s, allocated_regs);
} }
/*
* At a conditional branch, we assume all temporaries are dead and
* all globals and local temps are synced to their location.
*/
static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
{
sync_globals(s, allocated_regs);
for (int i = s->nb_globals; i < s->nb_temps; i++) {
TCGTemp *ts = &s->temps[i];
/*
* The liveness analysis already ensures that temps are dead.
* Keep tcg_debug_asserts for safety.
*/
if (ts->temp_local) {
tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
} else {
tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
}
}
}
/* /*
* Specialized code generation for INDEX_op_movi_*. * Specialized code generation for INDEX_op_movi_*.
*/ */
@ -3730,7 +3779,9 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
} }
} }
if (def->flags & TCG_OPF_BB_END) { if (def->flags & TCG_OPF_COND_BRANCH) {
tcg_reg_alloc_cbranch(s, i_allocated_regs);
} else if (def->flags & TCG_OPF_BB_END) {
tcg_reg_alloc_bb_end(s, i_allocated_regs); tcg_reg_alloc_bb_end(s, i_allocated_regs);
} else { } else {
if (def->flags & TCG_OPF_CALL_CLOBBER) { if (def->flags & TCG_OPF_CALL_CLOBBER) {