mirror of https://github.com/xemu-project/xemu.git
indirect register lowering
-----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQEcBAABAgAGBQJXpMAtAAoJEK0ScMxN0Cebh60H/17kh95KYPERqTVI6eu9sMep Nn0tKXl1j4jGr5/w+1UIoY3zVCEI+17GeXQY6+XMtAvHrbFsvlbM7QgUST82l3ww dm9cMRMYgqAdsuUZHNAHkTxXtwgKgQkw06nJuYLDCpY1Skjw/vNt3pKqy4GDD7OJ FTHhq360hvE/mf7aFQV4477Cg8QdzvNTqoJgCC1waDN1N5BBNraq+wIjtyJZ299R 6jAxjPBeGEIyv4/g4CdxrNPDdsBahnewO4wynQTbH52Whui1sRic2eSNzdKDK0hy aDVN2TDG1YnfhKCKAF73Gvpyb2eHcXDSdYQgFaVjaZtJpBXH845CRKHpo2kFrVw= =+piJ -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20160805' into staging indirect register lowering # gpg: Signature made Fri 05 Aug 2016 17:34:53 BST # gpg: using RSA key 0xAD1270CC4DD0279B # gpg: Good signature from "Richard Henderson <rth7680@gmail.com>" # gpg: aka "Richard Henderson <rth@redhat.com>" # gpg: aka "Richard Henderson <rth@twiddle.net>" # Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC 16A4 AD12 70CC 4DD0 279B * remotes/rth/tags/pull-tcg-20160805: tcg: Lower indirect registers in a separate pass tcg: Require liveness analysis tcg: Include liveness info in the dumps tcg: Compress dead_temps and mem_temps into a single array tcg: Fold life data into TCGOp tcg: Reorg TCGOp chaining tcg: Compress liveness data to 16 bits Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
cf5198d580
|
@ -59,7 +59,7 @@ static void gen_tb_end(TranslationBlock *tb, int num_insns)
|
|||
}
|
||||
|
||||
/* Terminate the linked list. */
|
||||
tcg_ctx.gen_op_buf[tcg_ctx.gen_last_op_idx].next = -1;
|
||||
tcg_ctx.gen_op_buf[tcg_ctx.gen_op_buf[0].prev].next = 0;
|
||||
}
|
||||
|
||||
static inline void gen_io_start(void)
|
||||
|
|
|
@ -42,6 +42,7 @@ static inline bool qemu_log_separate(void)
|
|||
#define CPU_LOG_TB_NOCHAIN (1 << 13)
|
||||
#define CPU_LOG_PAGE (1 << 14)
|
||||
#define LOG_TRACE (1 << 15)
|
||||
#define CPU_LOG_TB_OP_IND (1 << 16)
|
||||
|
||||
/* Returns true if a bit is set in the current loglevel mask
|
||||
*/
|
||||
|
@ -54,7 +55,7 @@ static inline bool qemu_loglevel_mask(int mask)
|
|||
|
||||
/* main logging function
|
||||
*/
|
||||
void GCC_FMT_ATTR(1, 2) qemu_log(const char *fmt, ...);
|
||||
int GCC_FMT_ATTR(1, 2) qemu_log(const char *fmt, ...);
|
||||
|
||||
/* vfprintf-like logging function
|
||||
*/
|
||||
|
|
|
@ -82,37 +82,6 @@ static void init_temp_info(TCGArg temp)
|
|||
}
|
||||
}
|
||||
|
||||
static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op,
|
||||
TCGOpcode opc, int nargs)
|
||||
{
|
||||
int oi = s->gen_next_op_idx;
|
||||
int pi = s->gen_next_parm_idx;
|
||||
int prev = old_op->prev;
|
||||
int next = old_op - s->gen_op_buf;
|
||||
TCGOp *new_op;
|
||||
|
||||
tcg_debug_assert(oi < OPC_BUF_SIZE);
|
||||
tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE);
|
||||
s->gen_next_op_idx = oi + 1;
|
||||
s->gen_next_parm_idx = pi + nargs;
|
||||
|
||||
new_op = &s->gen_op_buf[oi];
|
||||
*new_op = (TCGOp){
|
||||
.opc = opc,
|
||||
.args = pi,
|
||||
.prev = prev,
|
||||
.next = next
|
||||
};
|
||||
if (prev >= 0) {
|
||||
s->gen_op_buf[prev].next = oi;
|
||||
} else {
|
||||
s->gen_first_op_idx = oi;
|
||||
}
|
||||
old_op->prev = oi;
|
||||
|
||||
return new_op;
|
||||
}
|
||||
|
||||
static int op_bits(TCGOpcode op)
|
||||
{
|
||||
const TCGOpDef *def = &tcg_op_defs[op];
|
||||
|
@ -583,7 +552,7 @@ void tcg_optimize(TCGContext *s)
|
|||
nb_globals = s->nb_globals;
|
||||
reset_all_temps(nb_temps);
|
||||
|
||||
for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
|
||||
for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
|
||||
tcg_target_ulong mask, partmask, affected;
|
||||
int nb_oargs, nb_iargs, i;
|
||||
TCGArg tmp;
|
||||
|
@ -1120,7 +1089,7 @@ void tcg_optimize(TCGContext *s)
|
|||
uint64_t a = ((uint64_t)ah << 32) | al;
|
||||
uint64_t b = ((uint64_t)bh << 32) | bl;
|
||||
TCGArg rl, rh;
|
||||
TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
|
||||
TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2);
|
||||
TCGArg *args2 = &s->gen_opparam_buf[op2->args];
|
||||
|
||||
if (opc == INDEX_op_add2_i32) {
|
||||
|
@ -1146,7 +1115,7 @@ void tcg_optimize(TCGContext *s)
|
|||
uint32_t b = temps[args[3]].val;
|
||||
uint64_t r = (uint64_t)a * b;
|
||||
TCGArg rl, rh;
|
||||
TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
|
||||
TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2);
|
||||
TCGArg *args2 = &s->gen_opparam_buf[op2->args];
|
||||
|
||||
rl = args[0];
|
||||
|
|
|
@ -52,7 +52,7 @@ static void tcg_emit_op(TCGContext *ctx, TCGOpcode opc, int args)
|
|||
int pi = oi - 1;
|
||||
|
||||
tcg_debug_assert(oi < OPC_BUF_SIZE);
|
||||
ctx->gen_last_op_idx = oi;
|
||||
ctx->gen_op_buf[0].prev = oi;
|
||||
ctx->gen_next_op_idx = ni;
|
||||
|
||||
ctx->gen_op_buf[oi] = (TCGOp){
|
||||
|
|
590
tcg/tcg.c
590
tcg/tcg.c
|
@ -23,7 +23,6 @@
|
|||
*/
|
||||
|
||||
/* define it to use liveness analysis (better code) */
|
||||
#define USE_LIVENESS_ANALYSIS
|
||||
#define USE_TCG_OPTIMIZATIONS
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
|
@ -333,7 +332,7 @@ void tcg_context_init(TCGContext *s)
|
|||
|
||||
memset(s, 0, sizeof(*s));
|
||||
s->nb_globals = 0;
|
||||
|
||||
|
||||
/* Count total number of arguments and allocate the corresponding
|
||||
space */
|
||||
total_args = 0;
|
||||
|
@ -438,9 +437,9 @@ void tcg_func_start(TCGContext *s)
|
|||
s->goto_tb_issue_mask = 0;
|
||||
#endif
|
||||
|
||||
s->gen_first_op_idx = 0;
|
||||
s->gen_last_op_idx = -1;
|
||||
s->gen_next_op_idx = 0;
|
||||
s->gen_op_buf[0].next = 1;
|
||||
s->gen_op_buf[0].prev = 0;
|
||||
s->gen_next_op_idx = 1;
|
||||
s->gen_next_parm_idx = 0;
|
||||
|
||||
s->be = tcg_malloc(sizeof(TCGBackendData));
|
||||
|
@ -532,8 +531,12 @@ int tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
|
|||
#endif
|
||||
|
||||
if (!base_ts->fixed_reg) {
|
||||
indirect_reg = 1;
|
||||
/* We do not support double-indirect registers. */
|
||||
tcg_debug_assert(!base_ts->indirect_reg);
|
||||
base_ts->indirect_base = 1;
|
||||
s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
|
||||
? 2 : 1);
|
||||
indirect_reg = 1;
|
||||
}
|
||||
|
||||
if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
|
||||
|
@ -825,16 +828,16 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
|
|||
real_args++;
|
||||
}
|
||||
#endif
|
||||
/* If stack grows up, then we will be placing successive
|
||||
arguments at lower addresses, which means we need to
|
||||
reverse the order compared to how we would normally
|
||||
treat either big or little-endian. For those arguments
|
||||
that will wind up in registers, this still works for
|
||||
HPPA (the only current STACK_GROWSUP target) since the
|
||||
argument registers are *also* allocated in decreasing
|
||||
order. If another such target is added, this logic may
|
||||
have to get more complicated to differentiate between
|
||||
stack arguments and register arguments. */
|
||||
/* If stack grows up, then we will be placing successive
|
||||
arguments at lower addresses, which means we need to
|
||||
reverse the order compared to how we would normally
|
||||
treat either big or little-endian. For those arguments
|
||||
that will wind up in registers, this still works for
|
||||
HPPA (the only current STACK_GROWSUP target) since the
|
||||
argument registers are *also* allocated in decreasing
|
||||
order. If another such target is added, this logic may
|
||||
have to get more complicated to differentiate between
|
||||
stack arguments and register arguments. */
|
||||
#if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
|
||||
s->gen_opparam_buf[pi++] = args[i] + 1;
|
||||
s->gen_opparam_buf[pi++] = args[i];
|
||||
|
@ -869,7 +872,7 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
|
|||
/* Make sure the calli field didn't overflow. */
|
||||
tcg_debug_assert(s->gen_op_buf[i].calli == real_args);
|
||||
|
||||
s->gen_last_op_idx = i;
|
||||
s->gen_op_buf[0].prev = i;
|
||||
s->gen_next_op_idx = i + 1;
|
||||
s->gen_next_parm_idx = pi;
|
||||
|
||||
|
@ -1021,11 +1024,12 @@ void tcg_dump_ops(TCGContext *s)
|
|||
TCGOp *op;
|
||||
int oi;
|
||||
|
||||
for (oi = s->gen_first_op_idx; oi >= 0; oi = op->next) {
|
||||
for (oi = s->gen_op_buf[0].next; oi != 0; oi = op->next) {
|
||||
int i, k, nb_oargs, nb_iargs, nb_cargs;
|
||||
const TCGOpDef *def;
|
||||
const TCGArg *args;
|
||||
TCGOpcode c;
|
||||
int col = 0;
|
||||
|
||||
op = &s->gen_op_buf[oi];
|
||||
c = op->opc;
|
||||
|
@ -1033,7 +1037,7 @@ void tcg_dump_ops(TCGContext *s)
|
|||
args = &s->gen_opparam_buf[op->args];
|
||||
|
||||
if (c == INDEX_op_insn_start) {
|
||||
qemu_log("%s ----", oi != s->gen_first_op_idx ? "\n" : "");
|
||||
col += qemu_log("%s ----", oi != s->gen_op_buf[0].next ? "\n" : "");
|
||||
|
||||
for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
|
||||
target_ulong a;
|
||||
|
@ -1042,7 +1046,7 @@ void tcg_dump_ops(TCGContext *s)
|
|||
#else
|
||||
a = args[i];
|
||||
#endif
|
||||
qemu_log(" " TARGET_FMT_lx, a);
|
||||
col += qemu_log(" " TARGET_FMT_lx, a);
|
||||
}
|
||||
} else if (c == INDEX_op_call) {
|
||||
/* variable number of arguments */
|
||||
|
@ -1051,12 +1055,12 @@ void tcg_dump_ops(TCGContext *s)
|
|||
nb_cargs = def->nb_cargs;
|
||||
|
||||
/* function name, flags, out args */
|
||||
qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
|
||||
tcg_find_helper(s, args[nb_oargs + nb_iargs]),
|
||||
args[nb_oargs + nb_iargs + 1], nb_oargs);
|
||||
col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
|
||||
tcg_find_helper(s, args[nb_oargs + nb_iargs]),
|
||||
args[nb_oargs + nb_iargs + 1], nb_oargs);
|
||||
for (i = 0; i < nb_oargs; i++) {
|
||||
qemu_log(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
|
||||
args[i]));
|
||||
col += qemu_log(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
|
||||
args[i]));
|
||||
}
|
||||
for (i = 0; i < nb_iargs; i++) {
|
||||
TCGArg arg = args[nb_oargs + i];
|
||||
|
@ -1064,10 +1068,10 @@ void tcg_dump_ops(TCGContext *s)
|
|||
if (arg != TCG_CALL_DUMMY_ARG) {
|
||||
t = tcg_get_arg_str_idx(s, buf, sizeof(buf), arg);
|
||||
}
|
||||
qemu_log(",%s", t);
|
||||
col += qemu_log(",%s", t);
|
||||
}
|
||||
} else {
|
||||
qemu_log(" %s ", def->name);
|
||||
col += qemu_log(" %s ", def->name);
|
||||
|
||||
nb_oargs = def->nb_oargs;
|
||||
nb_iargs = def->nb_iargs;
|
||||
|
@ -1076,17 +1080,17 @@ void tcg_dump_ops(TCGContext *s)
|
|||
k = 0;
|
||||
for (i = 0; i < nb_oargs; i++) {
|
||||
if (k != 0) {
|
||||
qemu_log(",");
|
||||
col += qemu_log(",");
|
||||
}
|
||||
qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
|
||||
args[k++]));
|
||||
col += qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
|
||||
args[k++]));
|
||||
}
|
||||
for (i = 0; i < nb_iargs; i++) {
|
||||
if (k != 0) {
|
||||
qemu_log(",");
|
||||
col += qemu_log(",");
|
||||
}
|
||||
qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
|
||||
args[k++]));
|
||||
col += qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
|
||||
args[k++]));
|
||||
}
|
||||
switch (c) {
|
||||
case INDEX_op_brcond_i32:
|
||||
|
@ -1098,9 +1102,9 @@ void tcg_dump_ops(TCGContext *s)
|
|||
case INDEX_op_setcond_i64:
|
||||
case INDEX_op_movcond_i64:
|
||||
if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) {
|
||||
qemu_log(",%s", cond_name[args[k++]]);
|
||||
col += qemu_log(",%s", cond_name[args[k++]]);
|
||||
} else {
|
||||
qemu_log(",$0x%" TCG_PRIlx, args[k++]);
|
||||
col += qemu_log(",$0x%" TCG_PRIlx, args[k++]);
|
||||
}
|
||||
i = 1;
|
||||
break;
|
||||
|
@ -1114,12 +1118,12 @@ void tcg_dump_ops(TCGContext *s)
|
|||
unsigned ix = get_mmuidx(oi);
|
||||
|
||||
if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
|
||||
qemu_log(",$0x%x,%u", op, ix);
|
||||
col += qemu_log(",$0x%x,%u", op, ix);
|
||||
} else {
|
||||
const char *s_al, *s_op;
|
||||
s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
|
||||
s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
|
||||
qemu_log(",%s%s,%u", s_al, s_op, ix);
|
||||
col += qemu_log(",%s%s,%u", s_al, s_op, ix);
|
||||
}
|
||||
i = 1;
|
||||
}
|
||||
|
@ -1134,14 +1138,39 @@ void tcg_dump_ops(TCGContext *s)
|
|||
case INDEX_op_brcond_i32:
|
||||
case INDEX_op_brcond_i64:
|
||||
case INDEX_op_brcond2_i32:
|
||||
qemu_log("%s$L%d", k ? "," : "", arg_label(args[k])->id);
|
||||
col += qemu_log("%s$L%d", k ? "," : "", arg_label(args[k])->id);
|
||||
i++, k++;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
for (; i < nb_cargs; i++, k++) {
|
||||
qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", args[k]);
|
||||
col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", args[k]);
|
||||
}
|
||||
}
|
||||
if (op->life) {
|
||||
unsigned life = op->life;
|
||||
|
||||
for (; col < 48; ++col) {
|
||||
putc(' ', qemu_logfile);
|
||||
}
|
||||
|
||||
if (life & (SYNC_ARG * 3)) {
|
||||
qemu_log(" sync:");
|
||||
for (i = 0; i < 2; ++i) {
|
||||
if (life & (SYNC_ARG << i)) {
|
||||
qemu_log(" %d", i);
|
||||
}
|
||||
}
|
||||
}
|
||||
life /= DEAD_ARG;
|
||||
if (life) {
|
||||
qemu_log(" dead:");
|
||||
for (i = 0; life; ++i, life >>= 1) {
|
||||
if (life & 1) {
|
||||
qemu_log(" %d", i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
qemu_log("\n");
|
||||
|
@ -1298,71 +1327,116 @@ void tcg_op_remove(TCGContext *s, TCGOp *op)
|
|||
int next = op->next;
|
||||
int prev = op->prev;
|
||||
|
||||
if (next >= 0) {
|
||||
s->gen_op_buf[next].prev = prev;
|
||||
} else {
|
||||
s->gen_last_op_idx = prev;
|
||||
}
|
||||
if (prev >= 0) {
|
||||
s->gen_op_buf[prev].next = next;
|
||||
} else {
|
||||
s->gen_first_op_idx = next;
|
||||
}
|
||||
/* We should never attempt to remove the list terminator. */
|
||||
tcg_debug_assert(op != &s->gen_op_buf[0]);
|
||||
|
||||
memset(op, -1, sizeof(*op));
|
||||
s->gen_op_buf[next].prev = prev;
|
||||
s->gen_op_buf[prev].next = next;
|
||||
|
||||
memset(op, 0, sizeof(*op));
|
||||
|
||||
#ifdef CONFIG_PROFILER
|
||||
s->del_op_count++;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef USE_LIVENESS_ANALYSIS
|
||||
TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
|
||||
TCGOpcode opc, int nargs)
|
||||
{
|
||||
int oi = s->gen_next_op_idx;
|
||||
int pi = s->gen_next_parm_idx;
|
||||
int prev = old_op->prev;
|
||||
int next = old_op - s->gen_op_buf;
|
||||
TCGOp *new_op;
|
||||
|
||||
tcg_debug_assert(oi < OPC_BUF_SIZE);
|
||||
tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE);
|
||||
s->gen_next_op_idx = oi + 1;
|
||||
s->gen_next_parm_idx = pi + nargs;
|
||||
|
||||
new_op = &s->gen_op_buf[oi];
|
||||
*new_op = (TCGOp){
|
||||
.opc = opc,
|
||||
.args = pi,
|
||||
.prev = prev,
|
||||
.next = next
|
||||
};
|
||||
s->gen_op_buf[prev].next = oi;
|
||||
old_op->prev = oi;
|
||||
|
||||
return new_op;
|
||||
}
|
||||
|
||||
TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
|
||||
TCGOpcode opc, int nargs)
|
||||
{
|
||||
int oi = s->gen_next_op_idx;
|
||||
int pi = s->gen_next_parm_idx;
|
||||
int prev = old_op - s->gen_op_buf;
|
||||
int next = old_op->next;
|
||||
TCGOp *new_op;
|
||||
|
||||
tcg_debug_assert(oi < OPC_BUF_SIZE);
|
||||
tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE);
|
||||
s->gen_next_op_idx = oi + 1;
|
||||
s->gen_next_parm_idx = pi + nargs;
|
||||
|
||||
new_op = &s->gen_op_buf[oi];
|
||||
*new_op = (TCGOp){
|
||||
.opc = opc,
|
||||
.args = pi,
|
||||
.prev = prev,
|
||||
.next = next
|
||||
};
|
||||
s->gen_op_buf[next].prev = oi;
|
||||
old_op->next = oi;
|
||||
|
||||
return new_op;
|
||||
}
|
||||
|
||||
#define TS_DEAD 1
|
||||
#define TS_MEM 2
|
||||
|
||||
#define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
|
||||
#define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
|
||||
|
||||
/* liveness analysis: end of function: all temps are dead, and globals
|
||||
should be in memory. */
|
||||
static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps,
|
||||
uint8_t *mem_temps)
|
||||
static inline void tcg_la_func_end(TCGContext *s, uint8_t *temp_state)
|
||||
{
|
||||
memset(dead_temps, 1, s->nb_temps);
|
||||
memset(mem_temps, 1, s->nb_globals);
|
||||
memset(mem_temps + s->nb_globals, 0, s->nb_temps - s->nb_globals);
|
||||
memset(temp_state, TS_DEAD | TS_MEM, s->nb_globals);
|
||||
memset(temp_state + s->nb_globals, TS_DEAD, s->nb_temps - s->nb_globals);
|
||||
}
|
||||
|
||||
/* liveness analysis: end of basic block: all temps are dead, globals
|
||||
and local temps should be in memory. */
|
||||
static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
|
||||
uint8_t *mem_temps)
|
||||
static inline void tcg_la_bb_end(TCGContext *s, uint8_t *temp_state)
|
||||
{
|
||||
int i;
|
||||
int i, n;
|
||||
|
||||
memset(dead_temps, 1, s->nb_temps);
|
||||
memset(mem_temps, 1, s->nb_globals);
|
||||
for(i = s->nb_globals; i < s->nb_temps; i++) {
|
||||
mem_temps[i] = s->temps[i].temp_local;
|
||||
tcg_la_func_end(s, temp_state);
|
||||
for (i = s->nb_globals, n = s->nb_temps; i < n; i++) {
|
||||
if (s->temps[i].temp_local) {
|
||||
temp_state[i] |= TS_MEM;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Liveness analysis : update the opc_dead_args array to tell if a
|
||||
/* Liveness analysis : update the opc_arg_life array to tell if a
|
||||
given input arguments is dead. Instructions updating dead
|
||||
temporaries are removed. */
|
||||
static void tcg_liveness_analysis(TCGContext *s)
|
||||
static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
|
||||
{
|
||||
uint8_t *dead_temps, *mem_temps;
|
||||
int oi, oi_prev, nb_ops;
|
||||
int nb_globals = s->nb_globals;
|
||||
int oi, oi_prev;
|
||||
|
||||
nb_ops = s->gen_next_op_idx;
|
||||
s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
|
||||
s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
|
||||
|
||||
dead_temps = tcg_malloc(s->nb_temps);
|
||||
mem_temps = tcg_malloc(s->nb_temps);
|
||||
tcg_la_func_end(s, dead_temps, mem_temps);
|
||||
tcg_la_func_end(s, temp_state);
|
||||
|
||||
for (oi = s->gen_last_op_idx; oi >= 0; oi = oi_prev) {
|
||||
for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) {
|
||||
int i, nb_iargs, nb_oargs;
|
||||
TCGOpcode opc_new, opc_new2;
|
||||
bool have_opc_new2;
|
||||
uint16_t dead_args;
|
||||
uint8_t sync_args;
|
||||
TCGLifeData arg_life = 0;
|
||||
TCGArg arg;
|
||||
|
||||
TCGOp * const op = &s->gen_op_buf[oi];
|
||||
|
@ -1385,7 +1459,7 @@ static void tcg_liveness_analysis(TCGContext *s)
|
|||
if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
|
||||
for (i = 0; i < nb_oargs; i++) {
|
||||
arg = args[i];
|
||||
if (!dead_temps[arg] || mem_temps[arg]) {
|
||||
if (temp_state[arg] != TS_DEAD) {
|
||||
goto do_not_remove_call;
|
||||
}
|
||||
}
|
||||
|
@ -1394,46 +1468,44 @@ static void tcg_liveness_analysis(TCGContext *s)
|
|||
do_not_remove_call:
|
||||
|
||||
/* output args are dead */
|
||||
dead_args = 0;
|
||||
sync_args = 0;
|
||||
for (i = 0; i < nb_oargs; i++) {
|
||||
arg = args[i];
|
||||
if (dead_temps[arg]) {
|
||||
dead_args |= (1 << i);
|
||||
if (temp_state[arg] & TS_DEAD) {
|
||||
arg_life |= DEAD_ARG << i;
|
||||
}
|
||||
if (mem_temps[arg]) {
|
||||
sync_args |= (1 << i);
|
||||
if (temp_state[arg] & TS_MEM) {
|
||||
arg_life |= SYNC_ARG << i;
|
||||
}
|
||||
dead_temps[arg] = 1;
|
||||
mem_temps[arg] = 0;
|
||||
temp_state[arg] = TS_DEAD;
|
||||
}
|
||||
|
||||
if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
|
||||
/* globals should be synced to memory */
|
||||
memset(mem_temps, 1, s->nb_globals);
|
||||
}
|
||||
if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
|
||||
TCG_CALL_NO_READ_GLOBALS))) {
|
||||
/* globals should go back to memory */
|
||||
memset(dead_temps, 1, s->nb_globals);
|
||||
memset(temp_state, TS_DEAD | TS_MEM, nb_globals);
|
||||
} else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
|
||||
/* globals should be synced to memory */
|
||||
for (i = 0; i < nb_globals; i++) {
|
||||
temp_state[i] |= TS_MEM;
|
||||
}
|
||||
}
|
||||
|
||||
/* record arguments that die in this helper */
|
||||
for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
|
||||
arg = args[i];
|
||||
if (arg != TCG_CALL_DUMMY_ARG) {
|
||||
if (dead_temps[arg]) {
|
||||
dead_args |= (1 << i);
|
||||
if (temp_state[arg] & TS_DEAD) {
|
||||
arg_life |= DEAD_ARG << i;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* input arguments are live for preceding opcodes */
|
||||
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
|
||||
for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
|
||||
arg = args[i];
|
||||
dead_temps[arg] = 0;
|
||||
if (arg != TCG_CALL_DUMMY_ARG) {
|
||||
temp_state[arg] &= ~TS_DEAD;
|
||||
}
|
||||
}
|
||||
s->op_dead_args[oi] = dead_args;
|
||||
s->op_sync_args[oi] = sync_args;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -1441,8 +1513,7 @@ static void tcg_liveness_analysis(TCGContext *s)
|
|||
break;
|
||||
case INDEX_op_discard:
|
||||
/* mark the temporary as dead */
|
||||
dead_temps[args[0]] = 1;
|
||||
mem_temps[args[0]] = 0;
|
||||
temp_state[args[0]] = TS_DEAD;
|
||||
break;
|
||||
|
||||
case INDEX_op_add2_i32:
|
||||
|
@ -1463,8 +1534,8 @@ static void tcg_liveness_analysis(TCGContext *s)
|
|||
the low part. The result can be optimized to a simple
|
||||
add or sub. This happens often for x86_64 guest when the
|
||||
cpu mode is set to 32 bit. */
|
||||
if (dead_temps[args[1]] && !mem_temps[args[1]]) {
|
||||
if (dead_temps[args[0]] && !mem_temps[args[0]]) {
|
||||
if (temp_state[args[1]] == TS_DEAD) {
|
||||
if (temp_state[args[0]] == TS_DEAD) {
|
||||
goto do_remove;
|
||||
}
|
||||
/* Replace the opcode and adjust the args in place,
|
||||
|
@ -1501,8 +1572,8 @@ static void tcg_liveness_analysis(TCGContext *s)
|
|||
do_mul2:
|
||||
nb_iargs = 2;
|
||||
nb_oargs = 2;
|
||||
if (dead_temps[args[1]] && !mem_temps[args[1]]) {
|
||||
if (dead_temps[args[0]] && !mem_temps[args[0]]) {
|
||||
if (temp_state[args[1]] == TS_DEAD) {
|
||||
if (temp_state[args[0]] == TS_DEAD) {
|
||||
/* Both parts of the operation are dead. */
|
||||
goto do_remove;
|
||||
}
|
||||
|
@ -1510,8 +1581,7 @@ static void tcg_liveness_analysis(TCGContext *s)
|
|||
op->opc = opc = opc_new;
|
||||
args[1] = args[2];
|
||||
args[2] = args[3];
|
||||
} else if (have_opc_new2 && dead_temps[args[0]]
|
||||
&& !mem_temps[args[0]]) {
|
||||
} else if (temp_state[args[0]] == TS_DEAD && have_opc_new2) {
|
||||
/* The low part of the operation is dead; generate the high. */
|
||||
op->opc = opc = opc_new2;
|
||||
args[0] = args[1];
|
||||
|
@ -1534,8 +1604,7 @@ static void tcg_liveness_analysis(TCGContext *s)
|
|||
implies side effects */
|
||||
if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
|
||||
for (i = 0; i < nb_oargs; i++) {
|
||||
arg = args[i];
|
||||
if (!dead_temps[arg] || mem_temps[arg]) {
|
||||
if (temp_state[args[i]] != TS_DEAD) {
|
||||
goto do_not_remove;
|
||||
}
|
||||
}
|
||||
|
@ -1544,59 +1613,203 @@ static void tcg_liveness_analysis(TCGContext *s)
|
|||
} else {
|
||||
do_not_remove:
|
||||
/* output args are dead */
|
||||
dead_args = 0;
|
||||
sync_args = 0;
|
||||
for (i = 0; i < nb_oargs; i++) {
|
||||
arg = args[i];
|
||||
if (dead_temps[arg]) {
|
||||
dead_args |= (1 << i);
|
||||
if (temp_state[arg] & TS_DEAD) {
|
||||
arg_life |= DEAD_ARG << i;
|
||||
}
|
||||
if (mem_temps[arg]) {
|
||||
sync_args |= (1 << i);
|
||||
if (temp_state[arg] & TS_MEM) {
|
||||
arg_life |= SYNC_ARG << i;
|
||||
}
|
||||
dead_temps[arg] = 1;
|
||||
mem_temps[arg] = 0;
|
||||
temp_state[arg] = TS_DEAD;
|
||||
}
|
||||
|
||||
/* if end of basic block, update */
|
||||
if (def->flags & TCG_OPF_BB_END) {
|
||||
tcg_la_bb_end(s, dead_temps, mem_temps);
|
||||
tcg_la_bb_end(s, temp_state);
|
||||
} else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
|
||||
/* globals should be synced to memory */
|
||||
memset(mem_temps, 1, s->nb_globals);
|
||||
for (i = 0; i < nb_globals; i++) {
|
||||
temp_state[i] |= TS_MEM;
|
||||
}
|
||||
}
|
||||
|
||||
/* record arguments that die in this opcode */
|
||||
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
|
||||
arg = args[i];
|
||||
if (dead_temps[arg]) {
|
||||
dead_args |= (1 << i);
|
||||
if (temp_state[arg] & TS_DEAD) {
|
||||
arg_life |= DEAD_ARG << i;
|
||||
}
|
||||
}
|
||||
/* input arguments are live for preceding opcodes */
|
||||
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
|
||||
arg = args[i];
|
||||
dead_temps[arg] = 0;
|
||||
temp_state[args[i]] &= ~TS_DEAD;
|
||||
}
|
||||
s->op_dead_args[oi] = dead_args;
|
||||
s->op_sync_args[oi] = sync_args;
|
||||
}
|
||||
break;
|
||||
}
|
||||
op->life = arg_life;
|
||||
}
|
||||
}
|
||||
#else
|
||||
/* dummy liveness analysis */
|
||||
static void tcg_liveness_analysis(TCGContext *s)
|
||||
{
|
||||
int nb_ops = s->gen_next_op_idx;
|
||||
|
||||
s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
|
||||
memset(s->op_dead_args, 0, nb_ops * sizeof(uint16_t));
|
||||
s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
|
||||
memset(s->op_sync_args, 0, nb_ops * sizeof(uint8_t));
|
||||
/* Liveness analysis: Convert indirect regs to direct temporaries. */
|
||||
static bool liveness_pass_2(TCGContext *s, uint8_t *temp_state)
|
||||
{
|
||||
int nb_globals = s->nb_globals;
|
||||
int16_t *dir_temps;
|
||||
int i, oi, oi_next;
|
||||
bool changes = false;
|
||||
|
||||
dir_temps = tcg_malloc(nb_globals * sizeof(int16_t));
|
||||
memset(dir_temps, 0, nb_globals * sizeof(int16_t));
|
||||
|
||||
/* Create a temporary for each indirect global. */
|
||||
for (i = 0; i < nb_globals; ++i) {
|
||||
TCGTemp *its = &s->temps[i];
|
||||
if (its->indirect_reg) {
|
||||
TCGTemp *dts = tcg_temp_alloc(s);
|
||||
dts->type = its->type;
|
||||
dts->base_type = its->base_type;
|
||||
dir_temps[i] = temp_idx(s, dts);
|
||||
}
|
||||
}
|
||||
|
||||
memset(temp_state, TS_DEAD, nb_globals);
|
||||
|
||||
for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
|
||||
TCGOp *op = &s->gen_op_buf[oi];
|
||||
TCGArg *args = &s->gen_opparam_buf[op->args];
|
||||
TCGOpcode opc = op->opc;
|
||||
const TCGOpDef *def = &tcg_op_defs[opc];
|
||||
TCGLifeData arg_life = op->life;
|
||||
int nb_iargs, nb_oargs, call_flags;
|
||||
TCGArg arg, dir;
|
||||
|
||||
oi_next = op->next;
|
||||
|
||||
if (opc == INDEX_op_call) {
|
||||
nb_oargs = op->callo;
|
||||
nb_iargs = op->calli;
|
||||
call_flags = args[nb_oargs + nb_iargs + 1];
|
||||
} else {
|
||||
nb_iargs = def->nb_iargs;
|
||||
nb_oargs = def->nb_oargs;
|
||||
|
||||
/* Set flags similar to how calls require. */
|
||||
if (def->flags & TCG_OPF_BB_END) {
|
||||
/* Like writing globals: save_globals */
|
||||
call_flags = 0;
|
||||
} else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
|
||||
/* Like reading globals: sync_globals */
|
||||
call_flags = TCG_CALL_NO_WRITE_GLOBALS;
|
||||
} else {
|
||||
/* No effect on globals. */
|
||||
call_flags = (TCG_CALL_NO_READ_GLOBALS |
|
||||
TCG_CALL_NO_WRITE_GLOBALS);
|
||||
}
|
||||
}
|
||||
|
||||
/* Make sure that input arguments are available. */
|
||||
for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
|
||||
arg = args[i];
|
||||
/* Note this unsigned test catches TCG_CALL_ARG_DUMMY too. */
|
||||
if (arg < nb_globals) {
|
||||
dir = dir_temps[arg];
|
||||
if (dir != 0 && temp_state[arg] == TS_DEAD) {
|
||||
TCGTemp *its = &s->temps[arg];
|
||||
TCGOpcode lopc = (its->type == TCG_TYPE_I32
|
||||
? INDEX_op_ld_i32
|
||||
: INDEX_op_ld_i64);
|
||||
TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
|
||||
TCGArg *largs = &s->gen_opparam_buf[lop->args];
|
||||
|
||||
largs[0] = dir;
|
||||
largs[1] = temp_idx(s, its->mem_base);
|
||||
largs[2] = its->mem_offset;
|
||||
|
||||
/* Loaded, but synced with memory. */
|
||||
temp_state[arg] = TS_MEM;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Perform input replacement, and mark inputs that became dead.
|
||||
No action is required except keeping temp_state up to date
|
||||
so that we reload when needed. */
|
||||
for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
|
||||
arg = args[i];
|
||||
if (arg < nb_globals) {
|
||||
dir = dir_temps[arg];
|
||||
if (dir != 0) {
|
||||
args[i] = dir;
|
||||
changes = true;
|
||||
if (IS_DEAD_ARG(i)) {
|
||||
temp_state[arg] = TS_DEAD;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Liveness analysis should ensure that the following are
|
||||
all correct, for call sites and basic block end points. */
|
||||
if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
|
||||
/* Nothing to do */
|
||||
} else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
|
||||
for (i = 0; i < nb_globals; ++i) {
|
||||
/* Liveness should see that globals are synced back,
|
||||
that is, either TS_DEAD or TS_MEM. */
|
||||
tcg_debug_assert(dir_temps[i] == 0
|
||||
|| temp_state[i] != 0);
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < nb_globals; ++i) {
|
||||
/* Liveness should see that globals are saved back,
|
||||
that is, TS_DEAD, waiting to be reloaded. */
|
||||
tcg_debug_assert(dir_temps[i] == 0
|
||||
|| temp_state[i] == TS_DEAD);
|
||||
}
|
||||
}
|
||||
|
||||
/* Outputs become available. */
|
||||
for (i = 0; i < nb_oargs; i++) {
|
||||
arg = args[i];
|
||||
if (arg >= nb_globals) {
|
||||
continue;
|
||||
}
|
||||
dir = dir_temps[arg];
|
||||
if (dir == 0) {
|
||||
continue;
|
||||
}
|
||||
args[i] = dir;
|
||||
changes = true;
|
||||
|
||||
/* The output is now live and modified. */
|
||||
temp_state[arg] = 0;
|
||||
|
||||
/* Sync outputs upon their last write. */
|
||||
if (NEED_SYNC_ARG(i)) {
|
||||
TCGTemp *its = &s->temps[arg];
|
||||
TCGOpcode sopc = (its->type == TCG_TYPE_I32
|
||||
? INDEX_op_st_i32
|
||||
: INDEX_op_st_i64);
|
||||
TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
|
||||
TCGArg *sargs = &s->gen_opparam_buf[sop->args];
|
||||
|
||||
sargs[0] = dir;
|
||||
sargs[1] = temp_idx(s, its->mem_base);
|
||||
sargs[2] = its->mem_offset;
|
||||
|
||||
temp_state[arg] = TS_MEM;
|
||||
}
|
||||
/* Drop outputs that are dead. */
|
||||
if (IS_DEAD_ARG(i)) {
|
||||
temp_state[arg] = TS_DEAD;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return changes;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_DEBUG_TCG
|
||||
static void dump_regs(TCGContext *s)
|
||||
|
@ -1728,14 +1941,6 @@ static void temp_sync(TCGContext *s, TCGTemp *ts,
|
|||
if (!ts->mem_allocated) {
|
||||
temp_allocate_frame(s, temp_idx(s, ts));
|
||||
}
|
||||
if (ts->indirect_reg) {
|
||||
if (ts->val_type == TEMP_VAL_REG) {
|
||||
tcg_regset_set_reg(allocated_regs, ts->reg);
|
||||
}
|
||||
temp_load(s, ts->mem_base,
|
||||
tcg_target_available_regs[TCG_TYPE_PTR],
|
||||
allocated_regs);
|
||||
}
|
||||
switch (ts->val_type) {
|
||||
case TEMP_VAL_CONST:
|
||||
/* If we're going to free the temp immediately, then we won't
|
||||
|
@ -1826,12 +2031,6 @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
|
|||
break;
|
||||
case TEMP_VAL_MEM:
|
||||
reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base);
|
||||
if (ts->indirect_reg) {
|
||||
tcg_regset_set_reg(allocated_regs, reg);
|
||||
temp_load(s, ts->mem_base,
|
||||
tcg_target_available_regs[TCG_TYPE_PTR],
|
||||
allocated_regs);
|
||||
}
|
||||
tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
|
||||
ts->mem_coherent = 1;
|
||||
break;
|
||||
|
@ -1848,16 +2047,9 @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
|
|||
temporary registers needs to be allocated to store a constant. */
|
||||
static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
|
||||
{
|
||||
#ifdef USE_LIVENESS_ANALYSIS
|
||||
/* ??? Liveness does not yet incorporate indirect bases. */
|
||||
if (!ts->indirect_base) {
|
||||
/* The liveness analysis already ensures that globals are back
|
||||
in memory. Keep an tcg_debug_assert for safety. */
|
||||
tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
temp_sync(s, ts, allocated_regs, 1);
|
||||
/* The liveness analysis already ensures that globals are back
|
||||
in memory. Keep an tcg_debug_assert for safety. */
|
||||
tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
|
||||
}
|
||||
|
||||
/* save globals to their canonical location and assume they can be
|
||||
|
@ -1881,16 +2073,9 @@ static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
|
|||
|
||||
for (i = 0; i < s->nb_globals; i++) {
|
||||
TCGTemp *ts = &s->temps[i];
|
||||
#ifdef USE_LIVENESS_ANALYSIS
|
||||
/* ??? Liveness does not yet incorporate indirect bases. */
|
||||
if (!ts->indirect_base) {
|
||||
tcg_debug_assert(ts->val_type != TEMP_VAL_REG
|
||||
|| ts->fixed_reg
|
||||
|| ts->mem_coherent);
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
temp_sync(s, ts, allocated_regs, 0);
|
||||
tcg_debug_assert(ts->val_type != TEMP_VAL_REG
|
||||
|| ts->fixed_reg
|
||||
|| ts->mem_coherent);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1905,27 +2090,17 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
|
|||
if (ts->temp_local) {
|
||||
temp_save(s, ts, allocated_regs);
|
||||
} else {
|
||||
#ifdef USE_LIVENESS_ANALYSIS
|
||||
/* ??? Liveness does not yet incorporate indirect bases. */
|
||||
if (!ts->indirect_base) {
|
||||
/* The liveness analysis already ensures that temps are dead.
|
||||
Keep an tcg_debug_assert for safety. */
|
||||
tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
temp_dead(s, ts);
|
||||
/* The liveness analysis already ensures that temps are dead.
|
||||
Keep an tcg_debug_assert for safety. */
|
||||
tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
|
||||
}
|
||||
}
|
||||
|
||||
save_globals(s, allocated_regs);
|
||||
}
|
||||
|
||||
#define IS_DEAD_ARG(n) ((dead_args >> (n)) & 1)
|
||||
#define NEED_SYNC_ARG(n) ((sync_args >> (n)) & 1)
|
||||
|
||||
static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args,
|
||||
uint16_t dead_args, uint8_t sync_args)
|
||||
TCGLifeData arg_life)
|
||||
{
|
||||
TCGTemp *ots;
|
||||
tcg_target_ulong val;
|
||||
|
@ -1954,8 +2129,7 @@ static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args,
|
|||
}
|
||||
|
||||
static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
|
||||
const TCGArg *args, uint16_t dead_args,
|
||||
uint8_t sync_args)
|
||||
const TCGArg *args, TCGLifeData arg_life)
|
||||
{
|
||||
TCGRegSet allocated_regs;
|
||||
TCGTemp *ts, *ots;
|
||||
|
@ -1987,12 +2161,6 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
|
|||
if (!ots->mem_allocated) {
|
||||
temp_allocate_frame(s, args[0]);
|
||||
}
|
||||
if (ots->indirect_reg) {
|
||||
tcg_regset_set_reg(allocated_regs, ts->reg);
|
||||
temp_load(s, ots->mem_base,
|
||||
tcg_target_available_regs[TCG_TYPE_PTR],
|
||||
allocated_regs);
|
||||
}
|
||||
tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
|
||||
if (IS_DEAD_ARG(1)) {
|
||||
temp_dead(s, ts);
|
||||
|
@ -2040,8 +2208,7 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
|
|||
|
||||
static void tcg_reg_alloc_op(TCGContext *s,
|
||||
const TCGOpDef *def, TCGOpcode opc,
|
||||
const TCGArg *args, uint16_t dead_args,
|
||||
uint8_t sync_args)
|
||||
const TCGArg *args, TCGLifeData arg_life)
|
||||
{
|
||||
TCGRegSet allocated_regs;
|
||||
int i, k, nb_iargs, nb_oargs;
|
||||
|
@ -2206,8 +2373,7 @@ static void tcg_reg_alloc_op(TCGContext *s,
|
|||
#endif
|
||||
|
||||
static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs,
|
||||
const TCGArg * const args, uint16_t dead_args,
|
||||
uint8_t sync_args)
|
||||
const TCGArg * const args, TCGLifeData arg_life)
|
||||
{
|
||||
int flags, nb_regs, i;
|
||||
TCGReg reg;
|
||||
|
@ -2363,7 +2529,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
|
|||
{
|
||||
int n;
|
||||
|
||||
n = s->gen_last_op_idx + 1;
|
||||
n = s->gen_op_buf[0].prev + 1;
|
||||
s->op_count += n;
|
||||
if (n > s->op_count_max) {
|
||||
s->op_count_max = n;
|
||||
|
@ -2399,7 +2565,27 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
|
|||
s->la_time -= profile_getclock();
|
||||
#endif
|
||||
|
||||
tcg_liveness_analysis(s);
|
||||
{
|
||||
uint8_t *temp_state = tcg_malloc(s->nb_temps + s->nb_indirects);
|
||||
|
||||
liveness_pass_1(s, temp_state);
|
||||
|
||||
if (s->nb_indirects > 0) {
|
||||
#ifdef DEBUG_DISAS
|
||||
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
|
||||
&& qemu_log_in_addr_range(tb->pc))) {
|
||||
qemu_log("OP before indirect lowering:\n");
|
||||
tcg_dump_ops(s);
|
||||
qemu_log("\n");
|
||||
}
|
||||
#endif
|
||||
/* Replace indirect temps with direct temps. */
|
||||
if (liveness_pass_2(s, temp_state)) {
|
||||
/* If changes were made, re-run liveness. */
|
||||
liveness_pass_1(s, temp_state);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROFILER
|
||||
s->la_time += profile_getclock();
|
||||
|
@ -2422,13 +2608,12 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
|
|||
tcg_out_tb_init(s);
|
||||
|
||||
num_insns = -1;
|
||||
for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
|
||||
for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
|
||||
TCGOp * const op = &s->gen_op_buf[oi];
|
||||
TCGArg * const args = &s->gen_opparam_buf[op->args];
|
||||
TCGOpcode opc = op->opc;
|
||||
const TCGOpDef *def = &tcg_op_defs[opc];
|
||||
uint16_t dead_args = s->op_dead_args[oi];
|
||||
uint8_t sync_args = s->op_sync_args[oi];
|
||||
TCGLifeData arg_life = op->life;
|
||||
|
||||
oi_next = op->next;
|
||||
#ifdef CONFIG_PROFILER
|
||||
|
@ -2438,11 +2623,11 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
|
|||
switch (opc) {
|
||||
case INDEX_op_mov_i32:
|
||||
case INDEX_op_mov_i64:
|
||||
tcg_reg_alloc_mov(s, def, args, dead_args, sync_args);
|
||||
tcg_reg_alloc_mov(s, def, args, arg_life);
|
||||
break;
|
||||
case INDEX_op_movi_i32:
|
||||
case INDEX_op_movi_i64:
|
||||
tcg_reg_alloc_movi(s, args, dead_args, sync_args);
|
||||
tcg_reg_alloc_movi(s, args, arg_life);
|
||||
break;
|
||||
case INDEX_op_insn_start:
|
||||
if (num_insns >= 0) {
|
||||
|
@ -2467,8 +2652,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
|
|||
tcg_out_label(s, arg_label(args[0]), s->code_ptr);
|
||||
break;
|
||||
case INDEX_op_call:
|
||||
tcg_reg_alloc_call(s, op->callo, op->calli, args,
|
||||
dead_args, sync_args);
|
||||
tcg_reg_alloc_call(s, op->callo, op->calli, args, arg_life);
|
||||
break;
|
||||
default:
|
||||
/* Sanity check that we've not introduced any unhandled opcodes. */
|
||||
|
@ -2478,7 +2662,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
|
|||
/* Note: in order to speed up the code, it would be much
|
||||
faster to have specialized register allocator functions for
|
||||
some common argument patterns */
|
||||
tcg_reg_alloc_op(s, def, opc, args, dead_args, sync_args);
|
||||
tcg_reg_alloc_op(s, def, opc, args, arg_life);
|
||||
break;
|
||||
}
|
||||
#ifdef CONFIG_DEBUG_TCG
|
||||
|
|
52
tcg/tcg.h
52
tcg/tcg.h
|
@ -575,24 +575,41 @@ typedef struct TCGTempSet {
|
|||
unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)];
|
||||
} TCGTempSet;
|
||||
|
||||
/* While we limit helpers to 6 arguments, for 32-bit hosts, with padding,
|
||||
this imples a max of 6*2 (64-bit in) + 2 (64-bit out) = 14 operands.
|
||||
There are never more than 2 outputs, which means that we can store all
|
||||
dead + sync data within 16 bits. */
|
||||
#define DEAD_ARG 4
|
||||
#define SYNC_ARG 1
|
||||
typedef uint16_t TCGLifeData;
|
||||
|
||||
/* The layout here is designed to avoid crossing of a 32-bit boundary.
|
||||
If we do so, gcc adds padding, expanding the size to 12. */
|
||||
typedef struct TCGOp {
|
||||
TCGOpcode opc : 8;
|
||||
TCGOpcode opc : 8; /* 8 */
|
||||
|
||||
/* Index of the prev/next op, or 0 for the end of the list. */
|
||||
unsigned prev : 10; /* 18 */
|
||||
unsigned next : 10; /* 28 */
|
||||
|
||||
/* The number of out and in parameter for a call. */
|
||||
unsigned callo : 2;
|
||||
unsigned calli : 6;
|
||||
unsigned calli : 4; /* 32 */
|
||||
unsigned callo : 2; /* 34 */
|
||||
|
||||
/* Index of the arguments for this op, or -1 for zero-operand ops. */
|
||||
signed args : 16;
|
||||
/* Index of the arguments for this op, or 0 for zero-operand ops. */
|
||||
unsigned args : 14; /* 48 */
|
||||
|
||||
/* Index of the prex/next op, or -1 for the end of the list. */
|
||||
signed prev : 16;
|
||||
signed next : 16;
|
||||
/* Lifetime data of the operands. */
|
||||
unsigned life : 16; /* 64 */
|
||||
} TCGOp;
|
||||
|
||||
QEMU_BUILD_BUG_ON(NB_OPS > 0xff);
|
||||
QEMU_BUILD_BUG_ON(OPC_BUF_SIZE >= 0x7fff);
|
||||
QEMU_BUILD_BUG_ON(OPPARAM_BUF_SIZE >= 0x7fff);
|
||||
/* Make sure operands fit in the bitfields above. */
|
||||
QEMU_BUILD_BUG_ON(NB_OPS > (1 << 8));
|
||||
QEMU_BUILD_BUG_ON(OPC_BUF_SIZE > (1 << 10));
|
||||
QEMU_BUILD_BUG_ON(OPPARAM_BUF_SIZE > (1 << 14));
|
||||
|
||||
/* Make sure that we don't overflow 64 bits without noticing. */
|
||||
QEMU_BUILD_BUG_ON(sizeof(TCGOp) > 8);
|
||||
|
||||
struct TCGContext {
|
||||
uint8_t *pool_cur, *pool_end;
|
||||
|
@ -600,6 +617,7 @@ struct TCGContext {
|
|||
int nb_labels;
|
||||
int nb_globals;
|
||||
int nb_temps;
|
||||
int nb_indirects;
|
||||
|
||||
/* goto_tb support */
|
||||
tcg_insn_unit *code_buf;
|
||||
|
@ -607,13 +625,6 @@ struct TCGContext {
|
|||
uint16_t *tb_jmp_insn_offset; /* tb->jmp_insn_offset if USE_DIRECT_JUMP */
|
||||
uintptr_t *tb_jmp_target_addr; /* tb->jmp_target_addr if !USE_DIRECT_JUMP */
|
||||
|
||||
/* liveness analysis */
|
||||
uint16_t *op_dead_args; /* for each operation, each bit tells if the
|
||||
corresponding argument is dead */
|
||||
uint8_t *op_sync_args; /* for each operation, each bit tells if the
|
||||
corresponding output argument needs to be
|
||||
sync to memory. */
|
||||
|
||||
TCGRegSet reserved_regs;
|
||||
intptr_t current_frame_offset;
|
||||
intptr_t frame_start;
|
||||
|
@ -649,8 +660,6 @@ struct TCGContext {
|
|||
int goto_tb_issue_mask;
|
||||
#endif
|
||||
|
||||
int gen_first_op_idx;
|
||||
int gen_last_op_idx;
|
||||
int gen_next_op_idx;
|
||||
int gen_next_parm_idx;
|
||||
|
||||
|
@ -890,6 +899,9 @@ void tcg_gen_callN(TCGContext *s, void *func,
|
|||
TCGArg ret, int nargs, TCGArg *args);
|
||||
|
||||
void tcg_op_remove(TCGContext *s, TCGOp *op);
|
||||
TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, TCGOpcode opc, int narg);
|
||||
TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc, int narg);
|
||||
|
||||
void tcg_optimize(TCGContext *s);
|
||||
|
||||
/* only used for debugging purposes */
|
||||
|
|
24
util/log.c
24
util/log.c
|
@ -32,15 +32,22 @@ int qemu_loglevel;
|
|||
static int log_append = 0;
|
||||
static GArray *debug_regions;
|
||||
|
||||
void qemu_log(const char *fmt, ...)
|
||||
/* Return the number of characters emitted. */
|
||||
int qemu_log(const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, fmt);
|
||||
int ret = 0;
|
||||
if (qemu_logfile) {
|
||||
vfprintf(qemu_logfile, fmt, ap);
|
||||
va_list ap;
|
||||
va_start(ap, fmt);
|
||||
ret = vfprintf(qemu_logfile, fmt, ap);
|
||||
va_end(ap);
|
||||
|
||||
/* Don't pass back error results. */
|
||||
if (ret < 0) {
|
||||
ret = 0;
|
||||
}
|
||||
}
|
||||
va_end(ap);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool log_uses_own_buffers;
|
||||
|
@ -240,8 +247,9 @@ const QEMULogItem qemu_log_items[] = {
|
|||
{ CPU_LOG_TB_OP, "op",
|
||||
"show micro ops for each compiled TB" },
|
||||
{ CPU_LOG_TB_OP_OPT, "op_opt",
|
||||
"show micro ops (x86 only: before eflags optimization) and\n"
|
||||
"after liveness analysis" },
|
||||
"show micro ops after optimization" },
|
||||
{ CPU_LOG_TB_OP_IND, "op_ind",
|
||||
"show micro ops before indirect lowering" },
|
||||
{ CPU_LOG_INT, "int",
|
||||
"show interrupts/exceptions in short format" },
|
||||
{ CPU_LOG_EXEC, "exec",
|
||||
|
|
Loading…
Reference in New Issue