tcg-arm: Improve constant generation

Try fully rotated arguments to mov and mvn before trying movt
or full decomposition.  Begin decomposition with mvn when it
looks like it'll help.  Examples include

-:        mov   r9, #0x00000fa0
-:        orr   r9, r9, #0x000ee000
-:        orr   r9, r9, #0x0ff00000
-:        orr   r9, r9, #0xf0000000
+:        mvn   r9, #0x0000005f
+:        eor   r9, r9, #0x00011000

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
This commit is contained in:
Richard Henderson 2013-03-04 23:16:24 -08:00 committed by Aurelien Jarno
parent 2df3f1ee68
commit e86e0f2807
1 changed files with 44 additions and 23 deletions

View File

@ -427,15 +427,31 @@ static inline void tcg_out_dat_imm(TCGContext *s,
(rn << 16) | (rd << 12) | im); (rn << 16) | (rd << 12) | im);
} }
static inline void tcg_out_movi32(TCGContext *s, static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg)
int cond, int rd, uint32_t arg)
{ {
/* TODO: This is very suboptimal, we can easily have a constant int rot, opc, rn;
* pool somewhere after all the instructions. */
if ((int)arg < 0 && (int)arg >= -0x100) { /* For armv7, make sure not to use movw+movt when mov/mvn would do.
tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0, (~arg) & 0xff); Speed things up by only checking when movt would be required.
} else if (use_armv7_instructions) { Prior to armv7, have one go at fully rotated immediates before
/* use movw/movt */ doing the decomposition thing below. */
if (!use_armv7_instructions || (arg & 0xffff0000)) {
rot = encode_imm(arg);
if (rot >= 0) {
tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0,
rotl(arg, rot) | (rot << 7));
return;
}
rot = encode_imm(~arg);
if (rot >= 0) {
tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0,
rotl(~arg, rot) | (rot << 7));
return;
}
}
/* Use movw + movt. */
if (use_armv7_instructions) {
/* movw */ /* movw */
tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12) tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12)
| ((arg << 4) & 0x000f0000) | (arg & 0xfff)); | ((arg << 4) & 0x000f0000) | (arg & 0xfff));
@ -444,23 +460,28 @@ static inline void tcg_out_movi32(TCGContext *s,
tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12) tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12)
| ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff)); | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff));
} }
} else { return;
int opc = ARITH_MOV; }
int rn = 0;
/* TODO: This is very suboptimal, we can easily have a constant
pool somewhere after all the instructions. */
opc = ARITH_MOV;
rn = 0;
/* If we have lots of leading 1's, we can shorten the sequence by
beginning with mvn and then clearing higher bits with eor. */
if (clz32(~arg) > clz32(arg)) {
opc = ARITH_MVN, arg = ~arg;
}
do { do {
int i, rot; int i = ctz32(arg) & ~1;
i = ctz32(arg) & ~1;
rot = ((32 - i) << 7) & 0xf00; rot = ((32 - i) << 7) & 0xf00;
tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot); tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot);
arg &= ~(0xff << i); arg &= ~(0xff << i);
opc = ARITH_ORR; opc = ARITH_EOR;
rn = rd; rn = rd;
} while (arg); } while (arg);
} }
}
static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst, static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst,
TCGArg lhs, TCGArg rhs, int rhs_is_const) TCGArg lhs, TCGArg rhs, int rhs_is_const)