diff --git a/src/jit/frontend/armv3/armv3_frontend.c b/src/jit/frontend/armv3/armv3_frontend.c index c53ab4b4..f5a03e97 100644 --- a/src/jit/frontend/armv3/armv3_frontend.c +++ b/src/jit/frontend/armv3/armv3_frontend.c @@ -52,20 +52,19 @@ static void armv3_frontend_translate_code(struct jit_frontend *base, } static void armv3_frontend_analyze_code(struct jit_frontend *base, - struct jit_block *block) { + uint32_t begin_addr, int *size) { struct armv3_frontend *frontend = (struct armv3_frontend *)base; struct armv3_guest *guest = (struct armv3_guest *)frontend->guest; - uint32_t addr = block->guest_addr; - block->guest_size = 0; + *size = 0; while (1) { + uint32_t addr = begin_addr + *size; uint32_t data = guest->r32(guest->space, addr); union armv3_instr i = {data}; struct jit_opdef *def = armv3_get_opdef(i.raw); - addr += 4; - block->guest_size += 4; + *size += 4; /* stop emitting when pc is changed */ int mov_to_pc = 0; diff --git a/src/jit/frontend/sh4/sh4_disasm.c b/src/jit/frontend/sh4/sh4_disasm.c index 74ab9a76..9b0b5567 100644 --- a/src/jit/frontend/sh4/sh4_disasm.c +++ b/src/jit/frontend/sh4/sh4_disasm.c @@ -159,6 +159,66 @@ void sh4_format(uint32_t addr, union sh4_instr i, char *buffer, CHECK_EQ(strnrep(buffer, buffer_size, "#imm8", 5, value, value_len), 0); } +void sh4_branch_info(uint32_t addr, union sh4_instr i, int *branch_type, + uint32_t *branch_addr, uint32_t *next_addr) { + struct jit_opdef *def = sh4_get_opdef(i.raw); + + if (def->op == SH4_OP_INVALID) { + *branch_type = SH4_BRANCH_DYNAMIC; + } else if (def->op == SH4_OP_BF) { + uint32_t dest_addr = ((int8_t)i.disp_8.disp * 2) + addr + 4; + *branch_type = SH4_BRANCH_STATIC_FALSE; + *branch_addr = dest_addr; + *next_addr = addr + 4; + } else if (def->op == SH4_OP_BFS) { + uint32_t dest_addr = ((int8_t)i.disp_8.disp * 2) + addr + 4; + *branch_type = SH4_BRANCH_STATIC_FALSE; + *branch_addr = dest_addr; + *next_addr = addr + 4; + } else if (def->op == SH4_OP_BT) { + uint32_t dest_addr = ((int8_t)i.disp_8.disp * 2) + addr + 4; + *branch_type = SH4_BRANCH_STATIC_TRUE; + *branch_addr = dest_addr; + *next_addr = addr + 4; + } else if (def->op == SH4_OP_BTS) { + uint32_t dest_addr = ((int8_t)i.disp_8.disp * 2) + addr + 4; + *branch_type = SH4_BRANCH_STATIC_TRUE; + *branch_addr = dest_addr; + *next_addr = addr + 4; + } else if (def->op == SH4_OP_BRA) { + /* 12-bit displacement must be sign extended */ + int32_t disp = ((i.disp_12.disp & 0xfff) << 20) >> 20; + uint32_t dest_addr = (disp * 2) + addr + 4; + *branch_type = SH4_BRANCH_STATIC; + *branch_addr = dest_addr; + } else if (def->op == SH4_OP_BRAF) { + *branch_type = SH4_BRANCH_DYNAMIC; + } else if (def->op == SH4_OP_BSR) { + /* 12-bit displacement must be sign extended */ + int32_t disp = ((i.disp_12.disp & 0xfff) << 20) >> 20; + uint32_t ret_addr = addr + 4; + uint32_t dest_addr = ret_addr + disp * 2; + *branch_type = SH4_BRANCH_STATIC; + *branch_addr = dest_addr; + } else if (def->op == SH4_OP_BSRF) { + *branch_type = SH4_BRANCH_DYNAMIC; + } else if (def->op == SH4_OP_JMP) { + *branch_type = SH4_BRANCH_DYNAMIC; + } else if (def->op == SH4_OP_JSR) { + *branch_type = SH4_BRANCH_DYNAMIC; + } else if (def->op == SH4_OP_RTS) { + *branch_type = SH4_BRANCH_DYNAMIC; + } else if (def->op == SH4_OP_RTE) { + *branch_type = SH4_BRANCH_DYNAMIC; + } else if (def->op == SH4_OP_SLEEP) { + *branch_type = SH4_BRANCH_DYNAMIC; + } else if (def->op == SH4_OP_TRAPA) { + *branch_type = SH4_BRANCH_DYNAMIC; + } else { + LOG_FATAL("unexpected branch op %s", def->name); + } +} + CONSTRUCTOR(sh4_disasm_init) { sh4_disasm_init_lookup(); } diff --git a/src/jit/frontend/sh4/sh4_disasm.h b/src/jit/frontend/sh4/sh4_disasm.h index b7dc305b..7a03cc8a 100644 --- a/src/jit/frontend/sh4/sh4_disasm.h +++ b/src/jit/frontend/sh4/sh4_disasm.h @@ -17,6 +17,15 @@ enum { SH4_FLAG_STORE_SR = 0x200, }; +enum { + SH4_BRANCH_STATIC, + SH4_BRANCH_STATIC_TRUE, + SH4_BRANCH_STATIC_FALSE, + SH4_BRANCH_DYNAMIC, + SH4_BRANCH_DYNAMIC_TRUE, + SH4_BRANCH_DYNAMIC_FALSE, +}; + enum sh4_op { #define SH4_INSTR(name, desc, instr_code, cycles, flags) SH4_OP_##name, #include "jit/frontend/sh4/sh4_instr.inc" @@ -62,6 +71,9 @@ static struct jit_opdef *sh4_get_opdef(uint16_t instr) { return &sh4_opdefs[sh4_get_op(instr)]; } +void sh4_branch_info(uint32_t addr, union sh4_instr i, int *branch_type, + uint32_t *branch_addr, uint32_t *next_addr); + void sh4_format(uint32_t addr, union sh4_instr i, char *buffer, size_t buffer_size); diff --git a/src/jit/frontend/sh4/sh4_frontend.c b/src/jit/frontend/sh4/sh4_frontend.c index 35c1766d..fbe9b866 100644 --- a/src/jit/frontend/sh4/sh4_frontend.c +++ b/src/jit/frontend/sh4/sh4_frontend.c @@ -67,6 +67,78 @@ static void sh4_frontend_dump_code(struct jit_frontend *base, } } +static int sh4_frontend_is_terminator(struct jit_opdef *def) { + /* stop emitting once a branch is hit */ + if (def->flags & SH4_FLAG_STORE_PC) { + return 1; + } + + /* if fpscr has changed, stop emitting since the fpu state is invalidated. + also, if sr has changed, stop emitting as there are interrupts that + possibly need to be handled */ + if (def->flags & (SH4_FLAG_STORE_FPSCR | SH4_FLAG_STORE_SR)) { + return 1; + } + + return 0; +} + +static int sh4_frontend_is_idle_loop(struct sh4_frontend *frontend, + uint32_t begin_addr) { + struct sh4_guest *guest = (struct sh4_guest *)frontend->guest; + + /* look ahead to see if the current basic block is an idle loop */ + static int IDLE_MASK = SH4_FLAG_LOAD | SH4_FLAG_COND | SH4_FLAG_CMP; + int idle_loop = 1; + int all_flags = 0; + + int offset = 0; + + while (1) { + uint32_t addr = begin_addr + offset; + uint16_t data = guest->r16(guest->space, addr); + struct jit_opdef *def = sh4_get_opdef(data); + + offset += 2; + + /* if the instruction has none of the IDLE_MASK flags, disqualify */ + idle_loop &= (def->flags & IDLE_MASK) != 0; + all_flags |= def->flags; + + if (def->flags & SH4_FLAG_DELAYED) { + uint32_t delay_addr = begin_addr + offset; + uint32_t delay_data = guest->r16(guest->space, delay_addr); + struct jit_opdef *delay_def = sh4_get_opdef(delay_data); + + offset += 2; + + idle_loop &= (delay_def->flags & IDLE_MASK) != 0; + all_flags |= delay_def->flags; + } + + if (sh4_frontend_is_terminator(def)) { + /* if the block doesn't contain the required flags, disqualify */ + idle_loop &= (all_flags & IDLE_MASK) == IDLE_MASK; + + /* if the branch isn't a short back edge, disqualify */ + if (def->flags & SH4_FLAG_STORE_PC) { + union sh4_instr instr = {data}; + + int branch_type; + uint32_t branch_addr; + uint32_t next_addr; + sh4_branch_info(addr, instr, &branch_type, &branch_addr, &next_addr); + + idle_loop &= (begin_addr - branch_addr) <= 32; + } + + break; + } + } + + return idle_loop; +} + static void sh4_frontend_translate_code(struct jit_frontend *base, struct jit_block *block, struct ir *ir) { @@ -76,6 +148,14 @@ static void sh4_frontend_translate_code(struct jit_frontend *base, PROF_ENTER("cpu", "sh4_frontend_translate_code"); + /* cheap idle skip. in an idle loop, the block is just spinning, waiting for + an interrupt such as vblank before it'll exit. scale the block's number of + cycles in order to yield execution faster, enabling the interrupt to + actually be generated */ + int idle_loop = sh4_frontend_is_idle_loop(frontend, block->guest_addr); + int cycle_scale = idle_loop ? 10 : 1; + + /* generate code specialized for the current fpscr state */ int flags = 0; if (ctx->fpscr & PR_MASK) { flags |= SH4_DOUBLE_PR; @@ -97,7 +177,7 @@ static void sh4_frontend_translate_code(struct jit_frontend *base, def = sh4_get_opdef(data); /* emit synthetic op responsible for mapping guest to host instructions */ - ir_source_info(ir, addr, SCALE_CYCLES(block, def->cycles)); + ir_source_info(ir, addr, def->cycles * cycle_scale); /* the pc is normally only written to the context at the end of the block, sync now for any instruction which needs to read the correct pc */ @@ -121,7 +201,7 @@ static void sh4_frontend_translate_code(struct jit_frontend *base, struct ir_insert_point original = ir_get_insert_point(ir); ir_set_insert_point(ir, &delay_point); - ir_source_info(ir, delay_addr, SCALE_CYCLES(block, delay_def->cycles)); + ir_source_info(ir, delay_addr, delay_def->cycles * cycle_scale); if (delay_def->flags & SH4_FLAG_LOAD_PC) { ir_store_context(ir, offsetof(struct sh4_context, pc), @@ -164,126 +244,33 @@ static void sh4_frontend_translate_code(struct jit_frontend *base, } static void sh4_frontend_analyze_code(struct jit_frontend *base, - struct jit_block *block) { + uint32_t begin_addr, int *size) { struct sh4_frontend *frontend = (struct sh4_frontend *)base; struct sh4_guest *guest = (struct sh4_guest *)frontend->guest; - static int IDLE_MASK = SH4_FLAG_LOAD | SH4_FLAG_COND | SH4_FLAG_CMP; - int idle_loop = 1; - int all_flags = 0; - uint32_t offset = 0; - - block->guest_size = 0; + *size = 0; while (1) { - uint32_t addr = block->guest_addr + offset; - uint32_t data = guest->r16(guest->space, addr); - union sh4_instr instr = {data}; + uint32_t addr = begin_addr + *size; + uint16_t data = guest->r16(guest->space, addr); struct jit_opdef *def = sh4_get_opdef(data); - offset += 2; - block->guest_size += 2; - - /* if the instruction has none of the IDLE_MASK flags, disqualify */ - idle_loop &= (def->flags & IDLE_MASK) != 0; - all_flags |= def->flags; + *size += 2; if (def->flags & SH4_FLAG_DELAYED) { - uint32_t delay_addr = block->guest_addr + offset; - uint32_t delay_data = guest->r16(guest->space, delay_addr); + uint32_t delay_addr = begin_addr + *size; + uint16_t delay_data = guest->r16(guest->space, delay_addr); struct jit_opdef *delay_def = sh4_get_opdef(delay_data); - offset += 2; - block->guest_size += 2; - - idle_loop &= (delay_def->flags & IDLE_MASK) != 0; - all_flags |= delay_def->flags; + *size += 2; /* delay slots can't have another delay slot */ CHECK(!(delay_def->flags & SH4_FLAG_DELAYED)); } - /* stop emitting once a branch is hit and save off branch information */ - if (def->flags & SH4_FLAG_STORE_PC) { - if (def->op == SH4_OP_INVALID) { - block->branch_type = JIT_BRANCH_DYNAMIC; - } else if (def->op == SH4_OP_BF) { - uint32_t dest_addr = ((int8_t)instr.disp_8.disp * 2) + addr + 4; - block->branch_type = JIT_BRANCH_STATIC_FALSE; - block->branch_addr = dest_addr; - block->next_addr = addr + 4; - } else if (def->op == SH4_OP_BFS) { - uint32_t dest_addr = ((int8_t)instr.disp_8.disp * 2) + addr + 4; - block->branch_type = JIT_BRANCH_STATIC_FALSE; - block->branch_addr = dest_addr; - block->next_addr = addr + 4; - } else if (def->op == SH4_OP_BT) { - uint32_t dest_addr = ((int8_t)instr.disp_8.disp * 2) + addr + 4; - block->branch_type = JIT_BRANCH_STATIC_TRUE; - block->branch_addr = dest_addr; - block->next_addr = addr + 4; - } else if (def->op == SH4_OP_BTS) { - uint32_t dest_addr = ((int8_t)instr.disp_8.disp * 2) + addr + 4; - block->branch_type = JIT_BRANCH_STATIC_TRUE; - block->branch_addr = dest_addr; - block->next_addr = addr + 4; - } else if (def->op == SH4_OP_BRA) { - /* 12-bit displacement must be sign extended */ - int32_t disp = ((instr.disp_12.disp & 0xfff) << 20) >> 20; - uint32_t dest_addr = (disp * 2) + addr + 4; - block->branch_type = JIT_BRANCH_STATIC; - block->branch_addr = dest_addr; - } else if (def->op == SH4_OP_BRAF) { - block->branch_type = JIT_BRANCH_DYNAMIC; - } else if (def->op == SH4_OP_BSR) { - /* 12-bit displacement must be sign extended */ - int32_t disp = ((instr.disp_12.disp & 0xfff) << 20) >> 20; - uint32_t ret_addr = addr + 4; - uint32_t dest_addr = ret_addr + disp * 2; - block->branch_type = JIT_BRANCH_STATIC; - block->branch_addr = dest_addr; - } else if (def->op == SH4_OP_BSRF) { - block->branch_type = JIT_BRANCH_DYNAMIC; - } else if (def->op == SH4_OP_JMP) { - block->branch_type = JIT_BRANCH_DYNAMIC; - } else if (def->op == SH4_OP_JSR) { - block->branch_type = JIT_BRANCH_DYNAMIC; - } else if (def->op == SH4_OP_RTS) { - block->branch_type = JIT_BRANCH_DYNAMIC; - } else if (def->op == SH4_OP_RTE) { - block->branch_type = JIT_BRANCH_DYNAMIC; - } else if (def->op == SH4_OP_SLEEP) { - block->branch_type = JIT_BRANCH_DYNAMIC; - } else if (def->op == SH4_OP_TRAPA) { - block->branch_type = JIT_BRANCH_DYNAMIC; - } else { - LOG_FATAL("unexpected branch op %d", def->name); - } - + if (sh4_frontend_is_terminator(def)) { break; } - - /* if fpscr has changed, stop emitting since the fpu state is invalidated. - also, if sr has changed, stop emitting as there are interrupts that - possibly need to be handled */ - if (def->flags & (SH4_FLAG_STORE_FPSCR | SH4_FLAG_STORE_SR)) { - break; - } - } - - /* if the block doesn't contain the required flags, disqualify */ - idle_loop &= (all_flags & IDLE_MASK) == IDLE_MASK; - - /* if the branch isn't a short back edge, disqualify */ - idle_loop &= (block->guest_addr - block->branch_addr) <= 32; - - if (idle_loop) { -#if 0 - LOG_INFO("sh4_analyze_block detected idle loop at 0x%08x", - block->guest_addr); -#endif - - block->idle_loop = 1; } } diff --git a/src/jit/jit.c b/src/jit/jit.c index 244e970e..6913beb1 100644 --- a/src/jit/jit.c +++ b/src/jit/jit.c @@ -300,11 +300,13 @@ void jit_compile_code(struct jit *jit, uint32_t guest_addr) { LOG_INFO("jit_compile_block %s 0x%08x", jit->tag, guest_addr); #endif + /* analyze the guest code to get its extents */ + int guest_size; + jit->frontend->analyze_code(jit->frontend, guest_addr, &guest_size); + struct jit_block *block = jit_alloc_block(jit); block->guest_addr = guest_addr; - - /* analyze the guest code to get its extents */ - jit->frontend->analyze_code(jit->frontend, block); + block->guest_size = guest_size; /* allocate meta data structs for the original guest code */ block->source_map = calloc(block->guest_size, sizeof(void *)); diff --git a/src/jit/jit.h b/src/jit/jit.h index 0020f891..1ac50b61 100644 --- a/src/jit/jit.h +++ b/src/jit/jit.h @@ -14,15 +14,6 @@ struct lse; struct ra; struct val; -enum { - JIT_BRANCH_STATIC, - JIT_BRANCH_STATIC_TRUE, - JIT_BRANCH_STATIC_FALSE, - JIT_BRANCH_DYNAMIC, - JIT_BRANCH_DYNAMIC_TRUE, - JIT_BRANCH_DYNAMIC_FALSE, -}; - enum { JIT_REASON_UNKNOWN, JIT_REASON_FASTMEM, @@ -33,16 +24,6 @@ struct jit_block { uint32_t guest_addr; int guest_size; - /* destination address of terminating branch */ - int branch_type; - uint32_t branch_addr; - - /* address of next instruction after branch */ - uint32_t next_addr; - - /* is block an idle loop */ - int idle_loop; - /* maps guest instructions to host instructions */ void **source_map; diff --git a/src/jit/jit_frontend.h b/src/jit/jit_frontend.h index 105a88c2..bc945f34 100644 --- a/src/jit/jit_frontend.h +++ b/src/jit/jit_frontend.h @@ -26,7 +26,7 @@ struct jit_frontend { void (*destroy)(struct jit_frontend *); - void (*analyze_code)(struct jit_frontend *, struct jit_block *); + void (*analyze_code)(struct jit_frontend *, uint32_t, int *); void (*translate_code)(struct jit_frontend *, struct jit_block *, struct ir *); void (*dump_code)(struct jit_frontend *, const struct jit_block *,