diff --git a/src/jit/backend/x64/x64_backend.cc b/src/jit/backend/x64/x64_backend.cc index 3af25740..676a7353 100644 --- a/src/jit/backend/x64/x64_backend.cc +++ b/src/jit/backend/x64/x64_backend.cc @@ -296,80 +296,6 @@ void x64_backend_block_label(char *name, size_t size, struct ir_block *block) { snprintf(name, size, ".%p", block); } -static void x64_backend_emit_epilogue(struct x64_backend *backend, - struct jit_block *block) { - auto &e = *backend->codegen; - - /* if the block didn't branch to another address, return to dispatch */ - e.jmp(backend->dispatch_dynamic); -} - -static void x64_backend_emit_prologue(struct x64_backend *backend, - struct jit_block *block, - struct ir_block *ir) { - struct jit_guest *guest = backend->base.guest; - - auto &e = *backend->codegen; - - /* count number of instrs / cycles in the block */ - int num_instrs = 0; - int num_cycles = 0; - - list_for_each_entry(instr, &ir->instrs, struct ir_instr, it) { - if (instr->op == OP_SOURCE_INFO) { - num_instrs += 1; - num_cycles += instr->arg[1]->i32; - } - } - - /* yield control once remaining cycles are executed */ - e.mov(e.eax, e.dword[guestctx + guest->offset_cycles]); - e.test(e.eax, e.eax); - e.js(backend->dispatch_exit); - - /* handle pending interrupts */ - e.mov(e.rax, e.qword[guestctx + guest->offset_interrupts]); - e.test(e.rax, e.rax); - e.jnz(backend->dispatch_interrupt); - - /* update run counts */ - e.sub(e.dword[guestctx + guest->offset_cycles], num_cycles); - e.add(e.dword[guestctx + guest->offset_instrs], num_instrs); -} - -static void x64_backend_emit(struct x64_backend *backend, - struct jit_block *block, struct ir *ir) { - auto &e = *backend->codegen; - const uint8_t *code = backend->codegen->getCurr(); - - CHECK_LT(ir->locals_size, X64_STACK_SIZE); - - e.inLocalLabel(); - - list_for_each_entry(blk, &ir->blocks, struct ir_block, it) { - char block_label[128]; - x64_backend_block_label(block_label, sizeof(block_label), blk); - e.L(block_label); - - x64_backend_emit_prologue(backend, block, blk); - - list_for_each_entry(instr, &blk->instrs, struct ir_instr, it) { - struct jit_emitter *emitter = &x64_emitters[instr->op]; - x64_emit_cb emit = (x64_emit_cb)emitter->func; - CHECK_NOTNULL(emit); - - emit(backend, *backend->codegen, block, instr); - } - - x64_backend_emit_epilogue(backend, block); - } - - e.outLocalLabel(); - - block->host_addr = (void *)code; - block->host_size = (int)(backend->codegen->getCurr() - code); -} - static void x64_backend_emit_thunks(struct x64_backend *backend) { auto &e = *backend->codegen; @@ -584,6 +510,10 @@ static void x64_backend_dump_code(struct jit_backend *base, const uint8_t *addr, size_t count = cs_disasm(backend->capstone_handle, addr, size, 0, 0, &insns); CHECK(count); + fprintf(output, "#==--------------------------------------------------==#\n"); + fprintf(output, "# x64\n"); + fprintf(output, "#==--------------------------------------------------==#\n"); + for (size_t i = 0; i < count; i++) { cs_insn &insn = insns[i]; fprintf(output, "# 0x%" PRIx64 ":\t%s\t\t%s\n", insn.address, insn.mnemonic, @@ -593,17 +523,107 @@ static void x64_backend_dump_code(struct jit_backend *base, const uint8_t *addr, cs_free(insns, count); } -static int x64_backend_assemble_code(struct jit_backend *base, - struct jit_block *block, struct ir *ir) { +static void x64_backend_emit_epilog(struct x64_backend *backend, + struct ir_block *block) { + auto &e = *backend->codegen; + + /* if the block didn't branch to another address, return to dispatch */ + e.jmp(backend->dispatch_dynamic); +} + +static void x64_backend_emit_prolog(struct x64_backend *backend, + struct ir_block *block) { + struct jit_guest *guest = backend->base.guest; + + auto &e = *backend->codegen; + + /* count number of instrs / cycles in the block */ + int num_instrs = 0; + int num_cycles = 0; + + list_for_each_entry(instr, &block->instrs, struct ir_instr, it) { + if (instr->op == OP_SOURCE_INFO) { + num_instrs += 1; + num_cycles += instr->arg[1]->i32; + } + } + + /* yield control once remaining cycles are executed */ + e.mov(e.eax, e.dword[guestctx + guest->offset_cycles]); + e.test(e.eax, e.eax); + e.js(backend->dispatch_exit); + + /* handle pending interrupts */ + e.mov(e.rax, e.qword[guestctx + guest->offset_interrupts]); + e.test(e.rax, e.rax); + e.jnz(backend->dispatch_interrupt); + + /* update run counts */ + e.sub(e.dword[guestctx + guest->offset_cycles], num_cycles); + e.add(e.dword[guestctx + guest->offset_instrs], num_instrs); +} + +static void x64_backend_emit(struct x64_backend *backend, struct ir *ir, + jit_emit_cb emit_cb, void *emit_data) { + auto &e = *backend->codegen; + + CHECK_LT(ir->locals_size, X64_STACK_SIZE); + + list_for_each_entry(block, &ir->blocks, struct ir_block, it) { + int first = 1; + + /* label each block for local branches */ + uint8_t *block_addr = e.getCurr(); + char block_label[128]; + x64_backend_block_label(block_label, sizeof(block_label), block); + e.L(block_label); + + x64_backend_emit_prolog(backend, block); + + list_for_each_entry(instr, &block->instrs, struct ir_instr, it) { + /* call emit callback for each guest block / instruction enabling users + to map each to their corresponding host address */ + if (instr->op == OP_SOURCE_INFO) { + uint32_t guest_addr = instr->arg[0]->i32; + + if (first) { + emit_cb(emit_data, JIT_EMIT_BLOCK, guest_addr, block_addr); + first = 0; + } + + uint8_t *instr_addr = e.getCurr(); + emit_cb(emit_data, JIT_EMIT_INSTR, guest_addr, instr_addr); + + continue; + } + + struct jit_emitter *emitter = &x64_emitters[instr->op]; + x64_emit_cb emit = (x64_emit_cb)emitter->func; + CHECK_NOTNULL(emit); + emit(backend, e, instr); + } + + x64_backend_emit_epilog(backend, block); + } +} + +static int x64_backend_assemble_code(struct jit_backend *base, struct ir *ir, + uint8_t **addr, int *size, + jit_emit_cb emit_cb, void *emit_data) { PROF_ENTER("cpu", "x64_backend_assemble_code"); struct x64_backend *backend = container_of(base, struct x64_backend, base); + auto &e = *backend->codegen; + int res = 1; + uint8_t *code = e.getCurr(); /* try to generate the x64 code. if the code buffer overflows let the backend know so it can reset the cache and try again */ + e.inLocalLabel(); + try { - x64_backend_emit(backend, block, ir); + x64_backend_emit(backend, ir, emit_cb, emit_data); } catch (const Xbyak::Error &e) { if (e != Xbyak::ERR_CODE_IS_TOO_BIG) { LOG_FATAL("x64 codegen failure, %s", e.what()); @@ -611,6 +631,12 @@ static int x64_backend_assemble_code(struct jit_backend *base, res = 0; } + e.outLocalLabel(); + + /* return code address */ + *addr = code; + *size = (int)(e.getCurr() - code); + PROF_LEAVE(); return res; diff --git a/src/jit/backend/x64/x64_emitters.cc b/src/jit/backend/x64/x64_emitters.cc index 3b30cabe..612973be 100644 --- a/src/jit/backend/x64/x64_emitters.cc +++ b/src/jit/backend/x64/x64_emitters.cc @@ -8,14 +8,14 @@ extern "C" { #define EMITTER(op, constraints) \ void x64_emit_##op(struct x64_backend *, Xbyak::CodeGenerator &, \ - struct jit_block *, const struct ir_instr *); \ + const struct ir_instr *); \ static struct _x64_##op##_init { \ _x64_##op##_init() { \ x64_emitters[OP_##op] = {(void *)&x64_emit_##op, constraints}; \ } \ } x64_##op##_init; \ void x64_emit_##op(struct x64_backend *backend, Xbyak::CodeGenerator &e, \ - struct jit_block *block, const struct ir_instr *instr) + const struct ir_instr *instr) #define CONSTRAINTS(result_flags, ...) \ result_flags, { \ @@ -60,12 +60,7 @@ enum { struct jit_emitter x64_emitters[IR_NUM_OPS]; -EMITTER(SOURCE_INFO, CONSTRAINTS(NONE, IMM_I32, IMM_I32)) { - if (block->source_map) { - uint32_t addr = ARG0->i32; - block->source_map[addr - block->guest_addr] = e.getCurr(); - } -} +EMITTER(SOURCE_INFO, CONSTRAINTS(NONE, IMM_I32, IMM_I32)) {} EMITTER(FALLBACK, CONSTRAINTS(NONE, IMM_I64, IMM_I32, IMM_I32)) { struct jit_guest *guest = backend->base.guest; diff --git a/src/jit/backend/x64/x64_local.h b/src/jit/backend/x64/x64_local.h index e5a7f9d2..5f32cfa5 100644 --- a/src/jit/backend/x64/x64_local.h +++ b/src/jit/backend/x64/x64_local.h @@ -108,7 +108,7 @@ void x64_dispatch_restore_edge(struct jit_backend *base, void *code, * emitters */ typedef void (*x64_emit_cb)(struct x64_backend *, Xbyak::CodeGenerator &, - struct jit_block *, const struct ir_instr *); + const struct ir_instr *); extern struct jit_emitter x64_emitters[IR_NUM_OPS]; #endif diff --git a/src/jit/frontend/armv3/armv3_frontend.c b/src/jit/frontend/armv3/armv3_frontend.c index 0beefba5..e8c9aed9 100644 --- a/src/jit/frontend/armv3/armv3_frontend.c +++ b/src/jit/frontend/armv3/armv3_frontend.c @@ -22,16 +22,21 @@ static void armv3_frontend_dump_code(struct jit_frontend *base, struct armv3_frontend *frontend = (struct armv3_frontend *)base; struct jit_guest *guest = frontend->guest; + int offset = 0; char buffer[128]; - for (int offset = 0; offset < size; offset += 4) { + fprintf(output, "#==--------------------------------------------------==#\n"); + fprintf(output, "# armv3\n"); + fprintf(output, "#==--------------------------------------------------==#\n"); + + while (offset < size) { uint32_t addr = begin_addr + offset; uint32_t data = guest->r32(guest->space, addr); armv3_format(addr, data, buffer, sizeof(buffer)); fprintf(output, "# %s\n", buffer); - addr += 4; + offset += 4; } } @@ -41,13 +46,17 @@ static void armv3_frontend_translate_code(struct jit_frontend *base, struct armv3_frontend *frontend = (struct armv3_frontend *)base; struct armv3_guest *guest = (struct armv3_guest *)frontend->guest; - for (int offset = 0; offset < size; offset += 4) { + int offset = 0; + + while (offset < size) { uint32_t addr = begin_addr + offset; uint32_t data = guest->r32(guest->space, addr); struct jit_opdef *def = armv3_get_opdef(data); ir_source_info(ir, addr, 12); ir_fallback(ir, def->fallback, addr, data); + + offset += 4; } } diff --git a/src/jit/frontend/sh4/sh4_frontend.c b/src/jit/frontend/sh4/sh4_frontend.c index 9e25383b..a866770c 100644 --- a/src/jit/frontend/sh4/sh4_frontend.c +++ b/src/jit/frontend/sh4/sh4_frontend.c @@ -9,14 +9,6 @@ #include "jit/jit_frontend.h" #include "jit/jit_guest.h" -/* cheap idle skip. in an idle loop, the block is just spinning, waiting for - an interrupt such as vblank before it'll exit. scale the block's number of - cycles in order to yield execution faster, enabling the interrupt to - actually be generated */ -#define IDLE_LOOP_CYCLE_SCALE 10 -#define SCALE_CYCLES(blk, cycles) \ - ((blk)->idle_loop ? (cycles)*IDLE_LOOP_CYCLE_SCALE : (cycles)) - /* * fsca estimate lookup table, used by the jit and interpreter */ @@ -39,9 +31,12 @@ static void sh4_frontend_dump_code(struct jit_frontend *base, struct sh4_frontend *frontend = (struct sh4_frontend *)base; struct jit_guest *guest = frontend->guest; + int offset = 0; char buffer[128]; - int offset = 0; + fprintf(output, "#==--------------------------------------------------==#\n"); + fprintf(output, "# sh4\n"); + fprintf(output, "#==--------------------------------------------------==#\n"); while (offset < size) { uint32_t addr = begin_addr + offset; @@ -91,7 +86,6 @@ static int sh4_frontend_is_idle_loop(struct sh4_frontend *frontend, static int IDLE_MASK = SH4_FLAG_LOAD | SH4_FLAG_COND | SH4_FLAG_CMP; int idle_loop = 1; int all_flags = 0; - int offset = 0; while (1) { @@ -172,10 +166,9 @@ static void sh4_frontend_translate_code(struct jit_frontend *base, uint32_t addr = begin_addr + offset; uint16_t data = guest->r16(guest->space, addr); union sh4_instr instr = {data}; - sh4_translate_cb cb = sh4_get_translator(data); def = sh4_get_opdef(data); - /* emit synthetic op responsible for mapping guest to host instructions */ + /* emit meta information about the current guest instruction */ ir_source_info(ir, addr, def->cycles * cycle_scale); /* the pc is normally only written to the context at the end of the block, @@ -185,6 +178,8 @@ static void sh4_frontend_translate_code(struct jit_frontend *base, ir_alloc_i32(ir, addr)); } + /* emit the translation */ + sh4_translate_cb cb = sh4_get_translator(data); cb(guest, ir, addr, instr, flags, &delay_point); offset += 2; @@ -193,7 +188,6 @@ static void sh4_frontend_translate_code(struct jit_frontend *base, uint32_t delay_addr = begin_addr + offset; uint32_t delay_data = guest->r16(guest->space, delay_addr); union sh4_instr delay_instr = {delay_data}; - sh4_translate_cb delay_cb = sh4_get_translator(delay_data); struct jit_opdef *delay_def = sh4_get_opdef(delay_data); /* move insert point back to the middle of the last instruction */ @@ -207,6 +201,7 @@ static void sh4_frontend_translate_code(struct jit_frontend *base, ir_alloc_i32(ir, delay_addr)); } + sh4_translate_cb delay_cb = sh4_get_translator(delay_data); delay_cb(guest, ir, delay_addr, delay_instr, flags, NULL); /* restore insert point */ diff --git a/src/jit/ir/ir_write.c b/src/jit/ir/ir_write.c index 30e90d82..debc4817 100644 --- a/src/jit/ir/ir_write.c +++ b/src/jit/ir/ir_write.c @@ -200,8 +200,6 @@ static void ir_write_block(struct ir_writer *w, const struct ir_block *block, list_for_each_entry(instr, &block->instrs, struct ir_instr, it) { ir_write_instr(w, instr, output); } - - fprintf(output, "\n"); } static void ir_assign_labels(struct ir_writer *w) { diff --git a/src/jit/jit.c b/src/jit/jit.c index bdef4f08..f1250ae8 100644 --- a/src/jit/jit.c +++ b/src/jit/jit.c @@ -43,9 +43,9 @@ static int reverse_block_map_cmp(const struct rb_node *rb_lhs, const struct jit_block *rhs = container_of(rb_rhs, const struct jit_block, rit); - if ((uint8_t *)lhs->host_addr < (uint8_t *)rhs->host_addr) { + if (lhs->host_addr < rhs->host_addr) { return -1; - } else if ((uint8_t *)lhs->host_addr > (uint8_t *)rhs->host_addr) { + } else if (lhs->host_addr > rhs->host_addr) { return 1; } else { return 0; @@ -189,17 +189,34 @@ static void jit_finalize_block(struct jit *jit, struct jit_block *block) { rb_insert(&jit->blocks, &block->it, &block_map_cb); rb_insert(&jit->reverse_blocks, &block->rit, &reverse_block_map_cb); - - /* write out to perf map if enabled */ - if (OPTION_perf) { - fprintf(jit->perf_map, "%" PRIxPTR " %x %s_0x%08x\n", - (uintptr_t)block->host_addr, block->host_size, jit->tag, - block->guest_addr); - } } -static struct jit_block *jit_alloc_block(struct jit *jit) { +static struct jit_block *jit_alloc_block(struct jit *jit, uint32_t guest_addr, + int guest_size) { struct jit_block *block = calloc(1, sizeof(struct jit_block)); + + block->guest_addr = guest_addr; + block->guest_size = guest_size; + + /* allocate meta data structs for the original guest code */ + block->source_map = calloc(block->guest_size, sizeof(void *)); + block->fastmem = calloc(block->guest_size, sizeof(int8_t)); + +/* for debug builds, fastmem can be troublesome when running under gdb or + lldb. when doing so, SIGSEGV handling can be completely disabled with: + handle SIGSEGV nostop noprint pass + however, then legitimate SIGSEGV will also not be handled by the debugger. + as of this writing, there is no way to configure the debugger to ignore the + signal initially, letting us try to handle it, and then handling it in the + case that we do not (e.g. because it was not a fastmem-related segfault). + because of this, fastmem is default disabled for debug builds to cause less + headaches */ +#ifdef NDEBUG + for (int i = 0; i < block->guest_size; i++) { + block->fastmem[i] = 1; + } +#endif + return block; } @@ -256,7 +273,20 @@ void jit_link_code(struct jit *jit, void *branch, uint32_t addr) { jit_patch_edges(jit, src); } -static void jit_dump_block(struct jit *jit, uint32_t guest_addr, +static void jit_write_block(struct jit *jit, struct jit_block *block, + struct ir *ir, FILE *output) { + jit->frontend->dump_code(jit->frontend, block->guest_addr, block->guest_size, + output); + fprintf(output, "\n"); + + ir_write(ir, output); + + fprintf(output, "\n"); + jit->backend->dump_code(jit->backend, block->host_addr, block->host_size, + output); +} + +static void jit_dump_block(struct jit *jit, struct jit_block *block, struct ir *ir) { const char *appdir = fs_appdir(); @@ -266,14 +296,25 @@ static void jit_dump_block(struct jit *jit, uint32_t guest_addr, char filename[PATH_MAX]; snprintf(filename, sizeof(filename), "%s" PATH_SEPARATOR "0x%08x.ir", irdir, - guest_addr); + block->guest_addr); FILE *file = fopen(filename, "w"); CHECK_NOTNULL(file); - ir_write(ir, file); + jit_write_block(jit, block, ir, file); fclose(file); } +static void jit_emit_callback(struct jit *jit, int type, uint32_t guest_addr, + uint8_t *host_addr) { + struct jit_block *block = jit->curr_block; + + switch (type) { + case JIT_EMIT_INSTR: + block->source_map[guest_addr - block->guest_addr] = host_addr; + break; + } +} + static void jit_promote_fastmem(struct jit *jit, struct jit_block *block, struct ir *ir) { uint32_t last_addr = block->guest_addr; @@ -304,28 +345,9 @@ void jit_compile_code(struct jit *jit, uint32_t guest_addr) { int guest_size; jit->frontend->analyze_code(jit->frontend, guest_addr, &guest_size); - struct jit_block *block = jit_alloc_block(jit); - block->guest_addr = guest_addr; - block->guest_size = guest_size; - - /* allocate meta data structs for the original guest code */ - block->source_map = calloc(block->guest_size, sizeof(void *)); - block->fastmem = calloc(block->guest_size, sizeof(int8_t)); - -/* for debug builds, fastmem can be troublesome when running under gdb or - lldb. when doing so, SIGSEGV handling can be completely disabled with: - handle SIGSEGV nostop noprint pass - however, then legitimate SIGSEGV will also not be handled by the debugger. - as of this writing, there is no way to configure the debugger to ignore the - signal initially, letting us try to handle it, and then handling it in the - case that we do not (e.g. because it was not a fastmem-related segfault). - because of this, fastmem is default disabled for debug builds to cause less - headaches */ -#ifdef NDEBUG - for (int i = 0; i < block->guest_size; i++) { - block->fastmem[i] = 1; - } -#endif + /* create block */ + struct jit_block *block = jit_alloc_block(jit, guest_addr, guest_size); + jit->curr_block = block; /* if the block had previously been invalidated, finish removing it now */ struct jit_block *existing = jit_get_block(jit, guest_addr); @@ -348,15 +370,6 @@ void jit_compile_code(struct jit *jit, uint32_t guest_addr) { ir.capacity = sizeof(jit->ir_buffer); jit->frontend->translate_code(jit->frontend, guest_addr, guest_size, &ir); -#if 0 - jit->frontend->dump_code(jit->frontend, guest_addr, guest_size); -#endif - - /* dump unoptimized block */ - if (jit->dump_code) { - jit_dump_block(jit, guest_addr, &ir); - } - /* run optimization passes */ jit_promote_fastmem(jit, block, &ir); lse_run(jit->lse, &ir); @@ -366,19 +379,32 @@ void jit_compile_code(struct jit *jit, uint32_t guest_addr) { ra_run(jit->ra, &ir); /* assemble the ir into native code */ - int res = jit->backend->assemble_code(jit->backend, block, &ir); + int res = jit->backend->assemble_code(jit->backend, &ir, &block->host_addr, + &block->host_size, + (jit_emit_cb)jit_emit_callback, jit); - if (res) { -#if 0 - jit->backend->dump_code(jit->backend, block); -#endif - - jit_finalize_block(jit, block); - } else { + if (!res) { /* if the backend overflowed, completely free the cache and let dispatch try to compile again */ LOG_INFO("backend overflow, resetting code cache"); jit_free_code(jit); + PROF_LEAVE(); + return; + } + + /* finish by adding code to caches */ + jit_finalize_block(jit, block); + + /* dump compiled output */ + if (jit->dump_code) { + jit_dump_block(jit, block, &ir); + } + + /* write out to perf map if enabled */ + if (OPTION_perf) { + fprintf(jit->perf_map, "%" PRIxPTR " %x %s_0x%08x\n", + (uintptr_t)block->host_addr, block->host_size, jit->tag, + block->guest_addr); } PROF_LEAVE(); diff --git a/src/jit/jit.h b/src/jit/jit.h index 112b21d2..9bde0819 100644 --- a/src/jit/jit.h +++ b/src/jit/jit.h @@ -31,7 +31,7 @@ struct jit_block { int8_t *fastmem; /* address of compiled block in host memory */ - void *host_addr; + uint8_t *host_addr; int host_size; /* reason the block was invalidated */ @@ -80,6 +80,7 @@ struct jit { uint8_t ir_buffer[1024 * 1024 * 2]; /* compiled blocks */ + struct jit_block *curr_block; struct rb_tree blocks; struct rb_tree reverse_blocks; diff --git a/src/jit/jit_backend.h b/src/jit/jit_backend.h index cf8f17b3..3060d6be 100644 --- a/src/jit/jit_backend.h +++ b/src/jit/jit_backend.h @@ -56,6 +56,15 @@ enum { JIT_IMM_I64 | JIT_IMM_F32 | JIT_IMM_F64, }; +/* the assemble_code function is passed this callback to map guest blocks and + instructions to host addresses */ +enum { + JIT_EMIT_BLOCK, + JIT_EMIT_INSTR, +}; + +typedef void (*jit_emit_cb)(void *, int, uint32_t, uint8_t *); + /* backend-specific register definition */ struct jit_register { const char *name; @@ -83,7 +92,8 @@ struct jit_backend { /* compile interface */ void (*reset)(struct jit_backend *); - int (*assemble_code)(struct jit_backend *, struct jit_block *, struct ir *); + int (*assemble_code)(struct jit_backend *, struct ir *, uint8_t **, int *, + jit_emit_cb, void *); void (*dump_code)(struct jit_backend *, const uint8_t *, int, FILE *); int (*handle_exception)(struct jit_backend *, struct exception_state *);