diff --git a/desmume/src/arm_jit.cpp b/desmume/src/arm_jit.cpp index 7835f3674..5947fc2ee 100644 --- a/desmume/src/arm_jit.cpp +++ b/desmume/src/arm_jit.cpp @@ -234,7 +234,8 @@ static int bb_adr; static bool bb_thumb; static GpVar bb_cpu; static GpVar bb_cycles; -static GpVar total_cycles; +static GpVar bb_total_cycles; +static u32 bb_constant_cycles; #define cpu (&ARMPROC) #define bb_next_instruction (bb_adr + bb_opcodesize) @@ -700,7 +701,7 @@ static void emit_MMU_aluMemCycles(int alu_cycles, GpVar mem_cycles, int populati if(REG_POS(i,12)==15) \ { \ S_DST_R15; \ - c.add(total_cycles, 2); \ + bb_constant_cycles += 2; \ return 1; \ } \ SET_NZCV(!symmetric); \ @@ -712,7 +713,7 @@ static void emit_MMU_aluMemCycles(int alu_cycles, GpVar mem_cycles, int populati GpVar tmp = c.newGpVar(kX86VarTypeGpd); \ c.mov(tmp, reg_ptr(15)); \ c.mov(cpu_ptr(next_instruction), tmp); \ - c.add(total_cycles, 2); \ + bb_constant_cycles += 2; \ } \ } \ return 1; @@ -728,7 +729,7 @@ static void emit_MMU_aluMemCycles(int alu_cycles, GpVar mem_cycles, int populati if(REG_POS(i,12)==15) \ { \ S_DST_R15; \ - c.add(total_cycles, 2); \ + bb_constant_cycles += 2; \ return 1; \ } \ SET_NZCV(1); \ @@ -739,7 +740,7 @@ static void emit_MMU_aluMemCycles(int alu_cycles, GpVar mem_cycles, int populati { \ GpVar tmp = c.newGpVar(kX86VarTypeGpd); \ c.mov(cpu_ptr(next_instruction), lhs); \ - c.add(total_cycles, 2); \ + bb_constant_cycles += 2; \ } \ } \ return 1; @@ -763,7 +764,7 @@ static void emit_MMU_aluMemCycles(int alu_cycles, GpVar mem_cycles, int populati if(REG_POS(i,12)==15) \ { \ S_DST_R15; \ - c.add(total_cycles, 2); \ + bb_constant_cycles += 2; \ return 1; \ } \ SET_NZC; \ @@ -1097,7 +1098,7 @@ static int OP_MOV_IMM_VAL(const u32 i) { OP_MOV(IMM_VAL); } if(REG_POS(i,12)==15) \ { \ S_DST_R15; \ - c.add(total_cycles, 2); \ + bb_constant_cycles += 2; \ return 1; \ } \ if(!rhs_is_imm) \ @@ -3379,12 +3380,9 @@ static int OP_MOV_SPE(const u32 i) if(Rd == 15) { c.mov(cpu_ptr(next_instruction), tmp); - c.mov(bb_cycles, 3); - return 1; + bb_constant_cycles += 2; } - else - c.mov(bb_cycles, 1); - + return 1; } @@ -3673,11 +3671,10 @@ static int OP_B_COND(const u32 i) u32 dst = bb_r15 + ((u32)((s8)(i&0xFF))<<1); c.mov(cpu_ptr(instruct_adr), bb_next_instruction); - c.mov(bb_cycles, 1); emit_branch((i>>8)&0xF, skip); c.mov(cpu_ptr(instruct_adr), dst); - c.mov(bb_cycles, 3); + c.add(bb_total_cycles, 2); c.bind(skip); return 1; @@ -4027,12 +4024,13 @@ static void _armlog(u8 proc, u32 addr, u32 opcode) template static u32 compile_basicblock() { - bool has_variable_cycles = 0; - int constant_cycles = 0; - int interpreted_cycles = 0; - int n = 0; +#if LOG_JIT + bool has_variable_cycles = FALSE; +#endif + u32 interpreted_cycles = 0; u32 start_adr = cpu->instruct_adr; - u32 opcodes[MAX_JIT_BLOCK_SIZE]; + u32 opcode = 0; + bb_thumb = cpu->CPSR.bits.T; bb_opcodesize = bb_thumb ? 2 : 4; @@ -4043,35 +4041,8 @@ static u32 compile_basicblock() return 1; } - for(n=0; n(start_adr + n*2); - else - opcode = _MMU_read32(start_adr + n*4); - - opcodes[n++] = opcode; - has_variable_cycles |= (instr_is_conditional(opcode) && instr_cycles(opcode) > 1) - || instr_cycles(opcode) == 0; - constant_cycles += instr_is_conditional(opcode) ? 1 : instr_cycles(opcode); - if(instr_is_branch(opcode)) - break; - } - #if LOG_JIT - fprintf(stderr, "adr %08Xh %s%c (num %i)\n", start_adr, ARMPROC.CPSR.bits.T ? "THUMB":"ARM", PROCNUM?'7':'9', n); - fprintf(stderr, "cycles %d%s\n", constant_cycles, has_variable_cycles ? " + variable" : ""); - for(int i=0; i>6](dasm_addr, opcodes[i], dasmbuf); - else - des_arm_instructions_set[INSTRUCTION_INDEX(opcodes[i])](dasm_addr, opcodes[i], dasmbuf); - fprintf(stderr, "%08X\t%s\t\t; %s \n", dasm_addr, dasmbuf, disassemble(opcodes[i])); - } + fprintf(stderr, "adr %08Xh %s%c\n", start_adr, ARMPROC.CPSR.bits.T ? "THUMB":"ARM", PROCNUM?'7':'9'); #endif c.clear(); @@ -4083,12 +4054,9 @@ static u32 compile_basicblock() bb_cpu = c.newGpVar(kX86VarTypeGpz); c.mov(bb_cpu, (uintptr_t)&ARMPROC); - if(has_variable_cycles) - { - total_cycles = c.newGpVar(kX86VarTypeGpd); - JIT_COMMENT("set total_cycles to %d", constant_cycles); - c.mov(total_cycles, constant_cycles); - } + JIT_COMMENT("reset bb_total_cycles"); + bb_total_cycles = c.newGpVar(kX86VarTypeGpz); + c.mov(bb_total_cycles, 0); #if (PROFILER_JIT_LEVEL > 0) JIT_COMMENT("Profiler ptr"); @@ -4096,11 +4064,38 @@ static u32 compile_basicblock() c.mov(bb_profiler, (uintptr_t)&profiler_counter[PROCNUM]); #endif - for(int i=0; i(bb_adr); + else + opcode = _MMU_read32(bb_adr); + +#if LOG_JIT + char dasmbuf[1024] = {0}; + if(bb_thumb) + des_thumb_instructions_set[opcode>>6](bb_adr, opcode, dasmbuf); + else + des_arm_instructions_set[INSTRUCTION_INDEX(opcode)](bb_adr, opcode, dasmbuf); + fprintf(stderr, "%08X\t%s\t\t; %s \n", bb_adr, dasmbuf, disassemble(opcode)); +#endif + + u32 cycles = instr_cycles(opcode); + + bEndBlock = (i >= (MAX_JIT_BLOCK_SIZE - 1)) || instr_is_branch(opcode); + +#if LOG_JIT + if (instr_is_conditional(opcode) && (cycles > 1) || (cycles == 0)) + has_variable_cycles = TRUE; +#endif + bb_cycles = c.newGpVar(kX86VarTypeGpz); + + bb_constant_cycles += instr_is_conditional(opcode) ? 1 : cycles; + JIT_COMMENT("%s (PC:%08X)", disassemble(opcode), bb_adr); + #if (PROFILER_JIT_LEVEL > 0) JIT_COMMENT("*** profiler - counter"); if (bb_thumb) @@ -4108,71 +4103,68 @@ static u32 compile_basicblock() else c.add(profiler_counter_arm(opcode), 1); #endif - bb_cycles = c.newGpVar(kX86VarTypeGpd); - u32 cycles = instr_cycles(opcode); if(instr_is_conditional(opcode)) { // 25% of conditional instructions are immediately followed by // another with the same condition, but merging them into a // single branch has negligible effect on speed. - if(i == n-1) sync_r15(opcode, 1, 1); + if(bEndBlock) sync_r15(opcode, 1, 1); Label skip = c.newLabel(); emit_branch(CONDITION(opcode), skip); - if(i != n-1) sync_r15(opcode, 0, 0); + if(!bEndBlock) sync_r15(opcode, 0, 0); emit_armop_call(opcode); if(cycles == 0) { - JIT_COMMENT("cycles"); - c.lea(total_cycles, ptr(total_cycles.r64(), bb_cycles.r64(), kScaleNone, -1)); + JIT_COMMENT("variable cycles"); + c.lea(bb_total_cycles, ptr(bb_total_cycles.r64(), bb_cycles.r64(), kScaleNone)); } - else - if(cycles > 1) - { - JIT_COMMENT("cycles (%d)", cycles); - c.add(total_cycles, cycles - 1); - } c.bind(skip); } else { - sync_r15(opcode, i == (n-1), 0); + sync_r15(opcode, bEndBlock, 0); emit_armop_call(opcode); if(cycles == 0) { - JIT_COMMENT("cycles"); - c.add(total_cycles, bb_cycles); + JIT_COMMENT("variable cycles"); + c.lea(bb_total_cycles, ptr(bb_total_cycles.r64(), bb_cycles.r64(), kScaleNone)); } } interpreted_cycles += op_decode[PROCNUM][bb_thumb](); } - if(!instr_does_prefetch(opcodes[n-1])) + if(!instr_does_prefetch(opcode)) { JIT_COMMENT("!instr_does_prefetch: copy next_instruction (%08X) to instruct_adr (%08X)", cpu->next_instruction, cpu->instruct_adr); GpVar x = c.newGpVar(kX86VarTypeGpd); c.mov(x, cpu_ptr(next_instruction)); c.mov(cpu_ptr(instruct_adr), x); c.unuse(x); + //c.mov(cpu_ptr(instruct_adr), bb_adr); + //c.mov(cpu_ptr(instruct_adr), bb_next_instruction); } JIT_COMMENT("total cycles (block)"); - GpVar ret = c.newGpVar(kX86VarTypeGpd); - if(has_variable_cycles) - c.mov(ret, total_cycles); - else - c.mov(ret, constant_cycles); + + if (bb_constant_cycles > 0) + { + c.add(bb_total_cycles, bb_constant_cycles); + } #if (PROFILER_JIT_LEVEL > 1) JIT_COMMENT("*** profiler - cycles"); u32 padr = ((start_adr & 0x07FFFFFE) >> 1); bb_profiler_entry = c.newGpVar(kX86VarTypeGpz); c.mov(bb_profiler_entry, (uintptr_t)&profiler_entry[PROCNUM][padr]); - c.add(dword_ptr(bb_profiler_entry, offsetof(PROFILER_ENTRY, cycles)), ret); + c.add(dword_ptr(bb_profiler_entry, offsetof(PROFILER_ENTRY, cycles)), bb_total_cycles); profiler_entry[PROCNUM][padr].addr = start_adr; #endif - c.ret(ret); + c.ret(bb_total_cycles); +#if LOG_JIT + fprintf(stderr, "cycles %d%s\n", constant_cycles, has_variable_cycles ? " + variable" : ""); +#endif c.endFunc(); ArmOpCompiled f = (ArmOpCompiled)c.make(); diff --git a/desmume/src/instruction_attributes.h b/desmume/src/instruction_attributes.h index ed540d9a5..06905abd4 100644 --- a/desmume/src/instruction_attributes.h +++ b/desmume/src/instruction_attributes.h @@ -4423,10 +4423,10 @@ INSTR_CYCLES(1) | BRANCH_NEVER | SRCREG_POS0 | SRCREG_POS3, //OP_CMP_SPE INSTR_CYCLES(1) | BRANCH_NEVER | SRCREG_POS0 | SRCREG_POS3, //OP_CMP_SPE INSTR_CYCLES(1) | BRANCH_NEVER | SRCREG_POS0 | SRCREG_POS3, //OP_CMP_SPE INSTR_CYCLES(1) | BRANCH_NEVER | SRCREG_POS0 | SRCREG_POS3, //OP_CMP_SPE -INSTR_CYCLES_V | BRANCH_POS0 | SRCREG_POS3, //OP_MOV_SPE -INSTR_CYCLES_V | BRANCH_POS0 | SRCREG_POS3, //OP_MOV_SPE -INSTR_CYCLES_V | BRANCH_POS0 | SRCREG_POS3, //OP_MOV_SPE -INSTR_CYCLES_V | BRANCH_POS0 | SRCREG_POS3, //OP_MOV_SPE +INSTR_CYCLES(1) | BRANCH_POS0 | SRCREG_POS3, //OP_MOV_SPE +INSTR_CYCLES(1) | BRANCH_POS0 | SRCREG_POS3, //OP_MOV_SPE +INSTR_CYCLES(1) | BRANCH_POS0 | SRCREG_POS3, //OP_MOV_SPE +INSTR_CYCLES(1) | BRANCH_POS0 | SRCREG_POS3, //OP_MOV_SPE INSTR_CYCLES(3) | BRANCH_ALWAYS | SRCREG_POS3, //OP_BX_THUMB INSTR_CYCLES(3) | BRANCH_ALWAYS | SRCREG_POS3, //OP_BX_THUMB INSTR_CYCLES(4) | BRANCH_ALWAYS | SRCREG_POS3, //OP_BLX_THUMB @@ -4975,66 +4975,66 @@ INSTR_CYCLES_V | BRANCH_NEVER, //OP_LDMIA_THUMB INSTR_CYCLES_V | BRANCH_NEVER, //OP_LDMIA_THUMB INSTR_CYCLES_V | BRANCH_NEVER, //OP_LDMIA_THUMB INSTR_CYCLES_V | BRANCH_NEVER, //OP_LDMIA_THUMB -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND -INSTR_CYCLES_V | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND +INSTR_CYCLES(1) | BRANCH_ALWAYS, //OP_B_COND INSTR_CYCLES_V | BRANCH_SWI, //OP_SWI_THUMB INSTR_CYCLES_V | BRANCH_SWI, //OP_SWI_THUMB INSTR_CYCLES_V | BRANCH_SWI, //OP_SWI_THUMB