- fix bug in arithmetic shifts when imm>32;
- a bit speed optimizations;
This commit is contained in:
mtabachenko 2012-09-13 11:48:24 +00:00
parent e7b9bc7765
commit 1b77d8b3e3
1 changed files with 190 additions and 211 deletions

View File

@ -236,8 +236,6 @@ static GPVar bb_cpu;
static GPVar bb_cycles; static GPVar bb_cycles;
static GPVar total_cycles; static GPVar total_cycles;
static void *op_cmp[2][2];
#define cpu (&ARMPROC) #define cpu (&ARMPROC)
#define bb_next_instruction (bb_adr + bb_opcodesize) #define bb_next_instruction (bb_adr + bb_opcodesize)
#define bb_r15 (bb_adr + 2 * bb_opcodesize) #define bb_r15 (bb_adr + 2 * bb_opcodesize)
@ -304,6 +302,27 @@ static GPVar bb_profiler_entry;
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
// Shifting macros // Shifting macros
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
#define SET_NZCV(sign) { \
JIT_COMMENT("SET_NZCV"); \
GPVar x = c.newGP(VARIABLE_TYPE_GPD); \
GPVar y = c.newGP(VARIABLE_TYPE_GPD); \
c.sets(x.r8Lo()); \
c.setz(y.r8Lo()); \
c.lea(x, ptr(y.r64(), x.r64(), TIMES_2)); \
c.set(sign ? C_NC : C_C, y.r8Lo()); \
c.lea(x, ptr(y.r64(), x.r64(), TIMES_2)); \
c.seto(y.r8Lo()); \
c.lea(x, ptr(y.r64(), x.r64(), TIMES_2)); \
c.movzx(y, flags_ptr); \
c.shl(x, 4); \
c.and_(y, 0xF); \
c.or_(x, y); \
c.mov(flags_ptr, x.r8Lo()); \
c.unuse(x); \
c.unuse(y); \
JIT_COMMENT("end SET_NZCV"); \
}
#define SET_NZC { \ #define SET_NZC { \
JIT_COMMENT("SET_NZC"); \ JIT_COMMENT("SET_NZC"); \
GPVar x = c.newGP(VARIABLE_TYPE_GPD); \ GPVar x = c.newGP(VARIABLE_TYPE_GPD); \
@ -318,32 +337,31 @@ static GPVar bb_profiler_entry;
c.and_(y, 0x1F); \ c.and_(y, 0x1F); \
c.or_(x, y); \ c.or_(x, y); \
c.mov(flags_ptr, x.r8Lo()); \ c.mov(flags_ptr, x.r8Lo()); \
JIT_COMMENT("end SET_NZC"); } JIT_COMMENT("end SET_NZC"); \
}
#define SET_NZ { \ #define SET_NZ { \
JIT_COMMENT("SET_NZ"); \ JIT_COMMENT("SET_NZ"); \
GPVar x = c.newGP(VARIABLE_TYPE_GPD); \ GPVar x = c.newGP(VARIABLE_TYPE_GPN); \
GPVar y = c.newGP(VARIABLE_TYPE_GPD); \ GPVar y = c.newGP(VARIABLE_TYPE_GPN); \
c.sets(x.r8Lo()); \ c.pushf(); \
c.setz(y.r8Lo()); \ c.pop(x); \
c.lea(x, ptr(y.r64(), x.r64(), TIMES_2)); \ c.and_(x, (3 << 6)); \
c.movzx(y, flags_ptr); \ c.mov(y, flags_ptr); \
c.shl(x, 6); \
c.and_(y, 0x3F); \ c.and_(y, 0x3F); \
c.or_(x, y); \ c.or_(x, y); \
c.mov(flags_ptr, x.r8Lo()); \ c.mov(flags_ptr, x.r8Lo()); \
c.unuse(x); \ JIT_COMMENT("end SET_NZ"); \
c.unuse(y); \ }
JIT_COMMENT("end SET_NZ"); }
#define SET_NZ_W { \ #define SET_NZ_MUL { \
JIT_COMMENT("SET_NZ_W"); \ JIT_COMMENT("SET_NZ_W"); \
GPVar x = c.newGP(VARIABLE_TYPE_GPD); \ GPVar x = c.newGP(VARIABLE_TYPE_GPD); \
GPVar y = c.newGP(VARIABLE_TYPE_GPD); \ GPVar y = c.newGP(VARIABLE_TYPE_GPD); \
c.cmp(hi, lhs); \ c.cmp(hi, lhs); \
c.setz(y.r8Lo()); \ c.setz(y.r8Lo()); \
c.bt(hi, 31); \ c.test(hi, (1 << 31)); \
c.setc(x.r8Lo()); \ c.setnz(x.r8Lo()); \
c.lea(x, ptr(y.r64(), x.r64(), TIMES_2)); \ c.lea(x, ptr(y.r64(), x.r64(), TIMES_2)); \
c.movzx(y, flags_ptr); \ c.movzx(y, flags_ptr); \
c.shl(x, 6); \ c.shl(x, 6); \
@ -352,7 +370,8 @@ static GPVar bb_profiler_entry;
c.mov(flags_ptr, x.r8Lo()); \ c.mov(flags_ptr, x.r8Lo()); \
c.unuse(x); \ c.unuse(x); \
c.unuse(y); \ c.unuse(y); \
JIT_COMMENT("end SET_NZ_W"); } JIT_COMMENT("end SET_NZ_W"); \
}
#define SET_Q { \ #define SET_Q { \
JIT_COMMENT("SET_Q"); \ JIT_COMMENT("SET_Q"); \
@ -360,7 +379,8 @@ static GPVar bb_profiler_entry;
c.jno(__skipQ); \ c.jno(__skipQ); \
c.or_(flags_ptr, (1<<3)); \ c.or_(flags_ptr, (1<<3)); \
c.bind(__skipQ); \ c.bind(__skipQ); \
JIT_COMMENT("end SET_Q"); } JIT_COMMENT("end SET_Q"); \
}
#define S_DST_R15 { \ #define S_DST_R15 { \
JIT_COMMENT("S_DST_R15"); \ JIT_COMMENT("S_DST_R15"); \
@ -401,10 +421,15 @@ static GPVar bb_profiler_entry;
u32 imm = ((i>>7)&0x1F); \ u32 imm = ((i>>7)&0x1F); \
c.mov(rhs, reg_pos_ptr(0)); \ c.mov(rhs, reg_pos_ptr(0)); \
if (imm == 0) \ if (imm == 0) \
c.bt(flags_ptr, 5); \ { \
c.test(flags_ptr, (1 << 5)); \
c.setnz(rcf.r8Lo()); \
} \
else \ else \
{ \
c.shl(rhs, imm); \ c.shl(rhs, imm); \
c.setc(rcf.r8Lo()); c.setc(rcf.r8Lo()); \
}
#define LSR_IMM \ #define LSR_IMM \
JIT_COMMENT("LSR_IMM"); \ JIT_COMMENT("LSR_IMM"); \
@ -428,15 +453,16 @@ static GPVar bb_profiler_entry;
u32 imm = ((i>>7)&0x1F); \ u32 imm = ((i>>7)&0x1F); \
if (!imm) \ if (!imm) \
{ \ { \
c.bt(reg_pos_ptr(0), 31); \ c.test(reg_pos_ptr(0), (1 << 31)); \
c.setnz(rcf.r8Lo()); \
c.mov(rhs, 0); \ c.mov(rhs, 0); \
} \ } \
else \ else \
{ \ { \
c.mov(rhs, reg_pos_ptr(0)); \ c.mov(rhs, reg_pos_ptr(0)); \
c.shr(rhs, imm); \ c.shr(rhs, imm); \
} \ c.setc(rcf.r8Lo()); \
c.setc(rcf.r8Lo()); }
#define ASR_IMM \ #define ASR_IMM \
JIT_COMMENT("ASR_IMM"); \ JIT_COMMENT("ASR_IMM"); \
@ -514,20 +540,15 @@ static GPVar bb_profiler_entry;
bool rhs_is_imm = true; \ bool rhs_is_imm = true; \
GPVar rcf = c.newGP(VARIABLE_TYPE_GPD); \ GPVar rcf = c.newGP(VARIABLE_TYPE_GPD); \
u32 rhs = ROR((i&0xFF), (i>>7)&0x1E); \ u32 rhs = ROR((i&0xFF), (i>>7)&0x1E); \
if ((i>>8)&0xF) c.mov(rcf, BIT31(rhs)); \ if ((i>>8)&0xF) \
c.mov(rcf, BIT31(rhs)); \
else \ else \
{ \ { \
c.bt(flags_ptr, 5); \ c.test(flags_ptr, (1 << 5)); \
c.setc(rcf.r8Lo()); \ c.setnz(rcf.r8Lo()); \
} \ } \
u32 rhs_first = rhs; u32 rhs_first = rhs;
#define IMM_VALUE \
JIT_COMMENT("IMM_VALUE"); \
bool rhs_is_imm = true; \
u32 rhs = ROR((i&0xFF), (i>>7)&0x1E); \
u32 rhs_first = rhs;
#define IMM_OFF \ #define IMM_OFF \
JIT_COMMENT("IMM_OFF"); \ JIT_COMMENT("IMM_OFF"); \
bool rhs_is_imm = true; \ bool rhs_is_imm = true; \
@ -576,23 +597,27 @@ static GPVar bb_profiler_entry;
Label __eq32 = c.newLabel(); \ Label __eq32 = c.newLabel(); \
c.je(__eq32); \ c.je(__eq32); \
/* imm > 32 */ \ /* imm > 32 */ \
c.xor_(rhs, rhs); \ c.mov(rhs, 0); \
c.mov(rcf, 0); \
c.jmp(__done); \
/* imm == 32 */ \ /* imm == 32 */ \
c.bind(__eq32); \ c.bind(__eq32); \
} \ } \
c.x86inst(rhs, 31); \ c.x86inst(rhs, 31); \
c.x86inst(rhs, 1); \ c.x86inst(rhs, 1); \
c.setc(rcf.r8Lo()); \
c.jmp(__done); \ c.jmp(__done); \
/* imm == 0 */ \ /* imm == 0 */ \
c.bind(__zero); \ c.bind(__zero); \
c.bt(flags_ptr, 5); \ c.test(flags_ptr, (1 << 5)); \
c.setnz(rcf.r8Lo()); \
c.jmp(__done); \ c.jmp(__done); \
/* imm < 32 */ \ /* imm < 32 */ \
c.bind(__lt32); \ c.bind(__lt32); \
c.x86inst(rhs, imm); \ c.x86inst(rhs, imm); \
c.setc(rcf.r8Lo()); \
/* done */\ /* done */\
c.bind(__done); \ c.bind(__done);
c.setc(rcf.r8Lo());
#define LSL_REG LSX_REG(LSL_REG, shl, 0) #define LSL_REG LSX_REG(LSL_REG, shl, 0)
#define LSR_REG LSX_REG(LSR_REG, shr, 0) #define LSR_REG LSX_REG(LSR_REG, shr, 0)
@ -617,27 +642,29 @@ static GPVar bb_profiler_entry;
GPVar imm = c.newGP(VARIABLE_TYPE_GPN); \ GPVar imm = c.newGP(VARIABLE_TYPE_GPN); \
GPVar rhs = c.newGP(VARIABLE_TYPE_GPD); \ GPVar rhs = c.newGP(VARIABLE_TYPE_GPD); \
Label __zero = c.newLabel(); \ Label __zero = c.newLabel(); \
Label __zero2 = c.newLabel(); \ Label __zero_1F = c.newLabel(); \
Label __done = c.newLabel(); \ Label __done = c.newLabel(); \
c.movzx(imm, reg_pos_ptrB(8)); \ c.movzx(imm, reg_pos_ptrB(8)); \
c.mov(rhs, reg_pos_ptr(0)); \ c.mov(rhs, reg_pos_ptr(0)); \
c.test(imm, imm); \ c.test(imm, imm); \
c.jz(__zero);\ c.jz(__zero);\
c.and_(imm, 0x1F); \ c.and_(imm, 0x1F); \
c.jz(__zero2);\ c.jz(__zero_1F);\
/* imm&0x1F != 0 */ \ /* imm&0x1F != 0 */ \
c.ror(rhs, imm); \ c.ror(rhs, imm); \
c.setc(rcf.r8Lo()); \
c.jmp(__done); \ c.jmp(__done); \
/* imm&0x1F == 0 */ \ /* imm&0x1F == 0 */ \
c.bind(__zero2); \ c.bind(__zero_1F); \
c.bt(rhs, 31); \ c.test(rhs, (1 << 31)); \
c.setnz(rcf.r8Lo()); \
c.jmp(__done); \ c.jmp(__done); \
/* imm == 0 */ \ /* imm == 0 */ \
c.bind(__zero); \ c.bind(__zero); \
c.bt(flags_ptr, 5); \ c.test(flags_ptr, (1 << 5)); \
c.setnz(rcf.r8Lo()); \
/* done */ \ /* done */ \
c.bind(__done); \ c.bind(__done);
c.setc(rcf.r8Lo());
//==================================================================== common funcs //==================================================================== common funcs
static void emit_MMU_aluMemCycles(int alu_cycles, GPVar mem_cycles, int population) static void emit_MMU_aluMemCycles(int alu_cycles, GPVar mem_cycles, int population)
@ -683,7 +710,7 @@ static void emit_MMU_aluMemCycles(int alu_cycles, GPVar mem_cycles, int populati
c.add(total_cycles, 2); \ c.add(total_cycles, 2); \
return 1; \ return 1; \
} \ } \
c.call(op_cmp[PROCNUM][!symmetric])->setPrototype(ASMJIT_CALL_CONV, FunctionBuilder0<Void>()); \ SET_NZCV(!symmetric); \
} \ } \
else \ else \
{ \ { \
@ -697,20 +724,12 @@ static void emit_MMU_aluMemCycles(int alu_cycles, GPVar mem_cycles, int populati
} \ } \
return 1; return 1;
#define OP_ARITHMETIC_R(arg, x86inst, symmetric, flags) \ #define OP_ARITHMETIC_R(arg, x86inst, flags) \
arg; \ arg; \
if(symmetric && !rhs_is_imm) \
{ \
c.x86inst(*(GPVar*)&rhs, reg_pos_ptr(16)); \
c.mov(reg_pos_ptr(12), rhs); \
} \
else \
{ \
GPVar lhs = c.newGP(VARIABLE_TYPE_GPD); \ GPVar lhs = c.newGP(VARIABLE_TYPE_GPD); \
c.mov(lhs, rhs); \ c.mov(lhs, rhs); \
c.x86inst(lhs, reg_pos_ptr(16)); \ c.x86inst(lhs, reg_pos_ptr(16)); \
c.mov(reg_pos_ptr(12), lhs); \ c.mov(reg_pos_ptr(12), lhs); \
} \
if(flags) \ if(flags) \
{ \ { \
if(REG_POS(i,12)==15) \ if(REG_POS(i,12)==15) \
@ -719,15 +738,14 @@ static void emit_MMU_aluMemCycles(int alu_cycles, GPVar mem_cycles, int populati
c.add(total_cycles, 2); \ c.add(total_cycles, 2); \
return 1; \ return 1; \
} \ } \
c.call(op_cmp[PROCNUM][!symmetric])->setPrototype(ASMJIT_CALL_CONV, FunctionBuilder0<Void>()); \ SET_NZCV(1); \
} \ } \
else \ else \
{ \ { \
if(REG_POS(i,12)==15) \ if(REG_POS(i,12)==15) \
{ \ { \
GPVar tmp = c.newGP(VARIABLE_TYPE_GPD); \ GPVar tmp = c.newGP(VARIABLE_TYPE_GPD); \
c.mov(tmp, reg_ptr(15)); \ c.mov(cpu_ptr(next_instruction), lhs); \
c.mov(cpu_ptr(next_instruction), tmp); \
c.add(total_cycles, 2); \ c.add(total_cycles, 2); \
} \ } \
} \ } \
@ -735,7 +753,9 @@ static void emit_MMU_aluMemCycles(int alu_cycles, GPVar mem_cycles, int populati
#define OP_ARITHMETIC_S(arg, x86inst, symmetric) \ #define OP_ARITHMETIC_S(arg, x86inst, symmetric) \
arg; \ arg; \
if(symmetric && !rhs_is_imm) \ if(REG_POS(i,12) == REG_POS(i,16)) \
c.x86inst(reg_pos_ptr(12), rhs); \
else if(symmetric && !rhs_is_imm) \
{ \ { \
c.x86inst(*(GPVar*)&rhs, reg_pos_ptr(16)); \ c.x86inst(*(GPVar*)&rhs, reg_pos_ptr(16)); \
c.mov(reg_pos_ptr(12), rhs); \ c.mov(reg_pos_ptr(12), rhs); \
@ -810,15 +830,15 @@ static int OP_SUB_ROR_IMM(const u32 i) { OP_ARITHMETIC(ROR_IMM, sub, 0, 0); }
static int OP_SUB_ROR_REG(const u32 i) { OP_ARITHMETIC(ROR_REG, sub, 0, 0); } static int OP_SUB_ROR_REG(const u32 i) { OP_ARITHMETIC(ROR_REG, sub, 0, 0); }
static int OP_SUB_IMM_VAL(const u32 i) { OP_ARITHMETIC(IMM_VAL, sub, 0, 0); } static int OP_SUB_IMM_VAL(const u32 i) { OP_ARITHMETIC(IMM_VAL, sub, 0, 0); }
static int OP_RSB_LSL_IMM(const u32 i) { OP_ARITHMETIC_R(LSL_IMM, sub, 0, 0); } static int OP_RSB_LSL_IMM(const u32 i) { OP_ARITHMETIC_R(LSL_IMM, sub, 0); }
static int OP_RSB_LSL_REG(const u32 i) { OP_ARITHMETIC_R(LSL_REG, sub, 0, 0); } static int OP_RSB_LSL_REG(const u32 i) { OP_ARITHMETIC_R(LSL_REG, sub, 0); }
static int OP_RSB_LSR_IMM(const u32 i) { OP_ARITHMETIC_R(LSR_IMM, sub, 0, 0); } static int OP_RSB_LSR_IMM(const u32 i) { OP_ARITHMETIC_R(LSR_IMM, sub, 0); }
static int OP_RSB_LSR_REG(const u32 i) { OP_ARITHMETIC_R(LSR_REG, sub, 0, 0); } static int OP_RSB_LSR_REG(const u32 i) { OP_ARITHMETIC_R(LSR_REG, sub, 0); }
static int OP_RSB_ASR_IMM(const u32 i) { OP_ARITHMETIC_R(ASR_IMM, sub, 0, 0); } static int OP_RSB_ASR_IMM(const u32 i) { OP_ARITHMETIC_R(ASR_IMM, sub, 0); }
static int OP_RSB_ASR_REG(const u32 i) { OP_ARITHMETIC_R(ASR_REG, sub, 0, 0); } static int OP_RSB_ASR_REG(const u32 i) { OP_ARITHMETIC_R(ASR_REG, sub, 0); }
static int OP_RSB_ROR_IMM(const u32 i) { OP_ARITHMETIC_R(ROR_IMM, sub, 0, 0); } static int OP_RSB_ROR_IMM(const u32 i) { OP_ARITHMETIC_R(ROR_IMM, sub, 0); }
static int OP_RSB_ROR_REG(const u32 i) { OP_ARITHMETIC_R(ROR_REG, sub, 0, 0); } static int OP_RSB_ROR_REG(const u32 i) { OP_ARITHMETIC_R(ROR_REG, sub, 0); }
static int OP_RSB_IMM_VAL(const u32 i) { OP_ARITHMETIC_R(IMM_VAL, sub, 0, 0); } static int OP_RSB_IMM_VAL(const u32 i) { OP_ARITHMETIC_R(IMM_VAL, sub, 0); }
// ================================ S instructions // ================================ S instructions
static int OP_AND_S_LSL_IMM(const u32 i) { OP_ARITHMETIC_S(S_LSL_IMM, and_, 1); } static int OP_AND_S_LSL_IMM(const u32 i) { OP_ARITHMETIC_S(S_LSL_IMM, and_, 1); }
@ -871,15 +891,15 @@ static int OP_SUB_S_ROR_IMM(const u32 i) { OP_ARITHMETIC(ROR_IMM, sub, 0, 1); }
static int OP_SUB_S_ROR_REG(const u32 i) { OP_ARITHMETIC(ROR_REG, sub, 0, 1); } static int OP_SUB_S_ROR_REG(const u32 i) { OP_ARITHMETIC(ROR_REG, sub, 0, 1); }
static int OP_SUB_S_IMM_VAL(const u32 i) { OP_ARITHMETIC(IMM_VAL, sub, 0, 1); } static int OP_SUB_S_IMM_VAL(const u32 i) { OP_ARITHMETIC(IMM_VAL, sub, 0, 1); }
static int OP_RSB_S_LSL_IMM(const u32 i) { OP_ARITHMETIC_R(LSL_IMM, sub, 0, 1); } static int OP_RSB_S_LSL_IMM(const u32 i) { OP_ARITHMETIC_R(LSL_IMM, sub, 1); }
static int OP_RSB_S_LSL_REG(const u32 i) { OP_ARITHMETIC_R(LSL_REG, sub, 0, 1); } static int OP_RSB_S_LSL_REG(const u32 i) { OP_ARITHMETIC_R(LSL_REG, sub, 1); }
static int OP_RSB_S_LSR_IMM(const u32 i) { OP_ARITHMETIC_R(LSR_IMM, sub, 0, 1); } static int OP_RSB_S_LSR_IMM(const u32 i) { OP_ARITHMETIC_R(LSR_IMM, sub, 1); }
static int OP_RSB_S_LSR_REG(const u32 i) { OP_ARITHMETIC_R(LSR_REG, sub, 0, 1); } static int OP_RSB_S_LSR_REG(const u32 i) { OP_ARITHMETIC_R(LSR_REG, sub, 1); }
static int OP_RSB_S_ASR_IMM(const u32 i) { OP_ARITHMETIC_R(ASR_IMM, sub, 0, 1); } static int OP_RSB_S_ASR_IMM(const u32 i) { OP_ARITHMETIC_R(ASR_IMM, sub, 1); }
static int OP_RSB_S_ASR_REG(const u32 i) { OP_ARITHMETIC_R(ASR_REG, sub, 0, 1); } static int OP_RSB_S_ASR_REG(const u32 i) { OP_ARITHMETIC_R(ASR_REG, sub, 1); }
static int OP_RSB_S_ROR_IMM(const u32 i) { OP_ARITHMETIC_R(ROR_IMM, sub, 0, 1); } static int OP_RSB_S_ROR_IMM(const u32 i) { OP_ARITHMETIC_R(ROR_IMM, sub, 1); }
static int OP_RSB_S_ROR_REG(const u32 i) { OP_ARITHMETIC_R(ROR_REG, sub, 0, 1); } static int OP_RSB_S_ROR_REG(const u32 i) { OP_ARITHMETIC_R(ROR_REG, sub, 1); }
static int OP_RSB_S_IMM_VAL(const u32 i) { OP_ARITHMETIC_R(IMM_VAL, sub, 0, 1); } static int OP_RSB_S_IMM_VAL(const u32 i) { OP_ARITHMETIC_R(IMM_VAL, sub, 1); }
static int OP_ADC_LSL_IMM(const u32 i) { OP_ARITHMETIC(LSL_IMM; GET_CARRY(0), adc, 1, 0); } static int OP_ADC_LSL_IMM(const u32 i) { OP_ARITHMETIC(LSL_IMM; GET_CARRY(0), adc, 1, 0); }
static int OP_ADC_LSL_REG(const u32 i) { OP_ARITHMETIC(LSL_REG; GET_CARRY(0), adc, 1, 0); } static int OP_ADC_LSL_REG(const u32 i) { OP_ARITHMETIC(LSL_REG; GET_CARRY(0), adc, 1, 0); }
@ -921,25 +941,25 @@ static int OP_SBC_S_ROR_IMM(const u32 i) { OP_ARITHMETIC(ROR_IMM; GET_CARRY(1),
static int OP_SBC_S_ROR_REG(const u32 i) { OP_ARITHMETIC(ROR_REG; GET_CARRY(1), sbb, 0, 1); } static int OP_SBC_S_ROR_REG(const u32 i) { OP_ARITHMETIC(ROR_REG; GET_CARRY(1), sbb, 0, 1); }
static int OP_SBC_S_IMM_VAL(const u32 i) { OP_ARITHMETIC(IMM_VAL; GET_CARRY(1), sbb, 0, 1); } static int OP_SBC_S_IMM_VAL(const u32 i) { OP_ARITHMETIC(IMM_VAL; GET_CARRY(1), sbb, 0, 1); }
static int OP_RSC_LSL_IMM(const u32 i) { OP_ARITHMETIC_R(LSL_IMM; GET_CARRY(1), sbb, 0, 0); } static int OP_RSC_LSL_IMM(const u32 i) { OP_ARITHMETIC_R(LSL_IMM; GET_CARRY(1), sbb, 0); }
static int OP_RSC_LSL_REG(const u32 i) { OP_ARITHMETIC_R(LSL_REG; GET_CARRY(1), sbb, 0, 0); } static int OP_RSC_LSL_REG(const u32 i) { OP_ARITHMETIC_R(LSL_REG; GET_CARRY(1), sbb, 0); }
static int OP_RSC_LSR_IMM(const u32 i) { OP_ARITHMETIC_R(LSR_IMM; GET_CARRY(1), sbb, 0, 0); } static int OP_RSC_LSR_IMM(const u32 i) { OP_ARITHMETIC_R(LSR_IMM; GET_CARRY(1), sbb, 0); }
static int OP_RSC_LSR_REG(const u32 i) { OP_ARITHMETIC_R(LSR_REG; GET_CARRY(1), sbb, 0, 0); } static int OP_RSC_LSR_REG(const u32 i) { OP_ARITHMETIC_R(LSR_REG; GET_CARRY(1), sbb, 0); }
static int OP_RSC_ASR_IMM(const u32 i) { OP_ARITHMETIC_R(ASR_IMM; GET_CARRY(1), sbb, 0, 0); } static int OP_RSC_ASR_IMM(const u32 i) { OP_ARITHMETIC_R(ASR_IMM; GET_CARRY(1), sbb, 0); }
static int OP_RSC_ASR_REG(const u32 i) { OP_ARITHMETIC_R(ASR_REG; GET_CARRY(1), sbb, 0, 0); } static int OP_RSC_ASR_REG(const u32 i) { OP_ARITHMETIC_R(ASR_REG; GET_CARRY(1), sbb, 0); }
static int OP_RSC_ROR_IMM(const u32 i) { OP_ARITHMETIC_R(ROR_IMM; GET_CARRY(1), sbb, 0, 0); } static int OP_RSC_ROR_IMM(const u32 i) { OP_ARITHMETIC_R(ROR_IMM; GET_CARRY(1), sbb, 0); }
static int OP_RSC_ROR_REG(const u32 i) { OP_ARITHMETIC_R(ROR_REG; GET_CARRY(1), sbb, 0, 0); } static int OP_RSC_ROR_REG(const u32 i) { OP_ARITHMETIC_R(ROR_REG; GET_CARRY(1), sbb, 0); }
static int OP_RSC_IMM_VAL(const u32 i) { OP_ARITHMETIC_R(IMM_VAL; GET_CARRY(1), sbb, 0, 0); } static int OP_RSC_IMM_VAL(const u32 i) { OP_ARITHMETIC_R(IMM_VAL; GET_CARRY(1), sbb, 0); }
static int OP_RSC_S_LSL_IMM(const u32 i) { OP_ARITHMETIC_R(LSL_IMM; GET_CARRY(1), sbb, 0, 1); } static int OP_RSC_S_LSL_IMM(const u32 i) { OP_ARITHMETIC_R(LSL_IMM; GET_CARRY(1), sbb, 1); }
static int OP_RSC_S_LSL_REG(const u32 i) { OP_ARITHMETIC_R(LSL_REG; GET_CARRY(1), sbb, 0, 1); } static int OP_RSC_S_LSL_REG(const u32 i) { OP_ARITHMETIC_R(LSL_REG; GET_CARRY(1), sbb, 1); }
static int OP_RSC_S_LSR_IMM(const u32 i) { OP_ARITHMETIC_R(LSR_IMM; GET_CARRY(1), sbb, 0, 1); } static int OP_RSC_S_LSR_IMM(const u32 i) { OP_ARITHMETIC_R(LSR_IMM; GET_CARRY(1), sbb, 1); }
static int OP_RSC_S_LSR_REG(const u32 i) { OP_ARITHMETIC_R(LSR_REG; GET_CARRY(1), sbb, 0, 1); } static int OP_RSC_S_LSR_REG(const u32 i) { OP_ARITHMETIC_R(LSR_REG; GET_CARRY(1), sbb, 1); }
static int OP_RSC_S_ASR_IMM(const u32 i) { OP_ARITHMETIC_R(ASR_IMM; GET_CARRY(1), sbb, 0, 1); } static int OP_RSC_S_ASR_IMM(const u32 i) { OP_ARITHMETIC_R(ASR_IMM; GET_CARRY(1), sbb, 1); }
static int OP_RSC_S_ASR_REG(const u32 i) { OP_ARITHMETIC_R(ASR_REG; GET_CARRY(1), sbb, 0, 1); } static int OP_RSC_S_ASR_REG(const u32 i) { OP_ARITHMETIC_R(ASR_REG; GET_CARRY(1), sbb, 1); }
static int OP_RSC_S_ROR_IMM(const u32 i) { OP_ARITHMETIC_R(ROR_IMM; GET_CARRY(1), sbb, 0, 1); } static int OP_RSC_S_ROR_IMM(const u32 i) { OP_ARITHMETIC_R(ROR_IMM; GET_CARRY(1), sbb, 1); }
static int OP_RSC_S_ROR_REG(const u32 i) { OP_ARITHMETIC_R(ROR_REG; GET_CARRY(1), sbb, 0, 1); } static int OP_RSC_S_ROR_REG(const u32 i) { OP_ARITHMETIC_R(ROR_REG; GET_CARRY(1), sbb, 1); }
static int OP_RSC_S_IMM_VAL(const u32 i) { OP_ARITHMETIC_R(IMM_VAL; GET_CARRY(1), sbb, 0, 1); } static int OP_RSC_S_IMM_VAL(const u32 i) { OP_ARITHMETIC_R(IMM_VAL; GET_CARRY(1), sbb, 1); }
static int OP_BIC_LSL_IMM(const u32 i) { OP_ARITHMETIC(LSL_IMM; c.not_(rhs), and_, 1, 0); } static int OP_BIC_LSL_IMM(const u32 i) { OP_ARITHMETIC(LSL_IMM; c.not_(rhs), and_, 1, 0); }
static int OP_BIC_LSL_REG(const u32 i) { OP_ARITHMETIC(LSL_REG; c.not_(rhs), and_, 1, 0); } static int OP_BIC_LSL_REG(const u32 i) { OP_ARITHMETIC(LSL_REG; c.not_(rhs), and_, 1, 0); }
@ -1009,43 +1029,10 @@ static int OP_TEQ_IMM_VAL(const u32 i) { OP_TEQ_(S_IMM_VAL); }
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
// CMP // CMP
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
static void init_op_cmp(int PROCNUM, int sign)
{
c.clear();
JIT_COMMENT("init_op_cmp ARM%c, sign %d", PROCNUM?'7':'9', sign);
// actually takes 1 input, the flags reg
c.newFunction(ASMJIT_CALL_CONV, FunctionBuilder0<Void>());
c.getFunction()->setHint(FUNCTION_HINT_NAKED, true);
GPVar x = c.newGP(VARIABLE_TYPE_GPD);
GPVar y = c.newGP(VARIABLE_TYPE_GPD);
#if defined(_M_X64) || defined(__x86_64__)
GPVar bb_cpu = c.newGP(VARIABLE_TYPE_GPN);
c.mov(bb_cpu, (uintptr_t)&ARMPROC);
Mem flags = flags_ptr;
#else
Mem flags = byte_ptr_abs((u8*)&cpu->CPSR.val+3);
#endif
c.sets(x.r8Lo());
c.setz(y.r8Lo());
c.lea(x, ptr(y.r64(), x.r64(), TIMES_2));
c.set(sign ? C_NC : C_C, y.r8Lo());
c.lea(x, ptr(y.r64(), x.r64(), TIMES_2));
c.seto(y.r8Lo());
c.lea(x, ptr(y.r64(), x.r64(), TIMES_2));
c.movzx(y, flags);
c.shl(x, 4);
c.and_(y, 0xF);
c.or_(x, y);
c.mov(flags, x.r8Lo());
c.endFunction();
op_cmp[PROCNUM][sign] = c.make();
}
#define OP_CMP(arg) \ #define OP_CMP(arg) \
arg; \ arg; \
c.cmp(reg_pos_ptr(16), rhs); \ c.cmp(reg_pos_ptr(16), rhs); \
ECall* ctx = c.call(op_cmp[PROCNUM][1]); \ SET_NZCV(1); \
ctx->setPrototype(ASMJIT_CALL_CONV, FunctionBuilder0<Void>()); \
return 1; return 1;
static int OP_CMP_LSL_IMM(const u32 i) { OP_CMP(LSL_IMM); } static int OP_CMP_LSL_IMM(const u32 i) { OP_CMP(LSL_IMM); }
@ -1074,8 +1061,7 @@ static int OP_CMP_IMM_VAL(const u32 i) { OP_CMP(IMM_VAL); }
c.mov(lhs, reg_pos_ptr(16)); \ c.mov(lhs, reg_pos_ptr(16)); \
c.add(lhs, rhs); \ c.add(lhs, rhs); \
} \ } \
ECall* ctx = c.call(op_cmp[PROCNUM][sign]); \ SET_NZCV(sign); \
ctx->setPrototype(ASMJIT_CALL_CONV, FunctionBuilder0<Void>()); \
return 1; return 1;
static int OP_CMN_LSL_IMM(const u32 i) { OP_CMN(LSL_IMM); } static int OP_CMN_LSL_IMM(const u32 i) { OP_CMN(LSL_IMM); }
@ -1208,7 +1194,7 @@ static void MUL_Mxx_END(GPVar x, bool sign, int cycles)
c.adc(hi, reg_pos_ptr(16)); \ c.adc(hi, reg_pos_ptr(16)); \
c.mov(reg_pos_ptr(12), lhs); \ c.mov(reg_pos_ptr(12), lhs); \
c.mov(reg_pos_ptr(16), hi); \ c.mov(reg_pos_ptr(16), hi); \
SET_NZ_W; \ SET_NZ_MUL; \
} \ } \
else \ else \
{ \ { \
@ -1220,7 +1206,7 @@ static void MUL_Mxx_END(GPVar x, bool sign, int cycles)
{ \ { \
c.mov(reg_pos_ptr(12), lhs); \ c.mov(reg_pos_ptr(12), lhs); \
c.mov(reg_pos_ptr(16), hi); \ c.mov(reg_pos_ptr(16), hi); \
if(flags) SET_NZ_W; \ if(flags) SET_NZ_MUL; \
} \ } \
else \ else \
{ \ { \
@ -1486,8 +1472,8 @@ static int OP_MRS_SPSR(const u32 i)
static int OP_MSR_CPSR(const u32 i) { OP_MSR_(CPSR, REG_OFF, 1); } static int OP_MSR_CPSR(const u32 i) { OP_MSR_(CPSR, REG_OFF, 1); }
static int OP_MSR_SPSR(const u32 i) { OP_MSR_(SPSR, REG_OFF, 0); } static int OP_MSR_SPSR(const u32 i) { OP_MSR_(SPSR, REG_OFF, 0); }
static int OP_MSR_CPSR_IMM_VAL(const u32 i) { OP_MSR_(CPSR, IMM_VALUE, 1); } static int OP_MSR_CPSR_IMM_VAL(const u32 i) { OP_MSR_(CPSR, IMM_VAL, 1); }
static int OP_MSR_SPSR_IMM_VAL(const u32 i) { OP_MSR_(SPSR, IMM_VALUE, 0); } static int OP_MSR_SPSR_IMM_VAL(const u32 i) { OP_MSR_(SPSR, IMM_VAL, 0); }
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
// LDR // LDR
@ -2495,8 +2481,8 @@ static int OP_MCR(const u32 i)
c.mov(bb_mmu, (uintptr_t)&MMU); c.mov(bb_mmu, (uintptr_t)&MMU);
Mem rwmode = mmu_ptr_byte(ARM9_RW_MODE); Mem rwmode = mmu_ptr_byte(ARM9_RW_MODE);
Mem ldtbit = cpu_ptr_byte(LDTBit, 0); Mem ldtbit = cpu_ptr_byte(LDTBit, 0);
c.bt(data, 7); c.test(data, (1<<7));
c.setc(rwmode); c.setnz(rwmode);
//cpu->intVector = 0xFFFF0000 * (BIT13(val)); //cpu->intVector = 0xFFFF0000 * (BIT13(val));
GPVar vec = c.newGP(VARIABLE_TYPE_GPD); GPVar vec = c.newGP(VARIABLE_TYPE_GPD);
c.mov(tmp, 0xFFFF0000); c.mov(tmp, 0xFFFF0000);
@ -2505,8 +2491,8 @@ static int OP_MCR(const u32 i)
c.cmovc(vec, tmp); c.cmovc(vec, tmp);
c.mov(cpu_ptr(intVector), vec); c.mov(cpu_ptr(intVector), vec);
//cpu->LDTBit = !BIT15(val); //TBit //cpu->LDTBit = !BIT15(val); //TBit
c.bt(data, 1); c.test(data, (1 << 1));
c.setnc(ldtbit); c.setz(ldtbit);
//ctrl = (val & 0x000FF085) | 0x00000078; //ctrl = (val & 0x000FF085) | 0x00000078;
c.and_(data, 0x000FF085); c.and_(data, 0x000FF085);
c.or_(data, 0x00000078); c.or_(data, 0x00000078);
@ -2990,23 +2976,22 @@ static int OP_BKPT(const u32 i) { printf("JIT: unimplemented OP_BKPT\n"); return
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
// THUMB // THUMB
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
#define SET_NZCV(sign) \ #define SET_NZ_CLEAR_CV { \
ECall* ctx = c.call(op_cmp[PROCNUM][sign]); \ JIT_COMMENT("SET_NZ_CLEAR_CV"); \
ctx->setPrototype(ASMJIT_CALL_CONV, FunctionBuilder0<Void>()); GPVar x = c.newGP(VARIABLE_TYPE_GPN); \
GPVar y = c.newGP(VARIABLE_TYPE_GPN); \
#define SET_NZCV_ZERO_CV { \ c.pushf(); \
GPVar x = c.newGP(VARIABLE_TYPE_GPD); \ c.pop(x); \
GPVar y = c.newGP(VARIABLE_TYPE_GPD); \ c.and_(x, (3 << 6)); \
c.sets(x.r8Lo()); \ c.mov(y, flags_ptr); \
c.setz(y.r8Lo()); \ c.and_(y, 0x0F); \
c.lea(x, ptr(y.r64(), x.r64(), TIMES_2)); \
c.movzx(y, flags_ptr); \
c.shl(x, 6); \
c.and_(y, 0xF); \
c.or_(x, y); \ c.or_(x, y); \
c.mov(flags_ptr, x.r8Lo()); } c.mov(flags_ptr, x.r8Lo()); \
JIT_COMMENT("end SET_NZ_CLEAR_CV"); \
}
#define SET_NZC_SHIFTS_ZERO(cf) { \ #define SET_NZC_SHIFTS_ZERO(cf) { \
JIT_COMMENT("SET_NZC_SHIFTS_ZERO"); \
c.and_(flags_ptr, 0x1F); \ c.and_(flags_ptr, 0x1F); \
if(cf) \ if(cf) \
{ \ { \
@ -3015,7 +3000,9 @@ static int OP_BKPT(const u32 i) { printf("JIT: unimplemented OP_BKPT\n"); return
c.or_(flags_ptr, rcf.r8Lo()); \ c.or_(flags_ptr, rcf.r8Lo()); \
} \ } \
else \ else \
c.or_(flags_ptr, (1<<6)); } c.or_(flags_ptr, (1<<6)); \
JIT_COMMENT("end SET_NZC_SHIFTS_ZERO"); \
}
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
#define OP_SHIFTS_IMM(x86inst) \ #define OP_SHIFTS_IMM(x86inst) \
@ -3055,8 +3042,8 @@ static int OP_BKPT(const u32 i) { printf("JIT: unimplemented OP_BKPT\n"); return
c.jmp(__done); \ c.jmp(__done); \
/* imm == 32 */ \ /* imm == 32 */ \
c.bind(__eq32); \ c.bind(__eq32); \
c.bt(reg_pos_thumb(0), bit); \ c.test(reg_pos_thumb(0), (1 << bit)); \
c.setc(rcf.r8Lo()); \ c.setnz(rcf.r8Lo()); \
c.mov(reg_pos_thumb(0), 0); \ c.mov(reg_pos_thumb(0), 0); \
SET_NZC_SHIFTS_ZERO(1); \ SET_NZC_SHIFTS_ZERO(1); \
c.jmp(__done); \ c.jmp(__done); \
@ -3103,8 +3090,8 @@ static int OP_LSL_REG(const u32 i) { OP_SHIFTS_REG(shl, 0); }
static int OP_LSR_0(const u32 i) static int OP_LSR_0(const u32 i)
{ {
GPVar rcf = c.newGP(VARIABLE_TYPE_GPD); GPVar rcf = c.newGP(VARIABLE_TYPE_GPD);
c.bt(reg_pos_thumb(3), 31); c.test(reg_pos_thumb(3), (1 << 31));
c.setc(rcf.r8Lo()); c.setnz(rcf.r8Lo());
SET_NZC_SHIFTS_ZERO(1); SET_NZC_SHIFTS_ZERO(1);
c.mov(reg_pos_thumb(0), 0); c.mov(reg_pos_thumb(0), 0);
return 1; return 1;
@ -3116,8 +3103,8 @@ static int OP_ASR_0(const u32 i)
GPVar rcf = c.newGP(VARIABLE_TYPE_GPD); GPVar rcf = c.newGP(VARIABLE_TYPE_GPD);
GPVar rhs = c.newGP(VARIABLE_TYPE_GPD); GPVar rhs = c.newGP(VARIABLE_TYPE_GPD);
c.mov(rhs, reg_pos_thumb(3)); c.mov(rhs, reg_pos_thumb(3));
c.bt(rhs, 31); c.test(rhs, (1 << 31));
c.setc(rcf.r8Lo()); c.setnz(rcf.r8Lo());
c.sar(rhs, 31); c.sar(rhs, 31);
c.mov(reg_pos_thumb(0), rhs); c.mov(reg_pos_thumb(0), rhs);
SET_NZC; SET_NZC;
@ -3233,7 +3220,7 @@ static int OP_ADD_IMM3(const u32 i)
c.mov(tmp, reg_pos_thumb(3)); c.mov(tmp, reg_pos_thumb(3));
c.mov(reg_pos_thumb(0), tmp); c.mov(reg_pos_thumb(0), tmp);
c.cmp(tmp, 0); c.cmp(tmp, 0);
SET_NZCV_ZERO_CV; SET_NZ_CLEAR_CV;
return 1; return 1;
} }
if (_REG_NUM(i, 0) == _REG_NUM(i, 3)) if (_REG_NUM(i, 0) == _REG_NUM(i, 3))
@ -3458,8 +3445,7 @@ static int OP_MUL_REG(const u32 i)
static int OP_CMP_IMM8(const u32 i) static int OP_CMP_IMM8(const u32 i)
{ {
c.cmp(reg_pos_thumb(8), (i & 0xFF)); c.cmp(reg_pos_thumb(8), (i & 0xFF));
ECall* ctx = c.call(op_cmp[PROCNUM][1]); SET_NZCV(1);
ctx->setPrototype(ASMJIT_CALL_CONV, FunctionBuilder0<Void>());
return 1; return 1;
}; };
static int OP_CMP(const u32 i) static int OP_CMP(const u32 i)
@ -3467,8 +3453,7 @@ static int OP_CMP(const u32 i)
GPVar tmp = c.newGP(VARIABLE_TYPE_GPD); GPVar tmp = c.newGP(VARIABLE_TYPE_GPD);
c.mov(tmp, reg_pos_thumb(3)); c.mov(tmp, reg_pos_thumb(3));
c.cmp(reg_pos_thumb(0), tmp); c.cmp(reg_pos_thumb(0), tmp);
ECall* ctx = c.call(op_cmp[PROCNUM][1]); SET_NZCV(1);
ctx->setPrototype(ASMJIT_CALL_CONV, FunctionBuilder0<Void>());
return 1; return 1;
}; };
static int OP_CMP_SPE(const u32 i) static int OP_CMP_SPE(const u32 i)
@ -3477,8 +3462,7 @@ static int OP_CMP_SPE(const u32 i)
GPVar tmp = c.newGP(VARIABLE_TYPE_GPD); GPVar tmp = c.newGP(VARIABLE_TYPE_GPD);
c.mov(tmp, reg_pos_ptr(3)); c.mov(tmp, reg_pos_ptr(3));
c.cmp(reg_ptr(Rn), tmp); c.cmp(reg_ptr(Rn), tmp);
ECall* ctx = c.call(op_cmp[PROCNUM][1]); SET_NZCV(1);
ctx->setPrototype(ASMJIT_CALL_CONV, FunctionBuilder0<Void>());
return 1; return 1;
}; };
static int OP_CMN(const u32 i) static int OP_CMN(const u32 i)
@ -3486,8 +3470,7 @@ static int OP_CMN(const u32 i)
GPVar tmp = c.newGP(VARIABLE_TYPE_GPD); GPVar tmp = c.newGP(VARIABLE_TYPE_GPD);
c.mov(tmp, reg_pos_thumb(0)); c.mov(tmp, reg_pos_thumb(0));
c.add(tmp, reg_pos_thumb(3)); c.add(tmp, reg_pos_thumb(3));
ECall* ctx = c.call(op_cmp[PROCNUM][0]); SET_NZCV(0);
ctx->setPrototype(ASMJIT_CALL_CONV, FunctionBuilder0<Void>());
return 1; return 1;
}; };
@ -4273,12 +4256,8 @@ void arm_jit_reset(bool enable)
memset(compiled_funcs+128*i, 0, 128*sizeof(*compiled_funcs)); memset(compiled_funcs+128*i, 0, 128*sizeof(*compiled_funcs));
} }
#endif #endif
init_op_cmp(0, 0);
init_op_cmp(0, 1);
init_op_cmp(1, 0);
init_op_cmp(1, 1);
} }
c.clear(); c.clear();
#if (PROFILER_JIT_LEVEL > 0) #if (PROFILER_JIT_LEVEL > 0)