#include "arm7.h" #include "arm_mem.h" #include #define C_CORE #if 0 #define arm_printf printf #else void arm_printf(...) { } #endif //#define CPUReadHalfWordQuick(addr) arm_ReadMem16(addr & 0x7FFFFF) #define CPUReadMemoryQuick(addr) (*(u32*)&aica_ram[addr&ARAM_MASK]) #define CPUReadByte arm_ReadMem8 #define CPUReadMemory arm_ReadMem32 #define CPUReadHalfWord arm_ReadMem16 #define CPUReadHalfWordSigned(addr) ((s16)arm_ReadMem16(addr)) #define CPUWriteMemory arm_WriteMem32 #define CPUWriteHalfWord arm_WriteMem16 #define CPUWriteByte arm_WriteMem8 #define reg arm_Reg #define armNextPC reg[R15_ARM_NEXT].I #define CPUUpdateTicksAccesint(a) 1 #define CPUUpdateTicksAccessSeq32(a) 1 #define CPUUpdateTicksAccesshort(a) 1 #define CPUUpdateTicksAccess32(a) 1 #define CPUUpdateTicksAccess16(a) 1 //bool arm_FiqPending; -- not used , i use the input directly :) //bool arm_IrqPending; DECL_ALIGN(8) reg_pair arm_Reg[RN_ARM_REG_COUNT]; void CPUSwap(u32 *a, u32 *b) { u32 c = *b; *b = *a; *a = c; } /* bool N_FLAG; bool Z_FLAG; bool C_FLAG; bool V_FLAG; */ #define N_FLAG (reg[RN_PSR_FLAGS].FLG.N) #define Z_FLAG (reg[RN_PSR_FLAGS].FLG.Z) #define C_FLAG (reg[RN_PSR_FLAGS].FLG.C) #define V_FLAG (reg[RN_PSR_FLAGS].FLG.V) bool armIrqEnable; bool armFiqEnable; //bool armState; int armMode; bool Arm7Enabled=false; u8 cpuBitsSet[256]; bool intState = false; bool stopState = false; bool holdState = false; void CPUSwitchMode(int mode, bool saveState, bool breakLoop=true); extern "C" void CPUFiq(); void CPUUpdateCPSR(); void CPUUpdateFlags(); void CPUSoftwareInterrupt(int comment); void CPUUndefinedException(); void arm_Run_(u32 CycleCount) { if (!Arm7Enabled) return; u32 clockTicks=0; while (clockTicks Indirect, via write to PSR/Mode MSR */ struct ArmDPOP { u32 key; u32 mask; u32 flags; }; vector ops; enum OpFlags { OP_SETS_PC = 1, OP_READS_PC = 32768, OP_IS_COND = 65536, OP_MFB = 0x80000000, OP_HAS_RD_12 = 2, OP_HAS_RD_16 = 4, OP_HAS_RS_0 = 8, OP_HAS_RS_8 = 16, OP_HAS_RS_16 = 32, OP_HAS_FLAGS_READ = 4096, OP_HAS_FLAGS_WRITE = 8192, OP_HAS_RD_READ = 16384, //For conditionals OP_WRITE_FLAGS = 64, OP_WRITE_FLAGS_S = 128, OP_READ_FLAGS = 256, OP_READ_FLAGS_S = 512, OP_WRITE_REG = 1024, OP_READ_REG_1 = 2048, }; #define DP_R_ROFC (OP_READ_FLAGS_S|OP_READ_REG_1) //Reads reg1, op2, flags if S #define DP_R_ROF (OP_READ_FLAGS|OP_READ_REG_1) //Reads reg1, op2, flags (ADC & co) #define DP_R_OFC (OP_READ_FLAGS_S) //Reads op2, flags if S #define DP_W_RFC (OP_WRITE_FLAGS_S|OP_WRITE_REG) //Writes reg, and flags if S #define DP_W_F (OP_WRITE_FLAGS) //Writes only flags, always (S=1) /* COND | 00 0 OP1 S Rn Rd SA ST 0 Rm -- Data opcode, PSR xfer (imm shifted reg) | 00 0 OP1 S Rn Rd Rs 0 ST 1 Rm -- Data opcode, PSR xfer (reg shifted reg) | 00 0 0 00A S Rd Rn Rs 1001 Rm -- Mult | 00 0 1 0B0 0 Rn Rd 0000 1001 Rm -- SWP | 00 1 OP1 S Rn Rd imm8r4 -- Data opcode, PSR xfer (imm8r4) | 01 0 P UBW L Rn Rd Offset -- LDR/STR (I=0) | 01 1 P UBW L Rn Rd SHAM SHTP 0 Rs -- LDR/STR (I=1) | 10 0 P USW L Rn {RList} -- LDM/STM | 10 1 L {offset} -- B/BL | 11 1 1 X* -- SWI (undef cases) | 01 1 XXXX X X* X* X* 1 XXXX - Undefined (LDR/STR w/ encodings that would be reg. based shift) | 11 0 PUNW L Rn {undef} -- Copr. Data xfer (undef) | 11 1 0 CPOP Crn Crd Cpn CP3 0 Crm -- Copr. Data Op (undef) | 11 1 0 CPO3 L Crn Crd Cpn CP3 1 Crm -- Copr. Reg xf (undef) Phase #1: -Non branches that don't touch memory (pretty much: Data processing, Not MSR, Mult) -Everything else is ifb Phase #2: Move LDR/STR to templates Phase #3: Move LDM/STM to templates */ void AddDPOP(u32 subcd, u32 rflags, u32 wflags) { ArmDPOP op; u32 key=subcd<<21; u32 mask=(15<<21) | (7<<25); op.flags=rflags|wflags; if (wflags==DP_W_F) { //also match S bit for opcodes that must write to flags (CMP & co) mask|=1<<20; key|=1<<20; } //ISR form (bit 25=0, bit 4 = 0) op.key=key; op.mask=mask | (1<<4); ops.push_back(op); //RSR form (bit 25=0, bit 4 = 1, bit 7=0) op.key = key | (1<<4); op.mask = mask | (1<<4) | (1<<7); ops.push_back(op); //imm8r4 form (bit 25=1) op.key = key | (1<<25); op.mask = mask; ops.push_back(op); } void InitHash() { /* COND | 00 I OP1 S Rn Rd OPER2 -- Data opcode, PSR xfer Data processing opcodes */ //AND 0000 Rn, OPER2, {Flags} Rd, {Flags} //EOR 0001 Rn, OPER2, {Flags} Rd, {Flags} //SUB 0010 Rn, OPER2, {Flags} Rd, {Flags} //RSB 0011 Rn, OPER2, {Flags} Rd, {Flags} //ADD 0100 Rn, OPER2, {Flags} Rd, {Flags} //ORR 1100 Rn, OPER2, {Flags} Rd, {Flags} //BIC 1110 Rn, OPER2, {Flags} Rd, {Flags} AddDPOP(0,DP_R_ROFC, DP_W_RFC); AddDPOP(1,DP_R_ROFC, DP_W_RFC); AddDPOP(2,DP_R_ROFC, DP_W_RFC); AddDPOP(3,DP_R_ROFC, DP_W_RFC); AddDPOP(4,DP_R_ROFC, DP_W_RFC); AddDPOP(12,DP_R_ROFC, DP_W_RFC); AddDPOP(14,DP_R_ROFC, DP_W_RFC); //ADC 0101 Rn, OPER2, Flags Rd, {Flags} //SBC 0110 Rn, OPER2, Flags Rd, {Flags} //RSC 0111 Rn, OPER2, Flags Rd, {Flags} AddDPOP(5,DP_R_ROF, DP_W_RFC); AddDPOP(6,DP_R_ROF, DP_W_RFC); AddDPOP(7,DP_R_ROF, DP_W_RFC); //TST 1000 S=1 Rn, OPER2, Flags Flags //TEQ 1001 S=1 Rn, OPER2, Flags Flags AddDPOP(8,DP_R_ROF, DP_W_F); AddDPOP(9,DP_R_ROF, DP_W_F); //CMP 1010 S=1 Rn, OPER2 Flags //CMN 1011 S=1 Rn, OPER2 Flags AddDPOP(10,DP_R_ROF, DP_W_F); AddDPOP(11,DP_R_ROF, DP_W_F); //MOV 1101 OPER2, {Flags} Rd, {Flags} //MVN 1111 OPER2, {Flags} Rd, {Flags} AddDPOP(13,DP_R_OFC, DP_W_RFC); AddDPOP(15,DP_R_OFC, DP_W_RFC); } /* * * X86 Compiler * */ void armEmit32(u32 emit32); void *armGetEmitPtr(); #define _DEVEL (1) #define EMIT_I armEmit32((I)) #define EMIT_GET_PTR() armGetEmitPtr() u8* icPtr; u8* ICache; const u32 ICacheSize=1024*1024; #if HOST_OS == OS_WINDOWS u8 ARM7_TCB[ICacheSize+4096]; #elif HOST_OS == OS_LINUX u8 ARM7_TCB[ICacheSize+4096] __attribute__((section(".text"))); #elif HOST_OS==OS_DARWIN u8 ARM7_TCB[ICacheSize+4096] __attribute__((section("__TEXT, .text"))); #else #error ARM7_TCB ALLOC #endif #include "arm_emitter/arm_emitter.h" #undef I using namespace ARM; void* EntryPoints[ARAM_SIZE/4]; enum OpType { VOT_Fallback, VOT_DataOp, VOT_B, VOT_BL, VOT_BR, //Branch (to register) VOT_Read, //Actually, this handles LDR and STR //VOT_LDM, //This Isn't used anymore VOT_MRS, VOT_MSR, }; void armv_call(void* target); void armv_setup(); void armv_intpr(u32 opcd); void armv_end(void* codestart, u32 cycles); void armv_check_pc(u32 pc); void armv_check_cache(u32 opcd, u32 pc); void armv_imm_to_reg(u32 regn, u32 imm); void armv_MOV32(eReg regn, u32 imm); void armv_prof(OpType opt,u32 op,u32 flg); extern "C" void arm_dispatch(); extern "C" void arm_exit(); extern "C" void DYNACALL arm_mainloop(u32 cycl, void* regs, void* entrypoints); extern "C" void DYNACALL arm_compilecode(); template u32 DYNACALL DoMemOp(u32 addr,u32 data) { u32 rv=0; #if HOST_CPU==CPU_X86 addr=virt_arm_reg(0); data=virt_arm_reg(1); #endif if (L) { if (B) rv=arm_ReadMem8(addr); else rv=arm_ReadMem32(addr); } else { if (B) arm_WriteMem8(addr,data); else arm_WriteMem32(addr,data); } #if HOST_CPU==CPU_X86 virt_arm_reg(0)=rv; #endif return rv; } //findfirstset -- used in LDM/STM handling #if HOST_CPU==CPU_X86 && BUILD_COMPILER != COMPILER_GCC #include u32 findfirstset(u32 v) { unsigned long rv; _BitScanForward(&rv,v); return rv+1; } #else #define findfirstset __builtin_ffs #endif #if 0 //LDM isn't perf. citrical, and as a result, not implemented fully. //So this code is disabled //mask is *2 template void DYNACALL DoLDM(u32 addr, u32 mask) { #if HOST_CPU==CPU_X86 addr=virt_arm_reg(0); mask=virt_arm_reg(1); #endif //addr=(addr); //force align ? u32 idx=-1; do { u32 tz=findfirstset(mask); mask>>=tz; idx+=tz; arm_Reg[idx].I=arm_ReadMem32(addr); addr+=4; } while(mask); } #endif void* GetMemOp(bool L, bool B) { if (L) { if (B) return (void*)(u32(DYNACALL*)(u32,u32))&DoMemOp; else return (void*)(u32(DYNACALL*)(u32,u32))&DoMemOp; } else { if (B) return (void*)(u32(DYNACALL*)(u32,u32))&DoMemOp; else return (void*)(u32(DYNACALL*)(u32,u32))&DoMemOp; } } //Decodes an opcode, returns type. //opcd might be changed (currently for LDM/STM -> LDR/STR transforms) OpType DecodeOpcode(u32& opcd,u32& flags) { //by default, PC has to be updated flags=OP_READS_PC; u32 CC=(opcd >> 28); if (CC!=CC_AL) flags|=OP_IS_COND; //helpers ... #define CHK_BTS(M,S,V) ( (M & (opcd>>S)) == (V) ) //Check bits value in opcode #define IS_LOAD (opcd & (1<<20)) //Is L bit set ? (LDM/STM LDR/STR) #define READ_PC_CHECK(S) if (CHK_BTS(15,S,15)) flags|=OP_READS_PC; //Opcode sets pc ? bool _set_pc= (CHK_BTS(3,26,0) && CHK_BTS(15,12,15)) || //Data processing w/ Rd=PC (CHK_BTS(3,26,1) && CHK_BTS(15,12,15) && IS_LOAD ) || //LDR/STR w/ Rd=PC (CHK_BTS(7,25,4) && (opcd & 32768) && IS_LOAD) || //LDM/STM w/ PC in list CHK_BTS(7,25,5) || //B or BL CHK_BTS(15,24,15); //SWI //NV condition means VFP on newer cores, let interpreter handle it... if (CC==15) return VOT_Fallback; if (_set_pc) flags|=OP_SETS_PC; //B / BL ? if (CHK_BTS(7,25,5)) { verify(_set_pc); if (!(flags&OP_IS_COND)) flags&=~OP_READS_PC; //not COND doesn't read from pc flags|=OP_SETS_PC; //Branches Set pc .. //branch ! return (opcd&(1<<24))?VOT_BL:VOT_B; } //Common case: MOVCC PC,REG if (CHK_BTS(0xFFFFFF,4,0x1A0F00)) { verify(_set_pc); if (CC==CC_AL) flags&=~OP_READS_PC; return VOT_BR; } //No support for COND branching opcodes apart from the forms above .. if (CC!=CC_AL && _set_pc) { return VOT_Fallback; } u32 RList=opcd&0xFFFF; u32 Rn=(opcd>>16)&15; #define LDM_REGCNT() (cpuBitsSet[RList & 255] + cpuBitsSet[(RList >> 8) & 255]) //Data Processing opcodes -- find using mask/key //This will eventually be virtualised w/ register renaming for( u32 i=0;i> 28)!=0xE) { flags |= OP_HAS_FLAGS_READ; //if (flags & OP_WRITE_REG) flags |= OP_HAS_RD_READ; } //DPOP ! if ((ops[i].flags & OP_READ_FLAGS) || ((ops[i].flags & OP_READ_FLAGS_S) && (opcd & (1<<20)))) { flags |= OP_HAS_FLAGS_READ; } if ((ops[i].flags & OP_WRITE_FLAGS) || ((ops[i].flags & OP_WRITE_FLAGS_S) && (opcd & (1<<20)))) { flags |= OP_HAS_FLAGS_WRITE; } if(ops[i].flags & OP_WRITE_REG) { //All dpops that write, write to RD_12 flags |= OP_HAS_RD_12; verify(! (CHK_BTS(15,12,15) && CC!=CC_AL)); } if(ops[i].flags & OP_READ_REG_1) { //Reg 1 is RS_16 flags |= OP_HAS_RS_16; //reads from pc ? READ_PC_CHECK(16); } //op2 is imm or reg ? if ( !(opcd & (1<<25)) ) { //its reg (register or imm shifted) flags |= OP_HAS_RS_0; //reads from pc ? READ_PC_CHECK(0); //is it register shifted reg ? if (opcd & (1<<4)) { verify(! (opcd & (1<<7)) ); //must be zero flags |= OP_HAS_RS_8; //can't be pc ... verify(!CHK_BTS(15,8,15)); } else { //is it RRX ? if ( ((opcd>>4)&7)==6) { //RRX needs flags to be read (even if the opcode doesn't) flags |= OP_HAS_FLAGS_READ; } } } return VOT_DataOp; } } //Lets try mem opcodes since its not data processing /* Lets Check LDR/STR ! CCCC 01 0 P UBW L Rn Rd Offset -- LDR/STR (I=0) */ if ((opcd>>25)==(0xE4/2) ) { /* I=0 Everything else handled */ arm_printf("ARM: MEM %08X L/S:%d, AWB:%d!\n",opcd,(opcd>>20)&1,(opcd>>21)&1); return VOT_Read; } else if ((opcd>>25)==(0xE6/2) && CHK_BTS(0x7,4,0) ) { arm_printf("ARM: MEM REG to Reg %08X\n",opcd); /* I=1 Logical Left shift, only */ return VOT_Read; } //LDM common case else if ((opcd>>25)==(0xE8/2) /*&& CHK_BTS(32768,0,0)*/ && CHK_BTS(1,22,0) && CHK_BTS(1,20,1) && LDM_REGCNT()==1) { //P=0 //U=1 //L=1 //W=1 //S=0 u32 old_opcd=opcd; //One register xfered //Can be rewriten as normal mem opcode .. opcd=0xE4000000; //Imm offset opcd |= 0<<25; //Post incr opcd |= old_opcd & (1<<24); //Up/Dn opcd |= old_opcd & (1<<23); //Word/Byte opcd |= 0<<22; //Write back (must be 0 for PI) opcd |= old_opcd & (1<<21); //Load opcd |= old_opcd & (1<<20); //Rn opcd |= Rn<<16; //Rd u32 Rd=findfirstset(RList)-1; opcd |= Rd<<12; //Offset opcd |= 4; arm_printf("ARM: MEM TFX R %08X\n",opcd); return VOT_Read; } //STM common case else if ((opcd>>25)==(0xE8/2) && CHK_BTS(1,22,0) && CHK_BTS(1,20,0) && LDM_REGCNT()==1) { //P=1 //U=0 //L=1 //W=1 //S=0 u32 old_opcd=opcd; //One register xfered //Can be rewriten as normal mem opcode .. opcd=0xE4000000; //Imm offset opcd |= 0<<25; //Pre/Post incr opcd |= old_opcd & (1<<24); //Up/Dn opcd |= old_opcd & (1<<23); //Word/Byte opcd |= 0<<22; //Write back opcd |= old_opcd & (1<<21); //Store/Load opcd |= old_opcd & (1<<20); //Rn opcd |= Rn<<16; //Rd u32 Rd=findfirstset(RList)-1; opcd |= Rd<<12; //Offset opcd |= 4; arm_printf("ARM: MEM TFX W %08X\n",opcd); return VOT_Read; } else if (CHK_BTS(0xE10F0FFF,0,0xE10F0000)) { return VOT_MRS; } else if (CHK_BTS(0xEFBFFFF0,0,0xE129F000)) { return VOT_MSR; } else if ((opcd>>25)==(0xE8/2) && CHK_BTS(32768,0,0)) { arm_printf("ARM: MEM FB %08X\n",opcd); flags|=OP_MFB; //(flag Just for the fallback counters) } else { arm_printf("ARM: FB %08X\n",opcd); } //by default fallback to interpr return VOT_Fallback; } //helpers ... void LoadReg(eReg rd,u32 regn,ConditionCode cc=CC_AL) { LDR(rd,r8,(u8*)®[regn].I-(u8*)®[0].I,Offset,cc); } void StoreReg(eReg rd,u32 regn,ConditionCode cc=CC_AL) { STR(rd,r8,(u8*)®[regn].I-(u8*)®[0].I,Offset,cc); } //very quick-and-dirty register rename based virtualisation u32 renamed_regs[16]; u32 rename_reg_base; void RenameRegReset() { rename_reg_base=r1; memset(renamed_regs, 0, sizeof(renamed_regs)); } //returns new reg #. didrn is true if a rename mapping was added u32 RenameReg(u32 reg, bool& didrn) { if (renamed_regs[reg] == 0) { renamed_regs[reg]=rename_reg_base; rename_reg_base++; didrn=true; } else { didrn=false; } return renamed_regs[reg]; } //For reg reads (they need to be loaded) //load can be used to skip loading (for RD if not cond) void LoadAndRename(u32& opcd, u32 bitpos, bool load,u32 pc) { bool didrn; u32 reg=(opcd>>bitpos)&15; u32 nreg=RenameReg(reg,didrn); opcd = (opcd& ~(15<>bitpos)&15; u32 nreg=RenameReg(reg,didrn); verify(!didrn); if (reg==15) reg=R15_ARM_NEXT; StoreReg((eReg)nreg,reg); } //For COND void LoadFlags() { //Load flags LoadReg(r0,RN_PSR_FLAGS); //move them to flags register MSR(0,8,r0); } //Virtualise Data Processing opcode void VirtualizeOpcode(u32 opcd,u32 flag,u32 pc) { //Keep original opcode for info u32 orig=opcd; //Load arm flags, RS0/8/16, RD12/16 (as indicated by the decoder flags) if (flag & OP_HAS_FLAGS_READ) { LoadFlags(); } if (flag & OP_HAS_RS_0) LoadAndRename(opcd,0,true,pc+8); if (flag & OP_HAS_RS_8) LoadAndRename(opcd,8,true,pc+8); if (flag & OP_HAS_RS_16) LoadAndRename(opcd,16,true,pc+8); if (flag & OP_HAS_RD_12) LoadAndRename(opcd,12,flag&OP_HAS_RD_READ,pc+4); if (flag & OP_HAS_RD_16) { verify(! (flag & OP_HAS_RS_16)); LoadAndRename(opcd,16,flag&OP_HAS_RD_READ,pc+4); } //Opcode has been modified to use the new regs //Emit it ... arm_printf("Arm Virtual: %08X -> %08X\n",orig,opcd); armEmit32(opcd); //Store arm flags, rd12/rd16 (as indicated by the decoder flags) if (flag & OP_HAS_RD_12) StoreAndRename(orig,12); if (flag & OP_HAS_RD_16) StoreAndRename(orig,16); //Sanity check .. if (renamed_regs[15] != 0) { verify(flag&OP_READS_PC || (flag&OP_SETS_PC && !(flag&OP_IS_COND))); } if (flag & OP_HAS_FLAGS_WRITE) { //get results from flags register MRS(r1,0); //Store flags StoreReg(r1,RN_PSR_FLAGS); } } u32 nfb,ffb,bfb,mfb; #if HOST_CPU == CPU_X86 && FEAT_AREC != DYNAREC_NONE /* X86 backend * Uses a mix of * x86 code * Virtualised arm code (using the varm interpreter) * Emulated arm fallbacks (using the aica arm interpreter) * * The goal is to run as much code possible under the varm interpreter * so it will run on arm w/o changes. A few opcodes are missing from varm * (MOV32 is a notable case) and as such i've added a few varm_* hooks * * This code also performs a LOT of compiletime and runtime state/value sanity checks. * We don't care for speed here ... */ #include "emitter/x86_emitter.h" #include "virt_arm.h" static x86_block* x86e; void DumpRegs(const char* output) { static FILE* f=fopen(output, "w"); static int id=0; #if 0 if (490710==id) { __asm int 3; } #endif verify(id!=137250); #if 1 fprintf(f,"%d\n",id); //for(int i=0;i<14;i++) { int i=R15_ARM_NEXT; fprintf(f,"r%d=%08X\n",i,reg[i].I); } #endif id++; } void DYNACALL PrintOp(u32 opcd) { printf("%08X\n",opcd); } void armv_imm_to_reg(u32 regn, u32 imm) { x86e->Emit(op_mov32,®[regn].I,imm); } void armv_MOV32(eReg regn, u32 imm) { x86e->Emit(op_mov32,&virt_arm_reg(regn),imm); } void armv_call(void* loc) { x86e->Emit(op_call,x86_ptr_imm(loc)); } x86_Label* end_lbl; void armv_setup() { //Setup emitter x86e = new x86_block(); x86e->Init(0,0); x86e->x86_buff=(u8*)EMIT_GET_PTR(); x86e->x86_size=1024*64; x86e->do_realloc=false; //load base reg .. x86e->Emit(op_mov32,&virt_arm_reg(8),(u32)&arm_Reg[0]); //the "end" label is used to exit from the block, if a code modification (expected opcode // actual opcode in ram) is detected end_lbl=x86e->CreateLabel(false,0); } void armv_intpr(u32 opcd) { //Call interpreter x86e->Emit(op_mov32,ECX,opcd); x86e->Emit(op_call,x86_ptr_imm(&arm_single_op)); } void armv_end(void* codestart, u32 cycles) { //Normal block end //Move counter to EAX for return, pop ESI, ret x86e->Emit(op_sub32,ESI,cycles); x86e->Emit(op_jns,x86_ptr_imm(arm_dispatch)); x86e->Emit(op_jmp,x86_ptr_imm(arm_exit)); //Fluch cache, move counter to EAX, pop, ret //this should never happen (triggers a breakpoint on x86) x86e->MarkLabel(end_lbl); x86e->Emit(op_int3); x86e->Emit(op_call,x86_ptr_imm(FlushCache)); x86e->Emit(op_sub32,ESI,cycles); x86e->Emit(op_jmp,x86_ptr_imm(arm_dispatch)); //Generate the code & apply fixups/relocations as needed x86e->Generate(); //Use space from the dynarec buffer icPtr+=x86e->x86_indx; //Delete the x86 emitter ... delete x86e; } //sanity check: non branch doesn't set pc void armv_check_pc(u32 pc) { x86e->Emit(op_cmp32,&armNextPC,pc); x86_Label* nof=x86e->CreateLabel(false,0); x86e->Emit(op_je,nof); x86e->Emit(op_int3); x86e->MarkLabel(nof); } //sanity check: stale cache void armv_check_cache(u32 opcd, u32 pc) { x86e->Emit(op_cmp32,&CPUReadMemoryQuick(pc),opcd); x86_Label* nof=x86e->CreateLabel(false,0); x86e->Emit(op_je,nof); x86e->Emit(op_int3); x86e->MarkLabel(nof); } //profiler hook void armv_prof(OpType opt,u32 op,u32 flags) { if (VOT_Fallback!=opt) x86e->Emit(op_add32,&nfb,1); else { if (flags & OP_SETS_PC) x86e->Emit(op_add32,&bfb,1); else if (flags & OP_MFB) x86e->Emit(op_add32,&mfb,1); else x86e->Emit(op_add32,&ffb,1); } } naked void DYNACALL arm_compilecode() { __asm { call CompileCode; mov eax,0; jmp arm_dispatch; } } naked void DYNACALL arm_mainloop(u32 cycl, void* regs, void* entrypoints) { __asm { push esi mov esi,ecx add esi,reg[CYCL_CNT*4].I mov eax,0; jmp arm_dispatch } } naked void arm_dispatch() { __asm { arm_disp: mov eax,reg[R15_ARM_NEXT*4].I and eax,0x1FFFFC cmp reg[INTR_PEND*4].I,0 jne arm_dofiq jmp [EntryPoints+eax] arm_dofiq: call CPUFiq jmp arm_disp } } naked void arm_exit() { __asm { arm_exit: mov reg[CYCL_CNT*4].I,esi pop esi ret } } #elif (HOST_CPU == CPU_ARM) /* * * ARMv7 Compiler * */ //mprotect and stuff .. #include void armEmit32(u32 emit32) { if (icPtr >= (ICache+ICacheSize-1024)) die("ICache is full, invalidate old entries ..."); //ifdebug *(u32*)icPtr = emit32; icPtr+=4; } void *armGetEmitPtr() { if (icPtr < (ICache+ICacheSize-1024)) //ifdebug return static_cast(icPtr); return NULL; } #if HOST_OS==OS_DARWIN #include extern "C" void armFlushICache(void *code, void *pEnd) { sys_dcache_flush(code, (u8*)pEnd - (u8*)code + 1); sys_icache_invalidate(code, (u8*)pEnd - (u8*)code + 1); } #else extern "C" void armFlushICache(void *bgn, void *end) { __clear_cache(bgn, end); } #endif void armv_imm_to_reg(u32 regn, u32 imm) { MOV32(r0,imm); StoreReg(r0,regn); } void armv_call(void* loc) { CALL((u32)loc); } void armv_setup() { //Setup emitter //r9: temp for mem ops (PI WB) //r8: base //Stored on arm_mainloop so no need for push/pop } void armv_intpr(u32 opcd) { //Call interpreter MOV32(r0,opcd); CALL((u32)arm_single_op); } void armv_end(void* codestart, u32 cycl) { //Normal block end //cycle counter rv //pop registers & return if (is_i8r4(cycl)) SUB(r5,r5,cycl,true); else { u32 togo = cycl; while(ARMImmid8r4_enc(togo) == -1) { SUB(r5,r5,256); togo -= 256; } SUB(r5,r5,togo,true); } JUMP((u32)&arm_exit,CC_MI); //statically predicted as not taken JUMP((u32)&arm_dispatch); armFlushICache(codestart,(void*)EMIT_GET_PTR()); } //Hook cus varm misses this, so x86 needs special code void armv_MOV32(eReg regn, u32 imm) { MOV32(regn,imm); } /* No sanity checks on arm .. */ #endif // HOST_CPU == CPU_ARM //Run a timeslice for ARMREC //CycleCount is pretty much fixed to (512*32) for now (might change to a diff constant, but will be constant) void arm_Run(u32 CycleCount) { if (!Arm7Enabled) return; for (int i=0;i<32;i++) { arm_mainloop(CycleCount/32, arm_Reg, EntryPoints); libAICA_TimeStep(); } /* s32 clktks=reg[CYCL_CNT].I+CycleCount; //While we have time to spend do { //Check for interrupts if (reg[INTR_PEND].I) { CPUFiq(); } //lookup code at armNextPC, run a block & remove its cycles from the timeslice clktks-=EntryPoints[(armNextPC & ARAM_MASK)/4](); #if HOST_CPU==CPU_X86 verify(armNextPC<=ARAM_MASK); #endif } while(clktks>0); reg[CYCL_CNT].I=clktks; */ } #undef r /* TODO: R15 read/writing is kind of .. weird Gotta investigate why .. */ //Mem operand 2 calculation, if Reg or large imm void MemOperand2(eReg dst,bool I, bool U,u32 offs, u32 opcd) { if (I==true) { u32 Rm=(opcd>>0)&15; verify(CHK_BTS(7,4,0));// only SHL mode LoadReg(r1,Rm); u32 SA=31&(opcd>>7); //can't do shifted add for now -- EMITTER LIMIT -- if (SA) LSL(r1,r1,SA); } else { armv_MOV32(r1,offs); } if (U) ADD(dst,r0,r1); else SUB(dst,r0,r1); } template void DYNACALL MSR_do(u32 v) { #if HOST_CPU==CPU_X86 v=virt_arm_reg(r0); #endif if (Pd) { if(armMode > 0x10 && armMode < 0x1f) /* !=0x10 ?*/ { reg[17].I = (reg[17].I & 0x00FFFF00) | (v & 0xFF0000FF); } } else { CPUUpdateCPSR(); u32 newValue = reg[16].I; if(armMode > 0x10) { newValue = (newValue & 0xFFFFFF00) | (v & 0x000000FF); } newValue = (newValue & 0x00FFFFFF) | (v & 0xFF000000); newValue |= 0x10; if(armMode > 0x10) { CPUSwitchMode(newValue & 0x1f, false); } reg[16].I = newValue; CPUUpdateFlags(); } } //Compile & run block of code, starting armNextPC extern "C" void CompileCode() { //Get the code ptr void* rv=EMIT_GET_PTR(); //update the block table EntryPoints[(armNextPC&ARAM_MASK)/4]=rv; //setup local pc counter u32 pc=armNextPC; //emitter/block setup armv_setup(); //the ops counter is used to terminate the block (max op count for a single block is 32 currently) //We don't want too long blocks for timing accuracy u32 ops=0; u32 Cycles=0; for(;;) { ops++; //Read opcode ... u32 opcd=CPUReadMemoryQuick(pc); #if HOST_CPU==CPU_X86 //Sanity check: Stale cache armv_check_cache(opcd,pc); #endif u32 op_flags; //Decode & handle opcode OpType opt=DecodeOpcode(opcd,op_flags); switch(opt) { case VOT_DataOp: { //data processing opcode that can be virtualised RenameRegReset(); /* if (op_flags & OP_READS_PC) armv_imm_to_reg(15,pc+8); else*/ #if HOST_CPU==CPU_X86 armv_imm_to_reg(15,rand()); #endif VirtualizeOpcode(opcd,op_flags,pc); #if HOST_CPU==CPU_X86 armv_imm_to_reg(15,rand()); #endif } break; case VOT_BR: { //Branch to reg ConditionCode cc=(ConditionCode)(opcd>>28); verify(op_flags&OP_SETS_PC); if (cc!=CC_AL) { LoadFlags(); armv_imm_to_reg(R15_ARM_NEXT,pc+4); } LoadReg(r0,opcd&0xF); StoreReg(r0,R15_ARM_NEXT,cc); } break; case VOT_B: case VOT_BL: { //Branch to imm //<<2, sign extend ! s32 offs=((s32)opcd<<8)>>6; if (op_flags & OP_IS_COND) { armv_imm_to_reg(R15_ARM_NEXT,pc+4); LoadFlags(); ConditionCode cc=(ConditionCode)(opcd>>28); if (opt==VOT_BL) { armv_MOV32(r0,pc+4); StoreReg(r0,14,cc); } armv_MOV32(r0,pc+8+offs); StoreReg(r0,R15_ARM_NEXT,cc); } else { if (opt==VOT_BL) armv_imm_to_reg(14,pc+4); armv_imm_to_reg(R15_ARM_NEXT,pc+8+offs); } } break; case VOT_Read: { //LDR/STR u32 offs=opcd&4095; bool U=opcd&(1<<23); bool Pre=opcd&(1<<24); bool W=opcd&(1<<21); bool I=opcd&(1<<25); u32 Rn=(opcd>>16)&15; u32 Rd=(opcd>>12)&15; bool DoWB=W || (!Pre && Rn!=Rd); //Write back if: W, Post update w/ Rn!=Rd bool DoAdd=DoWB || Pre; //Register not updated anyway if (I==false && offs==0) { DoWB=false; DoAdd=false; } //verify(Rd!=15); verify(!((Rn==15) && DoWB)); //AGU if (Rn!=15) { LoadReg(r0,Rn); if (DoAdd) { eReg dst=Pre?r0:r9; if (I==false && is_i8r4(offs)) { if (U) ADD(dst,r0,offs); else SUB(dst,r0,offs); } else { MemOperand2(dst,I,U,offs,opcd); } if (DoWB && dst==r0) MOV(r9,r0); } } else { u32 addr=pc+8; if (Pre && offs && I==false) { addr+=U?offs:-offs; } armv_MOV32(r0,addr); if (Pre && I==true) { MemOperand2(r1,I,U,offs,opcd); ADD(r0,r0,r1); } } if (CHK_BTS(1,20,0)) { if (Rd==15) { armv_MOV32(r1,pc+12); } else { LoadReg(r1,Rd); } } //Call handler armv_call(GetMemOp(CHK_BTS(1,20,1),CHK_BTS(1,22,1))); if (CHK_BTS(1,20,1)) { if (Rd==15) { verify(op_flags & OP_SETS_PC); StoreReg(r0,R15_ARM_NEXT); } else { StoreReg(r0,Rd); } } //Write back from AGU, if any if (DoWB) { StoreReg(r9,Rn); } } break; case VOT_MRS: { u32 Rd=(opcd>>12)&15; armv_call((void*)&CPUUpdateCPSR); if (opcd & (1<<22)) { LoadReg(r0,17); } else { LoadReg(r0,16); } StoreReg(r0,Rd); } break; case VOT_MSR: { u32 Rm=(opcd>>0)&15; LoadReg(r0,Rm); if (opcd & (1<<22)) armv_call((void*)(void (DYNACALL*)(u32))&MSR_do<1>); else armv_call((void*)(void (DYNACALL*)(u32))&MSR_do<0>); if (op_flags & OP_SETS_PC) armv_imm_to_reg(R15_ARM_NEXT,pc+4); } break; /* //LDM is disabled for now //Common cases of LDM/STM are converted to STR/LDR (tsz==1) //Other cases are very uncommon and not worth implementing case VOT_LDM: { //P=0, U=1, S=0, L=1, W=1 u32 Rn=(opcd>>16)&15; u32 RList=opcd&0xFFFF; u32 tsz=(cpuBitsSet[RList & 255] + cpuBitsSet[(RList >> 8) & 255]); verify(CHK_BTS(1,24,0)); //P=0 verify(CHK_BTS(1,23,1)); //U=1 verify(CHK_BTS(1,22,0)); //S=0 verify(CHK_BTS(1,21,1)); //W=1 verify(CHK_BTS(1,20,1)); //L=0 //if (tsz!=1) // goto FALLBACK; bool _W=true; //w=1 if (RList & (1<); if (_W) { StoreReg(r9,Rn); } } break; */ case VOT_Fallback: { //interpreter fallback //arm_single_op needs PC+4 on r15 //TODO: only write it if needed -> Probably not worth the code, very few fallbacks now... armv_imm_to_reg(15,pc+8); //For cond branch, MSR if (op_flags & OP_SETS_PC) armv_imm_to_reg(R15_ARM_NEXT,pc+4); #if HOST_CPU==CPU_X86 if ( !(op_flags & OP_SETS_PC) ) armv_imm_to_reg(R15_ARM_NEXT,pc+4); #endif armv_intpr(opcd); #if HOST_CPU==CPU_X86 if ( !(op_flags & OP_SETS_PC) ) { //Sanity check: next pc armv_check_pc(pc+4); #if 0 x86e->Emit(op_mov32,ECX,opcd); x86e->Emit(op_call,x86_ptr_imm(PrintOp)); #endif } #endif } break; default: die("can't happen\n"); } //Lets say each opcode takes 9 cycles for now .. Cycles+=9; #if HOST_CPU==CPU_X86 armv_imm_to_reg(15,0xF87641FF); armv_prof(opt,opcd,op_flags); #endif //Branch ? if (op_flags & OP_SETS_PC) { //x86e->Emit(op_call,x86_ptr_imm(DumpRegs)); // great debugging tool arm_printf("ARM: %06X: Block End %d\n",pc,ops); #if HOST_CPU==CPU_X86 && 0 //Great fallback finder, also spams console if (opt==VOT_Fallback) { x86e->Emit(op_mov32,ECX,opcd); x86e->Emit(op_call,x86_ptr_imm(PrintOp)); } #endif break; } //block size limit ? if (ops>32) { arm_printf("ARM: %06X: Block split %d\n",pc,ops); armv_imm_to_reg(R15_ARM_NEXT,pc+4); break; } //Goto next opcode pc+=4; } armv_end((void*)rv,Cycles); } void FlushCache() { icPtr=ICache; for (u32 i=0;i // These have to be declared somewhere or linker dies u8* ARM::emit_opt=0; eReg ARM::reg_addr; eReg ARM::reg_dst; s32 ARM::imma; void armEmit32(u32 emit32) { if (icPtr >= (ICache + ICacheSize - 64*1024)) { die("ICache is full, invalidate old entries ..."); //ifdebug } x86e->Emit(op_mov32,ECX,emit32); x86e->Emit(op_call,x86_ptr_imm(virt_arm_op)); } void *armGetEmitPtr() { return icPtr; } #endif void armt_init() { InitHash(); //align to next page .. ICache = (u8*)(((unat)ARM7_TCB+4095)& ~4095); #if HOST_OS==OS_DARWIN //Can't just mprotect on iOS munmap(ICache, ICacheSize); ICache = (u8*)mmap(ICache, ICacheSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_ANON, 0, 0); #endif #if HOST_OS == OS_WINDOWS DWORD old; VirtualProtect(ICache,ICacheSize,PAGE_EXECUTE_READWRITE,&old); #elif HOST_OS == OS_LINUX || HOST_OS == OS_DARWIN printf("\n\t ARM7_TCB addr: %p | from: %p | addr here: %p\n", ICache, ARM7_TCB, armt_init); if (mprotect(ICache, ICacheSize, PROT_EXEC|PROT_READ|PROT_WRITE)) { perror("\n\tError - Couldn’t mprotect ARM7_TCB!"); verify(false); } #if TARGET_IPHONE memset((u8*)mmap(ICache, ICacheSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_ANON, 0, 0),0xFF,ICacheSize); #else memset(ICache,0xFF,ICacheSize); #endif #endif icPtr=ICache; } #endif