diff --git a/src/ARM.cpp b/src/ARM.cpp index 7e0e9228..e1f93a58 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -190,8 +190,6 @@ void ARM::Reset() BreakReq = false; #endif - memset(InterlockTimestamp, 0, sizeof(InterlockTimestamp)); - // zorp JumpTo(ExceptionBase); } @@ -695,7 +693,6 @@ void ARMv5::Execute() NDS.ARM9Timestamp += Cycles; Cycles = 0; - CyclesILed = 0; } if (Halted == 2) @@ -1262,7 +1259,7 @@ bool ARMv4::DataWrite32S(u32 addr, u32 val, bool dataabort) void ARMv5::AddCycles_CD_STR() { s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; - s32 numD = DataCycles + CyclesILed; + s32 numD = DataCycles; s32 early; if (DataRegion == Mem9_ITCM) @@ -1287,7 +1284,7 @@ void ARMv5::AddCycles_CD_STR() void ARMv5::AddCycles_CD_STM() { s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; - s32 numD = DataCycles + CyclesILed; + s32 numD = DataCycles; s32 early; if (DataRegion == Mem9_ITCM) @@ -1313,7 +1310,7 @@ void ARMv5::AddCycles_CDI_LDR() { // LDR cycles. ARM9 seems to skip the internal cycle here. s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; - s32 numD = DataCycles + CyclesILed; + s32 numD = DataCycles; // if a 32 bit bus, start 2 cycles early; else, start 4 cycles early s32 early; @@ -1340,7 +1337,7 @@ void ARMv5::AddCycles_CDI_LDM() { // LDM cycles. ARM9 seems to skip the internal cycle here. s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; - s32 numD = DataCycles + CyclesILed; + s32 numD = DataCycles; // if a 32 bit bus, start 2 cycles early; else, start 4 cycles early s32 early; @@ -1442,16 +1439,6 @@ void ARMv4::AddCycles_CD() } } -u64& ARMv5::Timestamp() -{ - return NDS.ARM9Timestamp; -} - -u64& ARMv4::Timestamp() -{ - return NDS.ARM7Timestamp; -} - u8 ARMv5::BusRead8(u32 addr) { return NDS.ARM9Read8(addr); diff --git a/src/ARM.h b/src/ARM.h index 25889329..dae5d96a 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -30,8 +30,6 @@ #include "debug/GdbStub.h" #endif -//#define INTERLOCK - namespace melonDS { inline u32 ROR(u32 x, u32 n) @@ -148,46 +146,6 @@ public: virtual void AddCycles_CD_STR() = 0; virtual void AddCycles_CD_STM() = 0; -/* - inline void AddCycles_L(const u32 delay, const u32 reg1) - { - if (InterlockTimestamp[reg1] > Timestamp() + delay); - Timestamp() = InterlockTimestamp[reg1]; - } - - inline void AddCycles_L(const u32 delay, const u32 reg1, const u32 reg2) - { - u64 cycles = std::max(InterlockTimestamp[reg1], InterlockTimestamp[reg2]); - if (cycles > Timestamp() + delay) - Timestamp() = cycles; - } - - inline void AddCycles_L(const u32 delay, const u32 reg1, const u32 reg2, const u32 reg3) - { - u64 cycles = std::max(InterlockTimestamp[reg1], std::max(InterlockTimestamp[reg2], InterlockTimestamp[reg3])); - if (cycles > Timestamp() + delay) - Timestamp() = cycles; - }*/ - -#ifdef INTERLOCK - // fetch the value of a register while handling any interlock cycles - virtual inline u32 GetReg(const u32 reg, const u32 delay = 0) = 0; - - // Must be called after all of an instruction's cycles are calculated!!! - virtual inline void SetCycles_L(const u32 reg, const u32 cycles, const u32 type) = 0; -#else - // fetch the value of a register while handling any interlock cycles - inline u32 GetReg(const u32 reg, const u32 delay = 0) - { - return R[reg]; - } - - // Must be called after all of an instruction's cycles are calculated!!! - inline void SetCycles_L(const u32 reg, const u32 cycles, const u32 type) {} -#endif - - virtual u64& Timestamp() = 0; - void CheckGdbIncoming(); u32 Num; @@ -224,15 +182,6 @@ public: MemRegion CodeMem; - enum InterlockType - { - ILT_Norm = 0, - ILT_Mul = 1, - }; - - u8 InterlockType[16]; - u64 InterlockTimestamp[16]; - #ifdef JIT_ENABLED u32 FastBlockLookupStart, FastBlockLookupSize; u64* FastBlockLookup; @@ -318,14 +267,14 @@ public: { // code only. always nonseq 32-bit for ARM9. s32 numC = CodeCycles; - Cycles += std::max(numC, CyclesILed + 1); + Cycles += numC; } void AddCycles_CI(s32 numI) override { // code+internal s32 numC = CodeCycles; - numI += 1 + CyclesILed; + numI += 1; Cycles += std::max(numC, numI); } @@ -334,25 +283,6 @@ public: void AddCycles_CDI_SWP() override { AddCycles_CD_STR(); } // uses the same behavior as str void AddCycles_CD_STR() override; void AddCycles_CD_STM() override; - -#ifdef INTERLOCK - // fetch the value of a register while handling any interlock cycles - inline u32 GetReg(const u32 reg, const u32 delay = 0) override - { - if (InterlockTimestamp[reg] > (Timestamp() + delay)) - CyclesILed = InterlockTimestamp[reg] - (Timestamp() + delay); - return R[reg]; - } - - // Must be called after all of an instruction's cycles are calculated!!! - inline void SetCycles_L(const u32 reg, const u32 cycles, const u32 type) override - { - InterlockTimestamp[reg] = cycles + Timestamp() + Cycles; - //InterlockType[reg] = type; - } -#endif - - u64& Timestamp() override; void GetCodeMemRegion(u32 addr, MemRegion* region); @@ -417,8 +347,6 @@ public: bool (*GetMemRegion)(u32 addr, bool write, MemRegion* region); - s32 CyclesILed; - #ifdef GDBSTUB_ENABLED u32 ReadMem(u32 addr, int size) override; void WriteMem(u32 addr, int size, u32 v) override; @@ -476,18 +404,6 @@ public: void AddCycles_CD_STR() override { AddCycles_CD(); } void AddCycles_CD_STM() override { AddCycles_CD(); } -#ifdef INTERLOCK - // fetch the value of a register while handling any interlock cycles - inline u32 GetReg(const u32 reg, const u32 delay = 0) override - { - return R[reg]; - } - - // Must be called after all of an instruction's cycles are calculated!!! - inline void SetCycles_L(const u32 reg, const u32 cycles, const u32 type) override{} -#endif - - u64& Timestamp() override; protected: u8 BusRead8(u32 addr) override; u16 BusRead16(u32 addr) override; diff --git a/src/ARMInterpreter.cpp b/src/ARMInterpreter.cpp index 93b347b5..f9623147 100644 --- a/src/ARMInterpreter.cpp +++ b/src/ARMInterpreter.cpp @@ -163,7 +163,7 @@ void A_MSR_REG(ARM* cpu) if ((cpu->CPSR & 0x1F) == 0x10) mask &= 0xFFFFFF00; - u32 val = cpu->GetReg(cpu->CurInstr & 0xF, 1); + u32 val = cpu->R[cpu->CurInstr & 0xF]; // bit4 is forced to 1 val |= 0x00000010; @@ -216,7 +216,7 @@ void A_MCR(ARM* cpu) u32 cn = (cpu->CurInstr >> 16) & 0xF; u32 cm = cpu->CurInstr & 0xF; u32 cpinfo = (cpu->CurInstr >> 5) & 0x7; - u32 val = cpu->GetReg((cpu->CurInstr>>12)&0xF); + u32 val = cpu->R[(cpu->CurInstr>>12)&0xF]; if (((cpu->CurInstr>>12) & 0xF) == 15) val += 4; if (cpu->Num==0 && cp==15) diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index e3208668..bc655996 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -160,14 +160,14 @@ inline bool OverflowSbc(u32 a, u32 b, u32 carry) cpu->SetC(b & 0x80000000); #define A_CALC_OP2_REG_SHIFT_IMM(shiftop) \ - u32 b = cpu->GetReg(cpu->CurInstr&0xF); \ + u32 b = cpu->R[cpu->CurInstr&0xF]; \ u32 s = (cpu->CurInstr>>7)&0x1F; \ shiftop(b, s); #define A_CALC_OP2_REG_SHIFT_REG(shiftop) \ - u32 b = cpu->GetReg(cpu->CurInstr&0xF); \ + u32 b = cpu->R[cpu->CurInstr&0xF]; \ if ((cpu->CurInstr&0xF)==15) b += 4; \ - shiftop(b, (cpu->GetReg((cpu->CurInstr>>8)&0xF) & 0xFF)); + shiftop(b, (cpu->R[(cpu->CurInstr>>8)&0xF] & 0xFF)); #define A_IMPLEMENT_ALU_OP(x,s) \ @@ -313,7 +313,7 @@ void A_##x##_REG_ROR_REG(ARM* cpu) \ #define A_AND(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a & b; \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -326,7 +326,7 @@ void A_##x##_REG_ROR_REG(ARM* cpu) \ } #define A_AND_S(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a & b; \ cpu->SetNZ(res & 0x80000000, \ !res); \ @@ -344,7 +344,7 @@ A_IMPLEMENT_ALU_OP(AND,_S) #define A_EOR(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a ^ b; \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -357,7 +357,7 @@ A_IMPLEMENT_ALU_OP(AND,_S) } #define A_EOR_S(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a ^ b; \ cpu->SetNZ(res & 0x80000000, \ !res); \ @@ -375,7 +375,7 @@ A_IMPLEMENT_ALU_OP(EOR,_S) #define A_SUB(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a - b; \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -388,7 +388,7 @@ A_IMPLEMENT_ALU_OP(EOR,_S) } #define A_SUB_S(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a - b; \ cpu->SetNZCV(res & 0x80000000, \ !res, \ @@ -408,7 +408,7 @@ A_IMPLEMENT_ALU_OP(SUB,) #define A_RSB(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = b - a; \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -421,7 +421,7 @@ A_IMPLEMENT_ALU_OP(SUB,) } #define A_RSB_S(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = b - a; \ cpu->SetNZCV(res & 0x80000000, \ !res, \ @@ -441,7 +441,7 @@ A_IMPLEMENT_ALU_OP(RSB,) #define A_ADD(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a + b; \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -454,7 +454,7 @@ A_IMPLEMENT_ALU_OP(RSB,) } #define A_ADD_S(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a + b; \ cpu->SetNZCV(res & 0x80000000, \ !res, \ @@ -474,7 +474,7 @@ A_IMPLEMENT_ALU_OP(ADD,) #define A_ADC(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a + b + (cpu->CPSR&0x20000000 ? 1:0); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -487,7 +487,7 @@ A_IMPLEMENT_ALU_OP(ADD,) } #define A_ADC_S(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res_tmp = a + b; \ u32 carry = (cpu->CPSR&0x20000000 ? 1:0); \ u32 res = res_tmp + carry; \ @@ -509,7 +509,7 @@ A_IMPLEMENT_ALU_OP(ADC,) #define A_SBC(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a - b - (cpu->CPSR&0x20000000 ? 0:1); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -522,7 +522,7 @@ A_IMPLEMENT_ALU_OP(ADC,) } #define A_SBC_S(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res_tmp = a - b; \ u32 carry = (cpu->CPSR&0x20000000 ? 0:1); \ u32 res = res_tmp - carry; \ @@ -544,7 +544,7 @@ A_IMPLEMENT_ALU_OP(SBC,) #define A_RSC(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = b - a - (cpu->CPSR&0x20000000 ? 0:1); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -557,7 +557,7 @@ A_IMPLEMENT_ALU_OP(SBC,) } #define A_RSC_S(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res_tmp = b - a; \ u32 carry = (cpu->CPSR&0x20000000 ? 0:1); \ u32 res = res_tmp - carry; \ @@ -579,7 +579,7 @@ A_IMPLEMENT_ALU_OP(RSC,) #define A_TST(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a & b; \ cpu->SetNZ(res & 0x80000000, \ !res); \ @@ -589,7 +589,7 @@ A_IMPLEMENT_ALU_TEST(TST,_S) #define A_TEQ(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a ^ b; \ cpu->SetNZ(res & 0x80000000, \ !res); \ @@ -599,7 +599,7 @@ A_IMPLEMENT_ALU_TEST(TEQ,_S) #define A_CMP(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a - b; \ cpu->SetNZCV(res & 0x80000000, \ !res, \ @@ -611,7 +611,7 @@ A_IMPLEMENT_ALU_TEST(CMP,) #define A_CMN(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a + b; \ cpu->SetNZCV(res & 0x80000000, \ !res, \ @@ -623,7 +623,7 @@ A_IMPLEMENT_ALU_TEST(CMN,) #define A_ORR(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a | b; \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -636,7 +636,7 @@ A_IMPLEMENT_ALU_TEST(CMN,) } #define A_ORR_S(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a | b; \ cpu->SetNZ(res & 0x80000000, \ !res); \ @@ -699,7 +699,7 @@ void A_MOV_REG_LSL_IMM_DBG(ARM* cpu) #define A_BIC(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a & ~b; \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -712,7 +712,7 @@ void A_MOV_REG_LSL_IMM_DBG(ARM* cpu) } #define A_BIC_S(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a & ~b; \ cpu->SetNZ(res & 0x80000000, \ !res); \ @@ -761,12 +761,18 @@ A_IMPLEMENT_ALU_OP(MVN,_S) void A_MUL(ARM* cpu) { - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF); - u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF); + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; u32 res = rm * rs; cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; + if (cpu->CurInstr & (1<<20)) + { + cpu->SetNZ(res & 0x80000000, + !res); + if (cpu->Num==1) cpu->SetC(0); + } u32 cycles; if (cpu->Num == 0) @@ -780,55 +786,53 @@ void A_MUL(ARM* cpu) } cpu->AddCycles_CI(cycles); - if (cpu->CurInstr & (1<<20)) - { - cpu->SetNZ(res & 0x80000000, - !res); - if (cpu->Num==1) cpu->SetC(0); - } - else cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions } void A_MLA(ARM* cpu) { - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF); - u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF); - u32 rn = cpu->GetReg((cpu->CurInstr >> 12) & 0xF); + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + u32 rn = cpu->R[(cpu->CurInstr >> 12) & 0xF]; u32 res = (rm * rs) + rn; cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; - - u32 cycles; - if (cpu->Num == 0) - cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1; - else - { - if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2; - else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3; - else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; - else cycles = 5; - } - - cpu->AddCycles_CI(cycles); if (cpu->CurInstr & (1<<20)) { cpu->SetNZ(res & 0x80000000, !res); if (cpu->Num==1) cpu->SetC(0); } - else cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions + + u32 cycles; + if (cpu->Num == 0) + cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1; + else + { + if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2; + else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3; + else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; + else cycles = 5; + } + + cpu->AddCycles_CI(cycles); } void A_UMULL(ARM* cpu) { - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); - u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1); + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; u64 res = (u64)rm * (u64)rs; cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); + if (cpu->CurInstr & (1<<20)) + { + cpu->SetNZ((u32)(res >> 63ULL), + !res); + if (cpu->Num==1) cpu->SetC(0); + } u32 cycles; if (cpu->Num == 0) @@ -842,27 +846,26 @@ void A_UMULL(ARM* cpu) } cpu->AddCycles_CI(cycles); - if (cpu->CurInstr & (1<<20)) - { - cpu->SetNZ((u32)(res >> 63ULL), - !res); - if (cpu->Num==1) cpu->SetC(0); - } - else cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions } void A_UMLAL(ARM* cpu) { - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); - u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1); + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; u64 res = (u64)rm * (u64)rs; - u64 rd = (u64)cpu->GetReg((cpu->CurInstr >> 12) & 0xF, 1) | ((u64)cpu->GetReg((cpu->CurInstr >> 16) & 0xF) << 32ULL); + u64 rd = (u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL); res += rd; cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); + if (cpu->CurInstr & (1<<20)) + { + cpu->SetNZ((u32)(res >> 63ULL), + !res); + if (cpu->Num==1) cpu->SetC(0); + } u32 cycles; if (cpu->Num == 0) @@ -876,24 +879,23 @@ void A_UMLAL(ARM* cpu) } cpu->AddCycles_CI(cycles); - if (cpu->CurInstr & (1<<20)) - { - cpu->SetNZ((u32)(res >> 63ULL), - !res); - if (cpu->Num==1) cpu->SetC(0); - } - else cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions } void A_SMULL(ARM* cpu) { - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); - u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1); + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; s64 res = (s64)(s32)rm * (s64)(s32)rs; cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); + if (cpu->CurInstr & (1<<20)) + { + cpu->SetNZ((u32)(res >> 63ULL), + !res); + if (cpu->Num==1) cpu->SetC(0); + } u32 cycles; if (cpu->Num == 0) @@ -907,27 +909,26 @@ void A_SMULL(ARM* cpu) } cpu->AddCycles_CI(cycles); - if (cpu->CurInstr & (1<<20)) - { - cpu->SetNZ((u32)(res >> 63ULL), - !res); - if (cpu->Num==1) cpu->SetC(0); - } - else cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions } void A_SMLAL(ARM* cpu) { - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); - u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1); + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; s64 res = (s64)(s32)rm * (s64)(s32)rs; - s64 rd = (s64)((u64)cpu->GetReg((cpu->CurInstr >> 12) & 0xF, 1) | ((u64)cpu->GetReg((cpu->CurInstr >> 16) & 0xF) << 32ULL)); + s64 rd = (s64)((u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL)); res += rd; cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); + if (cpu->CurInstr & (1<<20)) + { + cpu->SetNZ((u32)(res >> 63ULL), + !res); + if (cpu->Num==1) cpu->SetC(0); + } u32 cycles; if (cpu->Num == 0) @@ -939,24 +940,17 @@ void A_SMLAL(ARM* cpu) else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; else cycles = 5; } - + cpu->AddCycles_CI(cycles); - if (cpu->CurInstr & (1<<20)) - { - cpu->SetNZ((u32)(res >> 63ULL), - !res); - if (cpu->Num==1) cpu->SetC(0); - } - else cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions } void A_SMLAxy(ARM* cpu) { if (cpu->Num != 0) return; - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); - u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1); - u32 rn = cpu->GetReg((cpu->CurInstr >> 12) & 0xF, 1); + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + u32 rn = cpu->R[(cpu->CurInstr >> 12) & 0xF]; if (cpu->CurInstr & (1<<5)) rm >>= 16; else rm &= 0xFFFF; @@ -970,17 +964,16 @@ void A_SMLAxy(ARM* cpu) if (OverflowAdd(res_mul, rn)) cpu->CPSR |= 0x08000000; - cpu->AddCycles_C(); - cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Norm); + cpu->AddCycles_C(); // TODO: interlock?? } void A_SMLAWy(ARM* cpu) { if (cpu->Num != 0) return; - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); - u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1); - u32 rn = cpu->GetReg((cpu->CurInstr >> 12) & 0xF, 1); + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + u32 rn = cpu->R[(cpu->CurInstr >> 12) & 0xF]; if (cpu->CurInstr & (1<<6)) rs >>= 16; else rs &= 0xFFFF; @@ -992,16 +985,15 @@ void A_SMLAWy(ARM* cpu) if (OverflowAdd(res_mul, rn)) cpu->CPSR |= 0x08000000; - cpu->AddCycles_C(); - cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Norm); + cpu->AddCycles_C(); // TODO: interlock?? } void A_SMULxy(ARM* cpu) { if (cpu->Num != 0) return; - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); - u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1); + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; if (cpu->CurInstr & (1<<5)) rm >>= 16; else rm &= 0xFFFF; @@ -1011,16 +1003,15 @@ void A_SMULxy(ARM* cpu) u32 res = ((s16)rm * (s16)rs); cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; - cpu->AddCycles_C(); - cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Norm); + cpu->AddCycles_C(); // TODO: interlock?? } void A_SMULWy(ARM* cpu) { if (cpu->Num != 0) return; - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); - u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1); + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; if (cpu->CurInstr & (1<<6)) rs >>= 16; else rs &= 0xFFFF; @@ -1028,16 +1019,15 @@ void A_SMULWy(ARM* cpu) u32 res = ((s64)(s32)rm * (s16)rs) >> 16; cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; - cpu->AddCycles_C(); - cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Norm); + cpu->AddCycles_C(); // TODO: interlock?? } void A_SMLALxy(ARM* cpu) { if (cpu->Num != 0) return; - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 0); - u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 0); // yeah this one actually doesn't need two interlock cycles to interlock + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; if (cpu->CurInstr & (1<<5)) rm >>= 16; else rm &= 0xFFFF; @@ -1052,8 +1042,7 @@ void A_SMLALxy(ARM* cpu) cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); - cpu->AddCycles_CI(1); - cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Norm); + cpu->AddCycles_CI(1); // TODO: interlock?? } @@ -1062,7 +1051,7 @@ void A_CLZ(ARM* cpu) { if (cpu->Num != 0) return A_UNK(cpu); - u32 val = cpu->GetReg(cpu->CurInstr & 0xF, 1); + u32 val = cpu->R[cpu->CurInstr & 0xF]; u32 res = 0; while ((val & 0xFF000000) == 0) @@ -1087,8 +1076,8 @@ void A_QADD(ARM* cpu) { if (cpu->Num != 0) return A_UNK(cpu); - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); - u32 rn = cpu->GetReg((cpu->CurInstr >> 16) & 0xF, 1); + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF]; u32 res = rm + rn; if (OverflowAdd(rm, rn)) @@ -1098,16 +1087,15 @@ void A_QADD(ARM* cpu) } cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; - cpu->AddCycles_C(); - cpu->SetCycles_L((cpu->CurInstr >> 12) & 0xF, 1, cpu->ILT_Norm); + cpu->AddCycles_C(); // TODO: interlock?? } void A_QSUB(ARM* cpu) { if (cpu->Num != 0) return A_UNK(cpu); - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); - u32 rn = cpu->GetReg((cpu->CurInstr >> 16) & 0xF, 1); + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF]; u32 res = rm - rn; if (OverflowSub(rm, rn)) @@ -1117,16 +1105,15 @@ void A_QSUB(ARM* cpu) } cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; - cpu->AddCycles_C(); - cpu->SetCycles_L((cpu->CurInstr >> 12) & 0xF, 1, cpu->ILT_Norm); + cpu->AddCycles_C(); // TODO: interlock?? } void A_QDADD(ARM* cpu) { if (cpu->Num != 0) return A_UNK(cpu); - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); - u32 rn = cpu->GetReg((cpu->CurInstr >> 16) & 0xF, 1); + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF]; if (OverflowAdd(rn, rn)) { @@ -1144,16 +1131,15 @@ void A_QDADD(ARM* cpu) } cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; - cpu->AddCycles_C(); - cpu->SetCycles_L((cpu->CurInstr >> 12) & 0xF, 1, cpu->ILT_Norm); + cpu->AddCycles_C(); // TODO: interlock?? } void A_QDSUB(ARM* cpu) { if (cpu->Num != 0) return A_UNK(cpu); - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); - u32 rn = cpu->GetReg((cpu->CurInstr >> 16) & 0xF, 1); + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF]; if (OverflowAdd(rn, rn)) { @@ -1171,8 +1157,7 @@ void A_QDSUB(ARM* cpu) } cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; - cpu->AddCycles_C(); - cpu->SetCycles_L((cpu->CurInstr >> 12) & 0xF, 1, cpu->ILT_Norm); + cpu->AddCycles_C(); // TODO: interlock?? } @@ -1183,7 +1168,7 @@ void A_QDSUB(ARM* cpu) void T_LSL_IMM(ARM* cpu) { - u32 op = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 op = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 s = (cpu->CurInstr >> 6) & 0x1F; LSL_IMM_S(op, s); cpu->R[cpu->CurInstr & 0x7] = op; @@ -1194,7 +1179,7 @@ void T_LSL_IMM(ARM* cpu) void T_LSR_IMM(ARM* cpu) { - u32 op = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 op = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 s = (cpu->CurInstr >> 6) & 0x1F; LSR_IMM_S(op, s); cpu->R[cpu->CurInstr & 0x7] = op; @@ -1205,7 +1190,7 @@ void T_LSR_IMM(ARM* cpu) void T_ASR_IMM(ARM* cpu) { - u32 op = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 op = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 s = (cpu->CurInstr >> 6) & 0x1F; ASR_IMM_S(op, s); cpu->R[cpu->CurInstr & 0x7] = op; @@ -1216,8 +1201,8 @@ void T_ASR_IMM(ARM* cpu) void T_ADD_REG_(ARM* cpu) { - u32 a = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); - u32 b = cpu->GetReg((cpu->CurInstr >> 6) & 0x7); + u32 a = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 6) & 0x7]; u32 res = a + b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZCV(res & 0x80000000, @@ -1229,8 +1214,8 @@ void T_ADD_REG_(ARM* cpu) void T_SUB_REG_(ARM* cpu) { - u32 a = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); - u32 b = cpu->GetReg((cpu->CurInstr >> 6) & 0x7); + u32 a = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 6) & 0x7]; u32 res = a - b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZCV(res & 0x80000000, @@ -1242,7 +1227,7 @@ void T_SUB_REG_(ARM* cpu) void T_ADD_IMM_(ARM* cpu) { - u32 a = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 a = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 b = (cpu->CurInstr >> 6) & 0x7; u32 res = a + b; cpu->R[cpu->CurInstr & 0x7] = res; @@ -1255,7 +1240,7 @@ void T_ADD_IMM_(ARM* cpu) void T_SUB_IMM_(ARM* cpu) { - u32 a = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 a = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 b = (cpu->CurInstr >> 6) & 0x7; u32 res = a - b; cpu->R[cpu->CurInstr & 0x7] = res; @@ -1275,9 +1260,9 @@ void T_MOV_IMM(ARM* cpu) cpu->AddCycles_C(); } -void T_CMP_IMM(ARM* cpu) +void T_CMP_IMM(ARM* cpu) { - u32 a = cpu->GetReg((cpu->CurInstr >> 8) & 0x7); + u32 a = cpu->R[(cpu->CurInstr >> 8) & 0x7]; u32 b = cpu->CurInstr & 0xFF; u32 res = a - b; cpu->SetNZCV(res & 0x80000000, @@ -1289,7 +1274,7 @@ void T_CMP_IMM(ARM* cpu) void T_ADD_IMM(ARM* cpu) { - u32 a = cpu->GetReg((cpu->CurInstr >> 8) & 0x7); + u32 a = cpu->R[(cpu->CurInstr >> 8) & 0x7]; u32 b = cpu->CurInstr & 0xFF; u32 res = a + b; cpu->R[(cpu->CurInstr >> 8) & 0x7] = res; @@ -1302,7 +1287,7 @@ void T_ADD_IMM(ARM* cpu) void T_SUB_IMM(ARM* cpu) { - u32 a = cpu->GetReg((cpu->CurInstr >> 8) & 0x7); + u32 a = cpu->R[(cpu->CurInstr >> 8) & 0x7]; u32 b = cpu->CurInstr & 0xFF; u32 res = a - b; cpu->R[(cpu->CurInstr >> 8) & 0x7] = res; @@ -1316,8 +1301,8 @@ void T_SUB_IMM(ARM* cpu) void T_AND_REG(ARM* cpu) { - u32 a = cpu->GetReg(cpu->CurInstr & 0x7); - u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 res = a & b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, @@ -1327,8 +1312,8 @@ void T_AND_REG(ARM* cpu) void T_EOR_REG(ARM* cpu) { - u32 a = cpu->GetReg(cpu->CurInstr & 0x7); - u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 res = a ^ b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, @@ -1338,8 +1323,8 @@ void T_EOR_REG(ARM* cpu) void T_LSL_REG(ARM* cpu) { - u32 a = cpu->GetReg(cpu->CurInstr & 0x7, 1); - u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) & 0xFF; + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7] & 0xFF; LSL_REG_S(a, b); cpu->R[cpu->CurInstr & 0x7] = a; cpu->SetNZ(a & 0x80000000, @@ -1349,8 +1334,8 @@ void T_LSL_REG(ARM* cpu) void T_LSR_REG(ARM* cpu) { - u32 a = cpu->GetReg(cpu->CurInstr & 0x7, 1); - u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) & 0xFF; + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7] & 0xFF; LSR_REG_S(a, b); cpu->R[cpu->CurInstr & 0x7] = a; cpu->SetNZ(a & 0x80000000, @@ -1360,8 +1345,8 @@ void T_LSR_REG(ARM* cpu) void T_ASR_REG(ARM* cpu) { - u32 a = cpu->GetReg(cpu->CurInstr & 0x7, 1); - u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) & 0xFF; + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7] & 0xFF; ASR_REG_S(a, b); cpu->R[cpu->CurInstr & 0x7] = a; cpu->SetNZ(a & 0x80000000, @@ -1371,8 +1356,8 @@ void T_ASR_REG(ARM* cpu) void T_ADC_REG(ARM* cpu) { - u32 a = cpu->GetReg(cpu->CurInstr & 0x7); - u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 res_tmp = a + b; u32 carry = (cpu->CPSR&0x20000000 ? 1:0); u32 res = res_tmp + carry; @@ -1386,8 +1371,8 @@ void T_ADC_REG(ARM* cpu) void T_SBC_REG(ARM* cpu) { - u32 a = cpu->GetReg(cpu->CurInstr & 0x7); - u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 res_tmp = a - b; u32 carry = (cpu->CPSR&0x20000000 ? 0:1); u32 res = res_tmp - carry; @@ -1401,8 +1386,8 @@ void T_SBC_REG(ARM* cpu) void T_ROR_REG(ARM* cpu) { - u32 a = cpu->GetReg(cpu->CurInstr & 0x7, 1); - u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) & 0xFF; + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7] & 0xFF; ROR_REG_S(a, b); cpu->R[cpu->CurInstr & 0x7] = a; cpu->SetNZ(a & 0x80000000, @@ -1412,8 +1397,8 @@ void T_ROR_REG(ARM* cpu) void T_TST_REG(ARM* cpu) { - u32 a = cpu->GetReg(cpu->CurInstr & 0x7); - u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 res = a & b; cpu->SetNZ(res & 0x80000000, !res); @@ -1422,7 +1407,7 @@ void T_TST_REG(ARM* cpu) void T_NEG_REG(ARM* cpu) { - u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 res = -b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZCV(res & 0x80000000, @@ -1434,8 +1419,8 @@ void T_NEG_REG(ARM* cpu) void T_CMP_REG(ARM* cpu) { - u32 a = cpu->GetReg(cpu->CurInstr & 0x7); - u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 res = a - b; cpu->SetNZCV(res & 0x80000000, !res, @@ -1446,8 +1431,8 @@ void T_CMP_REG(ARM* cpu) void T_CMN_REG(ARM* cpu) { - u32 a = cpu->GetReg(cpu->CurInstr & 0x7); - u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 res = a + b; cpu->SetNZCV(res & 0x80000000, !res, @@ -1458,8 +1443,8 @@ void T_CMN_REG(ARM* cpu) void T_ORR_REG(ARM* cpu) { - u32 a = cpu->GetReg(cpu->CurInstr & 0x7); - u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 res = a | b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, @@ -1469,8 +1454,8 @@ void T_ORR_REG(ARM* cpu) void T_MUL_REG(ARM* cpu) { - u32 a = cpu->GetReg(cpu->CurInstr & 0x7); - u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 res = a * b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, @@ -1494,8 +1479,8 @@ void T_MUL_REG(ARM* cpu) void T_BIC_REG(ARM* cpu) { - u32 a = cpu->GetReg(cpu->CurInstr & 0x7); - u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 res = a & ~b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, @@ -1505,7 +1490,7 @@ void T_BIC_REG(ARM* cpu) void T_MVN_REG(ARM* cpu) { - u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 res = ~b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, @@ -1522,8 +1507,8 @@ void T_ADD_HIREG(ARM* cpu) u32 rd = (cpu->CurInstr & 0x7) | ((cpu->CurInstr >> 4) & 0x8); u32 rs = (cpu->CurInstr >> 3) & 0xF; - u32 a = cpu->GetReg(rd); - u32 b = cpu->GetReg(rs); + u32 a = cpu->R[rd]; + u32 b = cpu->R[rs]; cpu->AddCycles_C(); @@ -1542,8 +1527,8 @@ void T_CMP_HIREG(ARM* cpu) u32 rd = (cpu->CurInstr & 0x7) | ((cpu->CurInstr >> 4) & 0x8); u32 rs = (cpu->CurInstr >> 3) & 0xF; - u32 a = cpu->GetReg(rd); - u32 b = cpu->GetReg(rs); + u32 a = cpu->R[rd]; + u32 b = cpu->R[rs]; u32 res = a - b; cpu->SetNZCV(res & 0x80000000, @@ -1562,11 +1547,11 @@ void T_MOV_HIREG(ARM* cpu) if (rd == 15) { - cpu->JumpTo(cpu->GetReg(rs) | 1); + cpu->JumpTo(cpu->R[rs] | 1); } else { - cpu->R[rd] = cpu->GetReg(rs); + cpu->R[rd] = cpu->R[rs]; } // nocash-style debugging hook @@ -1583,7 +1568,7 @@ void T_MOV_HIREG(ARM* cpu) } -void T_ADD_PCREL(ARM* cpu) // checkme: pc shouldn't be able to interlock? +void T_ADD_PCREL(ARM* cpu) { u32 val = cpu->R[15] & ~2; val += ((cpu->CurInstr & 0xFF) << 2); @@ -1591,7 +1576,7 @@ void T_ADD_PCREL(ARM* cpu) // checkme: pc shouldn't be able to interlock? cpu->AddCycles_C(); } -void T_ADD_SPREL(ARM* cpu) // checkme: sp shouldn't be able to interlock in thumb? +void T_ADD_SPREL(ARM* cpu) { u32 val = cpu->R[13]; val += ((cpu->CurInstr & 0xFF) << 2); @@ -1599,7 +1584,7 @@ void T_ADD_SPREL(ARM* cpu) // checkme: sp shouldn't be able to interlock in thum cpu->AddCycles_C(); } -void T_ADD_SP(ARM* cpu) // checkme: sp shouldn't be able to interlock in thumb? +void T_ADD_SP(ARM* cpu) { u32 val = cpu->R[13]; if (cpu->CurInstr & (1<<7)) diff --git a/src/ARMInterpreter_Branch.cpp b/src/ARMInterpreter_Branch.cpp index 284dfa75..623be41a 100644 --- a/src/ARMInterpreter_Branch.cpp +++ b/src/ARMInterpreter_Branch.cpp @@ -46,15 +46,15 @@ void A_BLX_IMM(ARM* cpu) cpu->JumpTo(cpu->R[15] + offset + 1); } -void A_BX(ARM* cpu) // verify interlock +void A_BX(ARM* cpu) { - cpu->JumpTo(cpu->GetReg(cpu->CurInstr & 0xF)); + cpu->JumpTo(cpu->R[cpu->CurInstr & 0xF]); } -void A_BLX_REG(ARM* cpu) // verify interlock +void A_BLX_REG(ARM* cpu) { u32 lr = cpu->R[15] - 4; - cpu->JumpTo(cpu->GetReg(cpu->CurInstr & 0xF)); + cpu->JumpTo(cpu->R[cpu->CurInstr & 0xF]); cpu->R[14] = lr; } @@ -71,12 +71,12 @@ void T_BCOND(ARM* cpu) cpu->AddCycles_C(); } -void T_BX(ARM* cpu) // verify interlock +void T_BX(ARM* cpu) { - cpu->JumpTo(cpu->GetReg((cpu->CurInstr >> 3) & 0xF)); + cpu->JumpTo(cpu->R[(cpu->CurInstr >> 3) & 0xF]); } -void T_BLX_REG(ARM* cpu) // verify interlock +void T_BLX_REG(ARM* cpu) { if (cpu->Num==1) { @@ -85,7 +85,7 @@ void T_BLX_REG(ARM* cpu) // verify interlock } u32 lr = cpu->R[15] - 1; - cpu->JumpTo(cpu->GetReg((cpu->CurInstr >> 3) & 0xF)); + cpu->JumpTo(cpu->R[(cpu->CurInstr >> 3) & 0xF]); cpu->R[14] = lr; } diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 4a640bc5..e2726005 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -53,7 +53,7 @@ namespace melonDS::ARMInterpreter if (!(cpu->CurInstr & (1<<23))) offset = -offset; #define A_WB_CALC_OFFSET_REG(shiftop) \ - u32 offset = cpu->GetReg(cpu->CurInstr & 0xF); \ + u32 offset = cpu->R[cpu->CurInstr & 0xF]; \ u32 shift = ((cpu->CurInstr>>7)&0x1F); \ shiftop(offset, shift); \ if (!(cpu->CurInstr & (1<<23))) offset = -offset; @@ -61,8 +61,8 @@ namespace melonDS::ARMInterpreter #define A_STR \ - offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ - u32 storeval = cpu->GetReg((cpu->CurInstr>>12) & 0xF); \ + offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ if (((cpu->CurInstr>>12) & 0xF) == 0xF) \ storeval += 4; \ bool dataabort = !cpu->DataWrite32(offset, storeval); \ @@ -72,8 +72,8 @@ namespace melonDS::ARMInterpreter // TODO: user mode (bit21) #define A_STR_POST \ - u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ - u32 storeval = cpu->GetReg((cpu->CurInstr>>12) & 0xF); \ + u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ if (((cpu->CurInstr>>12) & 0xF) == 0xF) \ storeval += 4; \ bool dataabort = !cpu->DataWrite32(addr, storeval); \ @@ -82,8 +82,8 @@ namespace melonDS::ARMInterpreter cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_STRB \ - offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ - u32 storeval = cpu->GetReg((cpu->CurInstr>>12) & 0xF); \ + offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ if (((cpu->CurInstr>>12) & 0xF) == 15) storeval+=4; \ bool dataabort = !cpu->DataWrite8(offset, storeval); \ cpu->AddCycles_CD_STR(); \ @@ -92,8 +92,8 @@ namespace melonDS::ARMInterpreter // TODO: user mode (bit21) #define A_STRB_POST \ - u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ - u32 storeval = cpu->GetReg((cpu->CurInstr>>12) & 0xF); \ + u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ if (((cpu->CurInstr>>12) & 0xF) == 15) storeval+=4; \ bool dataabort = !cpu->DataWrite8(addr, storeval); \ cpu->AddCycles_CD_STR(); \ @@ -101,7 +101,7 @@ namespace melonDS::ARMInterpreter cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_LDR \ - offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead32(offset, &val); \ cpu->AddCycles_CDI_LDR(); \ if (dataabort) return; \ @@ -115,12 +115,11 @@ namespace melonDS::ARMInterpreter else \ { \ cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, (offset & 3) ? 2 : 1, cpu->ILT_Norm); \ } // TODO: user mode #define A_LDR_POST \ - u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead32(addr, &val); \ cpu->AddCycles_CDI_LDR(); \ if (dataabort) return; \ @@ -134,37 +133,26 @@ namespace melonDS::ARMInterpreter else \ { \ cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, (addr & 3) ? 2 : 1, cpu->ILT_Norm); \ } #define A_LDRB \ - offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead8(offset, &val); \ cpu->AddCycles_CDI_LDR(); \ if (dataabort) return; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) \ - cpu->JumpTo8_16Bit(val); \ - else \ - { \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \ - } + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ + else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; // TODO: user mode #define A_LDRB_POST \ - u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead8(addr, &val); \ cpu->AddCycles_CDI_LDR(); \ if (dataabort) return; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) \ - cpu->JumpTo8_16Bit(val); \ - else \ - { \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \ - } + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ + else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; @@ -242,14 +230,14 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (!(cpu->CurInstr & (1<<23))) offset = -offset; #define A_HD_CALC_OFFSET_REG \ - u32 offset = cpu->GetReg(cpu->CurInstr & 0xF); \ + u32 offset = cpu->R[cpu->CurInstr & 0xF]; \ if (!(cpu->CurInstr & (1<<23))) offset = -offset; #define A_STRH \ - offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ - u32 storeval = cpu->GetReg((cpu->CurInstr>>12) & 0xF); \ + offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ if (((cpu->CurInstr>>12) & 0xF) == 15) storeval+=4; \ bool dataabort = !cpu->DataWrite16(offset, storeval); \ cpu->AddCycles_CD_STR(); \ @@ -257,8 +245,8 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; #define A_STRH_POST \ - u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ - u32 storeval = cpu->GetReg((cpu->CurInstr>>12) & 0xF); \ + u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ if (((cpu->CurInstr>>12) & 0xF) == 15) storeval+=4; \ bool dataabort = !cpu->DataWrite16(addr, storeval); \ cpu->AddCycles_CD_STR(); \ @@ -269,47 +257,35 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) #define A_LDRD \ if (cpu->Num != 0) return; \ - offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ - if (!cpu->DataRead32 (offset, &cpu->R[r])) {cpu->AddCycles_CDI_LDR(); return;} \ - u32 val; bool dataabort = !cpu->DataRead32S(offset+4, &val); \ + if (!cpu->DataRead32 (offset , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \ + u32 val; if (!cpu->DataRead32S(offset+4, &val)) {cpu->AddCycles_CDI(); return;} \ + if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \ + else cpu->R[r+1] = val; \ cpu->AddCycles_CDI_LDM(); \ - if (dataabort) return; \ - if (r == 14) \ - cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \ - else \ - { \ - cpu->R[r+1] = val; \ - cpu->SetCycles_L(r+1, 1, cpu->ILT_Norm); \ - } \ - if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ + if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; #define A_LDRD_POST \ if (cpu->Num != 0) return; \ - u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ - if (!cpu->DataRead32 (addr, &cpu->R[r])) {cpu->AddCycles_CDI_LDR(); return;} \ - u32 val; bool dataabort = !cpu->DataRead32S(addr+4, &val); \ + if (!cpu->DataRead32 (addr , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \ + u32 val; if (!cpu->DataRead32S(addr+4, &val)) {cpu->AddCycles_CDI(); return;} \ + if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \ + else cpu->R[r+1] = val; \ cpu->AddCycles_CDI_LDM(); \ - if (dataabort) return; \ - if (r == 14) \ - cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \ - else \ - { \ - cpu->R[r+1] = val; \ - cpu->SetCycles_L(r+1, 1, cpu->ILT_Norm); \ - } \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_STRD \ if (cpu->Num != 0) return; \ - offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ - bool dataabort = !cpu->DataWrite32(offset, cpu->GetReg(r)); /* yes, this data abort behavior is on purpose */ \ - u32 storeval = cpu->GetReg(r+1, cpu->DataCycles); if (r == 14) storeval+=4; \ + bool dataabort = !cpu->DataWrite32(offset, cpu->R[r]); /* yes, this data abort behavior is on purpose */ \ + u32 storeval = cpu->R[r+1]; if (r == 14) storeval+=4; \ dataabort |= !cpu->DataWrite32S (offset+4, storeval, dataabort); /* no, i dont understand it either */ \ cpu->AddCycles_CD_STM(); \ if (dataabort) return; \ @@ -317,102 +293,72 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) #define A_STRD_POST \ if (cpu->Num != 0) return; \ - u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ - bool dataabort = !cpu->DataWrite32(addr, cpu->GetReg(r)); \ - u32 storeval = cpu->GetReg(r+1, cpu->DataCycles); if (r == 14) storeval+=4; \ + bool dataabort = !cpu->DataWrite32(addr, cpu->R[r]); \ + u32 storeval = cpu->R[r+1]; if (r == 14) storeval+=4; \ dataabort |= !cpu->DataWrite32S (addr+4, storeval, dataabort); \ cpu->AddCycles_CD_STM(); \ if (dataabort) return; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_LDRH \ - offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead16(offset, &val); \ cpu->AddCycles_CDI_LDR(); \ if (dataabort) return; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) \ - cpu->JumpTo8_16Bit(val); \ - else \ - { \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \ - } \ + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ + else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; #define A_LDRH_POST \ - u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead16(addr, &val); \ cpu->AddCycles_CDI_LDR(); \ if (dataabort) return; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) \ - cpu->JumpTo8_16Bit(val); \ - else \ - { \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \ - } \ + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ + else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_LDRSB \ - offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead8(offset, &val); \ cpu->AddCycles_CDI_LDR(); \ if (dataabort) return; \ val = (s32)(s8)val; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) \ - cpu->JumpTo8_16Bit(val); \ - else \ - { \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \ - } \ + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ + else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; #define A_LDRSB_POST \ - u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead8(addr, &val); \ cpu->AddCycles_CDI_LDR(); \ if (dataabort) return; \ val = (s32)(s8)val; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) \ - cpu->JumpTo8_16Bit(val); \ - else \ - { \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \ - } \ + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ + else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_LDRSH \ - offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead16(offset, &val); \ cpu->AddCycles_CDI_LDR(); \ if (dataabort) return; \ val = (s32)(s16)val; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) \ - cpu->JumpTo8_16Bit(val); \ - else \ - { \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \ - } \ + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ + else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; #define A_LDRSH_POST \ - u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead16(addr, &val); \ cpu->AddCycles_CDI_LDR(); \ if (dataabort) return; \ val = (s32)(s16)val; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) \ - cpu->JumpTo8_16Bit(val); \ - else \ - { \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \ - } \ + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ + else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; @@ -452,8 +398,8 @@ A_IMPLEMENT_HD_LDRSTR(LDRSH) void A_SWP(ARM* cpu) { - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); - u32 base = cpu->GetReg((cpu->CurInstr >> 16) & 0xF); + u32 base = cpu->R[(cpu->CurInstr >> 16) & 0xF]; + u32 rm = cpu->R[cpu->CurInstr & 0xF]; if ((cpu->CurInstr & 0xF) == 15) rm += 4; u32 val; @@ -462,38 +408,20 @@ void A_SWP(ARM* cpu) u32 numD = cpu->DataCycles; if (cpu->DataWrite32(base, rm)) { - cpu->AddCycles_CDI_SWP(); // rd only gets updated if both read and write succeed u32 rd = (cpu->CurInstr >> 12) & 0xF; - if (rd != 15) - { - cpu->R[rd] = ROR(val, 8*(base&0x3)); - - if (cpu->Num == 0) - { - u32 cycles; - if (base & 3) // add an extra interlock cycle when doing a misaligned load from a non-itcm address (checkme: does it matter whether you're executing from there?) - { - cycles = ((base < ((ARMv5*)cpu)->ITCMSize) && ((cpu->R[15]-8) < ((ARMv5*)cpu)->ITCMSize)) ? 1 : 2; - } - else cycles = 1; - - cpu->SetCycles_L(rd, cycles, cpu->ILT_Norm); - } - } - else if (cpu->Num == 1) // for some reason these jumps don't work on the arm 9? - cpu->JumpTo(ROR(val, 8*(base&0x3)) & ~1, cpu->ILT_Norm); + if (rd != 15) cpu->R[rd] = ROR(val, 8*(base&0x3)); + else if (cpu->Num==1) cpu->JumpTo(ROR(val, 8*(base&0x3)) & ~1); // for some reason these jumps don't work on the arm 9? } - else cpu->AddCycles_CDI_SWP(); cpu->DataCycles += numD; } - else cpu->AddCycles_CDI_SWP(); + cpu->AddCycles_CDI_SWP(); } void A_SWPB(ARM* cpu) { - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1) & 0xFF; - u32 base = cpu->GetReg((cpu->CurInstr >> 16) & 0xF); + u32 base = cpu->R[(cpu->CurInstr >> 16) & 0xF]; + u32 rm = cpu->R[cpu->CurInstr & 0xF] & 0xFF; if ((cpu->CurInstr & 0xF) == 15) rm += 4; u32 val; @@ -502,24 +430,14 @@ void A_SWPB(ARM* cpu) u32 numD = cpu->DataCycles; if (cpu->DataWrite8(base, rm)) { - cpu->AddCycles_CDI_SWP(); // rd only gets updated if both read and write succeed u32 rd = (cpu->CurInstr >> 12) & 0xF; - if (rd != 15) - { - cpu->R[rd] = val; - - // add an extra interlock cycle when doing a load from a non-itcm address (checkme: does it matter whether you're executing from there?) - if (cpu->Num == 0) - cpu->SetCycles_L(rd, ((base < ((ARMv5*)cpu)->ITCMSize) && ((cpu->R[15]-8) < ((ARMv5*)cpu)->ITCMSize)) ? 1 : 2, cpu->ILT_Norm); - } - else if (cpu->Num == 1)// for some reason these jumps don't work on the arm 9? - cpu->JumpTo(val & ~1); + if (rd != 15) cpu->R[rd] = val; + else if (cpu->Num==1) cpu->JumpTo(val & ~1); // for some reason these jumps don't work on the arm 9? } - else cpu->AddCycles_CDI_SWP(); cpu->DataCycles += numD; } - else cpu->AddCycles_CDI_SWP(); + cpu->AddCycles_CDI_SWP(); } @@ -527,12 +445,11 @@ void A_SWPB(ARM* cpu) void A_LDM(ARM* cpu) { u32 baseid = (cpu->CurInstr >> 16) & 0xF; - u32 base = cpu->GetReg(baseid, 1); + u32 base = cpu->R[baseid]; u32 wbbase; u32 oldbase = base; u32 preinc = (cpu->CurInstr & (1<<24)); bool first = true; - u32 lastreg = 0; // TODO: this doesn't support 0 reg LDMs (do those even work?) if (!(cpu->CurInstr & (1<<23))) // decrement { @@ -568,7 +485,6 @@ void A_LDM(ARM* cpu) } first = false; - lastreg = i; if (!preinc) base += 4; } } @@ -582,26 +498,12 @@ void A_LDM(ARM* cpu) { goto dataabort; } - cpu->AddCycles_CDI_LDM(); if (!preinc) base += 4; if (cpu->Num == 1) pc &= ~0x1; } - else - { - cpu->AddCycles_CDI_LDM(); - - if (cpu->Num == 0) - { - u32 lastbase = base; - if (!preinc) lastbase -= 4; - // no interlock occurs when loading from itcm (checkme: does it matter whether you're executing from there?) - if ((((ARMv5*)cpu)->ITCMSize < lastbase) && ((cpu->R[15]-8) > ((ARMv5*)cpu)->ITCMSize) && (cpu->CurInstr & (0x7FFF >> (15 - lastreg)))) - cpu->SetCycles_L(lastreg, 1, cpu->ILT_Norm); - } - } // switch back to previous regs if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15))) @@ -635,8 +537,6 @@ void A_LDM(ARM* cpu) if (false) { dataabort: - cpu->AddCycles_CDI_LDM(); - // CHECKME: interlock shouldn't apply when it data aborts, right? // switch back to original set of regs if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15))) @@ -645,12 +545,14 @@ void A_LDM(ARM* cpu) // restore original value of base in case the reg got written to cpu->R[baseid] = oldbase; } + + cpu->AddCycles_CDI_LDM(); } void A_STM(ARM* cpu) { u32 baseid = (cpu->CurInstr >> 16) & 0xF; - u32 base = cpu->GetReg(baseid, 1); + u32 base = cpu->R[baseid]; u32 oldbase = base; u32 preinc = (cpu->CurInstr & (1<<24)); bool first = true; @@ -694,7 +596,7 @@ void A_STM(ARM* cpu) val = oldbase; else val = base; } - else val = cpu->GetReg(i, 1+cpu->DataCycles); + else val = cpu->R[i]; if (i == 15) val+=4; @@ -738,170 +640,160 @@ void A_STM(ARM* cpu) -void T_LDR_PCREL(ARM* cpu) // checkme: can pc be interlocked? +void T_LDR_PCREL(ARM* cpu) { u32 addr = (cpu->R[15] & ~0x2) + ((cpu->CurInstr & 0xFF) << 2); cpu->DataRead32(addr, &cpu->R[(cpu->CurInstr >> 8) & 0x7]); cpu->AddCycles_CDI_LDR(); - cpu->SetCycles_L((cpu->CurInstr >> 8) & 0x7, 1, cpu->ILT_Norm); // checkme: verify cycle count } -void T_STR_REG(ARM* cpu) +void T_STR_REG(ARM* cpu) { - u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7); - cpu->DataWrite32(addr, cpu->GetReg(cpu->CurInstr & 0x7, 1)); + u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; + cpu->DataWrite32(addr, cpu->R[cpu->CurInstr & 0x7]); cpu->AddCycles_CD_STR(); } void T_STRB_REG(ARM* cpu) { - u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7); - cpu->DataWrite8(addr, cpu->GetReg(cpu->CurInstr & 0x7, 1)); + u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; + cpu->DataWrite8(addr, cpu->R[cpu->CurInstr & 0x7]); cpu->AddCycles_CD_STR(); } void T_LDR_REG(ARM* cpu) { - u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7); + u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; u32 val; if (cpu->DataRead32(addr, &val)) cpu->R[cpu->CurInstr & 0x7] = ROR(val, 8*(addr&0x3)); cpu->AddCycles_CDI_LDR(); - cpu->SetCycles_L(cpu->CurInstr & 0x7, (addr & 3) ? 2 : 1, cpu->ILT_Norm); } void T_LDRB_REG(ARM* cpu) { - u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7); + u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; cpu->DataRead8(addr, &cpu->R[cpu->CurInstr & 0x7]); cpu->AddCycles_CDI_LDR(); - cpu->SetCycles_L(cpu->CurInstr & 0x7, 2, cpu->ILT_Norm); } void T_STRH_REG(ARM* cpu) { - u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7); - cpu->DataWrite16(addr, cpu->GetReg(cpu->CurInstr & 0x7, 1)); + u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; + cpu->DataWrite16(addr, cpu->R[cpu->CurInstr & 0x7]); cpu->AddCycles_CD_STR(); } void T_LDRSB_REG(ARM* cpu) { - u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7); + u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; if (cpu->DataRead8(addr, &cpu->R[cpu->CurInstr & 0x7])) cpu->R[cpu->CurInstr & 0x7] = (s32)(s8)cpu->R[cpu->CurInstr & 0x7]; cpu->AddCycles_CDI_LDR(); - cpu->SetCycles_L(cpu->CurInstr & 0x7, 2, cpu->ILT_Norm); } void T_LDRH_REG(ARM* cpu) { - u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7); + u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; cpu->DataRead16(addr, &cpu->R[cpu->CurInstr & 0x7]); cpu->AddCycles_CDI_LDR(); - cpu->SetCycles_L(cpu->CurInstr & 0x7, 2, cpu->ILT_Norm); } void T_LDRSH_REG(ARM* cpu) { - u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7); + u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; if (cpu->DataRead16(addr, &cpu->R[cpu->CurInstr & 0x7])) cpu->R[cpu->CurInstr & 0x7] = (s32)(s16)cpu->R[cpu->CurInstr & 0x7]; cpu->AddCycles_CDI_LDR(); - cpu->SetCycles_L(cpu->CurInstr & 0x7, 2, cpu->ILT_Norm); } void T_STR_IMM(ARM* cpu) { u32 offset = (cpu->CurInstr >> 4) & 0x7C; - offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; - cpu->DataWrite32(offset, cpu->GetReg(cpu->CurInstr & 0x7, 1)); - cpu->AddCycles_CD_STR(); + cpu->DataWrite32(offset, cpu->R[cpu->CurInstr & 0x7]); + cpu->AddCycles_CD_LDR(); } void T_LDR_IMM(ARM* cpu) { u32 offset = (cpu->CurInstr >> 4) & 0x7C; - offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 val; if (cpu->DataRead32(offset, &val)) cpu->R[cpu->CurInstr & 0x7] = ROR(val, 8*(offset&0x3)); cpu->AddCycles_CDI_LDR(); - cpu->SetCycles_L(cpu->CurInstr & 0x7, (offset & 3) ? 2 : 1, cpu->ILT_Norm); } void T_STRB_IMM(ARM* cpu) { u32 offset = (cpu->CurInstr >> 6) & 0x1F; - offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; - cpu->DataWrite8(offset, cpu->GetReg(cpu->CurInstr & 0x7, 1)); + cpu->DataWrite8(offset, cpu->R[cpu->CurInstr & 0x7]); cpu->AddCycles_CD_STR(); } void T_LDRB_IMM(ARM* cpu) { u32 offset = (cpu->CurInstr >> 6) & 0x1F; - offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; cpu->DataRead8(offset, &cpu->R[cpu->CurInstr & 0x7]); cpu->AddCycles_CDI_LDR(); - cpu->SetCycles_L(cpu->CurInstr & 0x7, 2, cpu->ILT_Norm); } void T_STRH_IMM(ARM* cpu) { u32 offset = (cpu->CurInstr >> 5) & 0x3E; - offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; - cpu->DataWrite16(offset, cpu->GetReg(cpu->CurInstr & 0x7, 1)); + cpu->DataWrite16(offset, cpu->R[cpu->CurInstr & 0x7]); cpu->AddCycles_CD_STR(); } void T_LDRH_IMM(ARM* cpu) { u32 offset = (cpu->CurInstr >> 5) & 0x3E; - offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; cpu->DataRead16(offset, &cpu->R[cpu->CurInstr & 0x7]); cpu->AddCycles_CDI_LDR(); - cpu->SetCycles_L(cpu->CurInstr & 0x7, 2, cpu->ILT_Norm); } -void T_STR_SPREL(ARM* cpu) // checkme: can sp be interlocked in thumb mode? +void T_STR_SPREL(ARM* cpu) { u32 offset = (cpu->CurInstr << 2) & 0x3FC; offset += cpu->R[13]; - cpu->DataWrite32(offset, cpu->GetReg((cpu->CurInstr >> 8) & 0x7, 1)); + cpu->DataWrite32(offset, cpu->R[(cpu->CurInstr >> 8) & 0x7]); cpu->AddCycles_CD_STR(); } -void T_LDR_SPREL(ARM* cpu) // checkme: can sp be interlocked in thumb mode? +void T_LDR_SPREL(ARM* cpu) { u32 offset = (cpu->CurInstr << 2) & 0x3FC; offset += cpu->R[13]; cpu->DataRead32(offset, &cpu->R[(cpu->CurInstr >> 8) & 0x7]); cpu->AddCycles_CDI_LDR(); - cpu->SetCycles_L((cpu->CurInstr >> 8) & 0x7, 1, cpu->ILT_Norm); // checkme: verify cycle count } @@ -919,7 +811,7 @@ void T_PUSH(ARM* cpu) if (cpu->CurInstr & (1<<8)) nregs++; - u32 base = cpu->GetReg(13); + u32 base = cpu->R[13]; base -= (nregs<<2); u32 wbbase = base; @@ -927,8 +819,8 @@ void T_PUSH(ARM* cpu) { if (cpu->CurInstr & (1<DataWrite32 (base, cpu->GetReg(i, 1)) - : cpu->DataWrite32S(base, cpu->GetReg(i, 1)))) // verify interlock + if (!(first ? cpu->DataWrite32 (base, cpu->R[i]) + : cpu->DataWrite32S(base, cpu->R[i]))) { goto dataabort; } @@ -952,11 +844,10 @@ void T_PUSH(ARM* cpu) cpu->AddCycles_CD_STM(); } -void T_POP(ARM* cpu) // checkme: can sp be interlocked in thumb mode? +void T_POP(ARM* cpu) { u32 base = cpu->R[13]; bool first = true; - u32 lastreg = 0; for (int i = 0; i < 8; i++) { @@ -986,16 +877,6 @@ void T_POP(ARM* cpu) // checkme: can sp be interlocked in thumb mode? } cpu->R[13] = base; - - cpu->AddCycles_CDI_LDM(); - if (cpu->Num == 0) - { - u32 lastbase = base - 4; - // no interlock occurs when loading from itcm (checkme: does it matter whether you're executing from there?) - if ((((ARMv5*)cpu)->ITCMSize < lastbase) && ((cpu->R[15]-8) > ((ARMv5*)cpu)->ITCMSize) && (cpu->CurInstr & (0x7FFF >> (15 - lastreg)))) - cpu->SetCycles_L(lastreg, 1, cpu->ILT_Norm); - } - return; dataabort: cpu->AddCycles_CDI_LDM(); @@ -1003,15 +884,15 @@ void T_POP(ARM* cpu) // checkme: can sp be interlocked in thumb mode? void T_STMIA(ARM* cpu) { - u32 base = cpu->GetReg((cpu->CurInstr >> 8) & 0x7); + u32 base = cpu->R[(cpu->CurInstr >> 8) & 0x7]; bool first = true; for (int i = 0; i < 8; i++) { if (cpu->CurInstr & (1<DataWrite32 (base, cpu->GetReg(i, 1)) - : cpu->DataWrite32S(base, cpu->GetReg(i, 1)))) + if (!(first ? cpu->DataWrite32 (base, cpu->R[i]) + : cpu->DataWrite32S(base, cpu->R[i]))) { goto dataabort; } @@ -1028,9 +909,8 @@ void T_STMIA(ARM* cpu) void T_LDMIA(ARM* cpu) { - u32 base = cpu->GetReg((cpu->CurInstr >> 8) & 0x7); + u32 base = cpu->R[(cpu->CurInstr >> 8) & 0x7]; bool first = true; - u32 lastreg = 0; for (int i = 0; i < 8; i++) { @@ -1043,23 +923,11 @@ void T_LDMIA(ARM* cpu) } first = false; base += 4; - lastreg = i; } } if (!(cpu->CurInstr & (1<<((cpu->CurInstr >> 8) & 0x7)))) cpu->R[(cpu->CurInstr >> 8) & 0x7] = base; - - - cpu->AddCycles_CDI_LDM(); - if (cpu->Num == 0) - { - u32 lastbase = base - 4; - // no interlock occurs when loading from itcm (checkme: does it matter whether you're executing from there?) - if ((((ARMv5*)cpu)->ITCMSize < lastbase) && ((cpu->R[15]-8) > ((ARMv5*)cpu)->ITCMSize) && (cpu->CurInstr & (0x7FFF >> (15 - lastreg)))) - cpu->SetCycles_L(lastreg, 1, cpu->ILT_Norm); - } - return; dataabort: cpu->AddCycles_CDI_LDM();