diff --git a/src/ARM.cpp b/src/ARM.cpp index 1d552889..b7acf3da 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -159,6 +159,7 @@ void ARM::Reset() Cycles = 0; Halted = 0; DataCycles = 0; + CheckInterlock = false; IRQ = 0; IRQTimestamp = -1; @@ -701,11 +702,43 @@ void ARMv5::StartExecTHUMB() R[15] += 2; CurInstr = NextInstr[0]; NextInstr[0] = NextInstr[1]; - // code fetch is done during the execute stage cycle handling - if (R[15] & 0x2) NullFetch = true; - else NullFetch = false; - PC = R[15]; + + CheckInterlock = true; + // check for interlocks + if (CurInstr > 0xFFFFFFFF) [[unlikely]] // handle aborted instructions + { + // abt + } + else [[likely]] // actually execute + { + u32 icode = (CurInstr >> 6) & 0x3FF; + ARMInterpreter::THUMBInstrTable[icode](this); + } + if (R[15] & 0x2) + { + // the value we need is cached by the bus + // in practice we can treat this as a 1 cycle fetch, with no penalties + RetVal = NextInstr[1] >> 16; + NDS.ARM9Timestamp++; + if (NDS.ARM9Timestamp < TimestampMemory) NDS.ARM9Timestamp = TimestampMemory; + Store = false; + DataRegion = Mem9_Null; + + QueueFunction(&ARMv5::ContExecTHUMB); + } + else + { + DelayedQueue = &ARMv5::ContExecTHUMB; + CodeRead32(R[15]); + } +} + +void ARMv5::ContExecTHUMB() +{ + NextInstr[1] = RetVal; + + CheckInterlock = false; if ((NDS.ARM9Timestamp >= IRQTimestamp) && !(CPSR & 0x80)) TriggerIRQ(); else if (CurInstr > 0xFFFFFFFF) [[unlikely]] // handle aborted instructions { @@ -725,10 +758,36 @@ void ARMv5::StartExecARM() R[15] += 4; CurInstr = NextInstr[0]; NextInstr[0] = NextInstr[1]; - // code fetch is done during the execute stage cycle handling - NullFetch = false; - PC = R[15]; + CheckInterlock = true; + // check for interlocks + if (CurInstr & ((u64)1<<63)) [[unlikely]] // handle aborted instructions + { + // abt + } + else if (CheckCondition(CurInstr >> 28)) [[likely]] // actually execute + { + u32 icode = ((CurInstr >> 4) & 0xF) | ((CurInstr >> 16) & 0xFF0); + ARMInterpreter::ARMInstrTable[icode](this); + } + else if ((CurInstr & 0xFE000000) == 0xFA000000) + { + ARMInterpreter::A_BLX_IMM(this); + } + else if ((CurInstr & 0x0FF000F0) == 0x01200070) + { + ARMInterpreter::A_BKPT(this); // always passes regardless of condition code + } + + DelayedQueue = &ARMv5::ContExecARM; + CodeRead32(R[15]); +} + +void ARMv5::ContExecARM() +{ + NextInstr[1] = RetVal; + + CheckInterlock = false; if ((NDS.ARM9Timestamp >= IRQTimestamp) && !(CPSR & 0x80)) TriggerIRQ(); else if (CurInstr & ((u64)1<<63)) [[unlikely]] // handle aborted instructions { @@ -749,6 +808,7 @@ void ARMv5::StartExecARM() } else AddCycles_C(); + QueueFunction(&ARMv5::WBCheck_2); } @@ -855,6 +915,7 @@ void ARMv5::Execute() else { // we got a new addition to the list; redo the current entry and exit to resolve main ram + if (FuncQueueEnd < FuncQueueFill) FuncQueueEnd = FuncQueueFill; FuncQueueFill = FuncQueueProg; return; } @@ -1357,28 +1418,10 @@ u32 ARMv5::ReadMem(u32 addr, int size) void ARMv5::CodeFetch() { - if (NullFetch) - { - // the value we need is cached by the bus - // in practice we can treat this as a 1 cycle fetch, with no penalties - RetVal = NextInstr[1] >> 16; - NDS.ARM9Timestamp++; - if (NDS.ARM9Timestamp < TimestampMemory) NDS.ARM9Timestamp = TimestampMemory; - Store = false; - DataRegion = Mem9_Null; - QueueFunction(&ARMv5::AddExecute); - } - else - { - DelayedQueue = &ARMv5::AddExecute; - CodeRead32(PC); - } } void ARMv5::AddExecute() { - NextInstr[1] = RetVal; - NDS.ARM9Timestamp += ExecuteCycles; } diff --git a/src/ARM.h b/src/ARM.h index dfc81ff1..401db833 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -262,6 +262,7 @@ public: u8 FuncQueueProg; u8 ExecuteCycles; bool FuncQueueActive; + bool CheckInterlock; #ifdef JIT_ENABLED u32 FastBlockLookupStart, FastBlockLookupSize; @@ -345,14 +346,14 @@ public: void AddCycles_C() override { - ExecuteCycles = 0; - CodeFetch(); + //ExecuteCycles = 0; + //CodeFetch(); } void AddCycles_CI(s32 numX) override { ExecuteCycles = numX; - CodeFetch(); + QueueFunction(&ARMv5::AddExecute); } void AddCycles_MW(s32 numM) @@ -730,7 +731,9 @@ public: // Queue Functions void StartExecARM(); + void ContExecARM(); void StartExecTHUMB(); + void ContExecTHUMB(); void AddExecute(); void AddCycles_MW_2(); void DelayIfITCM_2(); diff --git a/src/ARMInterpreter.cpp b/src/ARMInterpreter.cpp index cd3346d0..83829fb3 100644 --- a/src/ARMInterpreter.cpp +++ b/src/ARMInterpreter.cpp @@ -36,6 +36,7 @@ namespace melonDS::ARMInterpreter void A_UNK(ARM* cpu) { + if (cpu->CheckInterlock) return; cpu->AddCycles_C(); Log(LogLevel::Warn, "undefined ARM%d instruction %08X @ %08X\n", cpu->Num?7:9, cpu->CurInstr, cpu->R[15]-8); #ifdef GDBSTUB_ENABLED @@ -56,6 +57,7 @@ void A_UNK(ARM* cpu) void T_UNK(ARM* cpu) { + if (cpu->CheckInterlock) return; cpu->AddCycles_C(); Log(LogLevel::Warn, "undefined THUMB%d instruction %04X @ %08X\n", cpu->Num?7:9, cpu->CurInstr, cpu->R[15]-4); #ifdef GDBSTUB_ENABLED @@ -75,6 +77,7 @@ void T_UNK(ARM* cpu) void A_BKPT(ARM* cpu) { + if (cpu->CheckInterlock) return; if (cpu->Num == 1) return A_UNK(cpu); // checkme Log(LogLevel::Warn, "BKPT: "); // combine with the prefetch abort warning message @@ -85,6 +88,7 @@ void A_BKPT(ARM* cpu) void A_MSR_IMM(ARM* cpu) { + if (cpu->CheckInterlock) return; if ((cpu->Num != 1) && (cpu->CurInstr & ((0x7<<16)|(1<<22)))) cpu->AddCycles_CI(2); // arm9 cpsr_sxc & spsr else cpu->AddCycles_C(); @@ -150,7 +154,7 @@ void A_MSR_IMM(ARM* cpu) void A_MSR_REG(ARM* cpu) { - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(cpu->CurInstr & 0xF); + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(cpu->CurInstr & 0xF); if ((cpu->Num != 1) && (cpu->CurInstr & ((0x7<<16)|(1<<22)))) cpu->AddCycles_CI(2); // arm9 cpsr_sxc & spsr else cpu->AddCycles_C(); @@ -217,6 +221,7 @@ void A_MSR_REG(ARM* cpu) void A_MRS(ARM* cpu) { + if (cpu->CheckInterlock) return; u32 psr; if (cpu->CurInstr & (1<<22)) { @@ -257,6 +262,12 @@ void A_MRS(ARM* cpu) void A_MCR(ARM* cpu) { + if (cpu->CheckInterlock) + { + if (!((cpu->CPSR & 0x1F) == 0x10)) ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr>>12)&0xF); + return; + } + if ((cpu->CPSR & 0x1F) == 0x10) return A_UNK(cpu); @@ -268,8 +279,6 @@ void A_MCR(ARM* cpu) u32 val = cpu->R[(cpu->CurInstr>>12)&0xF]; if (((cpu->CurInstr>>12) & 0xF) == 15) val += 4; - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr>>12)&0xF); - if (cpu->Num==0 && cp==15) { ((ARMv5*)cpu)->CP15Write((cn<<8)|(cm<<4)|cpinfo|(op<<12), val); // TODO: IF THIS RAISES AN EXCEPTION WE DO A DOUBLE CODE FETCH; FIX THAT @@ -291,6 +300,12 @@ void A_MCR(ARM* cpu) void A_MRC(ARM* cpu) { + if (cpu->CheckInterlock) + { + if (!((cpu->CPSR & 0x1F) == 0x10)) ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr>>12)&0xF); + return; + } + if ((cpu->CPSR & 0x1F) == 0x10) return A_UNK(cpu); @@ -334,6 +349,7 @@ void A_MRC(ARM* cpu) void A_SVC(ARM* cpu) // A_SWI { + if (cpu->CheckInterlock) return; cpu->AddCycles_C(); u32 oldcpsr = cpu->CPSR; cpu->CPSR &= ~0xBF; @@ -348,6 +364,7 @@ void A_SVC(ARM* cpu) // A_SWI void T_SVC(ARM* cpu) // T_SWI { + if (cpu->CheckInterlock) return; cpu->AddCycles_C(); u32 oldcpsr = cpu->CPSR; cpu->CPSR &= ~0xBF; diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index 66386274..3e80f423 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -84,25 +84,25 @@ inline bool OverflowSbc(u32 a, u32 b, u32 carry) #define LSL_IMM_S(x, s) \ if (s > 0) \ { \ - cpu->SetC(x & (1<<(32-s))); \ + if (!cpu->CheckInterlock) cpu->SetC(x & (1<<(32-s))); \ x <<= s; \ } #define LSR_IMM_S(x, s) \ if (s == 0) { \ - cpu->SetC(x & (1<<31)); \ + if (!cpu->CheckInterlock) cpu->SetC(x & (1<<31)); \ x = 0; \ } else { \ - cpu->SetC(x & (1<<(s-1))); \ + if (!cpu->CheckInterlock) cpu->SetC(x & (1<<(s-1))); \ x >>= s; \ } #define ASR_IMM_S(x, s) \ if (s == 0) { \ - cpu->SetC(x & (1<<31)); \ + if (!cpu->CheckInterlock) cpu->SetC(x & (1<<31)); \ x = ((s32)x) >> 31; \ } else { \ - cpu->SetC(x & (1<<(s-1))); \ + if (!cpu->CheckInterlock) cpu->SetC(x & (1<<(s-1))); \ x = ((s32)x) >> s; \ } @@ -111,11 +111,11 @@ inline bool OverflowSbc(u32 a, u32 b, u32 carry) { \ u32 newc = (x & 1); \ x = (x >> 1) | ((cpu->CPSR & 0x20000000) << 2); \ - cpu->SetC(newc); \ + if (!cpu->CheckInterlock) cpu->SetC(newc); \ } \ else \ { \ - cpu->SetC(x & (1<<(s-1))); \ + if (!cpu->CheckInterlock) cpu->SetC(x & (1<<(s-1))); \ x = ROR(x, s); \ } @@ -135,19 +135,19 @@ inline bool OverflowSbc(u32 a, u32 b, u32 carry) x = ROR(x, (s&0x1F)); #define LSL_REG_S(x, s) \ - if (s > 31) { cpu->SetC((s>32) ? 0 : (x & (1<<0))); x = 0; } \ - else if (s > 0) { cpu->SetC(x & (1<<(32-s))); x <<= s; } + if (s > 31) { if (!cpu->CheckInterlock) cpu->SetC((s>32) ? 0 : (x & (1<<0))); x = 0; } \ + else if (s > 0) { if (!cpu->CheckInterlock) cpu->SetC(x & (1<<(32-s))); x <<= s; } #define LSR_REG_S(x, s) \ - if (s > 31) { cpu->SetC((s>32) ? 0 : (x & (1<<31))); x = 0; } \ - else if (s > 0) { cpu->SetC(x & (1<<(s-1))); x >>= s; } + if (s > 31) { if (!cpu->CheckInterlock) cpu->SetC((s>32) ? 0 : (x & (1<<31))); x = 0; } \ + else if (s > 0) { if (!cpu->CheckInterlock) cpu->SetC(x & (1<<(s-1))); x >>= s; } #define ASR_REG_S(x, s) \ - if (s > 31) { cpu->SetC(x & (1<<31)); x = ((s32)x) >> 31; } \ - else if (s > 0) { cpu->SetC(x & (1<<(s-1))); x = ((s32)x) >> s; } + if (s > 31) { if (!cpu->CheckInterlock) cpu->SetC(x & (1<<31)); x = ((s32)x) >> 31; } \ + else if (s > 0) { if (!cpu->CheckInterlock) cpu->SetC(x & (1<<(s-1))); x = ((s32)x) >> s; } #define ROR_REG_S(x, s) \ - if (s > 0) cpu->SetC(x & (1<<(s-1))); \ + if (s > 0) if (!cpu->CheckInterlock) cpu->SetC(x & (1<<(s-1))); \ x = ROR(x, (s&0x1F)); @@ -160,7 +160,7 @@ inline bool OverflowSbc(u32 a, u32 b, u32 carry) #define A_CALC_OP2_IMM_S \ u32 b = ROR(cpu->CurInstr&0xFF, (cpu->CurInstr>>7)&0x1E); \ if ((cpu->CurInstr>>7)&0x1E) \ - cpu->SetC(b & 0x80000000); \ + if (!cpu->CheckInterlock) cpu->SetC(b & 0x80000000); \ u16 ilmask = 0; \ u8 iltime[16]; @@ -326,7 +326,7 @@ void A_##x##_REG_ROR_REG(ARM* cpu) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ iltime[(cpu->CurInstr>>16)&0xF] = c; \ u32 res = a & b; \ - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -341,9 +341,9 @@ void A_##x##_REG_ROR_REG(ARM* cpu) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ iltime[(cpu->CurInstr>>16)&0xF] = c; \ u32 res = a & b; \ + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ cpu->SetNZ(res & 0x80000000, \ !res); \ - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -361,7 +361,7 @@ A_IMPLEMENT_ALU_OP(AND,_S) u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ iltime[(cpu->CurInstr>>16)&0xF] = c; \ u32 res = a ^ b; \ - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -376,9 +376,9 @@ A_IMPLEMENT_ALU_OP(AND,_S) u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ iltime[(cpu->CurInstr>>16)&0xF] = c; \ u32 res = a ^ b; \ + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ cpu->SetNZ(res & 0x80000000, \ !res); \ - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -396,7 +396,7 @@ A_IMPLEMENT_ALU_OP(EOR,_S) u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ iltime[(cpu->CurInstr>>16)&0xF] = c; \ u32 res = a - b; \ - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -411,11 +411,11 @@ A_IMPLEMENT_ALU_OP(EOR,_S) u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ iltime[(cpu->CurInstr>>16)&0xF] = c; \ u32 res = a - b; \ + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ cpu->SetNZCV(res & 0x80000000, \ !res, \ CarrySub(a, b), \ OverflowSub(a, b)); \ - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -433,7 +433,7 @@ A_IMPLEMENT_ALU_OP(SUB,) u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ iltime[(cpu->CurInstr>>16)&0xF] = c; \ u32 res = b - a; \ - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -448,11 +448,11 @@ A_IMPLEMENT_ALU_OP(SUB,) u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ iltime[(cpu->CurInstr>>16)&0xF] = c; \ u32 res = b - a; \ + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ cpu->SetNZCV(res & 0x80000000, \ !res, \ CarrySub(b, a), \ OverflowSub(b, a)); \ - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -470,7 +470,7 @@ A_IMPLEMENT_ALU_OP(RSB,) u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ iltime[(cpu->CurInstr>>16)&0xF] = c; \ u32 res = a + b; \ - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -484,11 +484,11 @@ A_IMPLEMENT_ALU_OP(RSB,) #define A_ADD_S(c) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a + b; \ + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ cpu->SetNZCV(res & 0x80000000, \ !res, \ CarryAdd(a, b), \ OverflowAdd(a, b)); \ - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -503,10 +503,10 @@ A_IMPLEMENT_ALU_OP(ADD,) #define A_ADC(c) \ - u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ iltime[(cpu->CurInstr>>16)&0xF] = c; \ + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a + b + (cpu->CPSR&0x20000000 ? 1:0); \ - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -518,8 +518,9 @@ A_IMPLEMENT_ALU_OP(ADD,) } #define A_ADC_S(c) \ - u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ iltime[(cpu->CurInstr>>16)&0xF] = c; \ + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res_tmp = a + b; \ u32 carry = (cpu->CPSR&0x20000000 ? 1:0); \ u32 res = res_tmp + carry; \ @@ -527,7 +528,6 @@ A_IMPLEMENT_ALU_OP(ADD,) !res, \ CarryAdd(a, b) | CarryAdd(res_tmp, carry), \ OverflowAdc(a, b, carry)); \ - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -542,10 +542,10 @@ A_IMPLEMENT_ALU_OP(ADC,) #define A_SBC(c) \ - u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ iltime[(cpu->CurInstr>>16)&0xF] = c; \ + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a - b - (cpu->CPSR&0x20000000 ? 0:1); \ - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -557,8 +557,9 @@ A_IMPLEMENT_ALU_OP(ADC,) } #define A_SBC_S(c) \ - u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ iltime[(cpu->CurInstr>>16)&0xF] = c; \ + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res_tmp = a - b; \ u32 carry = (cpu->CPSR&0x20000000 ? 0:1); \ u32 res = res_tmp - carry; \ @@ -566,7 +567,6 @@ A_IMPLEMENT_ALU_OP(ADC,) !res, \ CarrySub(a, b) & CarrySub(res_tmp, carry), \ OverflowSbc(a, b, carry)); \ - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -581,10 +581,10 @@ A_IMPLEMENT_ALU_OP(SBC,) #define A_RSC(c) \ - u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ iltime[(cpu->CurInstr>>16)&0xF] = c; \ + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = b - a - (cpu->CPSR&0x20000000 ? 0:1); \ - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -596,8 +596,9 @@ A_IMPLEMENT_ALU_OP(SBC,) } #define A_RSC_S(c) \ - u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ iltime[(cpu->CurInstr>>16)&0xF] = c; \ + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res_tmp = b - a; \ u32 carry = (cpu->CPSR&0x20000000 ? 0:1); \ u32 res = res_tmp - carry; \ @@ -605,7 +606,6 @@ A_IMPLEMENT_ALU_OP(SBC,) !res, \ CarrySub(b, a) & CarrySub(res_tmp, carry), \ OverflowSbc(b, a, carry)); \ - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -623,7 +623,7 @@ A_IMPLEMENT_ALU_OP(RSC,) u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ iltime[(cpu->CurInstr>>16)&0xF] = c; \ u32 res = a & b; \ - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* this seems to trigger alu rd==15 behavior for arm7 and legacy instruction behavior for arm9 */ \ { \ @@ -654,7 +654,7 @@ A_IMPLEMENT_ALU_TEST(TST,_S) u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ iltime[(cpu->CurInstr>>16)&0xF] = c; \ u32 res = a ^ b; \ - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* this seems to trigger alu rd==15 behavior for arm7 and legacy instruction behavior for arm9 */ \ { \ @@ -685,7 +685,7 @@ A_IMPLEMENT_ALU_TEST(TEQ,_S) u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ iltime[(cpu->CurInstr>>16)&0xF] = c; \ u32 res = a - b; \ - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* this seems to trigger alu rd==15 behavior for arm7 and legacy instruction behavior for arm9 */ \ { \ @@ -720,7 +720,7 @@ A_IMPLEMENT_ALU_TEST(CMP,) u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ iltime[(cpu->CurInstr>>16)&0xF] = c; \ u32 res = a + b; \ - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* this seems to trigger alu rd==15 behavior for arm7 and legacy instruction behavior for arm9 */ \ { \ @@ -755,7 +755,7 @@ A_IMPLEMENT_ALU_TEST(CMN,) u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ iltime[(cpu->CurInstr>>16)&0xF] = c; \ u32 res = a | b; \ - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -770,9 +770,9 @@ A_IMPLEMENT_ALU_TEST(CMN,) u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ iltime[(cpu->CurInstr>>16)&0xF] = c; \ u32 res = a | b; \ + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ cpu->SetNZ(res & 0x80000000, \ !res); \ - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -787,7 +787,7 @@ A_IMPLEMENT_ALU_OP(ORR,_S) #define A_MOV(c) \ - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask, iltime); \ + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask, iltime); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -799,9 +799,9 @@ A_IMPLEMENT_ALU_OP(ORR,_S) } #define A_MOV_S(c) \ + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask, iltime); \ cpu->SetNZ(b & 0x80000000, \ !b); \ - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask, iltime); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -837,7 +837,7 @@ void A_MOV_REG_LSL_IMM_DBG(ARM* cpu) u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ iltime[(cpu->CurInstr>>16)&0xF] = c; \ u32 res = a & ~b; \ - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -852,9 +852,9 @@ void A_MOV_REG_LSL_IMM_DBG(ARM* cpu) u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ iltime[(cpu->CurInstr>>16)&0xF] = c; \ u32 res = a & ~b; \ + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ cpu->SetNZ(res & 0x80000000, \ !res); \ - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF)), iltime); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -870,7 +870,7 @@ A_IMPLEMENT_ALU_OP(BIC,_S) #define A_MVN(c) \ b = ~b; \ - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask, iltime); \ + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask, iltime); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -883,9 +883,9 @@ A_IMPLEMENT_ALU_OP(BIC,_S) #define A_MVN_S(c) \ b = ~b; \ + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask, iltime); \ cpu->SetNZ(b & 0x80000000, \ !b); \ - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask, iltime); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -905,22 +905,10 @@ void A_MUL(ARM* cpu) u32 rm = cpu->R[cpu->CurInstr & 0xF]; u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; - u32 res = rm * rs; - - // all multiply instructions fail writes to r15 on arm7/9 - if (((cpu->CurInstr >> 16) & 0xF) != 15) - cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; - - if (cpu->CurInstr & (1<<20)) - { - cpu->SetNZ(res & 0x80000000, - !res); - } - if (cpu->Num == 0) { - ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | - (1 << ((cpu->CurInstr >> 8) & 0xF))); + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | + (1 << ((cpu->CurInstr >> 8) & 0xF))); if (cpu->CurInstr & (1<<20)) cpu->AddCycles_CI(3); // S else { @@ -941,16 +929,9 @@ void A_MUL(ARM* cpu) cpu->AddCycles_CI(cycles); } -} + u32 res = rm * rs; -void A_MLA(ARM* cpu) -{ - u32 rm = cpu->R[cpu->CurInstr & 0xF]; - u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; - u32 rn = cpu->R[(cpu->CurInstr >> 12) & 0xF]; - - u32 res = (rm * rs) + rn; - + // all multiply instructions fail writes to r15 on arm7/9 if (((cpu->CurInstr >> 16) & 0xF) != 15) cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; @@ -959,14 +940,21 @@ void A_MLA(ARM* cpu) cpu->SetNZ(res & 0x80000000, !res); } +} + +void A_MLA(ARM* cpu) +{ + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + u32 rn = cpu->R[(cpu->CurInstr >> 12) & 0xF]; if (cpu->Num == 0) { u8 iltime[16] = {}; iltime[(cpu->CurInstr>>12)&0xF] = 1; - ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | - (1 << ((cpu->CurInstr >> 8) & 0xF)) | - (1 << ((cpu->CurInstr >> 12) & 0xF)), iltime); + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | + (1 << ((cpu->CurInstr >> 8) & 0xF)) | + (1 << ((cpu->CurInstr >> 12) & 0xF)), iltime); if (cpu->CurInstr & (1<<20)) cpu->AddCycles_CI(3); else { @@ -987,6 +975,16 @@ void A_MLA(ARM* cpu) cpu->AddCycles_CI(cycles); } + u32 res = (rm * rs) + rn; + + if (((cpu->CurInstr >> 16) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; + + if (cpu->CurInstr & (1<<20)) + { + cpu->SetNZ(res & 0x80000000, + !res); + } } void A_UMULL(ARM* cpu) @@ -994,23 +992,10 @@ void A_UMULL(ARM* cpu) u32 rm = cpu->R[cpu->CurInstr & 0xF]; u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; - u64 res = (u64)rm * (u64)rs; - - if (((cpu->CurInstr >> 12) & 0xF) != 15) - cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; - if (((cpu->CurInstr >> 16) & 0xF) != 15) - cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); - - if (cpu->CurInstr & (1<<20)) - { - cpu->SetNZ((u32)(res >> 63ULL), - !res); - } - if (cpu->Num == 0) { - ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | - (1 << ((cpu->CurInstr >> 8) & 0xF))); + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | + (1 << ((cpu->CurInstr >> 8) & 0xF))); if (cpu->CurInstr & (1<<20)) cpu->AddCycles_CI(4); else { @@ -1031,18 +1016,8 @@ void A_UMULL(ARM* cpu) cpu->AddCycles_CI(cycles); } -} - -void A_UMLAL(ARM* cpu) -{ - u32 rm = cpu->R[cpu->CurInstr & 0xF]; - u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; - u64 res = (u64)rm * (u64)rs; - u64 rd = (u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL); - res += rd; - if (((cpu->CurInstr >> 12) & 0xF) != 15) cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; if (((cpu->CurInstr >> 16) & 0xF) != 15) @@ -1053,15 +1028,22 @@ void A_UMLAL(ARM* cpu) cpu->SetNZ((u32)(res >> 63ULL), !res); } +} + +void A_UMLAL(ARM* cpu) +{ + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + u64 rd = (u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL); if (cpu->Num == 0) { u8 iltime[16] = {}; iltime[(cpu->CurInstr>>12)&0xF] = 1; - ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | - (1 << ((cpu->CurInstr >> 8) & 0xF)) | - (1 << ((cpu->CurInstr >> 12) & 0xF))/* | - (1 << ((cpu->CurInstr >> 16) & 0xF))*/, iltime); + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | + (1 << ((cpu->CurInstr >> 8) & 0xF)) | + (1 << ((cpu->CurInstr >> 12) & 0xF))/* | + (1 << ((cpu->CurInstr >> 16) & 0xF))*/, iltime); if (cpu->CurInstr & (1<<20)) cpu->AddCycles_CI(4); else { @@ -1082,14 +1064,9 @@ void A_UMLAL(ARM* cpu) cpu->AddCycles_CI(cycles); } -} + u64 res = (u64)rm * (u64)rs; -void A_SMULL(ARM* cpu) -{ - u32 rm = cpu->R[cpu->CurInstr & 0xF]; - u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; - - s64 res = (s64)(s32)rm * (s64)(s32)rs; + res += rd; if (((cpu->CurInstr >> 12) & 0xF) != 15) cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; @@ -1101,11 +1078,17 @@ void A_SMULL(ARM* cpu) cpu->SetNZ((u32)(res >> 63ULL), !res); } +} + +void A_SMULL(ARM* cpu) +{ + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; if (cpu->Num == 0) { - ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | - (1 << ((cpu->CurInstr >> 8) & 0xF))); + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | + (1 << ((cpu->CurInstr >> 8) & 0xF))); if (cpu->CurInstr & (1<<20)) cpu->AddCycles_CI(4); else { @@ -1125,17 +1108,8 @@ void A_SMULL(ARM* cpu) if (cpu->CurInstr & (1<<20)) cpu->SetC(SMULLSCarry(0, rm, rs, cycles==5)); cpu->AddCycles_CI(cycles); } -} - -void A_SMLAL(ARM* cpu) -{ - u32 rm = cpu->R[cpu->CurInstr & 0xF]; - u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; s64 res = (s64)(s32)rm * (s64)(s32)rs; - - s64 rd = (s64)((u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL)); - res += rd; if (((cpu->CurInstr >> 12) & 0xF) != 15) cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; @@ -1147,15 +1121,22 @@ void A_SMLAL(ARM* cpu) cpu->SetNZ((u32)(res >> 63ULL), !res); } +} + +void A_SMLAL(ARM* cpu) +{ + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + s64 rd = (s64)((u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL)); if (cpu->Num == 0) { u8 iltime[16] {}; iltime[(cpu->CurInstr>>12)&0xF] = 1; - ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | - (1 << ((cpu->CurInstr >> 8) & 0xF)) | - (1 << ((cpu->CurInstr >> 12) & 0xF)) /*| - (1 << ((cpu->CurInstr >> 16) & 0xF))*/, iltime); + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | + (1 << ((cpu->CurInstr >> 8) & 0xF)) | + (1 << ((cpu->CurInstr >> 12) & 0xF)) /*| + (1 << ((cpu->CurInstr >> 16) & 0xF))*/, iltime); if (cpu->CurInstr & (1<<20)) cpu->AddCycles_CI(4); else { @@ -1175,6 +1156,21 @@ void A_SMLAL(ARM* cpu) if (cpu->CurInstr & (1<<20)) cpu->SetC(SMULLSCarry(rd, rm, rs, cycles==5)); cpu->AddCycles_CI(cycles); } + + s64 res = (s64)(s32)rm * (s64)(s32)rs; + + res += rd; + + if (((cpu->CurInstr >> 12) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; + if (((cpu->CurInstr >> 16) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); + + if (cpu->CurInstr & (1<<20)) + { + cpu->SetNZ((u32)(res >> 63ULL), + !res); + } } void A_SMLAxy(ARM* cpu) @@ -1185,6 +1181,12 @@ void A_SMLAxy(ARM* cpu) u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; u32 rn = cpu->R[(cpu->CurInstr >> 12) & 0xF]; + u8 iltime[16] {}; + iltime[(cpu->CurInstr>>12)&0xF] = 1; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | + (1 << ((cpu->CurInstr >> 8) & 0xF)) | + (1 << ((cpu->CurInstr >> 12) & 0xF)), iltime); + if (cpu->CurInstr & (1<<5)) rm >>= 16; else rm &= 0xFFFF; if (cpu->CurInstr & (1<<6)) rs >>= 16; @@ -1199,12 +1201,6 @@ void A_SMLAxy(ARM* cpu) if (OverflowAdd(res_mul, rn)) cpu->CPSR |= 0x08000000; - - u8 iltime[16] {}; - iltime[(cpu->CurInstr>>12)&0xF] = 1; - ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | - (1 << ((cpu->CurInstr >> 8) & 0xF)) | - (1 << ((cpu->CurInstr >> 12) & 0xF)), iltime); cpu->AddCycles_C(); ((ARMv5*)cpu)->AddCycles_MW(1); // normally 1 length memory stages should be implicit, but we need one here explicitly for interlocks to work @@ -1219,6 +1215,12 @@ void A_SMLAWy(ARM* cpu) u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; u32 rn = cpu->R[(cpu->CurInstr >> 12) & 0xF]; + u8 iltime[16] = {}; + iltime[(cpu->CurInstr>>12)&0xF] = 1; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | + (1 << ((cpu->CurInstr >> 8) & 0xF)) | + (1 << ((cpu->CurInstr >> 12) & 0xF)), iltime); + if (cpu->CurInstr & (1<<6)) rs >>= 16; else rs &= 0xFFFF; @@ -1231,11 +1233,6 @@ void A_SMLAWy(ARM* cpu) if (OverflowAdd(res_mul, rn)) cpu->CPSR |= 0x08000000; - u8 iltime[16] = {}; - iltime[(cpu->CurInstr>>12)&0xF] = 1; - ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | - (1 << ((cpu->CurInstr >> 8) & 0xF)) | - (1 << ((cpu->CurInstr >> 12) & 0xF)), iltime); cpu->AddCycles_C(); ((ARMv5*)cpu)->AddCycles_MW(1); // normally 1 length memory stages should be implicit, but we need one here explicitly for interlocks to work @@ -1249,6 +1246,9 @@ void A_SMULxy(ARM* cpu) u32 rm = cpu->R[cpu->CurInstr & 0xF]; u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | + (1 << ((cpu->CurInstr >> 8) & 0xF))); + if (cpu->CurInstr & (1<<5)) rm >>= 16; else rm &= 0xFFFF; if (cpu->CurInstr & (1<<6)) rs >>= 16; @@ -1259,9 +1259,6 @@ void A_SMULxy(ARM* cpu) if (((cpu->CurInstr >> 16) & 0xF) != 15) cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; - - ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | - (1 << ((cpu->CurInstr >> 8) & 0xF))); cpu->AddCycles_C(); ((ARMv5*)cpu)->AddCycles_MW(1); // normally 1 length memory stages should be implicit, but we need one here explicitly for interlocks to work @@ -1275,6 +1272,9 @@ void A_SMULWy(ARM* cpu) u32 rm = cpu->R[cpu->CurInstr & 0xF]; u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | + (1 << ((cpu->CurInstr >> 8) & 0xF))); + if (cpu->CurInstr & (1<<6)) rs >>= 16; else rs &= 0xFFFF; @@ -1283,9 +1283,6 @@ void A_SMULWy(ARM* cpu) if (((cpu->CurInstr >> 16) & 0xF) != 15) cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; - - ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | - (1 << ((cpu->CurInstr >> 8) & 0xF))); cpu->AddCycles_C(); ((ARMv5*)cpu)->AddCycles_MW(1); // normally 1 length memory stages should be implicit, but we need one here explicitly for interlocks to work @@ -1299,6 +1296,13 @@ void A_SMLALxy(ARM* cpu) u32 rm = cpu->R[cpu->CurInstr & 0xF]; u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + u8 iltime[16] {}; + iltime[(cpu->CurInstr>>12)&0xF] = 1; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | + (1 << ((cpu->CurInstr >> 8) & 0xF)) | + (1 << ((cpu->CurInstr >> 12) & 0xF))/* | + (1 << ((cpu->CurInstr >> 16) & 0xF))*/, iltime); + if (cpu->CurInstr & (1<<5)) rm >>= 16; else rm &= 0xFFFF; if (cpu->CurInstr & (1<<6)) rs >>= 16; @@ -1315,13 +1319,6 @@ void A_SMLALxy(ARM* cpu) if (((cpu->CurInstr >> 16) & 0xF) != 15) cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); - - u8 iltime[16] {}; - iltime[(cpu->CurInstr>>12)&0xF] = 1; - ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | - (1 << ((cpu->CurInstr >> 8) & 0xF)) | - (1 << ((cpu->CurInstr >> 12) & 0xF))/* | - (1 << ((cpu->CurInstr >> 16) & 0xF))*/, iltime); cpu->AddCycles_CI(2); // 1 X ((ARMv5*)cpu)->AddCycles_MW(2); // 2 M @@ -1336,6 +1333,8 @@ void A_CLZ(ARM* cpu) u32 val = cpu->R[cpu->CurInstr & 0xF]; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(cpu->CurInstr & 0xF); + u32 res = 0; while ((val & 0xFF000000) == 0) { @@ -1350,7 +1349,6 @@ void A_CLZ(ARM* cpu) val |= 0x1; } - ((ARMv5*)cpu)->HandleInterlocksExecute(cpu->CurInstr & 0xF); cpu->AddCycles_C(); if (((cpu->CurInstr >> 12) & 0xF) == 15) cpu->JumpTo(res & ~1); @@ -1364,6 +1362,8 @@ void A_QADD(ARM* cpu) u32 rm = cpu->R[cpu->CurInstr & 0xF]; u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF]; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | (1 << ((cpu->CurInstr >> 16) & 0xF))); + u32 res = rm + rn; if (OverflowAdd(rm, rn)) { @@ -1375,7 +1375,6 @@ void A_QADD(ARM* cpu) if (((cpu->CurInstr >> 12) & 0xF) != 15) cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; - ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | (1 << ((cpu->CurInstr >> 16) & 0xF))); cpu->AddCycles_C(); ((ARMv5*)cpu)->AddCycles_MW(1); // normally 1 length memory stages should be implicit, but we need one here explicitly for interlocks to work @@ -1388,6 +1387,8 @@ void A_QSUB(ARM* cpu) u32 rm = cpu->R[cpu->CurInstr & 0xF]; u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF]; + + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | (1 << ((cpu->CurInstr >> 16) & 0xF))); u32 res = rm - rn; if (OverflowSub(rm, rn)) @@ -1399,7 +1400,6 @@ void A_QSUB(ARM* cpu) if (((cpu->CurInstr >> 12) & 0xF) != 15) cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; - ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | (1 << ((cpu->CurInstr >> 16) & 0xF))); cpu->AddCycles_C(); ((ARMv5*)cpu)->AddCycles_MW(1); // normally 1 length memory stages should be implicit, but we need one here explicitly for interlocks to work @@ -1412,6 +1412,8 @@ void A_QDADD(ARM* cpu) u32 rm = cpu->R[cpu->CurInstr & 0xF]; u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF]; + + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | (1 << ((cpu->CurInstr >> 16) & 0xF))); if (OverflowAdd(rn, rn)) { @@ -1431,7 +1433,6 @@ void A_QDADD(ARM* cpu) if (((cpu->CurInstr >> 12) & 0xF) != 15) cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; - ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | (1 << ((cpu->CurInstr >> 16) & 0xF))); cpu->AddCycles_C(); ((ARMv5*)cpu)->AddCycles_MW(1); // normally 1 length memory stages should be implicit, but we need one here explicitly for interlocks to work @@ -1444,6 +1445,8 @@ void A_QDSUB(ARM* cpu) u32 rm = cpu->R[cpu->CurInstr & 0xF]; u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF]; + + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | (1 << ((cpu->CurInstr >> 16) & 0xF))); if (OverflowAdd(rn, rn)) { @@ -1463,7 +1466,6 @@ void A_QDSUB(ARM* cpu) if (((cpu->CurInstr >> 12) & 0xF) != 15) cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; - ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | (1 << ((cpu->CurInstr >> 16) & 0xF))); cpu->AddCycles_C(); ((ARMv5*)cpu)->AddCycles_MW(1); // normally 1 length memory stages should be implicit, but we need one here explicitly for interlocks to work @@ -1480,11 +1482,11 @@ void T_LSL_IMM(ARM* cpu) { u32 op = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 s = (cpu->CurInstr >> 6) & 0x1F; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 3) & 0x7); LSL_IMM_S(op, s); cpu->R[cpu->CurInstr & 0x7] = op; cpu->SetNZ(op & 0x80000000, !op); - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 3) & 0x7); cpu->AddCycles_C(); } @@ -1492,11 +1494,11 @@ void T_LSR_IMM(ARM* cpu) { u32 op = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 s = (cpu->CurInstr >> 6) & 0x1F; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 3) & 0x7); LSR_IMM_S(op, s); cpu->R[cpu->CurInstr & 0x7] = op; cpu->SetNZ(op & 0x80000000, !op); - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 3) & 0x7); cpu->AddCycles_C(); } @@ -1504,11 +1506,11 @@ void T_ASR_IMM(ARM* cpu) { u32 op = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 s = (cpu->CurInstr >> 6) & 0x1F; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 3) & 0x7); ASR_IMM_S(op, s); cpu->R[cpu->CurInstr & 0x7] = op; cpu->SetNZ(op & 0x80000000, !op); - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 3) & 0x7); cpu->AddCycles_C(); } @@ -1516,13 +1518,13 @@ void T_ADD_REG_(ARM* cpu) { u32 a = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 b = cpu->R[(cpu->CurInstr >> 6) & 0x7]; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << ((cpu->CurInstr >> 3) & 0x7)) | (1 << ((cpu->CurInstr >> 6) & 0x7))); u32 res = a + b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZCV(res & 0x80000000, !res, CarryAdd(a, b), OverflowAdd(a, b)); - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << ((cpu->CurInstr >> 3) & 0x7)) | (1 << ((cpu->CurInstr >> 6) & 0x7))); cpu->AddCycles_C(); } @@ -1530,13 +1532,13 @@ void T_SUB_REG_(ARM* cpu) { u32 a = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 b = cpu->R[(cpu->CurInstr >> 6) & 0x7]; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << ((cpu->CurInstr >> 3) & 0x7)) | (1 << ((cpu->CurInstr >> 6) & 0x7))); u32 res = a - b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZCV(res & 0x80000000, !res, CarrySub(a, b), OverflowSub(a, b)); - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << ((cpu->CurInstr >> 3) & 0x7)) | (1 << ((cpu->CurInstr >> 6) & 0x7))); cpu->AddCycles_C(); } @@ -1544,13 +1546,13 @@ void T_ADD_IMM_(ARM* cpu) { u32 a = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 b = (cpu->CurInstr >> 6) & 0x7; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 3) & 0x7); u32 res = a + b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZCV(res & 0x80000000, !res, CarryAdd(a, b), OverflowAdd(a, b)); - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 3) & 0x7); cpu->AddCycles_C(); } @@ -1558,23 +1560,23 @@ void T_SUB_IMM_(ARM* cpu) { u32 a = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 b = (cpu->CurInstr >> 6) & 0x7; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 3) & 0x7); u32 res = a - b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZCV(res & 0x80000000, !res, CarrySub(a, b), OverflowSub(a, b)); - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 3) & 0x7); cpu->AddCycles_C(); } void T_MOV_IMM(ARM* cpu) { u32 b = cpu->CurInstr & 0xFF; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 8) & 0x7); cpu->R[(cpu->CurInstr >> 8) & 0x7] = b; cpu->SetNZ(0, !b); - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 8) & 0x7); cpu->AddCycles_C(); } @@ -1582,12 +1584,12 @@ void T_CMP_IMM(ARM* cpu) { u32 a = cpu->R[(cpu->CurInstr >> 8) & 0x7]; u32 b = cpu->CurInstr & 0xFF; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 8) & 0x7); u32 res = a - b; cpu->SetNZCV(res & 0x80000000, !res, CarrySub(a, b), OverflowSub(a, b)); - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 8) & 0x7); cpu->AddCycles_C(); } @@ -1595,13 +1597,13 @@ void T_ADD_IMM(ARM* cpu) { u32 a = cpu->R[(cpu->CurInstr >> 8) & 0x7]; u32 b = cpu->CurInstr & 0xFF; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 8) & 0x7); u32 res = a + b; cpu->R[(cpu->CurInstr >> 8) & 0x7] = res; cpu->SetNZCV(res & 0x80000000, !res, CarryAdd(a, b), OverflowAdd(a, b)); - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 8) & 0x7); cpu->AddCycles_C(); } @@ -1609,13 +1611,13 @@ void T_SUB_IMM(ARM* cpu) { u32 a = cpu->R[(cpu->CurInstr >> 8) & 0x7]; u32 b = cpu->CurInstr & 0xFF; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 8) & 0x7); u32 res = a - b; cpu->R[(cpu->CurInstr >> 8) & 0x7] = res; cpu->SetNZCV(res & 0x80000000, !res, CarrySub(a, b), OverflowSub(a, b)); - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 8) & 0x7); cpu->AddCycles_C(); } @@ -1624,11 +1626,11 @@ void T_AND_REG(ARM* cpu) { u32 a = cpu->R[cpu->CurInstr & 0x7]; u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); u32 res = a & b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, !res); - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); cpu->AddCycles_C(); } @@ -1636,11 +1638,11 @@ void T_EOR_REG(ARM* cpu) { u32 a = cpu->R[cpu->CurInstr & 0x7]; u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); u32 res = a ^ b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, !res); - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); cpu->AddCycles_C(); } @@ -1648,11 +1650,11 @@ void T_LSL_REG(ARM* cpu) { u32 a = cpu->R[cpu->CurInstr & 0x7]; u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7] & 0xFF; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); LSL_REG_S(a, b); cpu->R[cpu->CurInstr & 0x7] = a; cpu->SetNZ(a & 0x80000000, !a); - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); cpu->AddCycles_CI(1); } @@ -1660,11 +1662,11 @@ void T_LSR_REG(ARM* cpu) { u32 a = cpu->R[cpu->CurInstr & 0x7]; u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7] & 0xFF; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); LSR_REG_S(a, b); cpu->R[cpu->CurInstr & 0x7] = a; cpu->SetNZ(a & 0x80000000, !a); - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); cpu->AddCycles_CI(1); } @@ -1672,11 +1674,11 @@ void T_ASR_REG(ARM* cpu) { u32 a = cpu->R[cpu->CurInstr & 0x7]; u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7] & 0xFF; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); ASR_REG_S(a, b); cpu->R[cpu->CurInstr & 0x7] = a; cpu->SetNZ(a & 0x80000000, !a); - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); cpu->AddCycles_CI(1); } @@ -1684,6 +1686,7 @@ void T_ADC_REG(ARM* cpu) { u32 a = cpu->R[cpu->CurInstr & 0x7]; u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); u32 res_tmp = a + b; u32 carry = (cpu->CPSR&0x20000000 ? 1:0); u32 res = res_tmp + carry; @@ -1692,7 +1695,6 @@ void T_ADC_REG(ARM* cpu) !res, CarryAdd(a, b) | CarryAdd(res_tmp, carry), OverflowAdc(a, b, carry)); - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); cpu->AddCycles_C(); } @@ -1700,6 +1702,7 @@ void T_SBC_REG(ARM* cpu) { u32 a = cpu->R[cpu->CurInstr & 0x7]; u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); u32 res_tmp = a - b; u32 carry = (cpu->CPSR&0x20000000 ? 0:1); u32 res = res_tmp - carry; @@ -1708,7 +1711,6 @@ void T_SBC_REG(ARM* cpu) !res, CarrySub(a, b) & CarrySub(res_tmp, carry), OverflowSbc(a, b, carry)); - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); cpu->AddCycles_C(); } @@ -1716,11 +1718,11 @@ void T_ROR_REG(ARM* cpu) { u32 a = cpu->R[cpu->CurInstr & 0x7]; u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7] & 0xFF; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); ROR_REG_S(a, b); cpu->R[cpu->CurInstr & 0x7] = a; cpu->SetNZ(a & 0x80000000, !a); - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); cpu->AddCycles_CI(1); } @@ -1728,23 +1730,23 @@ void T_TST_REG(ARM* cpu) { u32 a = cpu->R[cpu->CurInstr & 0x7]; u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); u32 res = a & b; cpu->SetNZ(res & 0x80000000, !res); - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); cpu->AddCycles_C(); } void T_NEG_REG(ARM* cpu) { u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 3) & 0x7); u32 res = -b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZCV(res & 0x80000000, !res, CarrySub(0, b), OverflowSub(0, b)); - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 3) & 0x7); cpu->AddCycles_C(); } @@ -1752,12 +1754,12 @@ void T_CMP_REG(ARM* cpu) { u32 a = cpu->R[cpu->CurInstr & 0x7]; u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); u32 res = a - b; cpu->SetNZCV(res & 0x80000000, !res, CarrySub(a, b), OverflowSub(a, b)); - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); cpu->AddCycles_C(); } @@ -1765,12 +1767,12 @@ void T_CMN_REG(ARM* cpu) { u32 a = cpu->R[cpu->CurInstr & 0x7]; u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); u32 res = a + b; cpu->SetNZCV(res & 0x80000000, !res, CarryAdd(a, b), OverflowAdd(a, b)); - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); cpu->AddCycles_C(); } @@ -1778,11 +1780,11 @@ void T_ORR_REG(ARM* cpu) { u32 a = cpu->R[cpu->CurInstr & 0x7]; u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); u32 res = a | b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, !res); - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); cpu->AddCycles_C(); } @@ -1790,16 +1792,12 @@ void T_MUL_REG(ARM* cpu) { u32 a = cpu->R[cpu->CurInstr & 0x7]; u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; - u32 res = a * b; - cpu->R[cpu->CurInstr & 0x7] = res; - cpu->SetNZ(res & 0x80000000, - !res); s32 cycles; if (cpu->Num == 0) { cycles = 3; - ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); } else { @@ -1810,28 +1808,32 @@ void T_MUL_REG(ARM* cpu) cpu->SetC(MULSCarry(b, a, 0, cycles==4)); // carry flag destroyed, they say. whatever that means... } cpu->AddCycles_CI(cycles); // implemented as S variant, doesn't interlock + u32 res = a * b; + cpu->R[cpu->CurInstr & 0x7] = res; + cpu->SetNZ(res & 0x80000000, + !res); } void T_BIC_REG(ARM* cpu) { u32 a = cpu->R[cpu->CurInstr & 0x7]; u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); u32 res = a & ~b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, !res); - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); cpu->AddCycles_C(); } void T_MVN_REG(ARM* cpu) { u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 3) & 0x7); u32 res = ~b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, !res); - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 3) & 0x7); cpu->AddCycles_C(); } @@ -1844,12 +1846,13 @@ void T_ADD_HIREG(ARM* cpu) u32 rd = (cpu->CurInstr & 0x7) | ((cpu->CurInstr >> 4) & 0x8); u32 rs = (cpu->CurInstr >> 3) & 0xF; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << rd) | (1 << rs)); + u32 a = cpu->R[rd]; u32 b = cpu->R[rs]; cpu->AddCycles_C(); - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << rd) | (1 << rs)); if (rd == 15) { cpu->JumpTo((a + b) | 1); @@ -1865,6 +1868,8 @@ void T_CMP_HIREG(ARM* cpu) u32 rd = (cpu->CurInstr & 0x7) | ((cpu->CurInstr >> 4) & 0x8); u32 rs = (cpu->CurInstr >> 3) & 0xF; + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << rd) | (1 << rs)); + u32 a = cpu->R[rd]; u32 b = cpu->R[rs]; u32 res = a - b; @@ -1873,7 +1878,7 @@ void T_CMP_HIREG(ARM* cpu) !res, CarrySub(a, b), OverflowSub(a, b)); - + cpu->AddCycles_C(); if ((cpu->Num == 1) && (rd == 15)) @@ -1886,15 +1891,14 @@ void T_CMP_HIREG(ARM* cpu) cpu->CPSR |= 0x20; // keep it from crashing the emulator at least } } - else if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << rd) | (1 << rs)); } void T_MOV_HIREG(ARM* cpu) { u32 rd = (cpu->CurInstr & 0x7) | ((cpu->CurInstr >> 4) & 0x8); u32 rs = (cpu->CurInstr >> 3) & 0xF; - - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << rd) | (1 << rs)); + + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((1 << rd) | (1 << rs)); cpu->AddCycles_C(); if (rd == 15) @@ -1922,34 +1926,33 @@ void T_MOV_HIREG(ARM* cpu) void T_ADD_PCREL(ARM* cpu) { + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(15); u32 val = cpu->R[15] & ~2; val += ((cpu->CurInstr & 0xFF) << 2); cpu->R[(cpu->CurInstr >> 8) & 0x7] = val; - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(15); cpu->AddCycles_C(); } void T_ADD_SPREL(ARM* cpu) { + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(13); u32 val = cpu->R[13]; val += ((cpu->CurInstr & 0xFF) << 2); cpu->R[(cpu->CurInstr >> 8) & 0x7] = val; - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(13); cpu->AddCycles_C(); } void T_ADD_SP(ARM* cpu) { + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(13); u32 val = cpu->R[13]; if (cpu->CurInstr & (1<<7)) val -= ((cpu->CurInstr & 0x7F) << 2); else val += ((cpu->CurInstr & 0x7F) << 2); cpu->R[13] = val; - - if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(13); cpu->AddCycles_C(); } diff --git a/src/ARMInterpreter_Branch.cpp b/src/ARMInterpreter_Branch.cpp index 2ff63e75..1f271632 100644 --- a/src/ARMInterpreter_Branch.cpp +++ b/src/ARMInterpreter_Branch.cpp @@ -27,6 +27,7 @@ using Platform::LogLevel; void A_B(ARM* cpu) { + if (cpu->CheckInterlock) return; cpu->AddCycles_C(); s32 offset = (s32)(cpu->CurInstr << 8) >> 6; cpu->JumpTo(cpu->R[15] + offset); @@ -34,6 +35,7 @@ void A_B(ARM* cpu) void A_BL(ARM* cpu) { + if (cpu->CheckInterlock) return; cpu->AddCycles_C(); s32 offset = (s32)(cpu->CurInstr << 8) >> 6; cpu->R[14] = cpu->R[15] - 4; @@ -42,6 +44,7 @@ void A_BL(ARM* cpu) void A_BLX_IMM(ARM* cpu) { + if (cpu->CheckInterlock) return; cpu->AddCycles_C(); s32 offset = (s32)(cpu->CurInstr << 8) >> 6; if (cpu->CurInstr & 0x01000000) offset += 2; @@ -51,14 +54,14 @@ void A_BLX_IMM(ARM* cpu) void A_BX(ARM* cpu) { - if (cpu->Num==0) ((ARMv5*)cpu)->HandleInterlocksExecute(cpu->CurInstr&0xF); + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(cpu->CurInstr&0xF); cpu->AddCycles_C(); cpu->JumpTo(cpu->R[cpu->CurInstr & 0xF]); } void A_BLX_REG(ARM* cpu) { - if (cpu->Num==0) ((ARMv5*)cpu)->HandleInterlocksExecute(cpu->CurInstr&0xF); + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute(cpu->CurInstr&0xF); cpu->AddCycles_C(); u32 lr = cpu->R[15] - 4; cpu->JumpTo(cpu->R[cpu->CurInstr & 0xF]); @@ -69,6 +72,7 @@ void A_BLX_REG(ARM* cpu) void T_BCOND(ARM* cpu) { + if (cpu->CheckInterlock) return; cpu->AddCycles_C(); if (cpu->CheckCondition((cpu->CurInstr >> 8) & 0xF)) { @@ -79,14 +83,14 @@ void T_BCOND(ARM* cpu) void T_BX(ARM* cpu) { - if (cpu->Num==0) ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 3) & 0xF); + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 3) & 0xF); cpu->AddCycles_C(); cpu->JumpTo(cpu->R[(cpu->CurInstr >> 3) & 0xF]); } void T_BLX_REG(ARM* cpu) { - if (cpu->Num==0) ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 3) & 0xF); + if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 3) & 0xF); cpu->AddCycles_C(); if (cpu->Num==1) { @@ -101,6 +105,7 @@ void T_BLX_REG(ARM* cpu) void T_B(ARM* cpu) { + if (cpu->CheckInterlock) return; cpu->AddCycles_C(); s32 offset = (s32)((cpu->CurInstr & 0x7FF) << 21) >> 20; cpu->JumpTo(cpu->R[15] + offset + 1); @@ -108,6 +113,7 @@ void T_B(ARM* cpu) void T_BL_LONG_1(ARM* cpu) { + if (cpu->CheckInterlock) return; s32 offset = (s32)((cpu->CurInstr & 0x7FF) << 21) >> 9; cpu->R[14] = cpu->R[15] + offset; cpu->AddCycles_C(); @@ -118,6 +124,8 @@ void T_BL_LONG_2(ARM* cpu) if ((cpu->CurInstr & 0x1801) == 0x0801) // "BLX" with bit 0 set is an undefined instruction. return T_UNK(cpu); // TODO: Check ARM7 for exceptions + if (cpu->CheckInterlock) return; + cpu->AddCycles_C(); s32 offset = (cpu->CurInstr & 0x7FF) << 1; u32 pc = cpu->R[14] + offset; diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index f4a8494f..3148b1bb 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -25,13 +25,14 @@ namespace melonDS::ARMInterpreter { template -inline void ExecuteStage(ARM* cpu, u16 ilmask) +inline bool ExecuteStage(ARM* cpu, u16 ilmask) { if (cpu->Num == 0) { - ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask); + if (cpu->CheckInterlock) { ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask); return false;} ((ARMv5*)cpu)->AddCycles_C(); } + return true; } @@ -85,7 +86,7 @@ void LoadSingle(ARM* cpu, const u8 rd, const u8 rn, const s32 offset, const u16 cpu->LDRFailedRegs = 0; static_assert((size == 8) || (size == 16) || (size == 32), "dummy this function only takes 8/16/32 for size!!!"); - ExecuteStage(cpu, (ilmask | (1<(cpu, (ilmask | (1<R[rn]; @@ -182,7 +183,7 @@ void StoreSingle(ARM* cpu, const u8 rd, const u8 rn, const s32 offset, const u16 { static_assert((size == 8) || (size == 16) || (size == 32), "dummy this function only takes 8/16/32 for size!!!"); - ExecuteStage(cpu, (ilmask | (1<(cpu, (ilmask | (1<R[rn]; @@ -363,7 +364,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ cpu->LDRFailedRegs = 0; \ - ExecuteStage(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF))); \ + if (!ExecuteStage(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF)))) return; \ bool dabort = !cpu->DataRead32(offset, r); \ u32 oldval = cpu->R[r+1]; dabort |= !cpu->DataRead32S(offset+4, r+1); \ ((ARMv5*)cpu)->DelayIfITCM(2); \ @@ -385,7 +386,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ cpu->LDRFailedRegs = 0; \ - ExecuteStage(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF))); \ + if (!ExecuteStage(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF)))) return; \ bool dabort = !cpu->DataRead32(addr, r); \ u32 oldval = cpu->R[r+1]; dabort |= !cpu->DataRead32S(addr+4, r+1); \ ((ARMv5*)cpu)->DelayIfITCM(2); \ @@ -406,7 +407,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ - ExecuteStage(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF))); \ + if (!ExecuteStage(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF)))) return; \ ((ARMv5*)cpu)->HandleInterlocksMemory(r); \ bool dabort = !cpu->DataWrite32(offset, cpu->R[r], r); \ u32 storeval = cpu->R[r+1]; if (r+1 == 15) storeval+=4; \ @@ -423,7 +424,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ - ExecuteStage(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF))); \ + if (!ExecuteStage(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF)))) return; \ ((ARMv5*)cpu)->HandleInterlocksMemory(r); \ bool dabort = !cpu->DataWrite32(addr, cpu->R[r], r); \ u32 storeval = cpu->R[r+1]; if (r+1 == 15) storeval+=4; \ @@ -494,7 +495,7 @@ A_IMPLEMENT_HD_LDRSTR(LDRSH) template inline void SWP(ARM* cpu) { - ExecuteStage(cpu, ((cpu->CurInstr >> 16) & 0xF)); + if (!ExecuteStage(cpu, ((cpu->CurInstr >> 16) & 0xF))) return; cpu->LDRFailedRegs = 0; u32 base = cpu->R[(cpu->CurInstr >> 16) & 0xF]; u32 rd = (cpu->CurInstr >> 12) & 0xF; @@ -603,8 +604,10 @@ void EmptyRListLDMSTM(ARM* cpu, const u8 baseid, const u8 flags) void A_LDM(ARM* cpu) { - cpu->LDRFailedRegs = 0; u32 baseid = (cpu->CurInstr >> 16) & 0xF; + if (!ExecuteStage(cpu, baseid)) return; + + cpu->LDRFailedRegs = 0; u32 base = cpu->R[baseid]; u32 wbbase; u32 oldbase = base; @@ -622,8 +625,6 @@ void A_LDM(ARM* cpu) return; } - ExecuteStage(cpu, baseid); - if (!(cpu->CurInstr & (1<<23))) // decrement { // decrement is actually an increment starting from the end address @@ -750,6 +751,8 @@ void A_LDM(ARM* cpu) void A_STM(ARM* cpu) { u32 baseid = (cpu->CurInstr >> 16) & 0xF; + if (!ExecuteStage(cpu, baseid)) return; + u32 base = cpu->R[baseid]; u32 oldbase = base; u32 preinc = (cpu->CurInstr & (1<<24)); @@ -765,8 +768,6 @@ void A_STM(ARM* cpu) (0 << 4))); // thumb return; } - - ExecuteStage(cpu, baseid); if (!(cpu->CurInstr & (1<<23))) { @@ -860,7 +861,8 @@ void A_STM(ARM* cpu) void T_LDR_PCREL(ARM* cpu) { - ExecuteStage(cpu, 15); + if (!ExecuteStage(cpu, 15)) return; + cpu->LDRFailedRegs = 0; u32 addr = (cpu->R[15] & ~0x2) + ((cpu->CurInstr & 0xFF) << 2); bool dabort = !cpu->DataRead32(addr, (cpu->CurInstr >> 8) & 0x7); @@ -961,7 +963,8 @@ void T_LDR_SPREL(ARM* cpu) void T_PUSH(ARM* cpu) { - ExecuteStage(cpu, 13); + if (!ExecuteStage(cpu, 13)) return; + int nregs = 0; bool first = true; bool dabort = false; @@ -1033,7 +1036,8 @@ void T_PUSH(ARM* cpu) void T_POP(ARM* cpu) { - ExecuteStage(cpu, 13); + if (!ExecuteStage(cpu, 13)) return; + cpu->LDRFailedRegs = 0; u32 base = cpu->R[13]; bool first = true; @@ -1128,7 +1132,8 @@ void T_POP(ARM* cpu) void T_STMIA(ARM* cpu) { - ExecuteStage(cpu, ((cpu->CurInstr >> 8) & 0x7)); + if (!ExecuteStage(cpu, ((cpu->CurInstr >> 8) & 0x7))) return; + u32 base = cpu->R[(cpu->CurInstr >> 8) & 0x7]; bool first = true; bool dabort = false; @@ -1181,7 +1186,8 @@ void T_STMIA(ARM* cpu) void T_LDMIA(ARM* cpu) { - ExecuteStage(cpu, ((cpu->CurInstr >> 8) & 0x7)); + if (!ExecuteStage(cpu, ((cpu->CurInstr >> 8) & 0x7))) return; + u32 base = cpu->R[(cpu->CurInstr >> 8) & 0x7]; bool first = true; bool dabort = false; diff --git a/src/NDS.cpp b/src/NDS.cpp index 7918b5d5..dc180e96 100644 --- a/src/NDS.cpp +++ b/src/NDS.cpp @@ -1810,7 +1810,7 @@ u32 NDS::RunFrame() } } - //printf("A9 LOOP: 9 %lli %lli %08X %08llX %i 7 %lli %lli %08X %08llX %i\n", ARM9Timestamp, ARM9Target, ARM9.PC, ARM9.CurInstr, (u8)ARM9.MRTrack.Type, ARM7Timestamp, ARM7Target, ARM7.R[15], ARM7.CurInstr, (u8)ARM7.MRTrack.Type); + //printf("A9 LOOP: 9 %lli %lli %08X %08llX %i 7 %lli %lli %08X %08llX %i\n", ARM9Timestamp, ARM9Target, ARM9.R[15], ARM9.CurInstr, (u8)ARM9.MRTrack.Type, ARM7Timestamp, ARM7Target, ARM7.R[15], ARM7.CurInstr, (u8)ARM7.MRTrack.Type); RunTimers(0); GPU.GPU3D.Run();