From e2a810147f032bee153e2dc0a6fb73067e06e5ba Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Fri, 18 Oct 2024 15:00:55 -0400 Subject: [PATCH] re-add interlocks breaks gcc debug builds for ??? reason --- src/ARM.cpp | 40 ++++- src/ARM.h | 33 ++-- src/ARMInterpreter.cpp | 4 + src/ARMInterpreter_ALU.cpp | 275 +++++++++++++++++++++++++---- src/ARMInterpreter_Branch.cpp | 4 + src/ARMInterpreter_LoadStore.cpp | 290 ++++++++++++++++++++----------- 6 files changed, 473 insertions(+), 173 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index 6a51318f..7b8fbf46 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -200,13 +200,11 @@ void ARM::Reset() void ARMv5::Reset() { PU_Map = PU_PrivMap; + Store = false; TimestampActual = 0; - InterlockMem = 16; - InterlockWBCur = 16; - InterlockWBPrev = 16; - Store = false; - InterlockMask = 0; + ILCurrReg = 16; + ILPrevReg = 16; WBWritePointer = 16; WBFillPointer = 0; @@ -1152,7 +1150,7 @@ u32 ARMv5::ReadMem(u32 addr, int size) #endif -inline void ARMv5::CodeFetch() +void ARMv5::CodeFetch() { if (NullFetch) { @@ -1181,6 +1179,36 @@ void ARMv5::AddCycles_MW(s32 numM) if (numM > 0) NDS.ARM9Timestamp += numM; } +template +void ARMv5::HandleInterlocksExecute(u16 ilmask) +{ + if ((bitfield && (ilmask & (1< ILCurrTime) NDS.ARM9Timestamp = ILCurrTime; + ILCurrReg = 16; + ILPrevReg = 16; + return; + } + else if ((bitfield && (ilmask & (1< ILPrevTime) NDS.ARM9Timestamp = ILPrevTime; + } + + ILPrevReg = ILCurrReg; + ILPrevTime = ILCurrTime; + ILCurrReg = 16; +} +template void ARMv5::HandleInterlocksExecute(u16 ilmask); +template void ARMv5::HandleInterlocksExecute(u16 ilmask); + +void ARMv5::HandleInterlocksMemory(u8 reg) +{ + if ((reg != ILPrevReg) || (NDS.ARM9Timestamp <= ILPrevTime)) return; + + NDS.ARM9Timestamp = ILPrevTime; + ILPrevTime = 16; +} + u16 ARMv4::CodeRead16(u32 addr) { if ((addr >> 24) == 0x02) diff --git a/src/ARM.h b/src/ARM.h index 327f72c3..bacdf367 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -263,23 +263,6 @@ public: bool DataWrite16(u32 addr, u16 val) override; bool DataWrite32(u32 addr, u32 val) override; bool DataWrite32S(u32 addr, u32 val) override; - - template - void ExecuteStage(u8 rn, u8 rm) - { - static_assert((nregs < 2), "too many regs"); - - if constexpr (nregs == 1) - { - InterlockMask = 1 << rn; - } - if constexpr (nregs == 2) - { - InterlockMask = 1 << rn | 1 << rm; - } - - AddCycles_C(); - } void CodeFetch(); @@ -300,6 +283,10 @@ public: AddCycles_MW(DataCycles); DataCycles = 0; } + + template + void HandleInterlocksExecute(u16 ilmask); + void HandleInterlocksMemory(u8 reg); void GetCodeMemRegion(u32 addr, MemRegion* region); @@ -371,14 +358,14 @@ public: u64 ITCMTimestamp; u64 TimestampActual; - u8 InterlockMem; - u8 InterlockWBCur; - u8 InterlockWBPrev; - bool Store; - u16 InterlockMask; - bool NullFetch; u32 PC; + bool NullFetch; + bool Store; + u8 ILCurrReg; + u8 ILPrevReg; + u64 ILCurrTime; + u64 ILPrevTime; u8 WBWritePointer; // which entry to attempt to write next; should always be ANDed with 0xF after incrementing u8 WBFillPointer; // where the next entry should be added; should always be ANDed with 0xF after incrementing diff --git a/src/ARMInterpreter.cpp b/src/ARMInterpreter.cpp index b1d856a0..614f3b53 100644 --- a/src/ARMInterpreter.cpp +++ b/src/ARMInterpreter.cpp @@ -153,6 +153,8 @@ void A_MSR_IMM(ARM* cpu) void A_MSR_REG(ARM* cpu) { + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(cpu->CurInstr & 0xF); + u32* psr; if (cpu->CurInstr & (1<<22)) { @@ -275,6 +277,8 @@ void A_MCR(ARM* cpu) u32 val = cpu->R[(cpu->CurInstr>>12)&0xF]; if (((cpu->CurInstr>>12) & 0xF) == 15) val += 4; + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr>>12)&0xF); + if (cpu->Num==0 && cp==15) { ((ARMv5*)cpu)->CP15Write((cn<<8)|(cm<<4)|cpinfo, val); diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index e9439d2a..0d2fb5af 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -152,22 +152,26 @@ inline bool OverflowSbc(u32 a, u32 b, u32 carry) #define A_CALC_OP2_IMM \ - u32 b = ROR(cpu->CurInstr&0xFF, (cpu->CurInstr>>7)&0x1E); + u32 b = ROR(cpu->CurInstr&0xFF, (cpu->CurInstr>>7)&0x1E); \ + u16 ilmask = 0; #define A_CALC_OP2_IMM_S \ u32 b = ROR(cpu->CurInstr&0xFF, (cpu->CurInstr>>7)&0x1E); \ if ((cpu->CurInstr>>7)&0x1E) \ - cpu->SetC(b & 0x80000000); + cpu->SetC(b & 0x80000000); \ + u16 ilmask = 0; #define A_CALC_OP2_REG_SHIFT_IMM(shiftop) \ u32 b = cpu->R[cpu->CurInstr&0xF]; \ u32 s = (cpu->CurInstr>>7)&0x1F; \ - shiftop(b, s); + shiftop(b, s); \ + u16 ilmask = 1 << (cpu->CurInstr&0xF); #define A_CALC_OP2_REG_SHIFT_REG(shiftop) \ u32 b = cpu->R[cpu->CurInstr&0xF]; \ if ((cpu->CurInstr&0xF)==15) b += 4; \ - shiftop(b, (cpu->R[(cpu->CurInstr>>8)&0xF] & 0xFF)); + shiftop(b, (cpu->R[(cpu->CurInstr>>8)&0xF] & 0xFF)); \ + u16 ilmask = 1 << (cpu->CurInstr&0xF); #define A_IMPLEMENT_ALU_OP(x,s) \ @@ -377,6 +381,7 @@ A_IMPLEMENT_ALU_OP(EOR,_S) #define A_SUB(c) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a - b; \ + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -394,6 +399,7 @@ A_IMPLEMENT_ALU_OP(EOR,_S) !res, \ CarrySub(a, b), \ OverflowSub(a, b)); \ + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -410,6 +416,7 @@ A_IMPLEMENT_ALU_OP(SUB,) #define A_RSB(c) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = b - a; \ + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -427,6 +434,7 @@ A_IMPLEMENT_ALU_OP(SUB,) !res, \ CarrySub(b, a), \ OverflowSub(b, a)); \ + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -443,6 +451,7 @@ A_IMPLEMENT_ALU_OP(RSB,) #define A_ADD(c) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a + b; \ + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -460,6 +469,7 @@ A_IMPLEMENT_ALU_OP(RSB,) !res, \ CarryAdd(a, b), \ OverflowAdd(a, b)); \ + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -476,6 +486,7 @@ A_IMPLEMENT_ALU_OP(ADD,) #define A_ADC(c) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a + b + (cpu->CPSR&0x20000000 ? 1:0); \ + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -495,6 +506,7 @@ A_IMPLEMENT_ALU_OP(ADD,) !res, \ CarryAdd(a, b) | CarryAdd(res_tmp, carry), \ OverflowAdc(a, b, carry)); \ + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -511,6 +523,7 @@ A_IMPLEMENT_ALU_OP(ADC,) #define A_SBC(c) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a - b - (cpu->CPSR&0x20000000 ? 0:1); \ + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -530,6 +543,7 @@ A_IMPLEMENT_ALU_OP(ADC,) !res, \ CarrySub(a, b) & CarrySub(res_tmp, carry), \ OverflowSbc(a, b, carry)); \ + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -546,6 +560,7 @@ A_IMPLEMENT_ALU_OP(SBC,) #define A_RSC(c) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = b - a - (cpu->CPSR&0x20000000 ? 0:1); \ + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -565,6 +580,7 @@ A_IMPLEMENT_ALU_OP(SBC,) !res, \ CarrySub(b, a) & CarrySub(res_tmp, carry), \ OverflowSbc(b, a, carry)); \ + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -581,6 +597,8 @@ A_IMPLEMENT_ALU_OP(RSC,) #define A_TST(c) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a & b; \ + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \ + if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* this seems to trigger alu rd==15 behavior for arm7 and legacy instruction behavior for arm9 */ \ { \ if (cpu->Num == 1) \ @@ -601,8 +619,7 @@ A_IMPLEMENT_ALU_OP(RSC,) { \ cpu->SetNZ(res & 0x80000000, \ !res); \ - } \ - if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); + } A_IMPLEMENT_ALU_TEST(TST,_S) @@ -610,6 +627,8 @@ A_IMPLEMENT_ALU_TEST(TST,_S) #define A_TEQ(c) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a ^ b; \ + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \ + if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* this seems to trigger alu rd==15 behavior for arm7 and legacy instruction behavior for arm9 */ \ { \ if (cpu->Num == 1) \ @@ -630,8 +649,7 @@ A_IMPLEMENT_ALU_TEST(TST,_S) { \ cpu->SetNZ(res & 0x80000000, \ !res); \ - } \ - if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); + } A_IMPLEMENT_ALU_TEST(TEQ,_S) @@ -639,6 +657,8 @@ A_IMPLEMENT_ALU_TEST(TEQ,_S) #define A_CMP(c) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a - b; \ + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \ + if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* this seems to trigger alu rd==15 behavior for arm7 and legacy instruction behavior for arm9 */ \ { \ if (cpu->Num == 1) \ @@ -663,8 +683,7 @@ A_IMPLEMENT_ALU_TEST(TEQ,_S) !res, \ CarrySub(a, b), \ OverflowSub(a, b)); \ - } \ - if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); + } A_IMPLEMENT_ALU_TEST(CMP,) @@ -672,6 +691,8 @@ A_IMPLEMENT_ALU_TEST(CMP,) #define A_CMN(c) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a + b; \ + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \ + if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* this seems to trigger alu rd==15 behavior for arm7 and legacy instruction behavior for arm9 */ \ { \ if (cpu->Num == 1) \ @@ -696,8 +717,7 @@ A_IMPLEMENT_ALU_TEST(CMP,) !res, \ CarryAdd(a, b), \ OverflowAdd(a, b)); \ - } \ - if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); + } A_IMPLEMENT_ALU_TEST(CMN,) @@ -705,6 +725,7 @@ A_IMPLEMENT_ALU_TEST(CMN,) #define A_ORR(c) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a | b; \ + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -720,6 +741,7 @@ A_IMPLEMENT_ALU_TEST(CMN,) u32 res = a | b; \ cpu->SetNZ(res & 0x80000000, \ !res); \ + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -734,6 +756,7 @@ A_IMPLEMENT_ALU_OP(ORR,_S) #define A_MOV(c) \ + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -747,6 +770,7 @@ A_IMPLEMENT_ALU_OP(ORR,_S) #define A_MOV_S(c) \ cpu->SetNZ(b & 0x80000000, \ !b); \ + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -781,6 +805,7 @@ void A_MOV_REG_LSL_IMM_DBG(ARM* cpu) #define A_BIC(c) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a & ~b; \ + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -796,6 +821,7 @@ void A_MOV_REG_LSL_IMM_DBG(ARM* cpu) u32 res = a & ~b; \ cpu->SetNZ(res & 0x80000000, \ !res); \ + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -811,6 +837,7 @@ A_IMPLEMENT_ALU_OP(BIC,_S) #define A_MVN(c) \ b = ~b; \ + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -825,6 +852,7 @@ A_IMPLEMENT_ALU_OP(BIC,_S) b = ~b; \ cpu->SetNZ(b & 0x80000000, \ !b); \ + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ @@ -859,12 +887,17 @@ void A_MUL(ARM* cpu) if (cpu->Num == 0) { - if (cpu->CurInstr & (1<<20)) cpu->AddCycles_CI(3); + ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | + (1 << ((cpu->CurInstr >> 8) & 0xF))); + if (cpu->CurInstr & (1<<20)) cpu->AddCycles_CI(3); // S else { cpu->AddCycles_C(); // 1 X + cpu->DataRegion = Mem9_Null; ((ARMv5*)cpu)->AddCycles_MW(2); // 2 M + ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; + ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles; } } else @@ -899,12 +932,18 @@ void A_MLA(ARM* cpu) if (cpu->Num == 0) { + ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | + (1 << ((cpu->CurInstr >> 8) & 0xF)) | + (1 << ((cpu->CurInstr >> 12) & 0xF))); if (cpu->CurInstr & (1<<20)) cpu->AddCycles_CI(3); else { cpu->AddCycles_C(); // 1 X + cpu->DataRegion = Mem9_Null; ((ARMv5*)cpu)->AddCycles_MW(2); // 2 M + ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; + ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles; } } else @@ -938,18 +977,31 @@ void A_UMULL(ARM* cpu) if (cpu->Num==1) cpu->SetC(0); } - u32 cycles; if (cpu->Num == 0) - cycles = (cpu->CurInstr & (1<<20)) ? 4 : 2; + { + ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | + (1 << ((cpu->CurInstr >> 8) & 0xF))); + if (cpu->CurInstr & (1<<20)) cpu->AddCycles_CI(4); + else + { + cpu->AddCycles_CI(2); + + cpu->DataRegion = Mem9_Null; + ((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling + ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks + ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles; + } + } else { + u32 cycles; if ((rs & 0xFFFFFF00) == 0x00000000) cycles = 2; else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3; else if ((rs & 0xFF000000) == 0x00000000) cycles = 4; else cycles = 5; + cpu->AddCycles_CI(cycles); } - cpu->AddCycles_CI(cycles); } void A_UMLAL(ARM* cpu) @@ -974,18 +1026,33 @@ void A_UMLAL(ARM* cpu) if (cpu->Num==1) cpu->SetC(0); } - u32 cycles; if (cpu->Num == 0) - cycles = (cpu->CurInstr & (1<<20)) ? 4 : 2; + { + ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | + (1 << ((cpu->CurInstr >> 8) & 0xF)) | + (1 << ((cpu->CurInstr >> 12) & 0xF)) | + (1 << ((cpu->CurInstr >> 16) & 0xF))); + if (cpu->CurInstr & (1<<20)) cpu->AddCycles_CI(4); + else + { + cpu->AddCycles_CI(2); + + cpu->DataRegion = Mem9_Null; + ((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling + ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks + ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles; + } + } else { + u32 cycles; if ((rs & 0xFFFFFF00) == 0x00000000) cycles = 2; else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3; else if ((rs & 0xFF000000) == 0x00000000) cycles = 4; else cycles = 5; + cpu->AddCycles_CI(cycles); } - cpu->AddCycles_CI(cycles); } void A_SMULL(ARM* cpu) @@ -1007,18 +1074,30 @@ void A_SMULL(ARM* cpu) if (cpu->Num==1) cpu->SetC(0); } - u32 cycles; if (cpu->Num == 0) - cycles = (cpu->CurInstr & (1<<20)) ? 4 : 2; + { + ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | + (1 << ((cpu->CurInstr >> 8) & 0xF))); + if (cpu->CurInstr & (1<<20)) cpu->AddCycles_CI(4); + else + { + cpu->AddCycles_CI(2); + + cpu->DataRegion = Mem9_Null; + ((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling + ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks + ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles; + } + } else { + u32 cycles; if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2; else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3; else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; else cycles = 5; + cpu->AddCycles_CI(cycles); } - - cpu->AddCycles_CI(cycles); } void A_SMLAL(ARM* cpu) @@ -1043,18 +1122,32 @@ void A_SMLAL(ARM* cpu) if (cpu->Num==1) cpu->SetC(0); } - u32 cycles; if (cpu->Num == 0) - cycles = (cpu->CurInstr & (1<<20)) ? 4 : 2; + { + ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | + (1 << ((cpu->CurInstr >> 8) & 0xF)) | + (1 << ((cpu->CurInstr >> 12) & 0xF)) | + (1 << ((cpu->CurInstr >> 16) & 0xF))); + if (cpu->CurInstr & (1<<20)) cpu->AddCycles_CI(4); + else + { + cpu->AddCycles_CI(2); + + cpu->DataRegion = Mem9_Null; + ((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling + ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks + ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles; + } + } else { + u32 cycles; if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2; else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3; else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; else cycles = 5; + cpu->AddCycles_CI(cycles); } - - cpu->AddCycles_CI(cycles); } void A_SMLAxy(ARM* cpu) @@ -1078,8 +1171,17 @@ void A_SMLAxy(ARM* cpu) if (OverflowAdd(res_mul, rn)) cpu->CPSR |= 0x08000000; + - cpu->AddCycles_C(); // TODO: interlock?? + ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | + (1 << ((cpu->CurInstr >> 8) & 0xF)) | + (1 << ((cpu->CurInstr >> 12) & 0xF))); + cpu->AddCycles_C(); + + cpu->DataRegion = Mem9_Null; + ((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling + ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks + ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles; } void A_SMLAWy(ARM* cpu) @@ -1101,7 +1203,16 @@ void A_SMLAWy(ARM* cpu) if (OverflowAdd(res_mul, rn)) cpu->CPSR |= 0x08000000; - cpu->AddCycles_C(); // TODO: interlock?? + + ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | + (1 << ((cpu->CurInstr >> 8) & 0xF)) | + (1 << ((cpu->CurInstr >> 12) & 0xF))); + cpu->AddCycles_C(); + + cpu->DataRegion = Mem9_Null; + ((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling + ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks + ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles; } void A_SMULxy(ARM* cpu) @@ -1120,7 +1231,16 @@ void A_SMULxy(ARM* cpu) if (((cpu->CurInstr >> 16) & 0xF) != 15) cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; - cpu->AddCycles_C(); // TODO: interlock?? + + + ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | + (1 << ((cpu->CurInstr >> 8) & 0xF))); + cpu->AddCycles_C(); + + cpu->DataRegion = Mem9_Null; + ((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling + ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks + ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles; } void A_SMULWy(ARM* cpu) @@ -1137,7 +1257,16 @@ void A_SMULWy(ARM* cpu) if (((cpu->CurInstr >> 16) & 0xF) != 15) cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; - cpu->AddCycles_C(); // TODO: interlock?? + + + ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | + (1 << ((cpu->CurInstr >> 8) & 0xF))); + cpu->AddCycles_C(); + + cpu->DataRegion = Mem9_Null; + ((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling + ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks + ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles; } void A_SMLALxy(ARM* cpu) @@ -1162,10 +1291,18 @@ void A_SMLALxy(ARM* cpu) if (((cpu->CurInstr >> 16) & 0xF) != 15) cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); - + + + + ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | + (1 << ((cpu->CurInstr >> 8) & 0xF)) | + (1 << ((cpu->CurInstr >> 12) & 0xF)) | + (1 << ((cpu->CurInstr >> 16) & 0xF))); cpu->AddCycles_C(); // 1 X cpu->DataRegion = Mem9_Null; ((ARMv5*)cpu)->AddCycles_MW(2); // 2 M + ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks + ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles; } @@ -1192,6 +1329,8 @@ void A_CLZ(ARM* cpu) if (((cpu->CurInstr >> 12) & 0xF) == 15) cpu->JumpTo(res & ~1); else cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; + + ((ARMv5*)cpu)->HandleInterlocksExecute(cpu->CurInstr & 0xF); cpu->AddCycles_C(); } @@ -1213,7 +1352,13 @@ void A_QADD(ARM* cpu) if (((cpu->CurInstr >> 12) & 0xF) != 15) cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; - cpu->AddCycles_C(); // TODO: interlock?? + ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | (1 << ((cpu->CurInstr >> 16) & 0xF))); + cpu->AddCycles_C(); + + cpu->DataRegion = Mem9_Null; + ((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling + ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 12) & 0xF; // only one rd interlocks + ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles; } void A_QSUB(ARM* cpu) @@ -1233,7 +1378,13 @@ void A_QSUB(ARM* cpu) if (((cpu->CurInstr >> 12) & 0xF) != 15) cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; - cpu->AddCycles_C(); // TODO: interlock?? + ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | (1 << ((cpu->CurInstr >> 16) & 0xF))); + cpu->AddCycles_C(); + + cpu->DataRegion = Mem9_Null; + ((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling + ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 12) & 0xF; // only one rd interlocks + ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles; } void A_QDADD(ARM* cpu) @@ -1261,7 +1412,13 @@ void A_QDADD(ARM* cpu) if (((cpu->CurInstr >> 12) & 0xF) != 15) cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; - cpu->AddCycles_C(); // TODO: interlock?? + ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | (1 << ((cpu->CurInstr >> 16) & 0xF))); + cpu->AddCycles_C(); + + cpu->DataRegion = Mem9_Null; + ((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling + ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 12) & 0xF; // only one rd interlocks + ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles; } void A_QDSUB(ARM* cpu) @@ -1289,7 +1446,13 @@ void A_QDSUB(ARM* cpu) if (((cpu->CurInstr >> 12) & 0xF) != 15) cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; - cpu->AddCycles_C(); // TODO: interlock?? + ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | (1 << ((cpu->CurInstr >> 16) & 0xF))); + cpu->AddCycles_C(); + + cpu->DataRegion = Mem9_Null; + ((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling + ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 12) & 0xF; // only one rd interlocks + ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles; } @@ -1306,6 +1469,7 @@ void T_LSL_IMM(ARM* cpu) cpu->R[cpu->CurInstr & 0x7] = op; cpu->SetNZ(op & 0x80000000, !op); + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 3) & 0x7); cpu->AddCycles_C(); } @@ -1317,6 +1481,7 @@ void T_LSR_IMM(ARM* cpu) cpu->R[cpu->CurInstr & 0x7] = op; cpu->SetNZ(op & 0x80000000, !op); + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 3) & 0x7); cpu->AddCycles_C(); } @@ -1328,6 +1493,7 @@ void T_ASR_IMM(ARM* cpu) cpu->R[cpu->CurInstr & 0x7] = op; cpu->SetNZ(op & 0x80000000, !op); + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 3) & 0x7); cpu->AddCycles_C(); } @@ -1341,6 +1507,7 @@ void T_ADD_REG_(ARM* cpu) !res, CarryAdd(a, b), OverflowAdd(a, b)); + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << ((cpu->CurInstr >> 3) & 0x7)) | (1 << ((cpu->CurInstr >> 6) & 0x7))); cpu->AddCycles_C(); } @@ -1354,6 +1521,7 @@ void T_SUB_REG_(ARM* cpu) !res, CarrySub(a, b), OverflowSub(a, b)); + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << ((cpu->CurInstr >> 3) & 0x7)) | (1 << ((cpu->CurInstr >> 6) & 0x7))); cpu->AddCycles_C(); } @@ -1367,6 +1535,7 @@ void T_ADD_IMM_(ARM* cpu) !res, CarryAdd(a, b), OverflowAdd(a, b)); + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 3) & 0x7); cpu->AddCycles_C(); } @@ -1380,6 +1549,7 @@ void T_SUB_IMM_(ARM* cpu) !res, CarrySub(a, b), OverflowSub(a, b)); + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 3) & 0x7); cpu->AddCycles_C(); } @@ -1389,6 +1559,7 @@ void T_MOV_IMM(ARM* cpu) cpu->R[(cpu->CurInstr >> 8) & 0x7] = b; cpu->SetNZ(0, !b); + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 8) & 0x7); cpu->AddCycles_C(); } @@ -1401,6 +1572,7 @@ void T_CMP_IMM(ARM* cpu) !res, CarrySub(a, b), OverflowSub(a, b)); + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 8) & 0x7); cpu->AddCycles_C(); } @@ -1414,6 +1586,7 @@ void T_ADD_IMM(ARM* cpu) !res, CarryAdd(a, b), OverflowAdd(a, b)); + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 8) & 0x7); cpu->AddCycles_C(); } @@ -1427,6 +1600,7 @@ void T_SUB_IMM(ARM* cpu) !res, CarrySub(a, b), OverflowSub(a, b)); + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 8) & 0x7); cpu->AddCycles_C(); } @@ -1439,6 +1613,7 @@ void T_AND_REG(ARM* cpu) cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, !res); + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); cpu->AddCycles_C(); } @@ -1450,6 +1625,7 @@ void T_EOR_REG(ARM* cpu) cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, !res); + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); cpu->AddCycles_C(); } @@ -1461,6 +1637,7 @@ void T_LSL_REG(ARM* cpu) cpu->R[cpu->CurInstr & 0x7] = a; cpu->SetNZ(a & 0x80000000, !a); + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); cpu->AddCycles_CI(1); } @@ -1472,6 +1649,7 @@ void T_LSR_REG(ARM* cpu) cpu->R[cpu->CurInstr & 0x7] = a; cpu->SetNZ(a & 0x80000000, !a); + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); cpu->AddCycles_CI(1); } @@ -1483,6 +1661,7 @@ void T_ASR_REG(ARM* cpu) cpu->R[cpu->CurInstr & 0x7] = a; cpu->SetNZ(a & 0x80000000, !a); + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); cpu->AddCycles_CI(1); } @@ -1498,6 +1677,7 @@ void T_ADC_REG(ARM* cpu) !res, CarryAdd(a, b) | CarryAdd(res_tmp, carry), OverflowAdc(a, b, carry)); + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); cpu->AddCycles_C(); } @@ -1513,6 +1693,7 @@ void T_SBC_REG(ARM* cpu) !res, CarrySub(a, b) & CarrySub(res_tmp, carry), OverflowSbc(a, b, carry)); + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); cpu->AddCycles_C(); } @@ -1524,6 +1705,7 @@ void T_ROR_REG(ARM* cpu) cpu->R[cpu->CurInstr & 0x7] = a; cpu->SetNZ(a & 0x80000000, !a); + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); cpu->AddCycles_CI(1); } @@ -1534,6 +1716,7 @@ void T_TST_REG(ARM* cpu) u32 res = a & b; cpu->SetNZ(res & 0x80000000, !res); + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); cpu->AddCycles_C(); } @@ -1546,6 +1729,7 @@ void T_NEG_REG(ARM* cpu) !res, CarrySub(0, b), OverflowSub(0, b)); + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 3) & 0x7); cpu->AddCycles_C(); } @@ -1558,6 +1742,7 @@ void T_CMP_REG(ARM* cpu) !res, CarrySub(a, b), OverflowSub(a, b)); + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); cpu->AddCycles_C(); } @@ -1570,6 +1755,7 @@ void T_CMN_REG(ARM* cpu) !res, CarryAdd(a, b), OverflowAdd(a, b)); + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); cpu->AddCycles_C(); } @@ -1581,6 +1767,7 @@ void T_ORR_REG(ARM* cpu) cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, !res); + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); cpu->AddCycles_C(); } @@ -1597,6 +1784,7 @@ void T_MUL_REG(ARM* cpu) if (cpu->Num == 0) { cycles += 3; + ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); } else { @@ -1606,7 +1794,7 @@ void T_MUL_REG(ARM* cpu) else if (a & 0x0000FF00) cycles += 2; else cycles += 1; } - cpu->AddCycles_CI(cycles); + cpu->AddCycles_CI(cycles); // implemented as S variant, doesn't interlock } void T_BIC_REG(ARM* cpu) @@ -1617,6 +1805,7 @@ void T_BIC_REG(ARM* cpu) cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, !res); + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7))); cpu->AddCycles_C(); } @@ -1627,6 +1816,7 @@ void T_MVN_REG(ARM* cpu) cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, !res); + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 3) & 0x7); cpu->AddCycles_C(); } @@ -1643,7 +1833,8 @@ void T_ADD_HIREG(ARM* cpu) u32 b = cpu->R[rs]; cpu->AddCycles_C(); - + + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << rd) | (1 << rs)); if (rd == 15) { cpu->JumpTo((a + b) | 1); @@ -1678,6 +1869,7 @@ void T_CMP_HIREG(ARM* cpu) cpu->CPSR |= 0x20; // keep it from crashing the emulator at least } } + else if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << rd) | (1 << rs)); cpu->AddCycles_C(); } @@ -1687,6 +1879,7 @@ void T_MOV_HIREG(ARM* cpu) u32 rd = (cpu->CurInstr & 0x7) | ((cpu->CurInstr >> 4) & 0x8); u32 rs = (cpu->CurInstr >> 3) & 0xF; + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << rd) | (1 << rs)); cpu->AddCycles_C(); if (rd == 15) @@ -1717,6 +1910,8 @@ void T_ADD_PCREL(ARM* cpu) u32 val = cpu->R[15] & ~2; val += ((cpu->CurInstr & 0xFF) << 2); cpu->R[(cpu->CurInstr >> 8) & 0x7] = val; + + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(15); cpu->AddCycles_C(); } @@ -1725,6 +1920,8 @@ void T_ADD_SPREL(ARM* cpu) u32 val = cpu->R[13]; val += ((cpu->CurInstr & 0xFF) << 2); cpu->R[(cpu->CurInstr >> 8) & 0x7] = val; + + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(13); cpu->AddCycles_C(); } @@ -1736,6 +1933,8 @@ void T_ADD_SP(ARM* cpu) else val += ((cpu->CurInstr & 0x7F) << 2); cpu->R[13] = val; + + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(13); cpu->AddCycles_C(); } diff --git a/src/ARMInterpreter_Branch.cpp b/src/ARMInterpreter_Branch.cpp index a95aa27d..35bbbc52 100644 --- a/src/ARMInterpreter_Branch.cpp +++ b/src/ARMInterpreter_Branch.cpp @@ -51,12 +51,14 @@ void A_BLX_IMM(ARM* cpu) void A_BX(ARM* cpu) { + if (cpu->Num==0) ((ARMv5*)cpu)->HandleInterlocksExecute(cpu->CurInstr&0xF); cpu->AddCycles_C(); cpu->JumpTo(cpu->R[cpu->CurInstr & 0xF]); } void A_BLX_REG(ARM* cpu) { + if (cpu->Num==0) ((ARMv5*)cpu)->HandleInterlocksExecute(cpu->CurInstr&0xF); cpu->AddCycles_C(); u32 lr = cpu->R[15] - 4; cpu->JumpTo(cpu->R[cpu->CurInstr & 0xF]); @@ -77,12 +79,14 @@ void T_BCOND(ARM* cpu) void T_BX(ARM* cpu) { + if (cpu->Num==0) ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 3) & 0xF); cpu->AddCycles_C(); cpu->JumpTo(cpu->R[(cpu->CurInstr >> 3) & 0xF]); } void T_BLX_REG(ARM* cpu) { + if (cpu->Num==0) ((ARMv5*)cpu)->HandleInterlocksExecute((cpu->CurInstr >> 3) & 0xF); cpu->AddCycles_C(); if (cpu->Num==1) { diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index bab4f25b..3022b94d 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -18,15 +18,20 @@ #include #include "ARM.h" +#include "NDS.h" namespace melonDS::ARMInterpreter { - -void ExecuteStage(ARM* cpu) +template +inline void ExecuteStage(ARM* cpu, u16 ilmask) { - if (cpu->Num == 0) cpu->AddCycles_C(); + if (cpu->Num == 0) + { + ((ARMv5*)cpu)->HandleInterlocksExecute(ilmask); + ((ARMv5*)cpu)->AddCycles_C(); + } } @@ -56,13 +61,15 @@ void ExecuteStage(ARM* cpu) #define A_WB_CALC_OFFSET_IMM \ u32 offset = (cpu->CurInstr & 0xFFF); \ - if (!(cpu->CurInstr & (1<<23))) offset = -offset; + if (!(cpu->CurInstr & (1<<23))) offset = -offset; \ + u16 ilmask = 0; #define A_WB_CALC_OFFSET_REG(shiftop) \ u32 offset = cpu->R[cpu->CurInstr & 0xF]; \ u32 shift = ((cpu->CurInstr>>7)&0x1F); \ shiftop(offset, shift); \ - if (!(cpu->CurInstr & (1<<23))) offset = -offset; + if (!(cpu->CurInstr & (1<<23))) offset = -offset; \ + u16 ilmask = 1 << (cpu->CurInstr & 0xF); enum class Writeback { @@ -72,11 +79,12 @@ enum class Writeback Trans, }; -template -void LoadSingle(ARM* cpu, u8 rd, u8 rn, s32 offset) +template +void LoadSingle(ARM* cpu, u8 rd, u8 rn, s32 offset, u16 ilmask) { - ExecuteStage(cpu); static_assert((size == 8) || (size == 16) || (size == 32), "dummy this function only takes 8/16/32 for size!!!"); + + ExecuteStage(cpu, (ilmask | (1<R[rn]; @@ -117,15 +125,25 @@ void LoadSingle(ARM* cpu, u8 rd, u8 rn, s32 offset) if (cpu->Num==1 || (((ARMv5*)cpu)->CP15Control & (1<<15))) val &= ~0x1; cpu->JumpTo(val); } - else cpu->R[rd] = val; + else + { + cpu->R[rd] = val; + if (cpu->Num == 0) + { + ((ARMv5*)cpu)->ILCurrReg = rd; + bool extra = ((size < 32) || (signror && (addr&0x3))); + ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles + extra; + } + } } -template -void StoreSingle(ARM* cpu, u8 rd, u8 rn, s32 offset) +template +void StoreSingle(ARM* cpu, u8 rd, u8 rn, s32 offset, u16 ilmask) { - ExecuteStage(cpu); static_assert((size == 8) || (size == 16) || (size == 32), "dummy this function only takes 8/16/32 for size!!!"); + ExecuteStage(cpu, (ilmask | (1<R[rn]; else addr = cpu->R[rn]; @@ -139,6 +157,8 @@ void StoreSingle(ARM* cpu, u8 rd, u8 rn, s32 offset) ((ARMv5*)cpu)->PU_Map = ((ARMv5*)cpu)->PU_UserMap; } + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksMemory(rd); + bool dabort; if constexpr (size == 8) dabort = !cpu->DataWrite8 (addr, storeval); if constexpr (size == 16) dabort = !cpu->DataWrite16(addr, storeval); @@ -162,36 +182,36 @@ void StoreSingle(ARM* cpu, u8 rd, u8 rn, s32 offset) #define A_STR \ - if (cpu->CurInstr & (1<<21)) StoreSingle<32, Writeback::Pre>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ - else StoreSingle<32, Writeback::None>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); + if (cpu->CurInstr & (1<<21)) StoreSingle<32, Writeback::Pre, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \ + else StoreSingle<32, Writeback::None, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); #define A_STR_POST \ - if (cpu->CurInstr & (1<<21)) StoreSingle<32, Writeback::Trans>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ - else StoreSingle<32, Writeback::Post>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); + if (cpu->CurInstr & (1<<21)) StoreSingle<32, Writeback::Trans, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \ + else StoreSingle<32, Writeback::Post, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); #define A_STRB \ - if (cpu->CurInstr & (1<<21)) StoreSingle<8, Writeback::Pre>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ - else StoreSingle<8, Writeback::None>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); + if (cpu->CurInstr & (1<<21)) StoreSingle<8, Writeback::Pre, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \ + else StoreSingle<8, Writeback::None, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); #define A_STRB_POST \ - if (cpu->CurInstr & (1<<21)) StoreSingle<8, Writeback::Trans>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ - else StoreSingle<8, Writeback::Post>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); + if (cpu->CurInstr & (1<<21)) StoreSingle<8, Writeback::Trans, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \ + else StoreSingle<8, Writeback::Post, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); #define A_LDR \ - if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ - else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); + if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \ + else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); #define A_LDR_POST \ - if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ - else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); + if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \ + else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); #define A_LDRB \ - if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ - else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); + if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \ + else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); #define A_LDRB_POST \ - if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ - else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); + if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \ + else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); @@ -266,63 +286,72 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) #define A_HD_CALC_OFFSET_IMM \ u32 offset = (cpu->CurInstr & 0xF) | ((cpu->CurInstr >> 4) & 0xF0); \ - if (!(cpu->CurInstr & (1<<23))) offset = -offset; + if (!(cpu->CurInstr & (1<<23))) offset = -offset; \ + u16 ilmask = 0; #define A_HD_CALC_OFFSET_REG \ u32 offset = cpu->R[cpu->CurInstr & 0xF]; \ - if (!(cpu->CurInstr & (1<<23))) offset = -offset; + if (!(cpu->CurInstr & (1<<23))) offset = -offset; \ + u16 ilmask = 1 << (cpu->CurInstr & 0xF); #define A_STRH \ - if (cpu->CurInstr & (1<<21)) StoreSingle<16, Writeback::Pre>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ - else StoreSingle<16, Writeback::None>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); + if (cpu->CurInstr & (1<<21)) StoreSingle<16, Writeback::Pre, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \ + else StoreSingle<16, Writeback::None, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); #define A_STRH_POST \ - StoreSingle<16, Writeback::Post>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); + StoreSingle<16, Writeback::Post, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); // TODO: CHECK LDRD/STRD TIMINGS!! #define A_LDRD \ if (cpu->Num != 0) return; \ - ExecuteStage(cpu); \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ + ExecuteStage(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF))); \ bool dabort = !cpu->DataRead32(offset, &cpu->R[r]); \ u32 val; dabort |= !cpu->DataRead32S(offset+4, &val); \ + cpu->AddCycles_CDI(); \ if (dabort) { \ - cpu->AddCycles_CDI(); \ ((ARMv5*)cpu)->DataAbort(); \ return; } \ if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \ - else cpu->R[r+1] = val; \ - cpu->AddCycles_CDI(); \ + else { \ + cpu->R[r+1] = val; \ + if (cpu->Num == 0) { \ + ((ARMv5*)cpu)->ILCurrReg = r+1; \ + ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles; } } \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; #define A_LDRD_POST \ if (cpu->Num != 0) return; \ - ExecuteStage(cpu); \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ + ExecuteStage(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF))); \ bool dabort = !cpu->DataRead32(addr, &cpu->R[r]); \ u32 val; dabort |= !cpu->DataRead32S(addr+4, &val); \ + cpu->AddCycles_CDI(); \ if (dabort) { \ - cpu->AddCycles_CDI(); \ ((ARMv5*)cpu)->DataAbort(); \ return; } \ if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \ - else cpu->R[r+1] = val; \ - cpu->AddCycles_CDI(); \ + else { \ + cpu->R[r+1] = val; \ + if (cpu->Num == 0) { \ + ((ARMv5*)cpu)->ILCurrReg = r+1; \ + ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles; } } \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_STRD \ if (cpu->Num != 0) return; \ - ExecuteStage(cpu); \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ + ExecuteStage(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF))); \ + ((ARMv5*)cpu)->HandleInterlocksMemory(r); \ bool dabort = !cpu->DataWrite32(offset, cpu->R[r]); /* yes, this data abort behavior is on purpose */ \ u32 storeval = cpu->R[r+1]; if (r == 14) storeval+=4; \ dabort |= !cpu->DataWrite32S (offset+4, storeval); /* no, i dont understand it either */ \ @@ -334,10 +363,11 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) #define A_STRD_POST \ if (cpu->Num != 0) return; \ - ExecuteStage(cpu); \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ + ExecuteStage(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF))); \ + ((ARMv5*)cpu)->HandleInterlocksMemory(r); \ bool dabort = !cpu->DataWrite32(addr, cpu->R[r]); \ u32 storeval = cpu->R[r+1]; if (r == 14) storeval+=4; \ dabort |= !cpu->DataWrite32S (addr+4, storeval); \ @@ -348,25 +378,25 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_LDRH \ - if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ - else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); + if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \ + else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); #define A_LDRH_POST \ - LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); + LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); #define A_LDRSB \ - if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ - else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); + if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \ + else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); #define A_LDRSB_POST \ - LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); + LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); #define A_LDRSH \ - if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ - else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); + if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \ + else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); #define A_LDRSH_POST \ - LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); + LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); #define A_IMPLEMENT_HD_LDRSTR(x) \ @@ -406,7 +436,7 @@ A_IMPLEMENT_HD_LDRSTR(LDRSH) template inline void SWP(ARM* cpu) { - ExecuteStage(cpu); + ExecuteStage(cpu, ((cpu->CurInstr >> 16) & 0xF)); u32 base = cpu->R[(cpu->CurInstr >> 16) & 0xF]; u32 rm = cpu->R[cpu->CurInstr & 0xF]; if ((cpu->CurInstr & 0xF) == 15) rm += 4; @@ -415,7 +445,7 @@ inline void SWP(ARM* cpu) if ((byte ? cpu->DataRead8 (base, &val) : cpu->DataRead32(base, &val))) [[likely]] { - u32 numD = cpu->DataCycles; + cpu->NDS.ARM9Timestamp += cpu->DataCycles; // checkme if ((byte ? cpu->DataWrite8 (base, rm) : cpu->DataWrite32(base, rm))) [[likely]] @@ -424,17 +454,27 @@ inline void SWP(ARM* cpu) u32 rd = (cpu->CurInstr >> 12) & 0xF; if constexpr (!byte) val = ROR(val, 8*(base&0x3)); + + cpu->AddCycles_CDI(); - if (rd != 15) cpu->R[rd] = val; + if (rd != 15) + { + cpu->R[rd] = val; + if (cpu->Num == 0) + { + ((ARMv5*)cpu)->ILCurrReg = rd; + bool extra = (byte || (base&0x3)); + ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles + extra; + } + } else if (cpu->Num==1) cpu->JumpTo(val & ~1); // for some reason these jumps don't seem to work on the arm 9? + return; } - else ((ARMv5*)cpu)->DataAbort(); - - cpu->DataCycles += numD; } - else ((ARMv5*)cpu)->DataAbort(); + // data abort handling cpu->AddCycles_CDI(); + ((ARMv5*)cpu)->DataAbort(); } void A_SWP(ARM* cpu) @@ -499,7 +539,6 @@ void EmptyRListLDMSTM(ARM* cpu, const u8 baseid, const u8 flags) void A_LDM(ARM* cpu) { - ExecuteStage(cpu); u32 baseid = (cpu->CurInstr >> 16) & 0xF; u32 base = cpu->R[baseid]; u32 wbbase; @@ -518,6 +557,8 @@ void A_LDM(ARM* cpu) return; } + ExecuteStage(cpu, baseid); + if (!(cpu->CurInstr & (1<<23))) // decrement { // decrement is actually an increment starting from the end address @@ -573,11 +614,12 @@ void A_LDM(ARM* cpu) // switch back to previous regs if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15))) cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true); + + cpu->AddCycles_CDI(); // handle data aborts if (dabort) [[unlikely]] { - cpu->AddCycles_CDI(); ((ARMv5*)cpu)->DataAbort(); return; } @@ -601,17 +643,20 @@ void A_LDM(ARM* cpu) else cpu->R[baseid] = wbbase; } - + // jump if pc got written if (cpu->CurInstr & (1<<15)) cpu->JumpTo(pc, cpu->CurInstr & (1<<22)); - - cpu->AddCycles_CDI(); + else if (cpu->Num == 0) + { + u8 lastreg = 31 - __builtin_clz(cpu->CurInstr & 0x7FFF); + ((ARMv5*)cpu)->ILCurrReg = lastreg; + ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles; + } } void A_STM(ARM* cpu) { - ExecuteStage(cpu); u32 baseid = (cpu->CurInstr >> 16) & 0xF; u32 base = cpu->R[baseid]; u32 oldbase = base; @@ -628,6 +673,8 @@ void A_STM(ARM* cpu) (0 << 4))); // thumb return; } + + ExecuteStage(cpu, baseid); if (!(cpu->CurInstr & (1<<23))) { @@ -655,6 +702,8 @@ void A_STM(ARM* cpu) cpu->UpdateMode(cpu->CPSR, (cpu->CPSR&~0x1F)|0x10, true); } + if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksMemory(__builtin_ctz(cpu->CurInstr)); + for (u32 i = 0; i < 16; i++) { if (cpu->CurInstr & (1<CurInstr & (1<<22)) cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true); - + + cpu->AddCycles_CD(); + // handle data aborts if (dabort) [[unlikely]] { // restore original value of base cpu->R[baseid] = oldbase; - cpu->AddCycles_CD(); ((ARMv5*)cpu)->DataAbort(); return; } if ((cpu->CurInstr & (1<<23)) && (cpu->CurInstr & (1<<21))) cpu->R[baseid] = base; - - - cpu->AddCycles_CD(); } @@ -710,106 +757,108 @@ void A_STM(ARM* cpu) void T_LDR_PCREL(ARM* cpu) { - ExecuteStage(cpu); + ExecuteStage(cpu, 15); u32 addr = (cpu->R[15] & ~0x2) + ((cpu->CurInstr & 0xFF) << 2); bool dabort = !cpu->DataRead32(addr, &cpu->R[(cpu->CurInstr >> 8) & 0x7]); cpu->AddCycles_CDI(); - if (dabort) [[unlikely]] + if (dabort) [[unlikely]] ((ARMv5*)cpu)->DataAbort(); + else if (cpu->Num == 0) { - ((ARMv5*)cpu)->DataAbort(); + ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 8) & 0x7; + ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles; } } void T_STR_REG(ARM* cpu) { - StoreSingle<32, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); + StoreSingle<32, Writeback::None, true>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7], (1 << ((cpu->CurInstr >> 6) & 0x7))); } void T_STRB_REG(ARM* cpu) { - StoreSingle<8, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); + StoreSingle<8, Writeback::None, true>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7], (1 << ((cpu->CurInstr >> 6) & 0x7))); } void T_LDR_REG(ARM* cpu) { - LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); + LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7], (1 << ((cpu->CurInstr >> 6) & 0x7))); } void T_LDRB_REG(ARM* cpu) { - LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); + LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7], (1 << ((cpu->CurInstr >> 6) & 0x7))); } void T_STRH_REG(ARM* cpu) { - StoreSingle<16, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); + StoreSingle<16, Writeback::None, true>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7], (1 << ((cpu->CurInstr >> 6) & 0x7))); } void T_LDRSB_REG(ARM* cpu) { - LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); + LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7], (1 << ((cpu->CurInstr >> 6) & 0x7))); } void T_LDRH_REG(ARM* cpu) { - LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); + LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7], (1 << ((cpu->CurInstr >> 6) & 0x7))); } void T_LDRSH_REG(ARM* cpu) { - LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); + LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7], (1 << ((cpu->CurInstr >> 6) & 0x7))); } void T_STR_IMM(ARM* cpu) { - StoreSingle<32, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 4) & 0x7C)); + StoreSingle<32, Writeback::None, false>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 4) & 0x7C), 0); } void T_LDR_IMM(ARM* cpu) { - LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 4) & 0x7C)); + LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 4) & 0x7C), 0); } void T_STRB_IMM(ARM* cpu) { - StoreSingle<8, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 6) & 0x1F)); + StoreSingle<8, Writeback::None, false>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 6) & 0x1F), 0); } void T_LDRB_IMM(ARM* cpu) { - LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 6) & 0x1F)); + LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 6) & 0x1F), 0); } void T_STRH_IMM(ARM* cpu) { - StoreSingle<16, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 5) & 0x3E)); + StoreSingle<16, Writeback::None, false>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 5) & 0x3E), 0); } void T_LDRH_IMM(ARM* cpu) { - LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 5) & 0x3E)); + LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 5) & 0x3E), 0); } void T_STR_SPREL(ARM* cpu) { - StoreSingle<32, Writeback::None>(cpu, ((cpu->CurInstr >> 8) & 0x7), 13, ((cpu->CurInstr << 2) & 0x3FC)); + StoreSingle<32, Writeback::None, false>(cpu, ((cpu->CurInstr >> 8) & 0x7), 13, ((cpu->CurInstr << 2) & 0x3FC), 0); } void T_LDR_SPREL(ARM* cpu) { - LoadSingle(cpu, ((cpu->CurInstr >> 8) & 0x7), 13, ((cpu->CurInstr << 2) & 0x3FC)); + LoadSingle(cpu, ((cpu->CurInstr >> 8) & 0x7), 13, ((cpu->CurInstr << 2) & 0x3FC), 0); } void T_PUSH(ARM* cpu) { - ExecuteStage(cpu); + ExecuteStage(cpu, 13); int nregs = 0; bool first = true; bool dabort = false; @@ -833,6 +882,13 @@ void T_PUSH(ARM* cpu) base -= (nregs<<2); u32 wbbase = base; + if (cpu->Num == 0) + { + u8 firstreg = __builtin_ctz(cpu->CurInstr); + if (firstreg == 8) firstreg = 14; + ((ARMv5*)cpu)->HandleInterlocksMemory(firstreg); + } + for (int i = 0; i < 8; i++) { if (cpu->CurInstr & (1<DataWrite32S(base, cpu->R[14])); } + cpu->AddCycles_CD(); + if (dabort) [[unlikely]] { - cpu->AddCycles_CD(); ((ARMv5*)cpu)->DataAbort(); return; } cpu->R[13] = wbbase; - - cpu->AddCycles_CD(); } void T_POP(ARM* cpu) { - ExecuteStage(cpu); + ExecuteStage(cpu, 13); u32 base = cpu->R[13]; bool first = true; bool dabort = false; - + if (!(cpu->CurInstr & 0x1FF)) [[unlikely]] { EmptyRListLDMSTM(cpu, 13, 0b00011); @@ -898,10 +953,23 @@ void T_POP(ARM* cpu) : cpu->DataRead32S(base, &pc)); if (dabort) [[unlikely]] goto dataabort; + + cpu->AddCycles_CDI(); if (cpu->Num==1 || (((ARMv5*)cpu)->CP15Control & (1<<15))) pc |= 0x1; cpu->JumpTo(pc); base += 4; } + else + { + cpu->AddCycles_CDI(); + + if (cpu->Num == 0) + { + u8 lastreg = 31 - __builtin_clz(cpu->CurInstr & 0xFF); + ((ARMv5*)cpu)->ILCurrReg = lastreg; + ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles; + } + } if (dabort) [[unlikely]] { @@ -912,23 +980,27 @@ void T_POP(ARM* cpu) } cpu->R[13] = base; - - cpu->AddCycles_CDI(); } void T_STMIA(ARM* cpu) { - ExecuteStage(cpu); + ExecuteStage(cpu, ((cpu->CurInstr >> 8) & 0x7)); u32 base = cpu->R[(cpu->CurInstr >> 8) & 0x7]; bool first = true; bool dabort = false; - + if (!(cpu->CurInstr & 0xFF)) [[unlikely]] { EmptyRListLDMSTM(cpu, (cpu->CurInstr >> 8) & 0x7, 0b10010); return; } + if (cpu->Num == 0) + { + u8 firstreg = __builtin_ctz(cpu->CurInstr); + ((ARMv5*)cpu)->HandleInterlocksMemory(firstreg); + } + for (int i = 0; i < 8; i++) { if (cpu->CurInstr & (1<AddCycles_CD(); + if (dabort) [[unlikely]] { - cpu->AddCycles_CD(); ((ARMv5*)cpu)->DataAbort(); return; } // TODO: check "Rb included in Rlist" case cpu->R[(cpu->CurInstr >> 8) & 0x7] = base; - cpu->AddCycles_CD(); } void T_LDMIA(ARM* cpu) { - ExecuteStage(cpu); + ExecuteStage(cpu, ((cpu->CurInstr >> 8) & 0x7)); u32 base = cpu->R[(cpu->CurInstr >> 8) & 0x7]; bool first = true; bool dabort = false; - + if (!(cpu->CurInstr & 0xFF)) [[unlikely]] { EmptyRListLDMSTM(cpu, (cpu->CurInstr >> 8) & 0x7, 0b00011); @@ -980,17 +1052,23 @@ void T_LDMIA(ARM* cpu) } } + cpu->AddCycles_CDI(); + if (dabort) [[unlikely]] { - cpu->AddCycles_CDI(); ((ARMv5*)cpu)->DataAbort(); return; } + + if (cpu->Num == 0) + { + u8 lastreg = 31 - __builtin_clz(cpu->CurInstr & 0xFF); + ((ARMv5*)cpu)->ILCurrReg = lastreg; + ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles; + } if (!(cpu->CurInstr & (1<<((cpu->CurInstr >> 8) & 0x7)))) cpu->R[(cpu->CurInstr >> 8) & 0x7] = base; - - cpu->AddCycles_CDI(); }