diff --git a/src/ARM.cpp b/src/ARM.cpp index 040d8bfb..a7f19414 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -1398,6 +1398,11 @@ void ARMv5::AddCycles_MW_2() NDS.ARM9Timestamp -= DataCycles; } +void ARMv5::DelayIfITCM_2() +{ + if (DataRegion == Mem9_ITCM) NDS.ARM9Timestamp += ITCMDelay; +} + void ARMv5::SetupInterlock_2() { ILCurrReg = ILQueueReg; diff --git a/src/ARM.h b/src/ARM.h index b78c4130..737b196f 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -352,6 +352,12 @@ public: AddCycles_MW(DataCycles); } + void DelayIfITCM(s8 delay) + { + ITCMDelay = delay; + QueueFunction(&ARMv5::DelayIfITCM_2); + } + inline void SetupInterlock(u8 reg, s8 delay = 0) { ILQueueReg = reg; @@ -712,6 +718,7 @@ public: void StartExec(); void AddExecute(); void AddCycles_MW_2(); + void DelayIfITCM_2(); void JumpTo_2(); void JumpTo_3A(); void JumpTo_3B(); @@ -761,11 +768,11 @@ public: u8 ITCM[ITCMPhysicalSize]; //! Content of the ITCM u8* DTCM; //! Content of the DTCM - u8 ICache[ICACHE_SIZE]; //! Instruction Cache Content organized in @ref ICACHE_LINESPERSET times @ref ICACHE_SETS times @ref ICACHE_LINELENGTH bytes + alignas(u32) u8 ICache[ICACHE_SIZE]; //! Instruction Cache Content organized in @ref ICACHE_LINESPERSET times @ref ICACHE_SETS times @ref ICACHE_LINELENGTH bytes u32 ICacheTags[ICACHE_LINESPERSET*ICACHE_SETS]; //! Instruction Cache Tags organized in @ref ICACHE_LINESPERSET times @ref ICACHE_SETS Tags u8 ICacheCount; //! Global instruction line fill counter. Used for round-robin replacement strategy with the instruction cache - u8 DCache[DCACHE_SIZE]; //! Data Cache Content organized in @ref DCACHE_LINESPERSET times @ref DCACHE_SETS times @ref DCACHE_LINELENGTH bytes + alignas(u32) u8 DCache[DCACHE_SIZE]; //! Data Cache Content organized in @ref DCACHE_LINESPERSET times @ref DCACHE_SETS times @ref DCACHE_LINELENGTH bytes u32 DCacheTags[DCACHE_LINESPERSET*DCACHE_SETS]; //! Data Cache Tags organized in @ref DCACHE_LINESPERSET times @ref DCACHE_SETS Tags u8 DCacheCount; //! Global data line fill counter. Used for round-robin replacement strategy with the instruction cache @@ -803,6 +810,7 @@ public: u32 PC; bool NullFetch; bool Store; + s8 ITCMDelay; u8 ILCurrReg; u8 ILPrevReg; diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index ff9d6e5c..658ab4c8 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -366,7 +366,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) ExecuteStage(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF))); \ bool dabort = !cpu->DataRead32(offset, r); \ u32 oldval = cpu->R[r+1]; dabort |= !cpu->DataRead32S(offset+4, r+1); \ - /*if (cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2;*/ \ + ((ARMv5*)cpu)->DelayIfITCM(2); \ cpu->AddCycles_CDI(); \ if (dabort) { \ cpu->R[r+1] = oldval; \ @@ -388,7 +388,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) ExecuteStage(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF))); \ bool dabort = !cpu->DataRead32(addr, r); \ u32 oldval = cpu->R[r+1]; dabort |= !cpu->DataRead32S(addr+4, r+1); \ - /*if (cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2;*/ \ + ((ARMv5*)cpu)->DelayIfITCM(2); \ cpu->AddCycles_CDI(); \ if (dabort) { \ cpu->R[r+1] = oldval; \ @@ -411,7 +411,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) bool dabort = !cpu->DataWrite32(offset, cpu->R[r], r); \ u32 storeval = cpu->R[r+1]; if (r+1 == 15) storeval+=4; \ dabort |= !cpu->DataWrite32S (offset+4, storeval, r+1); \ - /*if (cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2;*/ \ + ((ARMv5*)cpu)->DelayIfITCM(2); \ cpu->AddCycles_CD(); \ if (dabort) [[unlikely]] { \ ((ARMv5*)cpu)->DataAbort(); \ @@ -428,7 +428,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) bool dabort = !cpu->DataWrite32(addr, cpu->R[r], r); \ u32 storeval = cpu->R[r+1]; if (r+1 == 15) storeval+=4; \ dabort |= !cpu->DataWrite32S (addr+4, storeval, r+1); \ - /*if (cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2;*/ \ + ((ARMv5*)cpu)->DelayIfITCM(2); \ cpu->AddCycles_CD(); \ if (dabort) [[unlikely]] { \ ((ARMv5*)cpu)->DataAbort(); \ @@ -508,8 +508,6 @@ inline void SWP(ARM* cpu) if ((byte ? cpu->DataRead8 (base, rd) : cpu->DataRead32(base, rd))) [[likely]] { - //cpu->NDS.ARM9Timestamp += cpu->DataCycles; // checkme - if ((byte ? cpu->DataWrite8 (base, storeval, rm) : cpu->DataWrite32(base, storeval, rm))) [[likely]] { @@ -679,14 +677,14 @@ void A_LDM(ARM* cpu) if (__builtin_popcount(cpu->CurInstr & 0xFFFF) == 1) [[unlikely]] // single reg { - //if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1; + if (cpu->Num == 0) ((ARMv5*)cpu)->DelayIfITCM(1); cpu->AddCycles_CDI(); if (cpu->Num == 0) ((ARMv5*)cpu)->ForceInterlock(); // on arm9 single reg ldm/stm cannot overlap memory and fetch stages else; // CHECKME: ARM7 timing behavior? } else { - //if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2; + if (cpu->Num == 0) ((ARMv5*)cpu)->DelayIfITCM(2); cpu->AddCycles_CDI(); } @@ -829,14 +827,14 @@ void A_STM(ARM* cpu) if (__builtin_popcount(cpu->CurInstr & 0xFFFF) == 1) [[unlikely]] // single reg { - //if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1; + if (cpu->Num == 0) ((ARMv5*)cpu)->DelayIfITCM(1); cpu->AddCycles_CD(); if (cpu->Num == 0) ((ARMv5*)cpu)->ForceInterlock(); // on arm9 single reg ldm/stm cannot overlap memory and fetch stages else; // CHECKME: ARM7 timing behavior? } else { - //if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2; + if (cpu->Num == 0) ((ARMv5*)cpu)->DelayIfITCM(2); cpu->AddCycles_CD(); } @@ -1013,14 +1011,14 @@ void T_PUSH(ARM* cpu) if (__builtin_popcount(cpu->CurInstr & 0x1FF) == 1) [[unlikely]] // single reg { - //if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1; + if (cpu->Num == 0) ((ARMv5*)cpu)->DelayIfITCM(1); cpu->AddCycles_CD(); if (cpu->Num == 0) ((ARMv5*)cpu)->ForceInterlock(); // on arm9 single reg ldm/stm cannot overlap memory and fetch stages else; // CHECKME: ARM7 timing behavior? } else { - //if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2; + if (cpu->Num == 0) ((ARMv5*)cpu)->DelayIfITCM(2); cpu->AddCycles_CD(); } @@ -1069,14 +1067,14 @@ void T_POP(ARM* cpu) if (__builtin_popcount(cpu->CurInstr & 0x1FF) == 1) [[unlikely]] // single reg { - //if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1; + if (cpu->Num == 0) ((ARMv5*)cpu)->DelayIfITCM(1); cpu->AddCycles_CDI(); if (cpu->Num == 0) ((ARMv5*)cpu)->ForceInterlock(); // on arm9 single reg ldm/stm cannot overlap memory and fetch stages else; // CHECKME: ARM7 timing behavior? } else { - //if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2; + if (cpu->Num == 0) ((ARMv5*)cpu)->DelayIfITCM(2); cpu->AddCycles_CDI(); } @@ -1099,14 +1097,14 @@ void T_POP(ARM* cpu) { if (__builtin_popcount(cpu->CurInstr & 0x1FF) == 1) [[unlikely]] // single reg { - //if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1; + if (cpu->Num == 0) ((ARMv5*)cpu)->DelayIfITCM(1); cpu->AddCycles_CDI(); if (cpu->Num == 0) ((ARMv5*)cpu)->ForceInterlock(); // on arm9 single reg ldm/stm cannot overlap memory and fetch stages else; // CHECKME: ARM7 timing behavior? } else { - //if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2; + if (cpu->Num == 0) ((ARMv5*)cpu)->DelayIfITCM(2); cpu->AddCycles_CDI(); } @@ -1160,14 +1158,14 @@ void T_STMIA(ARM* cpu) if (__builtin_popcount(cpu->CurInstr & 0xFF) == 1) [[unlikely]] // single reg { - //if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1; + if (cpu->Num == 0) ((ARMv5*)cpu)->DelayIfITCM(1); cpu->AddCycles_CD(); if (cpu->Num == 0) ((ARMv5*)cpu)->ForceInterlock(); // on arm9 single reg ldm/stm cannot overlap memory and fetch stages else; // CHECKME: ARM7 timing behavior? } else { - //if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2; + if (cpu->Num == 0) ((ARMv5*)cpu)->DelayIfITCM(2); cpu->AddCycles_CD(); } @@ -1210,14 +1208,14 @@ void T_LDMIA(ARM* cpu) if (__builtin_popcount(cpu->CurInstr & 0xFF) == 1) [[unlikely]] // single reg { - //if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1; + if (cpu->Num == 0) ((ARMv5*)cpu)->DelayIfITCM(1); cpu->AddCycles_CDI(); if (cpu->Num == 0) ((ARMv5*)cpu)->ForceInterlock(); // on arm9 single reg ldm/stm cannot overlap memory and fetch stages else; // CHECKME: ARM7 timing behavior? } else { - //if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2; + if (cpu->Num == 0) ((ARMv5*)cpu)->DelayIfITCM(2); cpu->AddCycles_CDI(); }