diff --git a/src/ARM.cpp b/src/ARM.cpp index 24858c28..655daa8c 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -164,11 +164,6 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr) // R0=DMA# R1=src R2=size if (addr==0x1FFD9E0) printf("[%03d] FMVdec\n", GPU::VCount); if (R[15]==0x1FFDF40) printf("[%03d] FMVdec FINISHED\n", GPU::VCount); - if (addr==0x202585C) - { - //u32 dorp; NDS::ARM9Read32(0x20630DC, &dorp); - //printf("[%03d] IRQ handler thing. wait=%08X\n", GPU::VCount, dorp); - } u32 oldregion = R[15] >> 24; u32 newregion = addr >> 24; @@ -443,7 +438,7 @@ void ARMv5::DataAbort() R[14] = R[15] + (oldcpsr & 0x20 ? 6 : 4); JumpTo(ExceptionBase + 0x10); } - +extern u64 arm9total, arm7total, arm9timer, arm7timer; s32 ARMv5::Execute() { if (Halted) @@ -456,12 +451,13 @@ s32 ARMv5::Execute() { Halted = 0; if (NDS::IME[0] & 0x1) - TriggerIRQ(); + TriggerIRQ(); //!! potential drift } else { Cycles = CyclesToRun; - //NDS::RunTimingCriticalDevices(0, CyclesToRun >> ClockShift); + arm9total+=(CyclesToRun>>1);//arm9timer+=(CyclesToRun>>1); + //NDS::RunTightTimers(0, CyclesToRun >> ClockShift); return Cycles; } } @@ -506,15 +502,20 @@ s32 ARMv5::Execute() AddCycles_C(); } - //s32 diff = Cycles - lastcycles; - //NDS::RunTimingCriticalDevices(0, diff >> ClockShift); + //s32 diff = Cycles - lastcycles;arm9timer+=(diff>>1); + //NDS::RunTightTimers(0, diff >> ClockShift); //lastcycles = Cycles - (diff & ClockDiffMask); // TODO optimize this shit!!! if (Halted) { - if (Halted == 1) + if (Halted == 1 && Cycles < CyclesToRun) + { + s32 diff = CyclesToRun - Cycles; Cycles = CyclesToRun; + //NDS::RunTightTimers(0, diff >> ClockShift); + //arm9timer += (diff>>1); + } break; } if (NDS::IF[0] & NDS::IE[0]) @@ -527,6 +528,13 @@ s32 ARMv5::Execute() if (Halted == 2) Halted = 0; + if (Cycles > lastcycles) + { + //s32 diff = Cycles - lastcycles;arm9timer+=(diff>>1); + //NDS::RunTightTimers(0, diff >> ClockShift); + } + +arm9total+=(Cycles>>1); return Cycles; } @@ -547,7 +555,8 @@ s32 ARMv4::Execute() else { Cycles = CyclesToRun; - NDS::RunTimingCriticalDevices(1, CyclesToRun); + //NDS::RunTightTimers(1, CyclesToRun); + arm7total+=CyclesToRun; //arm7timer+=CyclesToRun; return Cycles; } } @@ -587,15 +596,20 @@ s32 ARMv4::Execute() AddCycles_C(); } - s32 diff = Cycles - lastcycles; - NDS::RunTimingCriticalDevices(1, diff); - lastcycles = Cycles; + //s32 diff = Cycles - lastcycles;arm7timer+=diff; + //NDS::RunTightTimers(1, diff); + //lastcycles = Cycles; // TODO optimize this shit!!! if (Halted) { - if (Halted == 1) + if (Halted == 1 && Cycles < CyclesToRun) + { + s32 diff = CyclesToRun - Cycles; Cycles = CyclesToRun; + //NDS::RunTightTimers(1, diff); + //arm7timer += diff; + } break; } if (NDS::IF[1] & NDS::IE[1]) @@ -608,5 +622,12 @@ s32 ARMv4::Execute() if (Halted == 2) Halted = 0; + if (Cycles > lastcycles) + { + //s32 diff = Cycles - lastcycles;arm7timer+=(diff); + //NDS::RunTightTimers(1, diff); + } + +arm7total+=Cycles; return Cycles; } diff --git a/src/DMA.cpp b/src/DMA.cpp index 9edbf4b2..2f6674db 100644 --- a/src/DMA.cpp +++ b/src/DMA.cpp @@ -100,7 +100,7 @@ void DMA::DoSavestate(Savestate* file) file->Var32(&SrcAddrInc); file->Var32(&DstAddrInc); - file->Var32((u32*)&Running); + file->Var32(&Running); file->Var32((u32*)&InProgress); file->Var32((u32*)&IsGXFIFODMA); } @@ -170,36 +170,45 @@ void DMA::Start() if ((Cnt & 0x00600000) == 0x00600000) CurDstAddr = DstAddr; - - //printf("ARM%d DMA%d %08X %02X %08X->%08X %d bytes %dbit\n", CPU?7:9, Num, Cnt, StartMode, CurSrcAddr, CurDstAddr, RemCount*((Cnt&0x04000000)?4:2), (Cnt&0x04000000)?32:16); +if(CPU==0&&StartMode!=7&&false) + printf("ARM%d DMA%d %08X %02X %08X->%08X %d bytes %dbit\n", CPU?7:9, Num, Cnt, StartMode, CurSrcAddr, CurDstAddr, RemCount*((Cnt&0x04000000)?4:2), (Cnt&0x04000000)?32:16); IsGXFIFODMA = (CPU == 0 && (CurSrcAddr>>24) == 0x02 && CurDstAddr == 0x04000400 && DstAddrInc == 0); // TODO eventually: not stop if we're running code in ITCM - Running = true; + if (NDS::DMAsRunning(CPU)) + Running = 1; + else + Running = 2; + InProgress = true; NDS::StopCPU(CPU, 1< 112) - burststart = (IterCount == 112); - else - burststart = (IterCount == RemCount); + // note: this seems to only apply when starting DMA 'in the void' + // for example, the aging cart DMA PRIORITY test: + // starts a big DMA immediately, and a small DMA upon HBlank + // each pulling from a timer incrementing once per cycle + // it expects that the values be increasing linearly (2c/unit) + // even as the small DMA starts and ends + bool burststart = (Running == 2); + Running = 1; + + s32 unitcycles; + s32 lastcycles = cycles; if (!(Cnt & 0x04000000)) { - int unitcycles; - if (Num == 0) + if (CPU == 0) { if ((CurSrcAddr >> 24) == 0x02 && (CurDstAddr >> 24) == 0x02) { @@ -213,6 +222,7 @@ s32 DMA::Run(s32 cycles) if (burststart) { + cycles -= 2; cycles -= (NDS::ARM9MemTimings[CurSrcAddr >> 14][0] + NDS::ARM9MemTimings[CurDstAddr >> 14][0]); cycles += unitcycles; } @@ -232,6 +242,7 @@ s32 DMA::Run(s32 cycles) if (burststart) { + cycles -= 2; cycles -= (NDS::ARM7MemTimings[CurSrcAddr >> 15][0] + NDS::ARM7MemTimings[CurDstAddr >> 15][0]); cycles += unitcycles; } @@ -243,21 +254,25 @@ s32 DMA::Run(s32 cycles) while (IterCount > 0 && !Stall) { + cycles -= unitcycles; + + NDS::RunTightTimers(CPU, lastcycles-cycles); +//if(CPU){arm7timer+=(lastcycles-cycles);}else{arm9timer+=(lastcycles-cycles);} + lastcycles = cycles; + writefn(CurDstAddr, readfn(CurSrcAddr)); - cycles -= unitcycles; CurSrcAddr += SrcAddrInc<<1; CurDstAddr += DstAddrInc<<1; IterCount--; RemCount--; - if (cycles < 0) break; + if (cycles <= 0) break; } } else { - int unitcycles; - if (Num == 0) + if (CPU == 0) { if ((CurSrcAddr >> 24) == 0x02 && (CurDstAddr >> 24) == 0x02) { @@ -273,6 +288,7 @@ s32 DMA::Run(s32 cycles) if (burststart) { + cycles -= 2; cycles -= (NDS::ARM9MemTimings[CurSrcAddr >> 14][2] + NDS::ARM9MemTimings[CurDstAddr >> 14][2]); cycles += unitcycles; } @@ -294,6 +310,7 @@ s32 DMA::Run(s32 cycles) if (burststart) { + cycles -= 2; cycles -= (NDS::ARM7MemTimings[CurSrcAddr >> 15][2] + NDS::ARM7MemTimings[CurDstAddr >> 15][2]); cycles += unitcycles; } @@ -305,32 +322,37 @@ s32 DMA::Run(s32 cycles) while (IterCount > 0 && !Stall) { + cycles -= unitcycles; + + NDS::RunTightTimers(CPU, lastcycles-cycles); +//if(CPU){arm7timer+=(lastcycles-cycles);}else{arm9timer+=(lastcycles-cycles);} + lastcycles = cycles; + writefn(CurDstAddr, readfn(CurSrcAddr)); - cycles -= unitcycles; CurSrcAddr += SrcAddrInc<<2; CurDstAddr += DstAddrInc<<2; IterCount--; RemCount--; - if (cycles < 0) break; + if (cycles <= 0) break; } } Executing = false; Stall = false; - +//if (CPU) printf("ran DMA for %d cycles (asked %d)\n", startc-cycles, startc); if (RemCount) { if (IterCount == 0) { - Running = false; + Running = 0; NDS::ResumeCPU(CPU, 1<Run(cycles); - if (cycles > 0) cycles = DMAs[5]->Run(cycles); - if (cycles > 0) cycles = DMAs[6]->Run(cycles); - if (cycles > 0) cycles = DMAs[7]->Run(cycles); - ARM7Offset = -cycles; - RunTimingCriticalDevices(1, critcycles); - } - else - { - ARM7->CyclesToRun = ndscyclestorun - ARM7Offset; - CurCPU = 2; ARM7->Execute(); CurCPU = 0; - ARM7Offset = ARM7->Cycles - ARM7->CyclesToRun; - } - - RunSystem(ndscyclestorun); - - ndscyclesran += ndscyclestorun; - if (ndscyclesran >= cycles) break; - CalcIterationCycles(); - } + arm9cycles = GPU3D::CyclesToRunFor(); + arm9cycles = std::min(CurIterationCycles, arm9cycles); + RunTightTimers(0, arm9cycles); arm9total+=arm9cycles;//arm9timer += arm9cycles; + } + else if (CPUStop & 0x0FFF) + { + s32 cycles = CurIterationCycles; + cycles = DMAs[0]->Run(cycles); + if (cycles > 0) cycles = DMAs[1]->Run(cycles); + if (cycles > 0) cycles = DMAs[2]->Run(cycles); + if (cycles > 0) cycles = DMAs[3]->Run(cycles); + //printf("DMAs been running for %d cycles, %d, asked for %d\n", CurIterationCycles-cycles, (u32)(arm9total-kiki), CurIterationCycles); + arm9cycles = CurIterationCycles - cycles; } else { - s32 ndscyclestorun; - - if (CPUStop & 0x0FFF) - { - s32 cycles = CurIterationCycles; - cycles = DMAs[0]->Run(cycles); - if (cycles > 0) cycles = DMAs[1]->Run(cycles); - if (cycles > 0) cycles = DMAs[2]->Run(cycles); - if (cycles > 0) cycles = DMAs[3]->Run(cycles); - ndscyclestorun = CurIterationCycles - cycles; - } - else - { - ARM9->CyclesToRun = CurIterationCycles << 1; - CurCPU = 1; ARM9->Execute(); CurCPU = 0; - ndscyclestorun = ARM9->Cycles >> 1; - } - - RunTimingCriticalDevices(0, ndscyclestorun); - - if (CPUStop & 0x0FFF0000) - { - s32 cycles = ndscyclestorun - ARM7Offset; - s32 critcycles = cycles; - cycles = DMAs[4]->Run(cycles); - if (cycles > 0) cycles = DMAs[5]->Run(cycles); - if (cycles > 0) cycles = DMAs[6]->Run(cycles); - if (cycles > 0) cycles = DMAs[7]->Run(cycles); - ARM7Offset = -cycles; - RunTimingCriticalDevices(1, critcycles); - } - else - { - ARM7->CyclesToRun = ndscyclestorun - ARM7Offset; - CurCPU = 2; ARM7->Execute(); CurCPU = 0; - ARM7Offset = ARM7->Cycles - ARM7->CyclesToRun; - } - - RunSystem(ndscyclestorun); + ARM9->CyclesToRun = CurIterationCycles << 1; + CurCPU = 1; ARM9->Execute(); CurCPU = 0; + arm9cycles = ARM9->Cycles >> 1; + RunTightTimers(0, arm9cycles); //arm9timer += arm9cycles; } +//arm9total += arm9cycles; + RunLooseTimers(0, arm9cycles); + GPU3D::Run(arm9cycles); + + s32 ndscyclestorun = arm9cycles; +s32 zarp; + // ARM7Offset > ndscyclestorun means we are too far ahead of the ARM9 + if (ARM7Offset > ndscyclestorun) + { + ARM7Offset -= ndscyclestorun; + } + else + if (CPUStop & 0x0FFF0000) + { + s32 cycles = ndscyclestorun - ARM7Offset; zarp=cycles; + cycles = DMAs[4]->Run(cycles); + if (cycles > 0) cycles = DMAs[5]->Run(cycles); + if (cycles > 0) cycles = DMAs[6]->Run(cycles); + if (cycles > 0) cycles = DMAs[7]->Run(cycles); + ARM7Offset = -cycles; + printf("ARM7 DMA: cyclestorun=%d, req=%d, offset=%d\n", ndscyclestorun, zarp, ARM7Offset); + } + else + { + ARM7->CyclesToRun = ndscyclestorun - ARM7Offset; zarp=ARM7->CyclesToRun; + CurCPU = 2; ARM7->Execute(); CurCPU = 0; + ARM7Offset = ARM7->Cycles - ARM7->CyclesToRun; + RunTightTimers(1, ARM7->Cycles); //arm7timer += ndscyclestorun; + } +//arm7total += zarp + ARM7Offset;//ARM7->Cycles;//ndscyclestorun+ARM7Offset; +systotal += ndscyclestorun; + RunLooseTimers(1, ndscyclestorun);// + ARM7Offset); + RunSystem(ndscyclestorun); } + //printf("cycles: %ld %ld, %ld %ld, %ld\n", arm9total, arm9timer, arm7total, arm7timer, systotal); + printf("drift: [%ld] %ld %ld, %ld %ld\n", systotal, arm9total-systotal, arm9timer-systotal, arm7total-systotal, arm7timer-systotal); + return GPU::TotalScanlines; } @@ -838,12 +811,12 @@ void Reschedule() s32 oldcycles = CurIterationCycles; CalcIterationCycles(); - if (CurIterationCycles > oldcycles) + if (CurIterationCycles >= oldcycles) { CurIterationCycles = oldcycles; return; } - +//printf("Reschedule %d->%d while in %d, %08X\n", oldcycles, CurIterationCycles, CurCPU, CPUStop); if (CurCPU == 1) ARM9->CyclesToRun = CurIterationCycles << 1; else if (CurCPU == 2) ARM7->CyclesToRun = CurIterationCycles - ARM7Offset; // this is all. a reschedule shouldn't happen during DMA or GXFIFO stall. @@ -859,14 +832,20 @@ void ScheduleEvent(u32 id, bool periodic, s32 delay, void (*func)(u32), u32 para SchedEvent* evt = &SchedList[id]; - if (periodic) evt->WaitCycles += delay; - else evt->WaitCycles = delay + (ARM9->Cycles >> 1); + if (periodic) + evt->WaitCycles += delay; + else + { + if (CurCPU == 1) evt->WaitCycles = delay + (ARM9->Cycles >> 1); + else if (CurCPU == 2) evt->WaitCycles = delay + ARM7->Cycles; + else evt->WaitCycles = delay; + } evt->Func = func; evt->Param = param; SchedListMask |= (1<Halt(2); @@ -1069,11 +1048,17 @@ u32 GetPC(u32 cpu) void HandleTimerOverflow(u32 tid) { Timer* timer = &Timers[tid]; + //if ((timer->Cnt & 0x84) != 0x80) return; timer->Counter += timer->Reload << 16; if (timer->Cnt & (1<<6)) SetIRQ(tid >> 2, IRQ_Timer0 + (tid & 0x3)); -//if (tid<4) printf("[%03d] timer%d IRQ\n", GPU::VCount, tid); + + //u32 delay = (0x10000 - timer->Reload) << (16 - timer->CycleShift); + //delay -= (timer->Counter - timer->Reload) >> timer->CycleShift; + //printf("timer%d IRQ: resched %d, reload=%04X cnt=%08X\n", tid, delay, timer->Reload, timer->Counter); + //ScheduleEvent(Event_TimerIRQ_0 + tid, true, delay, HandleTimerOverflow, tid); + if ((tid & 0x3) == 3) return; @@ -1111,19 +1096,24 @@ void RunTimer(u32 tid, s32 cycles) HandleTimerOverflow(tid); } -void RunTimingCriticalDevices(u32 cpu, s32 cycles) +void RunTightTimers(u32 cpu, s32 cycles) { register u32 timermask = TimerCheckMask[cpu]; - +if(cpu)arm7timer+=cycles;else arm9timer+=cycles; if (timermask & 0x1) RunTimer((cpu<<2)+0, cycles); if (timermask & 0x2) RunTimer((cpu<<2)+1, cycles); if (timermask & 0x4) RunTimer((cpu<<2)+2, cycles); if (timermask & 0x8) RunTimer((cpu<<2)+3, cycles); +} - if (cpu == 0) - { - GPU3D::Run(cycles); - } +void RunLooseTimers(u32 cpu, s32 cycles) +{ + register u32 timermask = TimerCheckMask[cpu]; + + if (timermask & 0x10) RunTimer((cpu<<2)+0, cycles); + if (timermask & 0x20) RunTimer((cpu<<2)+1, cycles); + if (timermask & 0x40) RunTimer((cpu<<2)+2, cycles); + if (timermask & 0x80) RunTimer((cpu<<2)+3, cycles); } @@ -1138,6 +1128,16 @@ bool DMAsInMode(u32 cpu, u32 mode) return false; } +bool DMAsRunning(u32 cpu) +{ + cpu <<= 2; + if (DMAs[cpu+0]->IsRunning()) return true; + if (DMAs[cpu+1]->IsRunning()) return true; + if (DMAs[cpu+2]->IsRunning()) return true; + if (DMAs[cpu+3]->IsRunning()) return true; + return false; +} + void CheckDMAs(u32 cpu, u32 mode) { cpu <<= 2; @@ -1180,12 +1180,28 @@ void TimerStart(u32 id, u16 cnt) if ((!curstart) && newstart) { timer->Counter = timer->Reload << 16; + + /*if ((cnt & 0x84) == 0x80) + { + u32 delay = (0x10000 - timer->Reload) << TimerPrescaler[cnt & 0x03]; + printf("timer%d IRQ: start %d, reload=%04X cnt=%08X\n", id, delay, timer->Reload, timer->Counter); + CancelEvent(Event_TimerIRQ_0 + id); + ScheduleEvent(Event_TimerIRQ_0 + id, false, delay, HandleTimerOverflow, id); + }*/ } if ((cnt & 0x84) == 0x80) - TimerCheckMask[id>>2] |= (1<<(id&0x3)); + { + u32 tmask; + if ((cnt & 0x03) == 0) + tmask = 0x01 << (id&0x3); + else + tmask = 0x10 << (id&0x3); + + TimerCheckMask[id>>2] |= tmask; + } else - TimerCheckMask[id>>2] &= ~(1<<(id&0x3)); + TimerCheckMask[id>>2] &= ~(0x11 << (id&0x3)); } diff --git a/src/NDS.h b/src/NDS.h index 2cf43ef4..5dd6c0eb 100644 --- a/src/NDS.h +++ b/src/NDS.h @@ -31,6 +31,15 @@ enum Event_SPU, Event_Wifi, + /*Event_TimerIRQ_0, + Event_TimerIRQ_1, + Event_TimerIRQ_2, + Event_TimerIRQ_3, + Event_TimerIRQ_4, + Event_TimerIRQ_5, + Event_TimerIRQ_6, + Event_TimerIRQ_7,*/ + Event_DisplayFIFO, Event_ROMTransfer, Event_ROMSPITransfer, @@ -160,10 +169,12 @@ void GXFIFOUnstall(); u32 GetPC(u32 cpu); bool DMAsInMode(u32 cpu, u32 mode); +bool DMAsRunning(u32 cpu); void CheckDMAs(u32 cpu, u32 mode); void StopDMAs(u32 cpu, u32 mode); -void RunTimingCriticalDevices(u32 cpu, s32 cycles); +void RunTightTimers(u32 cpu, s32 cycles); +void RunLooseTimers(u32 cpu, s32 cycles); u8 ARM9Read8(u32 addr); u16 ARM9Read16(u32 addr);