temp commit

almost done killing ARM9/ARM7 desync, f
This commit is contained in:
StapleButter 2018-12-11 03:08:46 +01:00
parent aba4610d45
commit 78d384a754
5 changed files with 213 additions and 141 deletions

View File

@ -164,11 +164,6 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr)
// R0=DMA# R1=src R2=size
if (addr==0x1FFD9E0) printf("[%03d] FMVdec\n", GPU::VCount);
if (R[15]==0x1FFDF40) printf("[%03d] FMVdec FINISHED\n", GPU::VCount);
if (addr==0x202585C)
{
//u32 dorp; NDS::ARM9Read32(0x20630DC, &dorp);
//printf("[%03d] IRQ handler thing. wait=%08X\n", GPU::VCount, dorp);
}
u32 oldregion = R[15] >> 24;
u32 newregion = addr >> 24;
@ -443,7 +438,7 @@ void ARMv5::DataAbort()
R[14] = R[15] + (oldcpsr & 0x20 ? 6 : 4);
JumpTo(ExceptionBase + 0x10);
}
extern u64 arm9total, arm7total, arm9timer, arm7timer;
s32 ARMv5::Execute()
{
if (Halted)
@ -456,12 +451,13 @@ s32 ARMv5::Execute()
{
Halted = 0;
if (NDS::IME[0] & 0x1)
TriggerIRQ();
TriggerIRQ(); //!! potential drift
}
else
{
Cycles = CyclesToRun;
//NDS::RunTimingCriticalDevices(0, CyclesToRun >> ClockShift);
arm9total+=(CyclesToRun>>1);//arm9timer+=(CyclesToRun>>1);
//NDS::RunTightTimers(0, CyclesToRun >> ClockShift);
return Cycles;
}
}
@ -506,15 +502,20 @@ s32 ARMv5::Execute()
AddCycles_C();
}
//s32 diff = Cycles - lastcycles;
//NDS::RunTimingCriticalDevices(0, diff >> ClockShift);
//s32 diff = Cycles - lastcycles;arm9timer+=(diff>>1);
//NDS::RunTightTimers(0, diff >> ClockShift);
//lastcycles = Cycles - (diff & ClockDiffMask);
// TODO optimize this shit!!!
if (Halted)
{
if (Halted == 1)
if (Halted == 1 && Cycles < CyclesToRun)
{
s32 diff = CyclesToRun - Cycles;
Cycles = CyclesToRun;
//NDS::RunTightTimers(0, diff >> ClockShift);
//arm9timer += (diff>>1);
}
break;
}
if (NDS::IF[0] & NDS::IE[0])
@ -527,6 +528,13 @@ s32 ARMv5::Execute()
if (Halted == 2)
Halted = 0;
if (Cycles > lastcycles)
{
//s32 diff = Cycles - lastcycles;arm9timer+=(diff>>1);
//NDS::RunTightTimers(0, diff >> ClockShift);
}
arm9total+=(Cycles>>1);
return Cycles;
}
@ -547,7 +555,8 @@ s32 ARMv4::Execute()
else
{
Cycles = CyclesToRun;
NDS::RunTimingCriticalDevices(1, CyclesToRun);
//NDS::RunTightTimers(1, CyclesToRun);
arm7total+=CyclesToRun; //arm7timer+=CyclesToRun;
return Cycles;
}
}
@ -587,15 +596,20 @@ s32 ARMv4::Execute()
AddCycles_C();
}
s32 diff = Cycles - lastcycles;
NDS::RunTimingCriticalDevices(1, diff);
lastcycles = Cycles;
//s32 diff = Cycles - lastcycles;arm7timer+=diff;
//NDS::RunTightTimers(1, diff);
//lastcycles = Cycles;
// TODO optimize this shit!!!
if (Halted)
{
if (Halted == 1)
if (Halted == 1 && Cycles < CyclesToRun)
{
s32 diff = CyclesToRun - Cycles;
Cycles = CyclesToRun;
//NDS::RunTightTimers(1, diff);
//arm7timer += diff;
}
break;
}
if (NDS::IF[1] & NDS::IE[1])
@ -608,5 +622,12 @@ s32 ARMv4::Execute()
if (Halted == 2)
Halted = 0;
if (Cycles > lastcycles)
{
//s32 diff = Cycles - lastcycles;arm7timer+=(diff);
//NDS::RunTightTimers(1, diff);
}
arm7total+=Cycles;
return Cycles;
}

View File

@ -100,7 +100,7 @@ void DMA::DoSavestate(Savestate* file)
file->Var32(&SrcAddrInc);
file->Var32(&DstAddrInc);
file->Var32((u32*)&Running);
file->Var32(&Running);
file->Var32((u32*)&InProgress);
file->Var32((u32*)&IsGXFIFODMA);
}
@ -170,36 +170,45 @@ void DMA::Start()
if ((Cnt & 0x00600000) == 0x00600000)
CurDstAddr = DstAddr;
//printf("ARM%d DMA%d %08X %02X %08X->%08X %d bytes %dbit\n", CPU?7:9, Num, Cnt, StartMode, CurSrcAddr, CurDstAddr, RemCount*((Cnt&0x04000000)?4:2), (Cnt&0x04000000)?32:16);
if(CPU==0&&StartMode!=7&&false)
printf("ARM%d DMA%d %08X %02X %08X->%08X %d bytes %dbit\n", CPU?7:9, Num, Cnt, StartMode, CurSrcAddr, CurDstAddr, RemCount*((Cnt&0x04000000)?4:2), (Cnt&0x04000000)?32:16);
IsGXFIFODMA = (CPU == 0 && (CurSrcAddr>>24) == 0x02 && CurDstAddr == 0x04000400 && DstAddrInc == 0);
// TODO eventually: not stop if we're running code in ITCM
Running = true;
if (NDS::DMAsRunning(CPU))
Running = 1;
else
Running = 2;
InProgress = true;
NDS::StopCPU(CPU, 1<<Num);
}
extern u64 arm9total, arm7total;
s32 DMA::Run(s32 cycles)
{
if (!Running)
return cycles;
s32 startc = cycles;
Executing = true;
// add NS penalty for first accesses in burst
bool burststart;
if (StartMode == 0x07 && RemCount > 112)
burststart = (IterCount == 112);
else
burststart = (IterCount == RemCount);
// note: this seems to only apply when starting DMA 'in the void'
// for example, the aging cart DMA PRIORITY test:
// starts a big DMA immediately, and a small DMA upon HBlank
// each pulling from a timer incrementing once per cycle
// it expects that the values be increasing linearly (2c/unit)
// even as the small DMA starts and ends
bool burststart = (Running == 2);
Running = 1;
s32 unitcycles;
s32 lastcycles = cycles;
if (!(Cnt & 0x04000000))
{
int unitcycles;
if (Num == 0)
if (CPU == 0)
{
if ((CurSrcAddr >> 24) == 0x02 && (CurDstAddr >> 24) == 0x02)
{
@ -213,6 +222,7 @@ s32 DMA::Run(s32 cycles)
if (burststart)
{
cycles -= 2;
cycles -= (NDS::ARM9MemTimings[CurSrcAddr >> 14][0] + NDS::ARM9MemTimings[CurDstAddr >> 14][0]);
cycles += unitcycles;
}
@ -232,6 +242,7 @@ s32 DMA::Run(s32 cycles)
if (burststart)
{
cycles -= 2;
cycles -= (NDS::ARM7MemTimings[CurSrcAddr >> 15][0] + NDS::ARM7MemTimings[CurDstAddr >> 15][0]);
cycles += unitcycles;
}
@ -243,21 +254,25 @@ s32 DMA::Run(s32 cycles)
while (IterCount > 0 && !Stall)
{
cycles -= unitcycles;
NDS::RunTightTimers(CPU, lastcycles-cycles);
//if(CPU){arm7timer+=(lastcycles-cycles);}else{arm9timer+=(lastcycles-cycles);}
lastcycles = cycles;
writefn(CurDstAddr, readfn(CurSrcAddr));
cycles -= unitcycles;
CurSrcAddr += SrcAddrInc<<1;
CurDstAddr += DstAddrInc<<1;
IterCount--;
RemCount--;
if (cycles < 0) break;
if (cycles <= 0) break;
}
}
else
{
int unitcycles;
if (Num == 0)
if (CPU == 0)
{
if ((CurSrcAddr >> 24) == 0x02 && (CurDstAddr >> 24) == 0x02)
{
@ -273,6 +288,7 @@ s32 DMA::Run(s32 cycles)
if (burststart)
{
cycles -= 2;
cycles -= (NDS::ARM9MemTimings[CurSrcAddr >> 14][2] + NDS::ARM9MemTimings[CurDstAddr >> 14][2]);
cycles += unitcycles;
}
@ -294,6 +310,7 @@ s32 DMA::Run(s32 cycles)
if (burststart)
{
cycles -= 2;
cycles -= (NDS::ARM7MemTimings[CurSrcAddr >> 15][2] + NDS::ARM7MemTimings[CurDstAddr >> 15][2]);
cycles += unitcycles;
}
@ -305,32 +322,37 @@ s32 DMA::Run(s32 cycles)
while (IterCount > 0 && !Stall)
{
cycles -= unitcycles;
NDS::RunTightTimers(CPU, lastcycles-cycles);
//if(CPU){arm7timer+=(lastcycles-cycles);}else{arm9timer+=(lastcycles-cycles);}
lastcycles = cycles;
writefn(CurDstAddr, readfn(CurSrcAddr));
cycles -= unitcycles;
CurSrcAddr += SrcAddrInc<<2;
CurDstAddr += DstAddrInc<<2;
IterCount--;
RemCount--;
if (cycles < 0) break;
if (cycles <= 0) break;
}
}
Executing = false;
Stall = false;
//if (CPU) printf("ran DMA for %d cycles (asked %d)\n", startc-cycles, startc);
if (RemCount)
{
if (IterCount == 0)
{
Running = false;
Running = 0;
NDS::ResumeCPU(CPU, 1<<Num);
if (StartMode == 0x07)
GPU3D::CheckFIFODMA();
}
if(CPU){arm7total+=(startc-cycles);}else{arm9total+=(startc-cycles);}
return cycles;
}
@ -340,9 +362,9 @@ s32 DMA::Run(s32 cycles)
if (Cnt & 0x40000000)
NDS::SetIRQ(CPU, NDS::IRQ_DMA0 + Num);
Running = false;
Running = 0;
InProgress = false;
NDS::ResumeCPU(CPU, 1<<Num);
return cycles - 2;
if(CPU){arm7total+=(startc-(cycles));}else{arm9total+=(startc-(cycles));}
return cycles;
}

View File

@ -41,6 +41,8 @@ public:
return ((mode == StartMode) && (Cnt & 0x80000000));
}
bool IsRunning() { return Running!=0; }
void StartIfNeeded(u32 mode)
{
if ((mode == StartMode) && (Cnt & 0x80000000))
@ -74,7 +76,7 @@ private:
u32 DstAddrInc;
u32 CountMask;
bool Running;
u32 Running;
bool InProgress;
bool Executing;

View File

@ -31,7 +31,7 @@
#include "Wifi.h"
#include "Platform.h"
u64 arm9total=0, arm7total=0, arm9timer=0, arm7timer=0, systotal=0;
namespace NDS
{
@ -736,100 +736,73 @@ u32 RunFrame()
{
// TODO: give it some margin, so it can directly do 17 cycles instead of 16 then 1
CalcIterationCycles();
s32 arm9cycles;
u64 kiki = arm9total;
if (CPUStop & 0x80000000)
{
// GXFIFO stall
// we just run the GPU and the timers.
// the rest of the hardware is driven by the event scheduler.
s32 cycles = GPU3D::CyclesToRunFor();
GPU3D::Run(cycles);
u32 timermask = TimerCheckMask[0];
if (timermask & 0x1) RunTimer(0, cycles);
if (timermask & 0x2) RunTimer(1, cycles);
if (timermask & 0x4) RunTimer(2, cycles);
if (timermask & 0x8) RunTimer(3, cycles);
// run ARM7 and system peripherals, step by step
// as to give the finer-grained ones a chance to reschedule properly
// in case we end up running a large chunk of GXFIFO commands
s32 ndscyclesran = 0;
s32 ndscyclestorun;
for (;;)
{
ndscyclestorun = std::min(CurIterationCycles, cycles);
if (CPUStop & 0x0FFF0000)
{
s32 cycles = ndscyclestorun - ARM7Offset;
s32 critcycles = cycles;
cycles = DMAs[4]->Run(cycles);
if (cycles > 0) cycles = DMAs[5]->Run(cycles);
if (cycles > 0) cycles = DMAs[6]->Run(cycles);
if (cycles > 0) cycles = DMAs[7]->Run(cycles);
ARM7Offset = -cycles;
RunTimingCriticalDevices(1, critcycles);
}
else
{
ARM7->CyclesToRun = ndscyclestorun - ARM7Offset;
CurCPU = 2; ARM7->Execute(); CurCPU = 0;
ARM7Offset = ARM7->Cycles - ARM7->CyclesToRun;
}
RunSystem(ndscyclestorun);
ndscyclesran += ndscyclestorun;
if (ndscyclesran >= cycles) break;
CalcIterationCycles();
}
arm9cycles = GPU3D::CyclesToRunFor();
arm9cycles = std::min(CurIterationCycles, arm9cycles);
RunTightTimers(0, arm9cycles); arm9total+=arm9cycles;//arm9timer += arm9cycles;
}
else if (CPUStop & 0x0FFF)
{
s32 cycles = CurIterationCycles;
cycles = DMAs[0]->Run(cycles);
if (cycles > 0) cycles = DMAs[1]->Run(cycles);
if (cycles > 0) cycles = DMAs[2]->Run(cycles);
if (cycles > 0) cycles = DMAs[3]->Run(cycles);
//printf("DMAs been running for %d cycles, %d, asked for %d\n", CurIterationCycles-cycles, (u32)(arm9total-kiki), CurIterationCycles);
arm9cycles = CurIterationCycles - cycles;
}
else
{
s32 ndscyclestorun;
if (CPUStop & 0x0FFF)
{
s32 cycles = CurIterationCycles;
cycles = DMAs[0]->Run(cycles);
if (cycles > 0) cycles = DMAs[1]->Run(cycles);
if (cycles > 0) cycles = DMAs[2]->Run(cycles);
if (cycles > 0) cycles = DMAs[3]->Run(cycles);
ndscyclestorun = CurIterationCycles - cycles;
}
else
{
ARM9->CyclesToRun = CurIterationCycles << 1;
CurCPU = 1; ARM9->Execute(); CurCPU = 0;
ndscyclestorun = ARM9->Cycles >> 1;
}
RunTimingCriticalDevices(0, ndscyclestorun);
if (CPUStop & 0x0FFF0000)
{
s32 cycles = ndscyclestorun - ARM7Offset;
s32 critcycles = cycles;
cycles = DMAs[4]->Run(cycles);
if (cycles > 0) cycles = DMAs[5]->Run(cycles);
if (cycles > 0) cycles = DMAs[6]->Run(cycles);
if (cycles > 0) cycles = DMAs[7]->Run(cycles);
ARM7Offset = -cycles;
RunTimingCriticalDevices(1, critcycles);
}
else
{
ARM7->CyclesToRun = ndscyclestorun - ARM7Offset;
CurCPU = 2; ARM7->Execute(); CurCPU = 0;
ARM7Offset = ARM7->Cycles - ARM7->CyclesToRun;
}
RunSystem(ndscyclestorun);
ARM9->CyclesToRun = CurIterationCycles << 1;
CurCPU = 1; ARM9->Execute(); CurCPU = 0;
arm9cycles = ARM9->Cycles >> 1;
RunTightTimers(0, arm9cycles); //arm9timer += arm9cycles;
}
//arm9total += arm9cycles;
RunLooseTimers(0, arm9cycles);
GPU3D::Run(arm9cycles);
s32 ndscyclestorun = arm9cycles;
s32 zarp;
// ARM7Offset > ndscyclestorun means we are too far ahead of the ARM9
if (ARM7Offset > ndscyclestorun)
{
ARM7Offset -= ndscyclestorun;
}
else
if (CPUStop & 0x0FFF0000)
{
s32 cycles = ndscyclestorun - ARM7Offset; zarp=cycles;
cycles = DMAs[4]->Run(cycles);
if (cycles > 0) cycles = DMAs[5]->Run(cycles);
if (cycles > 0) cycles = DMAs[6]->Run(cycles);
if (cycles > 0) cycles = DMAs[7]->Run(cycles);
ARM7Offset = -cycles;
printf("ARM7 DMA: cyclestorun=%d, req=%d, offset=%d\n", ndscyclestorun, zarp, ARM7Offset);
}
else
{
ARM7->CyclesToRun = ndscyclestorun - ARM7Offset; zarp=ARM7->CyclesToRun;
CurCPU = 2; ARM7->Execute(); CurCPU = 0;
ARM7Offset = ARM7->Cycles - ARM7->CyclesToRun;
RunTightTimers(1, ARM7->Cycles); //arm7timer += ndscyclestorun;
}
//arm7total += zarp + ARM7Offset;//ARM7->Cycles;//ndscyclestorun+ARM7Offset;
systotal += ndscyclestorun;
RunLooseTimers(1, ndscyclestorun);// + ARM7Offset);
RunSystem(ndscyclestorun);
}
//printf("cycles: %ld %ld, %ld %ld, %ld\n", arm9total, arm9timer, arm7total, arm7timer, systotal);
printf("drift: [%ld] %ld %ld, %ld %ld\n", systotal, arm9total-systotal, arm9timer-systotal, arm7total-systotal, arm7timer-systotal);
return GPU::TotalScanlines;
}
@ -838,12 +811,12 @@ void Reschedule()
s32 oldcycles = CurIterationCycles;
CalcIterationCycles();
if (CurIterationCycles > oldcycles)
if (CurIterationCycles >= oldcycles)
{
CurIterationCycles = oldcycles;
return;
}
//printf("Reschedule %d->%d while in %d, %08X\n", oldcycles, CurIterationCycles, CurCPU, CPUStop);
if (CurCPU == 1) ARM9->CyclesToRun = CurIterationCycles << 1;
else if (CurCPU == 2) ARM7->CyclesToRun = CurIterationCycles - ARM7Offset;
// this is all. a reschedule shouldn't happen during DMA or GXFIFO stall.
@ -859,14 +832,20 @@ void ScheduleEvent(u32 id, bool periodic, s32 delay, void (*func)(u32), u32 para
SchedEvent* evt = &SchedList[id];
if (periodic) evt->WaitCycles += delay;
else evt->WaitCycles = delay + (ARM9->Cycles >> 1);
if (periodic)
evt->WaitCycles += delay;
else
{
if (CurCPU == 1) evt->WaitCycles = delay + (ARM9->Cycles >> 1);
else if (CurCPU == 2) evt->WaitCycles = delay + ARM7->Cycles;
else evt->WaitCycles = delay;
}
evt->Func = func;
evt->Param = param;
SchedListMask |= (1<<id);
//printf("scheduling event %d for within %d cycles\n", id, delay);
Reschedule();
}
@ -1041,7 +1020,7 @@ void ResumeCPU(u32 cpu, u32 mask)
void GXFIFOStall()
{
if (CPUStop & 0x80000000) return;
printf("GXFIFO STALL\n");
CPUStop |= 0x80000000;
if (CurCPU == 1) ARM9->Halt(2);
@ -1069,11 +1048,17 @@ u32 GetPC(u32 cpu)
void HandleTimerOverflow(u32 tid)
{
Timer* timer = &Timers[tid];
//if ((timer->Cnt & 0x84) != 0x80) return;
timer->Counter += timer->Reload << 16;
if (timer->Cnt & (1<<6))
SetIRQ(tid >> 2, IRQ_Timer0 + (tid & 0x3));
//if (tid<4) printf("[%03d] timer%d IRQ\n", GPU::VCount, tid);
//u32 delay = (0x10000 - timer->Reload) << (16 - timer->CycleShift);
//delay -= (timer->Counter - timer->Reload) >> timer->CycleShift;
//printf("timer%d IRQ: resched %d, reload=%04X cnt=%08X\n", tid, delay, timer->Reload, timer->Counter);
//ScheduleEvent(Event_TimerIRQ_0 + tid, true, delay, HandleTimerOverflow, tid);
if ((tid & 0x3) == 3)
return;
@ -1111,19 +1096,24 @@ void RunTimer(u32 tid, s32 cycles)
HandleTimerOverflow(tid);
}
void RunTimingCriticalDevices(u32 cpu, s32 cycles)
void RunTightTimers(u32 cpu, s32 cycles)
{
register u32 timermask = TimerCheckMask[cpu];
if(cpu)arm7timer+=cycles;else arm9timer+=cycles;
if (timermask & 0x1) RunTimer((cpu<<2)+0, cycles);
if (timermask & 0x2) RunTimer((cpu<<2)+1, cycles);
if (timermask & 0x4) RunTimer((cpu<<2)+2, cycles);
if (timermask & 0x8) RunTimer((cpu<<2)+3, cycles);
}
if (cpu == 0)
{
GPU3D::Run(cycles);
}
void RunLooseTimers(u32 cpu, s32 cycles)
{
register u32 timermask = TimerCheckMask[cpu];
if (timermask & 0x10) RunTimer((cpu<<2)+0, cycles);
if (timermask & 0x20) RunTimer((cpu<<2)+1, cycles);
if (timermask & 0x40) RunTimer((cpu<<2)+2, cycles);
if (timermask & 0x80) RunTimer((cpu<<2)+3, cycles);
}
@ -1138,6 +1128,16 @@ bool DMAsInMode(u32 cpu, u32 mode)
return false;
}
bool DMAsRunning(u32 cpu)
{
cpu <<= 2;
if (DMAs[cpu+0]->IsRunning()) return true;
if (DMAs[cpu+1]->IsRunning()) return true;
if (DMAs[cpu+2]->IsRunning()) return true;
if (DMAs[cpu+3]->IsRunning()) return true;
return false;
}
void CheckDMAs(u32 cpu, u32 mode)
{
cpu <<= 2;
@ -1180,12 +1180,28 @@ void TimerStart(u32 id, u16 cnt)
if ((!curstart) && newstart)
{
timer->Counter = timer->Reload << 16;
/*if ((cnt & 0x84) == 0x80)
{
u32 delay = (0x10000 - timer->Reload) << TimerPrescaler[cnt & 0x03];
printf("timer%d IRQ: start %d, reload=%04X cnt=%08X\n", id, delay, timer->Reload, timer->Counter);
CancelEvent(Event_TimerIRQ_0 + id);
ScheduleEvent(Event_TimerIRQ_0 + id, false, delay, HandleTimerOverflow, id);
}*/
}
if ((cnt & 0x84) == 0x80)
TimerCheckMask[id>>2] |= (1<<(id&0x3));
{
u32 tmask;
if ((cnt & 0x03) == 0)
tmask = 0x01 << (id&0x3);
else
tmask = 0x10 << (id&0x3);
TimerCheckMask[id>>2] |= tmask;
}
else
TimerCheckMask[id>>2] &= ~(1<<(id&0x3));
TimerCheckMask[id>>2] &= ~(0x11 << (id&0x3));
}

View File

@ -31,6 +31,15 @@ enum
Event_SPU,
Event_Wifi,
/*Event_TimerIRQ_0,
Event_TimerIRQ_1,
Event_TimerIRQ_2,
Event_TimerIRQ_3,
Event_TimerIRQ_4,
Event_TimerIRQ_5,
Event_TimerIRQ_6,
Event_TimerIRQ_7,*/
Event_DisplayFIFO,
Event_ROMTransfer,
Event_ROMSPITransfer,
@ -160,10 +169,12 @@ void GXFIFOUnstall();
u32 GetPC(u32 cpu);
bool DMAsInMode(u32 cpu, u32 mode);
bool DMAsRunning(u32 cpu);
void CheckDMAs(u32 cpu, u32 mode);
void StopDMAs(u32 cpu, u32 mode);
void RunTimingCriticalDevices(u32 cpu, s32 cycles);
void RunTightTimers(u32 cpu, s32 cycles);
void RunLooseTimers(u32 cpu, s32 cycles);
u8 ARM9Read8(u32 addr);
u16 ARM9Read16(u32 addr);