redesign main emu loop to use timestamps instead of being a trainwreck

* cleaner code
* faster in some cases
* more accurate (on-demand compensation for timers and GPU)
* less prone to desyncs
* overall betterer
This commit is contained in:
StapleButter 2019-01-05 05:28:58 +01:00
parent 90f2c0834e
commit 669247e8c8
14 changed files with 360 additions and 412 deletions

View File

@ -61,8 +61,6 @@ ARM::ARM(u32 num)
{
// well uh
Num = num;
SetClockShift(0); // safe default
}
ARM::~ARM()
@ -110,7 +108,7 @@ void ARM::DoSavestate(Savestate* file)
file->Section((char*)(Num ? "ARM7" : "ARM9"));
file->Var32((u32*)&Cycles);
file->Var32((u32*)&CyclesToRun);
//file->Var32((u32*)&CyclesToRun);
file->Var32(&Halted);
file->VarArray(R, 16*sizeof(u32));
@ -450,7 +448,7 @@ void ARMv5::DataAbort()
JumpTo(ExceptionBase + 0x10);
}
s32 ARMv5::Execute()
void ARMv5::Execute()
{
if (Halted)
{
@ -466,19 +464,12 @@ s32 ARMv5::Execute()
}
else
{
Cycles = CyclesToRun;
#ifdef DEBUG_CHECK_DESYNC
NDS::dbg_CyclesARM9 += (CyclesToRun >> ClockShift);
#endif // DEBUG_CHECK_DESYNC
//NDS::RunTightTimers(0, CyclesToRun >> ClockShift);
return Cycles;
NDS::ARM9Timestamp = NDS::ARM9Target;
return;
}
}
Cycles = 0;
s32 lastcycles = 0;
while (Cycles < CyclesToRun)
while (NDS::ARM9Timestamp < NDS::ARM9Target)
{
if (CPSR & 0x20) // THUMB
{
@ -515,19 +506,12 @@ s32 ARMv5::Execute()
AddCycles_C();
}
//s32 diff = Cycles - lastcycles;
//NDS::RunTightTimers(0, diff >> ClockShift);
//lastcycles = Cycles - (diff & ClockDiffMask);
// TODO optimize this shit!!!
if (Halted)
{
if (Halted == 1 && Cycles < CyclesToRun)
if (Halted == 1 && NDS::ARM9Timestamp < NDS::ARM9Target)
{
//s32 diff = CyclesToRun - Cycles;
Cycles = CyclesToRun;
//NDS::RunTightTimers(0, diff >> ClockShift);
//arm9timer += (diff>>1);
NDS::ARM9Timestamp = NDS::ARM9Target;
}
break;
}
@ -536,24 +520,16 @@ s32 ARMv5::Execute()
if (NDS::IME[0] & 0x1)
TriggerIRQ();
}
NDS::ARM9Timestamp += Cycles;
Cycles = 0;
}
if (Halted == 2)
Halted = 0;
/*if (Cycles > lastcycles)
{
s32 diff = Cycles - lastcycles;
//NDS::RunTightTimers(0, diff >> ClockShift);
}*/
#ifdef DEBUG_CHECK_DESYNC
NDS::dbg_CyclesARM9 += (Cycles >> ClockShift);
#endif // DEBUG_CHECK_DESYNC
return Cycles;
}
s32 ARMv4::Execute()
void ARMv4::Execute()
{
if (Halted)
{
@ -569,19 +545,12 @@ s32 ARMv4::Execute()
}
else
{
Cycles = CyclesToRun;
#ifdef DEBUG_CHECK_DESYNC
NDS::dbg_CyclesARM7 += CyclesToRun;
#endif // DEBUG_CHECK_DESYNC
//NDS::RunTightTimers(1, CyclesToRun);
return Cycles;
NDS::ARM7Timestamp = NDS::ARM7Target;
return;
}
}
Cycles = 0;
s32 lastcycles = 0;
while (Cycles < CyclesToRun)
while (NDS::ARM7Timestamp < NDS::ARM7Target)
{
if (CPSR & 0x20) // THUMB
{
@ -613,19 +582,12 @@ s32 ARMv4::Execute()
AddCycles_C();
}
//s32 diff = Cycles - lastcycles;
//NDS::RunTightTimers(1, diff);
//lastcycles = Cycles;
// TODO optimize this shit!!!
if (Halted)
{
if (Halted == 1 && Cycles < CyclesToRun)
if (Halted == 1 && NDS::ARM7Timestamp < NDS::ARM7Target)
{
//s32 diff = CyclesToRun - Cycles;
Cycles = CyclesToRun;
//NDS::RunTightTimers(1, diff);
//arm7timer += diff;
NDS::ARM7Timestamp = NDS::ARM7Target;
}
break;
}
@ -634,20 +596,11 @@ s32 ARMv4::Execute()
if (NDS::IME[1] & 0x1)
TriggerIRQ();
}
NDS::ARM7Timestamp += Cycles;
Cycles = 0;
}
if (Halted == 2)
Halted = 0;
/*if (Cycles > lastcycles)
{
//s32 diff = Cycles - lastcycles;
//NDS::RunTightTimers(1, diff);
}*/
#ifdef DEBUG_CHECK_DESYNC
NDS::dbg_CyclesARM7 += Cycles;
#endif // DEBUG_CHECK_DESYNC
return Cycles;
}

View File

@ -42,12 +42,6 @@ public:
virtual void DoSavestate(Savestate* file);
void SetClockShift(u32 shift)
{
ClockShift = shift;
ClockDiffMask = (1<<shift) - 1;
}
virtual void JumpTo(u32 addr, bool restorecpsr = false) = 0;
void RestoreCPSR();
@ -67,7 +61,7 @@ public:
}
}
virtual s32 Execute() = 0;
virtual void Execute() = 0;
bool CheckCondition(u32 code)
{
@ -122,13 +116,7 @@ public:
u32 Num;
// shift relative to system clock
// 0=33MHz 1=66MHz 2=133MHz
u32 ClockShift;
u32 ClockDiffMask;
s32 Cycles;
s32 CyclesToRun;
u32 Halted;
u32 CodeRegion;
@ -170,7 +158,7 @@ public:
void PrefetchAbort();
void DataAbort();
s32 Execute();
void Execute();
// all code accesses are forced nonseq 32bit
u32 CodeRead32(u32 addr, bool branch);
@ -287,7 +275,7 @@ public:
void JumpTo(u32 addr, bool restorecpsr = false);
s32 Execute();
void Execute();
u16 CodeRead16(u32 addr)
{

View File

@ -265,7 +265,7 @@ void ARMv5::UpdateRegionTimings(u32 addrstart, u32 addrend)
}
else
{
MemTimings[i][0] = bustimings[2] << ClockShift;
MemTimings[i][0] = bustimings[2] << NDS::ARM9ClockShift;
}
if (pu & 0x10)
@ -276,9 +276,9 @@ void ARMv5::UpdateRegionTimings(u32 addrstart, u32 addrend)
}
else
{
MemTimings[i][1] = bustimings[0] << ClockShift;
MemTimings[i][2] = bustimings[2] << ClockShift;
MemTimings[i][3] = bustimings[3] << ClockShift;
MemTimings[i][1] = bustimings[0] << NDS::ARM9ClockShift;
MemTimings[i][2] = bustimings[2] << NDS::ARM9ClockShift;
MemTimings[i][3] = bustimings[3] << NDS::ARM9ClockShift;
}
}
}
@ -358,7 +358,7 @@ void ARMv5::ICacheLookup(u32 addr)
// ouch :/
//printf("cache miss %08X: %d/%d\n", addr, NDS::ARM9MemTimings[addr >> 14][2], NDS::ARM9MemTimings[addr >> 14][3]);
CodeCycles = (NDS::ARM9MemTimings[addr >> 14][2] + (NDS::ARM9MemTimings[addr >> 14][3] * 7)) << ClockShift;
CodeCycles = (NDS::ARM9MemTimings[addr >> 14][2] + (NDS::ARM9MemTimings[addr >> 14][3] * 7)) << NDS::ARM9ClockShift;
CurICacheLine = ptr;
}

View File

@ -45,6 +45,8 @@
// * applied to all accesses for mainRAM->mainRAM, resulting in timings of 16-18 cycles per unit
//
// TODO: GBA slot
// TODO: re-add initial NS delay
// TODO: timings are nonseq when address is fixed/decrementing
DMA::DMA(u32 cpu, u32 num)
@ -186,33 +188,27 @@ void DMA::Start()
NDS::StopCPU(CPU, 1<<Num);
}
s32 DMA::Run(s32 cycles)
void DMA::Run()
{
if (!Running)
return cycles;
if (!Running) return;
if (CPU == 0) return Run9();
else return Run7();
}
#ifdef DEBUG_CHECK_DESYNC
s32 startc = cycles;
#endif // DEBUG_CHECK_DESYNC
void DMA::Run9()
{
if (NDS::ARM9Timestamp >= NDS::ARM9Target) return;
Executing = true;
// add NS penalty for first accesses in burst
// note: this seems to only apply when starting DMA 'in the void'
// for example, the aging cart DMA PRIORITY test:
// starts a big DMA immediately, and a small DMA upon HBlank
// each pulling from a timer incrementing once per cycle
// it expects that the values be increasing linearly (2c/unit)
// even as the small DMA starts and ends
bool burststart = (Running == 2);
Running = 1;
s32 unitcycles;
s32 lastcycles = cycles;
//s32 lastcycles = cycles;
if (!(Cnt & 0x04000000))
{
if (CPU == 0)
if (!(Cnt & (1<<26)))
{
if ((CurSrcAddr >> 24) == 0x02 && (CurDstAddr >> 24) == 0x02)
{
@ -224,59 +220,29 @@ s32 DMA::Run(s32 cycles)
if ((CurSrcAddr >> 24) == (CurDstAddr >> 24))
unitcycles++;
if (burststart)
/*if (burststart)
{
cycles -= 2;
cycles -= (NDS::ARM9MemTimings[CurSrcAddr >> 14][0] + NDS::ARM9MemTimings[CurDstAddr >> 14][0]);
cycles += unitcycles;
}*/
}
}
}
else
{
if ((CurSrcAddr >> 24) == 0x02 && (CurDstAddr >> 24) == 0x02)
{
unitcycles = NDS::ARM7MemTimings[CurSrcAddr >> 15][0] + NDS::ARM7MemTimings[CurDstAddr >> 15][0];
}
else
{
unitcycles = NDS::ARM7MemTimings[CurSrcAddr >> 15][1] + NDS::ARM7MemTimings[CurDstAddr >> 15][1];
if ((CurSrcAddr >> 23) == (CurDstAddr >> 23))
unitcycles++;
if (burststart)
{
cycles -= 2;
cycles -= (NDS::ARM7MemTimings[CurSrcAddr >> 15][0] + NDS::ARM7MemTimings[CurDstAddr >> 15][0]);
cycles += unitcycles;
}
}
}
u16 (*readfn)(u32) = CPU ? NDS::ARM7Read16 : NDS::ARM9Read16;
void (*writefn)(u32,u16) = CPU ? NDS::ARM7Write16 : NDS::ARM9Write16;
while (IterCount > 0 && !Stall)
{
cycles -= unitcycles;
NDS::ARM9Timestamp += (unitcycles << NDS::ARM9ClockShift);
NDS::RunTightTimers(CPU, lastcycles-cycles);
lastcycles = cycles;
writefn(CurDstAddr, readfn(CurSrcAddr));
NDS::ARM9Write16(CurDstAddr, NDS::ARM9Read16(CurSrcAddr));
CurSrcAddr += SrcAddrInc<<1;
CurDstAddr += DstAddrInc<<1;
IterCount--;
RemCount--;
if (cycles <= 0) break;
if (NDS::ARM9Timestamp >= NDS::ARM9Target) break;
}
}
else
{
if (CPU == 0)
{
if ((CurSrcAddr >> 24) == 0x02 && (CurDstAddr >> 24) == 0x02)
{
@ -290,12 +256,102 @@ s32 DMA::Run(s32 cycles)
else if ((CurSrcAddr >> 24) == 0x02)
unitcycles--;
if (burststart)
/*if (burststart)
{
cycles -= 2;
cycles -= (NDS::ARM9MemTimings[CurSrcAddr >> 14][2] + NDS::ARM9MemTimings[CurDstAddr >> 14][2]);
cycles += unitcycles;
}*/
}
while (IterCount > 0 && !Stall)
{
NDS::ARM9Timestamp += (unitcycles << NDS::ARM9ClockShift);
NDS::ARM9Write32(CurDstAddr, NDS::ARM9Read32(CurSrcAddr));
CurSrcAddr += SrcAddrInc<<2;
CurDstAddr += DstAddrInc<<2;
IterCount--;
RemCount--;
if (NDS::ARM9Timestamp >= NDS::ARM9Target) break;
}
}
Executing = false;
Stall = false;
if (RemCount)
{
if (IterCount == 0)
{
Running = 0;
NDS::ResumeCPU(0, 1<<Num);
if (StartMode == 0x07)
GPU3D::CheckFIFODMA();
}
return;
}
if (!(Cnt & (1<<25)))
Cnt &= ~(1<<31);
if (Cnt & (1<<30))
NDS::SetIRQ(0, NDS::IRQ_DMA0 + Num);
Running = 0;
InProgress = false;
NDS::ResumeCPU(0, 1<<Num);
}
void DMA::Run7()
{
if (NDS::ARM7Timestamp >= NDS::ARM7Target) return;
Executing = true;
// add NS penalty for first accesses in burst
bool burststart = (Running == 2);
Running = 1;
s32 unitcycles;
//s32 lastcycles = cycles;
if (!(Cnt & (1<<26)))
{
if ((CurSrcAddr >> 24) == 0x02 && (CurDstAddr >> 24) == 0x02)
{
unitcycles = NDS::ARM7MemTimings[CurSrcAddr >> 15][0] + NDS::ARM7MemTimings[CurDstAddr >> 15][0];
}
else
{
unitcycles = NDS::ARM7MemTimings[CurSrcAddr >> 15][1] + NDS::ARM7MemTimings[CurDstAddr >> 15][1];
if ((CurSrcAddr >> 23) == (CurDstAddr >> 23))
unitcycles++;
/*if (burststart)
{
cycles -= 2;
cycles -= (NDS::ARM7MemTimings[CurSrcAddr >> 15][0] + NDS::ARM7MemTimings[CurDstAddr >> 15][0]);
cycles += unitcycles;
}*/
}
while (IterCount > 0 && !Stall)
{
NDS::ARM7Timestamp += unitcycles;
NDS::ARM7Write16(CurDstAddr, NDS::ARM7Read16(CurSrcAddr));
CurSrcAddr += SrcAddrInc<<1;
CurDstAddr += DstAddrInc<<1;
IterCount--;
RemCount--;
if (NDS::ARM7Timestamp >= NDS::ARM7Target) break;
}
}
else
@ -312,34 +368,26 @@ s32 DMA::Run(s32 cycles)
else if ((CurSrcAddr >> 24) == 0x02)
unitcycles--;
if (burststart)
/*if (burststart)
{
cycles -= 2;
cycles -= (NDS::ARM7MemTimings[CurSrcAddr >> 15][2] + NDS::ARM7MemTimings[CurDstAddr >> 15][2]);
cycles += unitcycles;
}*/
}
}
}
u32 (*readfn)(u32) = CPU ? NDS::ARM7Read32 : NDS::ARM9Read32;
void (*writefn)(u32,u32) = CPU ? NDS::ARM7Write32 : NDS::ARM9Write32;
while (IterCount > 0 && !Stall)
{
cycles -= unitcycles;
NDS::ARM7Timestamp += unitcycles;
NDS::RunTightTimers(CPU, lastcycles-cycles);
lastcycles = cycles;
writefn(CurDstAddr, readfn(CurSrcAddr));
NDS::ARM7Write32(CurDstAddr, NDS::ARM7Read32(CurSrcAddr));
CurSrcAddr += SrcAddrInc<<2;
CurDstAddr += DstAddrInc<<2;
IterCount--;
RemCount--;
if (cycles <= 0) break;
if (NDS::ARM7Timestamp >= NDS::ARM7Target) break;
}
}
@ -351,34 +399,19 @@ s32 DMA::Run(s32 cycles)
if (IterCount == 0)
{
Running = 0;
NDS::ResumeCPU(CPU, 1<<Num);
if (StartMode == 0x07)
GPU3D::CheckFIFODMA();
NDS::ResumeCPU(1, 1<<Num);
}
#ifdef DEBUG_CHECK_DESYNC
if (CPU) NDS::dbg_CyclesARM7 += (startc-cycles);
else NDS::dbg_CyclesARM9 += (startc-cycles);
#endif // DEBUG_CHECK_DESYNC
return cycles;
return;
}
if (!(Cnt & 0x02000000))
Cnt &= ~0x80000000;
if (!(Cnt & (1<<25)))
Cnt &= ~(1<<31);
if (Cnt & 0x40000000)
NDS::SetIRQ(CPU, NDS::IRQ_DMA0 + Num);
if (Cnt & (1<<30))
NDS::SetIRQ(1, NDS::IRQ_DMA0 + Num);
Running = 0;
InProgress = false;
NDS::ResumeCPU(CPU, 1<<Num);
#ifdef DEBUG_CHECK_DESYNC
if (CPU) NDS::dbg_CyclesARM7 += (startc-cycles);
else NDS::dbg_CyclesARM9 += (startc-cycles);
#endif // DEBUG_CHECK_DESYNC
return cycles;
NDS::ResumeCPU(1, 1<<Num);
}

View File

@ -34,7 +34,10 @@ public:
void WriteCnt(u32 val);
void Start();
s32 Run(s32 cycles);
void Run();
void Run9();
void Run7();
bool IsInMode(u32 mode)
{

View File

@ -798,7 +798,7 @@ void StartScanline(u32 line)
}
if (RunFIFO)
NDS::ScheduleEvent(NDS::Event_DisplayFIFO, true, 32, DisplayFIFO, 0);
NDS::ScheduleEvent(NDS::Event_DisplayFIFO, false, 32, DisplayFIFO, 0);
}
if (VCount == 262)

View File

@ -91,6 +91,7 @@
// and imposes rules on when further vertex commands can run
// (one every 9-cycle time slot during polygon setup)
// polygon setup time is 27 cycles for a triangle and 36 for a quad
// except: only one time slot is taken if the polygon is rejected by culling/clipping
// * additionally, some commands (BEGIN, LIGHT_VECTOR, BOXTEST) stall the polygon pipeline
@ -182,6 +183,7 @@ u32 GXStat;
u32 ExecParams[32];
u32 ExecParamCount;
u64 Timestamp;
s32 CycleCount;
s32 VertexPipeline;
s32 NormalPipeline;
@ -330,6 +332,7 @@ void Reset()
memset(ExecParams, 0, 32*4);
ExecParamCount = 0;
Timestamp = 0;
CycleCount = 0;
VertexPipeline = 0;
NormalPipeline = 0;
@ -405,6 +408,7 @@ void DoSavestate(Savestate* file)
file->VarArray(ExecParams, 32*4);
file->Var32(&ExecParamCount);
file->Var32((u32*)&CycleCount);
file->Var64(&Timestamp);
file->Var32(&MatrixMode);
@ -2271,16 +2275,18 @@ void FinishWork(s32 cycles)
GXStat &= ~(1<<27);
}
void Run(s32 cycles)
void Run()
{
if (!GeometryEnabled)
return;
if (FlushRequest)
return;
if (CmdPIPE->IsEmpty() && !(GXStat & (1<<27)))
if (!GeometryEnabled || FlushRequest ||
(CmdPIPE->IsEmpty() && !(GXStat & (1<<27))))
{
Timestamp = NDS::ARM9Timestamp >> NDS::ARM9ClockShift;
return;
}
s32 cycles = (NDS::ARM9Timestamp >> NDS::ARM9ClockShift) - Timestamp;
CycleCount -= cycles;
Timestamp = NDS::ARM9Timestamp >> NDS::ARM9ClockShift;
if (CycleCount <= 0)
{
@ -2465,21 +2471,27 @@ u8 Read8(u32 addr)
switch (addr)
{
case 0x04000600:
Run();
return GXStat & 0xFF;
case 0x04000601:
{
Run();
return ((GXStat >> 8) & 0xFF) |
(PosMatrixStackPointer & 0x1F) |
((ProjMatrixStackPointer & 0x1) << 5);
}
case 0x04000602:
{
Run();
u32 fifolevel = CmdFIFO->Level();
return fifolevel & 0xFF;
}
case 0x04000603:
{
Run();
u32 fifolevel = CmdFIFO->Level();
return ((GXStat >> 24) & 0xFF) |
@ -2505,12 +2517,16 @@ u16 Read16(u32 addr)
case 0x04000600:
{
Run();
return (GXStat & 0xFFFF) |
((PosMatrixStackPointer & 0x1F) << 8) |
((ProjMatrixStackPointer & 0x1) << 13);
}
case 0x04000602:
{
Run();
u32 fifolevel = CmdFIFO->Level();
return (GXStat >> 16) |
@ -2545,6 +2561,8 @@ u32 Read32(u32 addr)
case 0x04000600:
{
Run();
u32 fifolevel = CmdFIFO->Level();
return GXStat |

View File

@ -84,6 +84,8 @@ extern u32 RenderClearAttr1, RenderClearAttr2;
extern std::array<Polygon*,2048> RenderPolygonRAM;
extern u32 RenderNumPolygons;
extern u64 Timestamp;
bool Init();
void DeInit();
void Reset();
@ -95,7 +97,7 @@ void SetEnabled(bool geometry, bool rendering);
void ExecuteCommand();
s32 CyclesToRunFor();
void Run(s32 cycles);
void Run();
void CheckFIFOIRQ();
void CheckFIFODMA();

View File

@ -36,14 +36,6 @@
namespace NDS
{
#ifdef DEBUG_CHECK_DESYNC
u64 dbg_CyclesSys;
u64 dbg_CyclesARM9;
u64 dbg_CyclesTimer9;
u64 dbg_CyclesARM7;
u64 dbg_CyclesTimer7;
#endif
// timing notes
//
// * this implementation is technically wrong for VRAM
@ -60,6 +52,7 @@ u64 dbg_CyclesTimer7;
// * 3 / ARM9 internal: cache/TCM
//
// ARM9 always gets 3c nonseq penalty when using the bus (except for mainRAM where the penalty is 7c)
// /!\ 3c penalty doesn't apply to DMA!
//
// ARM7 only gets nonseq penalty when accessing mainRAM (7c as for ARM9)
//
@ -72,14 +65,20 @@ ARMv5* ARM9;
ARMv4* ARM7;
u32 NumFrames;
u64 SysClockCycles;
u64 LastSysClockCycles;
u32 FrameSysClockCycles;
u64 FrameStartTimestamp;
s32 CurIterationCycles;
s32 ARM7Offset;
int CurCPU;
const s32 kMaxIterationCycles = 16;
u32 ARM9ClockShift;
// no need to worry about those overflowing, they can keep going for atleast 4350 years
u64 ARM9Timestamp, ARM9Target;
u64 ARM7Timestamp, ARM7Target;
u64 SysTimestamp;
SchedEvent SchedList[Event_MAX];
u32 SchedListMask;
@ -119,6 +118,7 @@ u16 ARM7BIOSProt;
Timer Timers[8];
u8 TimerCheckMask[2];
u64 TimerTimestamp[2];
DMA* DMAs[8];
u32 DMA9Fill[4];
@ -270,6 +270,8 @@ void InitTimings()
// (especially wrt VRAM mirroring and overlapping and whatnot).
// ARM9
// TODO: +3c nonseq waitstate doesn't apply to DMA!
// but of course mainRAM always gets 8c nonseq waitstate
SetARM9RegionTimings(0x00000000, 0xFFFFFFFF, 32, 1 + 3, 1); // void
@ -384,15 +386,6 @@ void Reset()
FILE* f;
u32 i;
#ifdef DEBUG_CHECK_DESYNC
dbg_CyclesSys = 0;
dbg_CyclesARM9 = 0;
dbg_CyclesTimer9 = 0;
dbg_CyclesARM7 = 0;
dbg_CyclesTimer7 = 0;
#endif // DEBUG_CHECK_DESYNC
SysClockCycles = 0;
LastSysClockCycles = 0;
f = melon_fopen_local("bios9.bin", "rb");
@ -429,8 +422,12 @@ void Reset()
fclose(f);
}
ARM9->SetClockShift(1);
ARM7->SetClockShift(0);
// TODO for later: configure this when emulating a DSi
ARM9ClockShift = 1;
ARM9Timestamp = 0; ARM9Target = 0;
ARM7Timestamp = 0; ARM7Target = 0;
SysTimestamp = 0;
InitTimings();
@ -481,6 +478,8 @@ void Reset()
memset(Timers, 0, 8*sizeof(Timer));
TimerCheckMask[0] = 0;
TimerCheckMask[1] = 0;
TimerTimestamp[0] = 0;
TimerTimestamp[1] = 0;
for (i = 0; i < 8; i++) DMAs[i]->Reset();
memset(DMA9Fill, 0, 4*4);
@ -488,9 +487,6 @@ void Reset()
memset(SchedList, 0, sizeof(SchedList));
SchedListMask = 0;
CurIterationCycles = 0;
ARM7Offset = 0;
KeyInput = 0x007F03FF;
KeyCnt = 0;
RCnt = 0;
@ -566,7 +562,7 @@ bool DoSavestate_Scheduler(Savestate* file)
}
file->Var32(&funcid);
file->Var32((u32*)&evt->WaitCycles);
file->Var64(&evt->Timestamp);
file->Var32(&evt->Param);
}
}
@ -596,7 +592,7 @@ bool DoSavestate_Scheduler(Savestate* file)
else
evt->Func = NULL;
file->Var32((u32*)&evt->WaitCycles);
file->Var64(&evt->Timestamp);
file->Var32(&evt->Param);
}
}
@ -651,13 +647,20 @@ bool DoSavestate(Savestate* file)
file->Var32(&timer->CycleShift);
}
file->VarArray(TimerCheckMask, 2*sizeof(u8));
file->VarArray(TimerTimestamp, 2*sizeof(u64));
file->VarArray(DMA9Fill, 4*sizeof(u32));
if (!DoSavestate_Scheduler(file)) return false;
file->Var32(&SchedListMask);
file->Var32((u32*)&CurIterationCycles);
file->Var32((u32*)&ARM7Offset);
file->Var64(&ARM9Timestamp);
file->Var64(&ARM9Target);
file->Var64(&ARM7Timestamp);
file->Var64(&ARM7Target);
file->Var64(&SysTimestamp);
file->Var64(&LastSysClockCycles);
file->Var64(&FrameStartTimestamp);
file->Var32(&NumFrames);
// TODO: save KeyInput????
file->Var16(&KeyCnt);
@ -731,40 +734,51 @@ void RelocateSave(const char* path, bool write)
}
void CalcIterationCycles()
{
CurIterationCycles = 16;
u64 NextTarget()
{
u64 ret = SysTimestamp + kMaxIterationCycles;
u32 mask = SchedListMask;
for (int i = 0; i < Event_MAX; i++)
{
if (!(SchedListMask & (1<<i)))
continue;
if (SchedList[i].WaitCycles < CurIterationCycles)
CurIterationCycles = SchedList[i].WaitCycles;
if (!mask) break;
if (mask & 0x1)
{
if (SchedList[i].Timestamp < ret)
ret = SchedList[i].Timestamp;
}
mask >>= 1;
}
return ret;
}
void RunSystem(s32 cycles)
void RunSystem(u64 timestamp)
{
SysTimestamp = timestamp;
u32 mask = SchedListMask;
for (int i = 0; i < Event_MAX; i++)
{
if (!(SchedListMask & (1<<i)))
continue;
SchedList[i].WaitCycles -= cycles;
if (SchedList[i].WaitCycles < 1)
if (!mask) break;
if (mask & 0x1)
{
if (SchedList[i].Timestamp <= SysTimestamp)
{
SchedListMask &= ~(1<<i);
SchedList[i].Func(SchedList[i].Param);
}
}
mask >>= 1;
}
}
u32 RunFrame()
{
FrameSysClockCycles = 0;
FrameStartTimestamp = SysTimestamp;
if (!Running) return 263; // dorp
if (CPUStop & 0x40000000) return 263;
@ -774,88 +788,55 @@ u32 RunFrame()
while (Running && GPU::TotalScanlines==0)
{
// TODO: give it some margin, so it can directly do 17 cycles instead of 16 then 1
CalcIterationCycles();
s32 arm9cycles;
u64 target = NextTarget();
ARM9Target = target << ARM9ClockShift;
CurCPU = 0;
if (CPUStop & 0x80000000)
{
// GXFIFO stall
// we just run the GPU and the timers.
// the rest of the hardware is driven by the event scheduler.
s32 cycles = GPU3D::CyclesToRunFor();
arm9cycles = GPU3D::CyclesToRunFor();
arm9cycles = std::min(CurIterationCycles, arm9cycles);
RunTightTimers(0, arm9cycles);
#ifdef DEBUG_CHECK_DESYNC
dbg_CyclesARM9 += arm9cycles;
#endif // DEBUG_CHECK_DESYNC
ARM9Timestamp = std::min(ARM9Target, ARM9Timestamp+(cycles<<ARM9ClockShift));
}
else if (CPUStop & 0x0FFF)
{
s32 cycles = CurIterationCycles;
cycles = DMAs[0]->Run(cycles);
if (cycles > 0 && !(CPUStop & 0x80000000))
cycles = DMAs[1]->Run(cycles);
if (cycles > 0 && !(CPUStop & 0x80000000))
cycles = DMAs[2]->Run(cycles);
if (cycles > 0 && !(CPUStop & 0x80000000))
cycles = DMAs[3]->Run(cycles);
arm9cycles = CurIterationCycles - cycles;
DMAs[0]->Run();
if (!(CPUStop & 0x80000000)) DMAs[1]->Run();
if (!(CPUStop & 0x80000000)) DMAs[2]->Run();
if (!(CPUStop & 0x80000000)) DMAs[3]->Run();
}
else
{
ARM9->CyclesToRun = CurIterationCycles << 1;
CurCPU = 1; ARM9->Execute(); CurCPU = 0;
arm9cycles = ARM9->Cycles >> 1;
RunTightTimers(0, arm9cycles);
ARM9->Execute();
}
RunLooseTimers(0, arm9cycles);
GPU3D::Run(arm9cycles);
RunTimers(0);
GPU3D::Run();
s32 ndscyclestorun = arm9cycles;
target = ARM9Timestamp >> ARM9ClockShift;
CurCPU = 1;
// ARM7Offset > ndscyclestorun means we are too far ahead of the ARM9
if (ARM7Offset > ndscyclestorun)
while (ARM7Timestamp < target)
{
ARM7Offset -= ndscyclestorun;
}
else
ARM7Target = target; // might be changed by a reschedule
if (CPUStop & 0x0FFF0000)
{
s32 cycles = ndscyclestorun - ARM7Offset;
cycles = DMAs[4]->Run(cycles);
if (cycles > 0)
cycles = DMAs[5]->Run(cycles);
if (cycles > 0)
cycles = DMAs[6]->Run(cycles);
if (cycles > 0)
cycles = DMAs[7]->Run(cycles);
ARM7Offset = -cycles;
DMAs[4]->Run();
DMAs[5]->Run();
DMAs[6]->Run();
DMAs[7]->Run();
}
else
{
ARM7->CyclesToRun = ndscyclestorun - ARM7Offset;
CurCPU = 2; ARM7->Execute(); CurCPU = 0;
ARM7Offset = ARM7->Cycles - ARM7->CyclesToRun;
RunTightTimers(1, ARM7->Cycles);
ARM7->Execute();
}
#ifdef DEBUG_CHECK_DESYNC
dbg_CyclesSys += ndscyclestorun;
#endif // DEBUG_CHECK_DESYNC
RunTimers(1);
}
RunLooseTimers(1, ndscyclestorun);
RunSystem(ndscyclestorun);
SysClockCycles += ndscyclestorun;
LastSysClockCycles += ndscyclestorun;
FrameSysClockCycles += ndscyclestorun;
RunSystem(target);
if (CPUStop & 0x40000000)
{
@ -867,12 +848,11 @@ u32 RunFrame()
}
#ifdef DEBUG_CHECK_DESYNC
printf("[%08X%08X] ARM9=%ld timer9=%ld, ARM7=%ld timer7=%ld\n",
(u32)(dbg_CyclesSys>>32), (u32)dbg_CyclesSys,
dbg_CyclesARM9-dbg_CyclesSys,
dbg_CyclesTimer9-dbg_CyclesSys,
dbg_CyclesARM7-dbg_CyclesSys,
dbg_CyclesTimer7-dbg_CyclesSys);
printf("[%08X%08X] ARM9=%ld, ARM7=%ld, GPU=%ld\n",
(u32)(SysTimestamp>>32), (u32)SysTimestamp,
(ARM9Timestamp>>1)-SysTimestamp,
ARM7Timestamp-SysTimestamp,
GPU3D::Timestamp-SysTimestamp);
#endif
NumFrames++;
@ -880,26 +860,18 @@ u32 RunFrame()
return GPU::TotalScanlines;
}
void Reschedule()
void Reschedule(u64 target)
{
s32 oldcycles = CurIterationCycles;
CalcIterationCycles();
if (CurIterationCycles >= oldcycles)
{
CurIterationCycles = oldcycles;
return;
}
if (CurCPU == 0)
{
CurIterationCycles = oldcycles;
return;
if (target < (ARM9Target >> ARM9ClockShift))
ARM9Target = (target << ARM9ClockShift);
}
else
{
if (target < ARM7Target)
ARM7Target = target;
}
if (CurCPU == 1) ARM9->CyclesToRun = CurIterationCycles << 1;
else if (CurCPU == 2) ARM7->CyclesToRun = CurIterationCycles - ARM7Offset;
// this is all. a reschedule shouldn't happen during DMA or GXFIFO stall.
}
void ScheduleEvent(u32 id, bool periodic, s32 delay, void (*func)(u32), u32 param)
@ -913,12 +885,13 @@ void ScheduleEvent(u32 id, bool periodic, s32 delay, void (*func)(u32), u32 para
SchedEvent* evt = &SchedList[id];
if (periodic)
evt->WaitCycles += delay;
evt->Timestamp += delay;
else
{
if (CurCPU == 1) evt->WaitCycles = delay + (ARM9->Cycles >> 1);
else if (CurCPU == 2) evt->WaitCycles = delay + ARM7->Cycles;
else evt->WaitCycles = delay;
if (CurCPU == 0)
evt->Timestamp = (ARM9Timestamp >> ARM9ClockShift) + delay;
else
evt->Timestamp = ARM7Timestamp + delay;
}
evt->Func = func;
@ -926,7 +899,7 @@ void ScheduleEvent(u32 id, bool periodic, s32 delay, void (*func)(u32), u32 para
SchedListMask |= (1<<id);
Reschedule();
Reschedule(evt->Timestamp);
}
void CancelEvent(u32 id)
@ -1156,27 +1129,22 @@ u64 GetSysClockCycles(int num)
if (num == 0 || num == 2)
{
if (num == 0) ret = SysClockCycles;
else if (num == 2) ret = FrameSysClockCycles;
if (CurCPU == 0)
ret = ARM9Timestamp >> ARM9ClockShift;
else
ret = ARM7Timestamp;
if (CurCPU == 1) ret += (ARM9->Cycles >> 1);
else if (CurCPU == 2) ret += ARM7->Cycles;
if (num == 2) ret -= FrameStartTimestamp;
}
else if (num == 1)
{
ret = LastSysClockCycles;
LastSysClockCycles = 0;
if (CurCPU == 1)
{
ret += (ARM9->Cycles >> 1);
LastSysClockCycles = -(ARM9->Cycles >> 1);
}
else if (CurCPU == 2)
{
ret += ARM7->Cycles;
LastSysClockCycles = -ARM7->Cycles;
}
if (CurCPU == 0)
LastSysClockCycles = ARM9Timestamp >> ARM9ClockShift;
else
LastSysClockCycles = ARM7Timestamp;
}
return ret;
@ -1271,17 +1239,11 @@ void NocashPrint(u32 ncpu, u32 addr)
void HandleTimerOverflow(u32 tid)
{
Timer* timer = &Timers[tid];
//if ((timer->Cnt & 0x84) != 0x80) return;
timer->Counter += timer->Reload << 16;
if (timer->Cnt & (1<<6))
SetIRQ(tid >> 2, IRQ_Timer0 + (tid & 0x3));
//u32 delay = (0x10000 - timer->Reload) << (16 - timer->CycleShift);
//delay -= (timer->Counter - timer->Reload) >> timer->CycleShift;
//printf("timer%d IRQ: resched %d, reload=%04X cnt=%08X\n", tid, delay, timer->Reload, timer->Counter);
//ScheduleEvent(Event_TimerIRQ_0 + tid, true, delay, HandleTimerOverflow, tid);
if ((tid & 0x3) == 3)
return;
@ -1310,8 +1272,6 @@ void HandleTimerOverflow(u32 tid)
void RunTimer(u32 tid, s32 cycles)
{
Timer* timer = &Timers[tid];
//if ((timer->Cnt & 0x84) != 0x80)
// return;
u32 oldcount = timer->Counter;
timer->Counter += (cycles << timer->CycleShift);
@ -1319,29 +1279,22 @@ void RunTimer(u32 tid, s32 cycles)
HandleTimerOverflow(tid);
}
void RunTightTimers(u32 cpu, s32 cycles)
void RunTimers(u32 cpu)
{
register u32 timermask = TimerCheckMask[cpu];
s32 cycles;
if (cpu == 0)
cycles = (ARM9Timestamp >> ARM9ClockShift) - TimerTimestamp[0];
else
cycles = ARM7Timestamp - TimerTimestamp[1];
if (timermask & 0x1) RunTimer((cpu<<2)+0, cycles);
if (timermask & 0x2) RunTimer((cpu<<2)+1, cycles);
if (timermask & 0x4) RunTimer((cpu<<2)+2, cycles);
if (timermask & 0x8) RunTimer((cpu<<2)+3, cycles);
#ifdef DEBUG_CHECK_DESYNC
if (cpu) dbg_CyclesTimer7 += cycles;
else dbg_CyclesTimer9 += cycles;
#endif // DEBUG_CHECK_DESYNC
}
void RunLooseTimers(u32 cpu, s32 cycles)
{
register u32 timermask = TimerCheckMask[cpu];
if (timermask & 0x10) RunTimer((cpu<<2)+0, cycles);
if (timermask & 0x20) RunTimer((cpu<<2)+1, cycles);
if (timermask & 0x40) RunTimer((cpu<<2)+2, cycles);
if (timermask & 0x80) RunTimer((cpu<<2)+3, cycles);
TimerTimestamp[cpu] += cycles;
}
@ -1391,6 +1344,7 @@ const s32 TimerPrescaler[4] = {0, 6, 8, 10};
u16 TimerGetCounter(u32 timer)
{
RunTimers(timer>>2);
u32 ret = Timers[timer].Counter;
return ret >> 16;
@ -1421,10 +1375,10 @@ void TimerStart(u32 id, u16 cnt)
if ((cnt & 0x84) == 0x80)
{
u32 tmask;
if ((cnt & 0x03) == 0)
//if ((cnt & 0x03) == 0)
tmask = 0x01 << (id&0x3);
else
tmask = 0x10 << (id&0x3);
//else
// tmask = 0x10 << (id&0x3);
TimerCheckMask[id>>2] |= tmask;
}
@ -1579,7 +1533,7 @@ void debug(u32 param)
// printf("VRAM %c: %02X\n", 'A'+i, GPU::VRAMCNT[i]);
/*FILE*
shit = fopen("debug/justbeep.bin", "wb");
shit = fopen("debug/colourfuck.bin", "wb");
for (u32 i = 0x02000000; i < 0x02400000; i+=4)
{
u32 val = ARM7Read32(i);

View File

@ -29,14 +29,6 @@
namespace NDS
{
#ifdef DEBUG_CHECK_DESYNC
extern u64 dbg_CyclesSys;
extern u64 dbg_CyclesARM9;
extern u64 dbg_CyclesTimer9;
extern u64 dbg_CyclesARM7;
extern u64 dbg_CyclesTimer7;
#endif
enum
{
Event_LCD = 0,
@ -56,7 +48,7 @@ enum
typedef struct
{
void (*Func)(u32 param);
s32 WaitCycles;
u64 Timestamp;
u32 Param;
} SchedEvent;
@ -109,6 +101,10 @@ typedef struct
extern u8 ARM9MemTimings[0x40000][4];
extern u8 ARM7MemTimings[0x20000][4];
extern u64 ARM9Timestamp, ARM9Target;
extern u64 ARM7Timestamp, ARM7Target;
extern u32 ARM9ClockShift;
// hax
extern u32 IME[2];
extern u32 IE[2];
@ -182,8 +178,7 @@ bool DMAsRunning(u32 cpu);
void CheckDMAs(u32 cpu, u32 mode);
void StopDMAs(u32 cpu, u32 mode);
void RunTightTimers(u32 cpu, s32 cycles);
void RunLooseTimers(u32 cpu, s32 cycles);
void RunTimers(u32 cpu);
u8 ARM9Read8(u32 addr);
u16 ARM9Read16(u32 addr);

View File

@ -1266,7 +1266,7 @@ void WriteROMCnt(u32 val)
if (datasize == 0)
NDS::ScheduleEvent(NDS::Event_ROMTransfer, false, xfercycle*cmddelay, ROMEndTransfer, 0);
else
NDS::ScheduleEvent(NDS::Event_ROMTransfer, true, xfercycle*(cmddelay+4), ROMPrepareData, 0);
NDS::ScheduleEvent(NDS::Event_ROMTransfer, false, xfercycle*(cmddelay+4), ROMPrepareData, 0);
}
u32 ReadROMData()
@ -1281,7 +1281,7 @@ u32 ReadROMData()
u32 delay = 4;
if (!(DataOutPos & 0x1FF)) delay += ((ROMCnt >> 16) & 0x3F);
NDS::ScheduleEvent(NDS::Event_ROMTransfer, true, xfercycle*delay, ROMPrepareData, 0);
NDS::ScheduleEvent(NDS::Event_ROMTransfer, false, xfercycle*delay, ROMPrepareData, 0);
}
else
ROMEndTransfer(0);

View File

@ -22,7 +22,7 @@
#include <stdio.h>
#include "types.h"
#define SAVESTATE_MAJOR 3
#define SAVESTATE_MAJOR 4
#define SAVESTATE_MINOR 0
class Savestate

View File

@ -1350,7 +1350,7 @@ void Write(u32 addr, u16 val)
if ((IOPORT(W_PowerUS) & 0x0001) && !(val & 0x0001))
{
printf("WIFI ON\n");
NDS::ScheduleEvent(NDS::Event_Wifi, true, 33, USTimer, 0);
NDS::ScheduleEvent(NDS::Event_Wifi, false, 33, USTimer, 0);
if (!MPInited)
{
Platform::MP_Init();

View File

@ -16,6 +16,8 @@
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
// TODO: all this should ideally go in Platform.cpp
#include <stdio.h>
#include <string.h>
#include <stdlib.h>