add proper support for GXFIFO stalls.

bad games that blast the GXFIFO and overflow it:
* Super Mario 64 DS
* Rayman RR2

latter seems to get its music streaming crapoed.
This commit is contained in:
StapleButter 2018-11-23 22:21:41 +01:00
parent 27e1ca4103
commit a9e7f8bc5b
8 changed files with 140 additions and 34 deletions

View File

@ -217,6 +217,7 @@ void ARM::JumpTo(u32 addr, bool restorecpsr)
// aging cart debug crap
//if (addr == 0x0201764C) printf("capture test %d: R1=%08X\n", R[6], R[1]);
//if (addr == 0x020175D8) printf("capture test %d: res=%08X\n", R[6], R[0]);
// R0=DMA# R1=src R2=size
u32 oldregion = R[15] >> 23;
u32 newregion = addr >> 23;

View File

@ -242,12 +242,14 @@ s32 DMA::Run(s32 cycles)
if (!Running)
return cycles;
Executing = true;
if (!(Cnt & 0x04000000))
{
u16 (*readfn)(u32) = CPU ? NDS::ARM7Read16 : NDS::ARM9Read16;
void (*writefn)(u32,u16) = CPU ? NDS::ARM7Write16 : NDS::ARM9Write16;
while (IterCount > 0 && cycles > 0)
while (IterCount > 0 && cycles > 0 && !Stall)
{
writefn(CurDstAddr, readfn(CurSrcAddr));
@ -264,7 +266,8 @@ s32 DMA::Run(s32 cycles)
else
{
// optimized path for typical GXFIFO DMA
if (IsGXFIFODMA)
// likely not worth it tbh
/*if (IsGXFIFODMA)
{
while (IterCount > 0 && cycles > 0)
{
@ -278,12 +281,12 @@ s32 DMA::Run(s32 cycles)
IterCount--;
RemCount--;
}
}
}*/
u32 (*readfn)(u32) = CPU ? NDS::ARM7Read32 : NDS::ARM9Read32;
void (*writefn)(u32,u32) = CPU ? NDS::ARM7Write32 : NDS::ARM9Write32;
while (IterCount > 0 && cycles > 0)
while (IterCount > 0 && cycles > 0 && !Stall)
{
writefn(CurDstAddr, readfn(CurSrcAddr));
@ -298,6 +301,9 @@ s32 DMA::Run(s32 cycles)
}
}
Executing = false;
Stall = false;
if (RemCount)
{
if (IterCount == 0)

View File

@ -53,6 +53,11 @@ public:
Cnt &= ~0x80000000;
}
void StallIfRunning()
{
if (Executing) Stall = true;
}
u32 SrcAddr;
u32 DstAddr;
u32 Cnt;
@ -74,6 +79,9 @@ private:
bool Running;
bool InProgress;
bool Executing;
bool Stall;
bool IsGXFIFODMA;
};

View File

@ -73,6 +73,13 @@
// TODO: check how DISP_1DOT_DEPTH works and whether it's latched
// command execution notes
//
// timings given by GBAtek are for individual commands
// real-life timings are different depending on how commands are combined
// the engine is able to do parallel execution to some extent
namespace GPU3D
{
@ -116,38 +123,38 @@ const u32 CmdNumParams[256] =
const s32 CmdNumCycles[256] =
{
// 0x00
0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
// 0x10
1, 17, 36, 17, 36, 19, 34, 30, 35, 31, 28, 22, 22,
0, 0, 0,
1, 1, 1,
// 0x20
1, 9, 1, 9, 8, 8, 8, 8, 8, 1, 1, 1,
0, 0, 0, 0,
1, 9, 1, 9, 9, 9, 9, 9, 9, 1, 1, 1,
1, 1, 1, 1,
// 0x30
4, 4, 6, 1, 32,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
// 0x40
1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
// 0x50
392,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
// 0x60
1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
// 0x70
103, 9, 5,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
// 0x80+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
};
typedef union
@ -164,6 +171,8 @@ typedef union
FIFO<CmdFIFOEntry>* CmdFIFO;
FIFO<CmdFIFOEntry>* CmdPIPE;
FIFO<CmdFIFOEntry>* CmdStallQueue;
u32 NumCommands, CurCommand, ParamCount, TotalParams;
u32 DispCnt;
@ -276,6 +285,8 @@ bool Init()
CmdFIFO = new FIFO<CmdFIFOEntry>(256);
CmdPIPE = new FIFO<CmdFIFOEntry>(4);
CmdStallQueue = new FIFO<CmdFIFOEntry>(64);
if (!SoftRenderer::Init()) return false;
return true;
@ -287,6 +298,8 @@ void DeInit()
delete CmdFIFO;
delete CmdPIPE;
delete CmdStallQueue;
}
void Reset()
@ -294,6 +307,8 @@ void Reset()
CmdFIFO->Clear();
CmdPIPE->Clear();
CmdStallQueue->Clear();
NumCommands = 0;
CurCommand = 0;
ParamCount = 0;
@ -514,6 +529,20 @@ void DoSavestate(Savestate* file)
// probably not worth storing the vblank-latched Renderxxxxxx variables
if (file->Saving ||
file->VersionMajor > 2 ||
(file->VersionMajor == 2 && file->VersionMinor >= 1))
{
// command stall queue, only in version 2.1 and up
CmdStallQueue->DoSavestate(file);
}
else
{
// for version 2.0, just clear it. not having it doesn't matter
// if this comes from older melonDS revisions.
CmdStallQueue->Clear();
}
if (!file->Saving)
{
ClipMatrixDirty = true;
@ -1387,17 +1416,13 @@ void CmdFIFOWrite(CmdFIFOEntry& entry)
{
if (CmdFIFO->IsFull())
{
//printf("!!! GX FIFO FULL\n");
//return;
// store it to the stall queue. stall the system.
// worst case is if a STMxx opcode causes this, which is why our stall queue
// has 64 entries. this is less complicated than trying to make STMxx stall-able.
// temp. hack
// SM64DS seems to overflow the FIFO occasionally
// either leftover bugs in our implementation, or the game accidentally doing that
// TODO: investigate.
// TODO: implement this behavior properly (freezes the bus until the FIFO isn't full anymore)
while (CmdFIFO->IsFull())
ExecuteCommand();
CmdStallQueue->Write(entry);
NDS::GXFIFOStall();
return;
}
CmdFIFO->Write(entry);
@ -1426,6 +1451,21 @@ CmdFIFOEntry CmdFIFORead()
if (!CmdFIFO->IsEmpty())
CmdPIPE->Write(CmdFIFO->Read());
// empty stall queue if needed
// CmdFIFO should not be full at this point.
if (!CmdStallQueue->IsEmpty())
{
while (!CmdStallQueue->IsEmpty())
{
if (CmdFIFO->IsFull()) break;
CmdFIFOEntry entry = CmdStallQueue->Read();
CmdFIFOWrite(entry);
}
if (CmdStallQueue->IsEmpty())
NDS::GXFIFOUnstall();
}
CheckFIFODMA();
CheckFIFOIRQ();
}
@ -1450,6 +1490,7 @@ void ExecuteCommand()
for (int k = 0; k < ExecParamCount; k++) printf("0x%08X, ", ExecParams[k]);
printf("\n");*/
CycleCount += CmdNumCycles[entry.Command];
ExecParamCount = 0;
if (CycleCount > 0)
@ -1852,6 +1893,8 @@ void ExecuteCommand()
break;
case 0x40: // begin polygons
// TODO: check if there was a polygon being defined but incomplete
// such cases seem to freeze the GPU
PolygonMode = ExecParams[0] & 0x3;
VertexNum = 0;
VertexNumInPoly = 0;
@ -1902,6 +1945,12 @@ void ExecuteCommand()
}
}
s32 CyclesToRunFor()
{
if (CycleCount < 0) return 0;
return CycleCount;
}
void Run(s32 cycles)
{
if (FlushRequest)
@ -1924,6 +1973,8 @@ void Run(s32 cycles)
if (CycleCount <= 0 && CmdPIPE->IsEmpty())
{
// todo: advance remaining pipeline shit here
CycleCount = 0;
GXStat &= ~(1<<27);

View File

@ -90,6 +90,7 @@ void DoSavestate(Savestate* file);
void ExecuteCommand();
s32 CyclesToRunFor();
void Run(s32 cycles);
void CheckFIFOIRQ();
void CheckFIFODMA();

View File

@ -108,6 +108,7 @@ bool Running;
void DivDone(u32 param);
void SqrtDone(u32 param);
void RunTimer(u32 tid, s32 cycles);
bool Init()
@ -608,12 +609,27 @@ u32 RunFrame()
s32 ndscyclestorun;
// TODO: give it some margin, so it can directly do 17 cycles instead of 16 then 1
// TODO: we need to directly change CurIterationCycles when rescheduling shit
CalcIterationCycles();
if (CPUStop & 0x80000000)
{
// GXFIFO stall
// we just run the GPU and the timers.
// the rest of the hardware is driven by the event scheduler.
s32 cycles = GPU3D::CyclesToRunFor();
GPU3D::Run(cycles);
u32 timermask = TimerCheckMask[0];
if (timermask & 0x1) RunTimer(0, cycles);
if (timermask & 0x2) RunTimer(1, cycles);
if (timermask & 0x4) RunTimer(2, cycles);
if (timermask & 0x8) RunTimer(3, cycles);
timermask = TimerCheckMask[1];
if (timermask & 0x1) RunTimer(4, cycles);
if (timermask & 0x2) RunTimer(5, cycles);
if (timermask & 0x4) RunTimer(6, cycles);
if (timermask & 0x8) RunTimer(7, cycles);
}
else
{
@ -818,6 +834,27 @@ void ResumeCPU(u32 cpu, u32 mask)
CPUStop &= ~mask;
}
void GXFIFOStall()
{
if (CPUStop & 0x80000000) return;
CPUStop |= 0x80000000;
if (CurCPU == 1) ARM9->Halt(2);
else
{
DMAs[0]->StallIfRunning();
DMAs[1]->StallIfRunning();
DMAs[2]->StallIfRunning();
DMAs[3]->StallIfRunning();
}
}
void GXFIFOUnstall()
{
CPUStop &= ~0x80000000;
}
u32 GetPC(u32 cpu)
{
return cpu ? ARM7->R[15] : ARM9->R[15];

View File

@ -148,6 +148,8 @@ void ClearIRQ(u32 cpu, u32 irq);
bool HaltInterrupted(u32 cpu);
void StopCPU(u32 cpu, u32 mask);
void ResumeCPU(u32 cpu, u32 mask);
void GXFIFOStall();
void GXFIFOUnstall();
u32 GetPC(u32 cpu);

View File

@ -23,7 +23,7 @@
#include "types.h"
#define SAVESTATE_MAJOR 2
#define SAVESTATE_MINOR 0
#define SAVESTATE_MINOR 1
class Savestate
{