* GXFIFO DMA (note: Mario Slam Basketball keeps overflowing the FIFO for whatever reason, probably lack of proper DMA timings)

* start implementing matrix shit
This commit is contained in:
StapleButter 2017-02-08 21:30:10 +01:00
parent 796dc58f71
commit 49fd4ccb45
6 changed files with 436 additions and 16 deletions

View File

@ -376,6 +376,8 @@ s32 ARM::Execute()
//if (R[15]==0x037F9364) printf("R8=%08X R9=%08X\n", R[8], R[9]);
// gross hack
// TODO, though: move timer code here too?
// quick testing shows that moving this to the NDS loop doesn't really slow things down
if (Num==0)
{
s32 diff = Cycles - lastcycles;

22
DMA.cpp
View File

@ -20,6 +20,7 @@
#include "NDS.h"
#include "DMA.h"
#include "NDSCart.h"
#include "GPU3D.h"
// NOTES ON DMA SHIT
@ -87,9 +88,11 @@ void DMA::WriteCnt(u32 val)
if ((StartMode & 0x7) == 0)
Start();
else if (StartMode == 0x07)
GPU3D::CheckFIFODMA();
//else
// printf("SPECIAL ARM%d DMA%d START MODE %02X\n", CPU?7:9, Num, StartMode);
if ((StartMode&7)!=0x00 && (StartMode&7)!=0x1 && StartMode!=2 && StartMode!=0x05 && StartMode!=0x12)
if ((StartMode&7)!=0x00 && (StartMode&7)!=0x1 && StartMode!=2 && StartMode!=0x05 && StartMode!=0x12 && StartMode!=0x07)
printf("UNIMPLEMENTED ARM%d DMA%d START MODE %02X\n", CPU?7:9, Num, StartMode);
//if (StartMode==2)printf("HBLANK DMA %08X -> %08X\n", SrcAddr, DstAddr);
}
@ -123,6 +126,10 @@ void DMA::Start()
NDS::TriggerIRQ(CPU, NDS::IRQ_DMA0 + Num);
return;
}
if (StartMode == 0x07)printf("GXFIFO DMA %08X %08X\n", Cnt, CurSrcAddr);
u32 num = RemCount;
if (StartMode == 0x07 && num > 112)
num = 112;
// TODO: NOT MAKE THE DMA INSTANT!!
if (!(Cnt & 0x04000000))
@ -130,12 +137,13 @@ void DMA::Start()
u16 (*readfn)(u32) = CPU ? NDS::ARM7Read16 : NDS::ARM9Read16;
void (*writefn)(u32,u16) = CPU ? NDS::ARM7Write16 : NDS::ARM9Write16;
while (RemCount > 0)
while (num > 0)
{
writefn(CurDstAddr, readfn(CurSrcAddr));
CurSrcAddr += SrcAddrInc<<1;
CurDstAddr += DstAddrInc<<1;
num--;
RemCount--;
}
}
@ -144,16 +152,24 @@ void DMA::Start()
u32 (*readfn)(u32) = CPU ? NDS::ARM7Read32 : NDS::ARM9Read32;
void (*writefn)(u32,u32) = CPU ? NDS::ARM7Write32 : NDS::ARM9Write32;
while (RemCount > 0)
while (num > 0)
{
writefn(CurDstAddr, readfn(CurSrcAddr));
CurSrcAddr += SrcAddrInc<<2;
CurDstAddr += DstAddrInc<<2;
num--;
RemCount--;
}
}
if (RemCount)
{
Cnt &= ~countmask;
Cnt |= RemCount;
return;
}
if (!(Cnt & 0x02000000))
Cnt &= ~0x80000000;

413
GPU3D.cpp
View File

@ -119,6 +119,142 @@ u32 ExecParamCount;
s32 CycleCount;
u32 MatrixMode;
s32 ProjMatrix[16];
s32 PosMatrix[16];
s32 VecMatrix[16];
s32 TexMatrix[16];
s32 ProjMatrixStack[16];
s32 PosMatrixStack[31][16];
s32 ProjMatrixStackPointer;
s32 PosMatrixStackPointer;
void MatrixLoadIdentity(s32* m)
{
m[0] = 0x1000; m[1] = 0; m[2] = 0; m[3] = 0;
m[4] = 0; m[5] = 0x1000; m[6] = 0; m[7] = 0;
m[8] = 0; m[9] = 0; m[10] = 0x1000; m[11] = 0;
m[12] = 0; m[13] = 0; m[14] = 0; m[15] = 0x1000;
}
void MatrixLoad4x4(s32* m, s32* s)
{
memcpy(m, s, 16*4);
}
void MatrixLoad4x3(s32* m, s32* s)
{
m[0] = s[0]; m[1] = s[1]; m[2] = s[2]; m[3] = 0;
m[4] = s[3]; m[5] = s[4]; m[6] = s[5]; m[7] = 0;
m[8] = s[6]; m[9] = s[7]; m[10] = s[8]; m[11] = 0;
m[12] = s[9]; m[13] = s[10]; m[14] = s[11]; m[15] = 0x1000;
}
void MatrixMult4x4(s32* m, s32* s)
{
s32 tmp[16];
memcpy(tmp, m, 16*4);
// m = s*m
m[0] = (s[0]*tmp[0] + s[1]*tmp[4] + s[2]*tmp[8] + s[3]*tmp[12]) >> 12;
m[1] = (s[0]*tmp[1] + s[1]*tmp[5] + s[2]*tmp[9] + s[3]*tmp[13]) >> 12;
m[2] = (s[0]*tmp[2] + s[1]*tmp[6] + s[2]*tmp[10] + s[3]*tmp[14]) >> 12;
m[3] = (s[0]*tmp[3] + s[1]*tmp[7] + s[2]*tmp[11] + s[3]*tmp[15]) >> 12;
m[4] = (s[4]*tmp[0] + s[5]*tmp[4] + s[6]*tmp[8] + s[7]*tmp[12]) >> 12;
m[5] = (s[4]*tmp[1] + s[5]*tmp[5] + s[6]*tmp[9] + s[7]*tmp[13]) >> 12;
m[6] = (s[4]*tmp[2] + s[5]*tmp[6] + s[6]*tmp[10] + s[7]*tmp[14]) >> 12;
m[7] = (s[4]*tmp[3] + s[5]*tmp[7] + s[6]*tmp[11] + s[7]*tmp[15]) >> 12;
m[8] = (s[8]*tmp[0] + s[9]*tmp[4] + s[10]*tmp[8] + s[11]*tmp[12]) >> 12;
m[9] = (s[8]*tmp[1] + s[9]*tmp[5] + s[10]*tmp[9] + s[11]*tmp[13]) >> 12;
m[10] = (s[8]*tmp[2] + s[9]*tmp[6] + s[10]*tmp[10] + s[11]*tmp[14]) >> 12;
m[11] = (s[8]*tmp[3] + s[9]*tmp[7] + s[10]*tmp[11] + s[11]*tmp[15]) >> 12;
m[12] = (s[12]*tmp[0] + s[13]*tmp[4] + s[14]*tmp[8] + s[15]*tmp[12]) >> 12;
m[13] = (s[12]*tmp[1] + s[13]*tmp[5] + s[14]*tmp[9] + s[15]*tmp[13]) >> 12;
m[14] = (s[12]*tmp[2] + s[13]*tmp[6] + s[14]*tmp[10] + s[15]*tmp[14]) >> 12;
m[15] = (s[12]*tmp[3] + s[13]*tmp[7] + s[14]*tmp[11] + s[15]*tmp[15]) >> 12;
}
void MatrixMult4x3(s32* m, s32* s)
{
s32 tmp[16];
memcpy(tmp, m, 16*4);
// m = s*m
m[0] = (s[0]*tmp[0] + s[1]*tmp[4] + s[2]*tmp[8]) >> 12;
m[1] = (s[0]*tmp[1] + s[1]*tmp[5] + s[2]*tmp[9]) >> 12;
m[2] = (s[0]*tmp[2] + s[1]*tmp[6] + s[2]*tmp[10]) >> 12;
m[3] = (s[0]*tmp[3] + s[1]*tmp[7] + s[2]*tmp[11]) >> 12;
m[4] = (s[3]*tmp[0] + s[4]*tmp[4] + s[5]*tmp[8]) >> 12;
m[5] = (s[3]*tmp[1] + s[4]*tmp[5] + s[5]*tmp[9]) >> 12;
m[6] = (s[3]*tmp[2] + s[4]*tmp[6] + s[5]*tmp[10]) >> 12;
m[7] = (s[3]*tmp[3] + s[4]*tmp[7] + s[5]*tmp[11]) >> 12;
m[8] = (s[6]*tmp[0] + s[7]*tmp[4] + s[8]*tmp[8]) >> 12;
m[9] = (s[6]*tmp[1] + s[7]*tmp[5] + s[8]*tmp[9]) >> 12;
m[10] = (s[6]*tmp[2] + s[7]*tmp[6] + s[8]*tmp[10]) >> 12;
m[11] = (s[6]*tmp[3] + s[7]*tmp[7] + s[8]*tmp[11]) >> 12;
m[12] = (s[9]*tmp[0] + s[10]*tmp[4] + s[11]*tmp[8] + 0x1000*tmp[12]) >> 12;
m[13] = (s[9]*tmp[1] + s[10]*tmp[5] + s[11]*tmp[9] + 0x1000*tmp[13]) >> 12;
m[14] = (s[9]*tmp[2] + s[10]*tmp[6] + s[11]*tmp[10] + 0x1000*tmp[14]) >> 12;
m[15] = (s[9]*tmp[3] + s[10]*tmp[7] + s[11]*tmp[11] + 0x1000*tmp[15]) >> 12;
}
void MatrixMult3x3(s32* m, s32* s)
{
s32 tmp[12];
memcpy(tmp, m, 12*4);
// m = s*m
m[0] = (s[0]*tmp[0] + s[1]*tmp[4] + s[2]*tmp[8]) >> 12;
m[1] = (s[0]*tmp[1] + s[1]*tmp[5] + s[2]*tmp[9]) >> 12;
m[2] = (s[0]*tmp[2] + s[1]*tmp[6] + s[2]*tmp[10]) >> 12;
m[3] = (s[0]*tmp[3] + s[1]*tmp[7] + s[2]*tmp[11]) >> 12;
m[4] = (s[3]*tmp[0] + s[4]*tmp[4] + s[5]*tmp[8]) >> 12;
m[5] = (s[3]*tmp[1] + s[4]*tmp[5] + s[5]*tmp[9]) >> 12;
m[6] = (s[3]*tmp[2] + s[4]*tmp[6] + s[5]*tmp[10]) >> 12;
m[7] = (s[3]*tmp[3] + s[4]*tmp[7] + s[5]*tmp[11]) >> 12;
m[8] = (s[6]*tmp[0] + s[7]*tmp[4] + s[8]*tmp[8]) >> 12;
m[9] = (s[6]*tmp[1] + s[7]*tmp[5] + s[8]*tmp[9]) >> 12;
m[10] = (s[6]*tmp[2] + s[7]*tmp[6] + s[8]*tmp[10]) >> 12;
m[11] = (s[6]*tmp[3] + s[7]*tmp[7] + s[8]*tmp[11]) >> 12;
}
void MatrixScale(s32* m, s32* s)
{
m[0] = (s[0]*m[0]) >> 12;
m[1] = (s[0]*m[1]) >> 12;
m[2] = (s[0]*m[2]) >> 12;
m[3] = (s[0]*m[3]) >> 12;
m[4] = (s[1]*m[4]) >> 12;
m[5] = (s[1]*m[5]) >> 12;
m[6] = (s[1]*m[6]) >> 12;
m[7] = (s[1]*m[7]) >> 12;
m[8] = (s[2]*m[8]) >> 12;
m[9] = (s[2]*m[9]) >> 12;
m[10] = (s[2]*m[10]) >> 12;
m[11] = (s[2]*m[11]) >> 12;
}
void MatrixTranslate(s32* m, s32* s)
{
m[12] += (s[0]*m[0] + s[1]*m[4] + s[2]*m[8]) >> 12;
m[13] += (s[0]*m[1] + s[1]*m[5] + s[2]*m[9]) >> 12;
m[14] += (s[0]*m[2] + s[1]*m[6] + s[2]*m[10]) >> 12;
}
bool Init()
{
CmdFIFO = new FIFO<CmdFIFOEntry>(256);
@ -148,6 +284,19 @@ void Reset()
memset(ExecParams, 0, 32*4);
ExecParamCount = 0;
CycleCount = 0;
MatrixMode = 0;
MatrixLoadIdentity(ProjMatrix);
MatrixLoadIdentity(PosMatrix);
MatrixLoadIdentity(VecMatrix);
MatrixLoadIdentity(TexMatrix);
memset(ProjMatrixStack, 0, 16*4);
memset(PosMatrixStack, 0, 31 * 16*4);
ProjMatrixStackPointer = 0;
PosMatrixStackPointer = 0;
}
@ -179,9 +328,11 @@ CmdFIFOEntry CmdFIFORead()
CmdPIPE->Write(CmdFIFO->Read());
if (!CmdFIFO->IsEmpty())
CmdPIPE->Write(CmdFIFO->Read());
CheckFIFODMA();
CheckFIFOIRQ();
}
CheckFIFOIRQ();
return ret;
}
@ -200,15 +351,242 @@ void ExecuteCommand()
CycleCount += CmdNumCycles[entry.Command];
ExecParamCount = 0;
// TODO: actually execute the command, maybe
GXStat &= ~(1<<14);
//printf("3D CMD %02X\n", entry.Command);
switch (entry.Command)
{
case 0x18:
case 0x19:
case 0x1A:
// TODO: more cycles if MTX_MODE=2
case 0x10: // matrix mode
MatrixMode = ExecParams[0] & 0x3;
break;
case 0x11: // push matrix
if (MatrixMode == 0)
{
if (ProjMatrixStackPointer > 0)
{
printf("!! PROJ MATRIX STACK OVERFLOW\n");
GXStat |= (1<<15);
break;
}
memcpy(ProjMatrixStack, ProjMatrix, 16*4);
ProjMatrixStackPointer++;
GXStat |= (1<<14);
}
else if (MatrixMode == 3)
{
printf("!! CAN'T PUSH TEXTURE MATRIX\n");
GXStat |= (1<<15); // CHECKME
}
else
{
if (PosMatrixStackPointer > 30)
{
printf("!! POS MATRIX STACK OVERFLOW\n");
GXStat |= (1<<15);
break;
}
memcpy(PosMatrixStack[PosMatrixStackPointer], PosMatrix, 16*4);
PosMatrixStackPointer++;
GXStat |= (1<<14);
}
break;
case 0x12: // pop matrix
if (MatrixMode == 0)
{
if (ProjMatrixStackPointer <= 0)
{
printf("!! PROJ MATRIX STACK UNDERFLOW\n");
GXStat |= (1<<15);
break;
}
ProjMatrixStackPointer--;
memcpy(ProjMatrix, ProjMatrixStack, 16*4);
GXStat |= (1<<14);
}
else if (MatrixMode == 3)
{
printf("!! CAN'T POP TEXTURE MATRIX\n");
GXStat |= (1<<15); // CHECKME
}
else
{
s32 offset = (s32)(ExecParams[0] << 26) >> 26;
PosMatrixStackPointer -= offset;
if (PosMatrixStackPointer < 0 || PosMatrixStackPointer > 30)
{
printf("!! POS MATRIX STACK UNDER/OVERFLOW %d\n", PosMatrixStackPointer);
PosMatrixStackPointer += offset;
GXStat |= (1<<15);
break;
}
memcpy(PosMatrix, PosMatrixStack[PosMatrixStackPointer], 16*4);
GXStat |= (1<<14);
}
break;
case 0x13: // store matrix
if (MatrixMode == 0)
{
memcpy(ProjMatrixStack, ProjMatrix, 16*4);
}
else if (MatrixMode == 3)
{
printf("!! CAN'T STORE TEXTURE MATRIX\n");
GXStat |= (1<<15); // CHECKME
}
else
{
u32 addr = ExecParams[0] & 0x1F;
if (addr > 30)
{
printf("!! POS MATRIX STORE ADDR 31\n");
GXStat |= (1<<15);
break;
}
memcpy(PosMatrixStack[addr], PosMatrix, 16*4);
}
break;
case 0x14: // restore matrix
if (MatrixMode == 0)
{
memcpy(ProjMatrix, ProjMatrixStack, 16*4);
}
else if (MatrixMode == 3)
{
printf("!! CAN'T RESTORE TEXTURE MATRIX\n");
GXStat |= (1<<15); // CHECKME
}
else
{
u32 addr = ExecParams[0] & 0x1F;
if (addr > 30)
{
printf("!! POS MATRIX STORE ADDR 31\n");
GXStat |= (1<<15);
break;
}
memcpy(PosMatrix, PosMatrixStack[addr], 16*4);
}
break;
case 0x15: // identity
if (MatrixMode == 0)
MatrixLoadIdentity(ProjMatrix);
else if (MatrixMode == 3)
MatrixLoadIdentity(TexMatrix);
else
{
MatrixLoadIdentity(PosMatrix);
if (MatrixMode == 2)
MatrixLoadIdentity(VecMatrix);
}
break;
case 0x16: // load 4x4
if (MatrixMode == 0)
MatrixLoad4x4(ProjMatrix, (s32*)ExecParams);
else if (MatrixMode == 3)
MatrixLoad4x4(TexMatrix, (s32*)ExecParams);
else
{
MatrixLoad4x4(PosMatrix, (s32*)ExecParams);
if (MatrixMode == 2)
MatrixLoad4x4(VecMatrix, (s32*)ExecParams);
}
break;
case 0x17: // load 4x3
if (MatrixMode == 0)
MatrixLoad4x3(ProjMatrix, (s32*)ExecParams);
else if (MatrixMode == 3)
MatrixLoad4x3(TexMatrix, (s32*)ExecParams);
else
{
MatrixLoad4x3(PosMatrix, (s32*)ExecParams);
if (MatrixMode == 2)
MatrixLoad4x3(VecMatrix, (s32*)ExecParams);
}
break;
case 0x18: // mult 4x4
if (MatrixMode == 0)
MatrixMult4x4(ProjMatrix, (s32*)ExecParams);
else if (MatrixMode == 3)
MatrixMult4x4(TexMatrix, (s32*)ExecParams);
else
{
MatrixMult4x4(PosMatrix, (s32*)ExecParams);
if (MatrixMode == 2)
{
MatrixMult4x4(VecMatrix, (s32*)ExecParams);
CycleCount += 30;
}
}
break;
case 0x19: // mult 4x3
if (MatrixMode == 0)
MatrixMult4x3(ProjMatrix, (s32*)ExecParams);
else if (MatrixMode == 3)
MatrixMult4x3(TexMatrix, (s32*)ExecParams);
else
{
MatrixMult4x3(PosMatrix, (s32*)ExecParams);
if (MatrixMode == 2)
{
MatrixMult4x3(VecMatrix, (s32*)ExecParams);
CycleCount += 30;
}
}
break;
case 0x1A: // mult 3x3
if (MatrixMode == 0)
MatrixMult3x3(ProjMatrix, (s32*)ExecParams);
else if (MatrixMode == 3)
MatrixMult3x3(TexMatrix, (s32*)ExecParams);
else
{
MatrixMult3x3(PosMatrix, (s32*)ExecParams);
if (MatrixMode == 2)
{
MatrixMult3x3(VecMatrix, (s32*)ExecParams);
CycleCount += 30;
}
}
break;
case 0x1B: // scale
if (MatrixMode == 0)
MatrixScale(ProjMatrix, (s32*)ExecParams);
else if (MatrixMode == 3)
MatrixScale(TexMatrix, (s32*)ExecParams);
else
MatrixScale(PosMatrix, (s32*)ExecParams);
break;
case 0x1C: // translate
if (MatrixMode == 0)
MatrixTranslate(ProjMatrix, (s32*)ExecParams);
else if (MatrixMode == 3)
MatrixTranslate(TexMatrix, (s32*)ExecParams);
else
{
MatrixTranslate(PosMatrix, (s32*)ExecParams);
if (MatrixMode == 2)
MatrixTranslate(VecMatrix, (s32*)ExecParams);
}
break;
case 0x21:
@ -249,6 +627,12 @@ void CheckFIFOIRQ()
if (irq) NDS::TriggerIRQ(0, NDS::IRQ_GXFIFO);
}
void CheckFIFODMA()
{
if (CmdFIFO->Level() < 128)
NDS::CheckDMAs(0, 0x07);
}
u8 Read8(u32 addr)
{
@ -272,13 +656,26 @@ u32 Read32(u32 addr)
u32 fifolevel = CmdFIFO->Level();
return GXStat |
// matrix stack levels, TODO
((PosMatrixStackPointer & 0x1F) << 8) |
((ProjMatrixStackPointer & 0x1) << 13) |
(fifolevel << 16) |
(fifolevel < 128 ? (1<<25) : 0) |
(fifolevel == 0 ? (1<<26) : 0) |
(CycleCount > 0 ? (1<<27) : 0);
}
}
if (addr >= 0x04000640 && addr < 0x04000680)
{
printf("!! CLIPMTX READ\n");
return 0;
}
if (addr >= 0x04000680 && addr < 0x040006A4)
{
printf("!! VECMTX READ\n");
return 0;
}
return 0;
}
@ -330,6 +727,8 @@ void Write32(u32 addr, u32 val)
ParamCount = 0;
TotalParams = CmdNumParams[CurCommand & 0xFF];
}
return;
}
if (addr >= 0x04000440 && addr < 0x040005CC)

View File

@ -28,6 +28,7 @@ void Reset();
void Run(s32 cycles);
void CheckFIFOIRQ();
void CheckFIFODMA();
u8 Read8(u32 addr);
u16 Read16(u32 addr);

View File

@ -307,7 +307,7 @@ void Reset()
// test
//LoadROM();
//LoadFirmware();
if (NDSCart::LoadROM("rom/nsmb.nds"))
if (NDSCart::LoadROM("rom/Simple_Tri.nds"))
Running = true; // hax
}
@ -377,6 +377,7 @@ void RunFrame()
ARM7Offset = ARM7->Cycles - ARM7->CyclesToRun;
RunSystem(ndscyclestorun);
//GPU3D::Run(ndscyclestorun);
/*while (ndscycles < ndscyclestorun)
{

View File

@ -10,7 +10,7 @@
1481161027 c:\documents\sources\melonds\types.h
1486511394 source:c:\documents\sources\melonds\nds.cpp
1486515172 source:c:\documents\sources\melonds\nds.cpp
<stdio.h>
<string.h>
"NDS.h"
@ -24,7 +24,7 @@
"RTC.h"
"Wifi.h"
1486511108 source:c:\documents\sources\melonds\arm.cpp
1486512922 source:c:\documents\sources\melonds\arm.cpp
<stdio.h>
"NDS.h"
"ARM.h"
@ -109,11 +109,12 @@
1486511075 c:\documents\sources\melonds\fifo.h
"types.h"
1486309616 source:c:\documents\sources\melonds\dma.cpp
1486514961 source:c:\documents\sources\melonds\dma.cpp
<stdio.h>
"NDS.h"
"DMA.h"
"NDSCart.h"
"GPU3D.h"
1484698068 c:\documents\sources\melonds\dma.h
"types.h"
@ -145,9 +146,9 @@
"NDS.h"
"NDSCart.h"
1486510737 c:\documents\sources\melonds\gpu3d.h
1486514429 c:\documents\sources\melonds\gpu3d.h
1486511257 source:c:\documents\sources\melonds\gpu3d.cpp
1486585700 source:c:\documents\sources\melonds\gpu3d.cpp
<stdio.h>
<string.h>
"NDS.h"