From 2bd67aef64e6932019fa90db982a1755ce100c0f Mon Sep 17 00:00:00 2001 From: StapleButter Date: Thu, 9 Feb 2017 02:39:52 +0100 Subject: [PATCH 01/16] calculate vertices. it doesn't do much, but hey, it's a start. --- DMA.cpp | 2 +- GPU3D.cpp | 240 +++++++++++++++++++++++++++++++++++++++++-------- melonDS.depend | 6 +- 3 files changed, 207 insertions(+), 41 deletions(-) diff --git a/DMA.cpp b/DMA.cpp index 836a5805..19711ae3 100644 --- a/DMA.cpp +++ b/DMA.cpp @@ -126,7 +126,7 @@ void DMA::Start() NDS::TriggerIRQ(CPU, NDS::IRQ_DMA0 + Num); return; } -if (StartMode == 0x07)printf("GXFIFO DMA %08X %08X\n", Cnt, CurSrcAddr); + //if (StartMode == 0x07)printf("GXFIFO DMA %08X %08X\n", Cnt, CurSrcAddr); u32 num = RemCount; if (StartMode == 0x07 && num > 112) num = 112; diff --git a/GPU3D.cpp b/GPU3D.cpp index 6cf636a7..1f268d6e 100644 --- a/GPU3D.cpp +++ b/GPU3D.cpp @@ -23,6 +23,19 @@ #include "FIFO.h" +// 3D engine notes +// +// vertex/polygon RAM is filled when a complete polygon is defined, after it's been culled and clipped +// 04000604 reads from bank used by renderer +// bank used by renderer is emptied at scanline ~192 +// banks are swapped at scanline ~194 +// TODO: needs more investigation. it's weird. +// +// clipping rules: +// * if a shared vertex in a strip is clipped, affected polygons are converted into single polygons +// strip is resumed at the first eligible polygon + + namespace GPU3D { @@ -126,11 +139,77 @@ s32 PosMatrix[16]; s32 VecMatrix[16]; s32 TexMatrix[16]; +s32 ClipMatrix[16]; +bool ClipMatrixDirty; + +s32 Viewport[4]; + s32 ProjMatrixStack[16]; s32 PosMatrixStack[31][16]; s32 ProjMatrixStackPointer; s32 PosMatrixStackPointer; +void MatrixLoadIdentity(s32* m); +void UpdateClipMatrix(); + + +u32 PolygonMode; +s16 CurVertex[3]; + +s32 TempVertexBuffer[4][4]; + + + +bool Init() +{ + CmdFIFO = new FIFO(256); + CmdPIPE = new FIFO(4); + + return true; +} + +void DeInit() +{ + delete CmdFIFO; + delete CmdPIPE; +} + +void Reset() +{ + CmdFIFO->Clear(); + CmdPIPE->Clear(); + + NumCommands = 0; + CurCommand = 0; + ParamCount = 0; + TotalParams = 0; + + GXStat = 0; + + memset(ExecParams, 0, 32*4); + ExecParamCount = 0; + CycleCount = 0; + + + MatrixMode = 0; + + MatrixLoadIdentity(ProjMatrix); + MatrixLoadIdentity(PosMatrix); + MatrixLoadIdentity(VecMatrix); + MatrixLoadIdentity(TexMatrix); + + ClipMatrixDirty = true; + UpdateClipMatrix(); + + memset(Viewport, 0, sizeof(Viewport)); + + memset(ProjMatrixStack, 0, 16*4); + memset(PosMatrixStack, 0, 31 * 16*4); + ProjMatrixStackPointer = 0; + PosMatrixStackPointer = 0; +} + + void MatrixLoadIdentity(s32* m) { @@ -254,52 +333,52 @@ void MatrixTranslate(s32* m, s32* s) m[14] += (s[0]*m[2] + s[1]*m[6] + s[2]*m[10]) >> 12; } - -bool Init() +void UpdateClipMatrix() { - CmdFIFO = new FIFO(256); - CmdPIPE = new FIFO(4); + if (!ClipMatrixDirty) return; + ClipMatrixDirty = false; - return true; + memcpy(ClipMatrix, ProjMatrix, 16*4); + MatrixMult4x4(ClipMatrix, PosMatrix); } -void DeInit() + + +void SubmitPolygon() { - delete CmdFIFO; - delete CmdPIPE; + // } -void Reset() +void SubmitVertex() { - CmdFIFO->Clear(); - CmdPIPE->Clear(); + s32 vertex[4] = {(s32)CurVertex[0], (s32)CurVertex[1], (s32)CurVertex[2], 0x1000}; + s32 vertextrans[4]; - NumCommands = 0; - CurCommand = 0; - ParamCount = 0; - TotalParams = 0; + //printf("vertex: %f %f %f\n", vertex[0]/4096.0f, vertex[1]/4096.0f, vertex[2]/4096.0f); - GXStat = 0; + UpdateClipMatrix(); + vertextrans[0] = (vertex[0]*ClipMatrix[0] + vertex[1]*ClipMatrix[4] + vertex[2]*ClipMatrix[8] + vertex[3]*ClipMatrix[12]) >> 12; + vertextrans[1] = (vertex[0]*ClipMatrix[1] + vertex[1]*ClipMatrix[5] + vertex[2]*ClipMatrix[9] + vertex[3]*ClipMatrix[13]) >> 12; + vertextrans[2] = (vertex[0]*ClipMatrix[2] + vertex[1]*ClipMatrix[6] + vertex[2]*ClipMatrix[10] + vertex[3]*ClipMatrix[14]) >> 12; + vertextrans[3] = (vertex[0]*ClipMatrix[3] + vertex[1]*ClipMatrix[7] + vertex[2]*ClipMatrix[11] + vertex[3]*ClipMatrix[15]) >> 12; - memset(ExecParams, 0, 32*4); - ExecParamCount = 0; - CycleCount = 0; + //printf("vertex trans: %f %f %f\n", vertextrans[0]/4096.0f, vertextrans[1]/4096.0f, vertextrans[2]/4096.0f); + if (vertextrans[3] == 0) + { + //printf("!!!! VERTEX W IS ZERO\n"); + //return; + vertextrans[3] = 0x1000; // checkme + } - MatrixMode = 0; + s32 screenX = (((vertextrans[0]+vertextrans[3]) * Viewport[2]) / (vertextrans[3]<<1)) + Viewport[0]; + s32 screenY = (((vertextrans[1]+vertextrans[3]) * Viewport[3]) / (vertextrans[3]<<1)) + Viewport[1]; - MatrixLoadIdentity(ProjMatrix); - MatrixLoadIdentity(PosMatrix); - MatrixLoadIdentity(VecMatrix); - MatrixLoadIdentity(TexMatrix); - - memset(ProjMatrixStack, 0, 16*4); - memset(PosMatrixStack, 0, 31 * 16*4); - ProjMatrixStackPointer = 0; - PosMatrixStackPointer = 0; + //printf("screen: %d, %d\n", screenX, screenY); } + void CmdFIFOWrite(CmdFIFOEntry& entry) { if (CmdFIFO->IsEmpty() && !CmdPIPE->IsFull()) @@ -338,7 +417,6 @@ CmdFIFOEntry CmdFIFORead() - void ExecuteCommand() { CmdFIFOEntry entry = CmdFIFORead(); @@ -408,6 +486,7 @@ void ExecuteCommand() ProjMatrixStackPointer--; memcpy(ProjMatrix, ProjMatrixStack, 16*4); GXStat |= (1<<14); + ClipMatrixDirty = true; } else if (MatrixMode == 3) { @@ -429,6 +508,7 @@ void ExecuteCommand() memcpy(PosMatrix, PosMatrixStack[PosMatrixStackPointer], 16*4); GXStat |= (1<<14); + ClipMatrixDirty = true; } break; @@ -460,6 +540,7 @@ void ExecuteCommand() if (MatrixMode == 0) { memcpy(ProjMatrix, ProjMatrixStack, 16*4); + ClipMatrixDirty = true; } else if (MatrixMode == 3) { @@ -477,12 +558,16 @@ void ExecuteCommand() } memcpy(PosMatrix, PosMatrixStack[addr], 16*4); + ClipMatrixDirty = true; } break; case 0x15: // identity if (MatrixMode == 0) + { MatrixLoadIdentity(ProjMatrix); + ClipMatrixDirty = true; + } else if (MatrixMode == 3) MatrixLoadIdentity(TexMatrix); else @@ -490,12 +575,16 @@ void ExecuteCommand() MatrixLoadIdentity(PosMatrix); if (MatrixMode == 2) MatrixLoadIdentity(VecMatrix); + ClipMatrixDirty = true; } break; case 0x16: // load 4x4 if (MatrixMode == 0) + { MatrixLoad4x4(ProjMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + } else if (MatrixMode == 3) MatrixLoad4x4(TexMatrix, (s32*)ExecParams); else @@ -503,12 +592,16 @@ void ExecuteCommand() MatrixLoad4x4(PosMatrix, (s32*)ExecParams); if (MatrixMode == 2) MatrixLoad4x4(VecMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; } break; case 0x17: // load 4x3 if (MatrixMode == 0) + { MatrixLoad4x3(ProjMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + } else if (MatrixMode == 3) MatrixLoad4x3(TexMatrix, (s32*)ExecParams); else @@ -516,12 +609,16 @@ void ExecuteCommand() MatrixLoad4x3(PosMatrix, (s32*)ExecParams); if (MatrixMode == 2) MatrixLoad4x3(VecMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; } break; case 0x18: // mult 4x4 if (MatrixMode == 0) + { MatrixMult4x4(ProjMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + } else if (MatrixMode == 3) MatrixMult4x4(TexMatrix, (s32*)ExecParams); else @@ -532,12 +629,16 @@ void ExecuteCommand() MatrixMult4x4(VecMatrix, (s32*)ExecParams); CycleCount += 30; } + ClipMatrixDirty = true; } break; case 0x19: // mult 4x3 if (MatrixMode == 0) + { MatrixMult4x3(ProjMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + } else if (MatrixMode == 3) MatrixMult4x3(TexMatrix, (s32*)ExecParams); else @@ -548,12 +649,16 @@ void ExecuteCommand() MatrixMult4x3(VecMatrix, (s32*)ExecParams); CycleCount += 30; } + ClipMatrixDirty = true; } break; case 0x1A: // mult 3x3 if (MatrixMode == 0) + { MatrixMult3x3(ProjMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + } else if (MatrixMode == 3) MatrixMult3x3(TexMatrix, (s32*)ExecParams); else @@ -564,21 +669,31 @@ void ExecuteCommand() MatrixMult3x3(VecMatrix, (s32*)ExecParams); CycleCount += 30; } + ClipMatrixDirty = true; } break; case 0x1B: // scale if (MatrixMode == 0) + { MatrixScale(ProjMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + } else if (MatrixMode == 3) MatrixScale(TexMatrix, (s32*)ExecParams); else + { MatrixScale(PosMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + } break; case 0x1C: // translate if (MatrixMode == 0) + { MatrixTranslate(ProjMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + } else if (MatrixMode == 3) MatrixTranslate(TexMatrix, (s32*)ExecParams); else @@ -586,6 +701,7 @@ void ExecuteCommand() MatrixTranslate(PosMatrix, (s32*)ExecParams); if (MatrixMode == 2) MatrixTranslate(VecMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; } break; @@ -593,9 +709,59 @@ void ExecuteCommand() // TODO: more cycles if lights are enabled break; + case 0x23: // full vertex + CurVertex[0] = ExecParams[0] & 0xFFFF; + CurVertex[1] = ExecParams[0] >> 16; + CurVertex[2] = ExecParams[1] & 0xFFFF; + SubmitVertex(); + break; + + case 0x24: // 10-bit vertex + CurVertex[0] = (ExecParams[0] & 0x000003FF) << 6; + CurVertex[1] = (ExecParams[0] & 0x000FFC00) >> 4; + CurVertex[2] = (ExecParams[0] & 0x3FF00000) >> 14; + SubmitVertex(); + break; + + case 0x25: // vertex XY + CurVertex[0] = ExecParams[0] & 0xFFFF; + CurVertex[1] = ExecParams[0] >> 16; + SubmitVertex(); + break; + + case 0x26: // vertex XZ + CurVertex[0] = ExecParams[0] & 0xFFFF; + CurVertex[2] = ExecParams[0] >> 16; + SubmitVertex(); + break; + + case 0x27: // vertex YZ + CurVertex[1] = ExecParams[0] & 0xFFFF; + CurVertex[2] = ExecParams[0] >> 16; + SubmitVertex(); + break; + + case 0x28: // 10-bit delta vertex + CurVertex[0] += (s16)((ExecParams[0] & 0x000003FF) << 6) >> 6; + CurVertex[1] += (s16)((ExecParams[0] & 0x000FFC00) >> 4) >> 6; + CurVertex[2] += (s16)((ExecParams[0] & 0x3FF00000) >> 14) >> 6; + SubmitVertex(); + break; + + case 0x40: + PolygonMode = ExecParams[0] & 0x3; + break; + case 0x50: // TODO: make it happen upon VBlank, not right now break; + + case 0x60: // viewport x1,y1,x2,y2 + Viewport[0] = ExecParams[0] & 0xFF; + Viewport[1] = (ExecParams[0] >> 8) & 0xFF; + Viewport[2] = ((ExecParams[0] >> 16) & 0xFF) - Viewport[0] + 1; + Viewport[3] = (ExecParams[0] >> 24) - Viewport[1] + 1; + break; } } } @@ -606,12 +772,12 @@ void Run(s32 cycles) { while (CycleCount <= 0 && !CmdPIPE->IsEmpty()) ExecuteCommand(); - - if (CmdPIPE->IsEmpty()) - CycleCount = 0; } - else - CycleCount -= cycles; + + CycleCount -= cycles; + + if (CycleCount <= 0 && CmdPIPE->IsEmpty()) + CycleCount = 0; } @@ -667,8 +833,8 @@ u32 Read32(u32 addr) if (addr >= 0x04000640 && addr < 0x04000680) { - printf("!! CLIPMTX READ\n"); - return 0; + UpdateClipMatrix(); + return ClipMatrix[(addr & 0x3C) >> 2]; } if (addr >= 0x04000680 && addr < 0x040006A4) { diff --git a/melonDS.depend b/melonDS.depend index f49c92a2..92a73bdb 100644 --- a/melonDS.depend +++ b/melonDS.depend @@ -10,7 +10,7 @@ 1481161027 c:\documents\sources\melonds\types.h -1486515172 source:c:\documents\sources\melonds\nds.cpp +1486603989 source:c:\documents\sources\melonds\nds.cpp "NDS.h" @@ -109,7 +109,7 @@ 1486511075 c:\documents\sources\melonds\fifo.h "types.h" -1486514961 source:c:\documents\sources\melonds\dma.cpp +1486589927 source:c:\documents\sources\melonds\dma.cpp "NDS.h" "DMA.h" @@ -148,7 +148,7 @@ 1486514429 c:\documents\sources\melonds\gpu3d.h -1486585700 source:c:\documents\sources\melonds\gpu3d.cpp +1486603980 source:c:\documents\sources\melonds\gpu3d.cpp "NDS.h" From 896bb1b7813e9197b0532841b681fec1cefb6146 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Thu, 9 Feb 2017 08:18:05 -0500 Subject: [PATCH 02/16] ARM: Fix out of bounds array indexing in RestoreCPSR --- ARM.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ARM.cpp b/ARM.cpp index f582deb2..10b87956 100644 --- a/ARM.cpp +++ b/ARM.cpp @@ -197,7 +197,7 @@ void ARM::RestoreCPSR() switch (CPSR & 0x1F) { case 0x11: - CPSR = R_FIQ[8]; + CPSR = R_FIQ[7]; break; case 0x12: From c95f7578bbf3c9cec96b341474b3ca73159c791d Mon Sep 17 00:00:00 2001 From: StapleButter Date: Fri, 10 Feb 2017 15:24:46 +0100 Subject: [PATCH 03/16] store vertices and polygons. strips not handled yet. --- GPU.cpp | 4 + GPU2D.cpp | 5 + GPU2D.h | 1 + GPU3D.cpp | 383 +++++++++++++++++++++++++++++++++++++++++++++++-- GPU3D.h | 16 +++ melonDS.depend | 12 +- 6 files changed, 405 insertions(+), 16 deletions(-) diff --git a/GPU.cpp b/GPU.cpp index e153c4d2..b5db5cbe 100644 --- a/GPU.cpp +++ b/GPU.cpp @@ -837,6 +837,10 @@ void StartScanline(u32 line) if (DispStat[0] & (1<<3)) NDS::TriggerIRQ(0, NDS::IRQ_VBlank); if (DispStat[1] & (1<<3)) NDS::TriggerIRQ(1, NDS::IRQ_VBlank); + + GPU2D_A->VBlank(); + GPU2D_B->VBlank(); + GPU3D::VBlank(); } //NDS::ScheduleEvent(LINE_CYCLES, StartScanline, line+1); diff --git a/GPU2D.cpp b/GPU2D.cpp index 6b4594ec..906be662 100644 --- a/GPU2D.cpp +++ b/GPU2D.cpp @@ -243,6 +243,11 @@ void GPU2D::DrawScanline(u32 line) } } +void GPU2D::VBlank() +{ + // +} + template void GPU2D::DrawScanlineBGMode(u32 line, u32* spritebuf, u16* dst) diff --git a/GPU2D.h b/GPU2D.h index d7da3cb3..3f767ea4 100644 --- a/GPU2D.h +++ b/GPU2D.h @@ -37,6 +37,7 @@ public: void Write32(u32 addr, u32 val); void DrawScanline(u32 line); + void VBlank(); private: u32 Num; diff --git a/GPU3D.cpp b/GPU3D.cpp index 1f268d6e..bf323c1a 100644 --- a/GPU3D.cpp +++ b/GPU3D.cpp @@ -39,6 +39,8 @@ namespace GPU3D { +#define COPYVERTEX(a, b) { *(u64*)&a[0] = *(u64*)&b[0]; *(u64*)&a[2] = *(u64*)&b[2]; } + const u32 CmdNumParams[256] = { // 0x00 @@ -156,7 +158,17 @@ void UpdateClipMatrix(); u32 PolygonMode; s16 CurVertex[3]; -s32 TempVertexBuffer[4][4]; +Vertex TempVertexBuffer[4]; +u32 VertexNum; +u32 VertexNumInPoly; + +Vertex VertexRAM[6144 * 2]; +Polygon PolygonRAM[2048 * 2]; + +Vertex* CurVertexRAM; +Polygon* CurPolygonRAM; +u32 NumVertices, NumPolygons; +u32 CurRAMBank; @@ -207,6 +219,15 @@ void Reset() memset(PosMatrixStack, 0, 31 * 16*4); ProjMatrixStackPointer = 0; PosMatrixStackPointer = 0; + + VertexNum = 0; + VertexNumInPoly = 0; + + CurRAMBank = 0; + CurVertexRAM = &VertexRAM[0]; + CurPolygonRAM = &PolygonRAM[0]; + NumVertices = 0; + NumPolygons = 0; } @@ -346,25 +367,335 @@ void UpdateClipMatrix() void SubmitPolygon() { - // + // clip. + // for each vertex: + // if it's outside, check if the previous and next vertices are inside, if so, fixor + + Vertex clippedvertices[20]; + u32 numclipped; + + int nverts = PolygonMode & 0x1 ? 4:3; + int nvisible = 0; + + /*for (int i = 0; i < nverts; i++) + { + s32* v = TempVertexBuffer[i].Position; + + if ((u32)(v[0]+0x1000) <= 0x2000 && + (u32)(v[1]+0x1000) <= 0x2000 && + (u32)(v[2]+0x1000) <= 0x2000) + { + nvisible++; + } + } + + if (!nvisible) return;*/ + + int prev, next; + int c; + + // X clipping + + prev = nverts-1; next = 1; c = 0; + for (int i = 0; i < nverts; i++) + { + Vertex vtx = TempVertexBuffer[i]; + if (vtx.Position[0] > 0x1000) + { + Vertex* vprev = &TempVertexBuffer[prev]; + if (vprev->Position[0] <= 0x1000) + { + s32 factor = ((0x1000 - vprev->Position[0]) << 12) / (vtx.Position[0] - vprev->Position[0]); + + Vertex mid; + mid.Position[0] = 0x1000; + mid.Position[1] = vprev->Position[1] + (((vtx.Position[1] - vprev->Position[1]) * factor) >> 12); + mid.Position[2] = vprev->Position[2] + (((vtx.Position[2] - vprev->Position[2]) * factor) >> 12); + mid.Color[0] = vprev->Color[0] + (((vtx.Color[0] - vprev->Color[0]) * factor) >> 12); + mid.Color[1] = vprev->Color[1] + (((vtx.Color[1] - vprev->Color[1]) * factor) >> 12); + mid.Color[2] = vprev->Color[2] + (((vtx.Color[2] - vprev->Color[2]) * factor) >> 12); + + clippedvertices[c++] = mid; + } + + Vertex* vnext = &TempVertexBuffer[next]; + if (vnext->Position[0] <= 0x1000) + { + s32 factor = ((0x1000 - vnext->Position[0]) << 12) / (vtx.Position[0] - vnext->Position[0]); + + Vertex mid; + mid.Position[0] = 0x1000; + mid.Position[1] = vnext->Position[1] + (((vtx.Position[1] - vnext->Position[1]) * factor) >> 12); + mid.Position[2] = vnext->Position[2] + (((vtx.Position[2] - vnext->Position[2]) * factor) >> 12); + mid.Color[0] = vnext->Color[0] + (((vtx.Color[0] - vnext->Color[0]) * factor) >> 12); + mid.Color[1] = vnext->Color[1] + (((vtx.Color[1] - vnext->Color[1]) * factor) >> 12); + mid.Color[2] = vnext->Color[2] + (((vtx.Color[2] - vnext->Color[2]) * factor) >> 12); + + clippedvertices[c++] = mid; + } + } + else if (vtx.Position[0] < -0x1000) + { + Vertex* vprev = &TempVertexBuffer[prev]; + if (vprev->Position[0] >= -0x1000) + { + s32 factor = ((-0x1000 - vprev->Position[0]) << 12) / (vtx.Position[0] - vprev->Position[0]); + + Vertex mid; + mid.Position[0] = -0x1000; + mid.Position[1] = vprev->Position[1] + (((vtx.Position[1] - vprev->Position[1]) * factor) >> 12); + mid.Position[2] = vprev->Position[2] + (((vtx.Position[2] - vprev->Position[2]) * factor) >> 12); + mid.Color[0] = vprev->Color[0] + (((vtx.Color[0] - vprev->Color[0]) * factor) >> 12); + mid.Color[1] = vprev->Color[1] + (((vtx.Color[1] - vprev->Color[1]) * factor) >> 12); + mid.Color[2] = vprev->Color[2] + (((vtx.Color[2] - vprev->Color[2]) * factor) >> 12); + + clippedvertices[c++] = mid; + } + + Vertex* vnext = &TempVertexBuffer[next]; + if (vnext->Position[0] >= -0x1000) + { + s32 factor = ((-0x1000 - vnext->Position[0]) << 12) / (vtx.Position[0] - vnext->Position[0]); + + Vertex mid; + mid.Position[0] = -0x1000; + mid.Position[1] = vnext->Position[1] + (((vtx.Position[1] - vnext->Position[1]) * factor) >> 12); + mid.Position[2] = vnext->Position[2] + (((vtx.Position[2] - vnext->Position[2]) * factor) >> 12); + mid.Color[0] = vnext->Color[0] + (((vtx.Color[0] - vnext->Color[0]) * factor) >> 12); + mid.Color[1] = vnext->Color[1] + (((vtx.Color[1] - vnext->Color[1]) * factor) >> 12); + mid.Color[2] = vnext->Color[2] + (((vtx.Color[2] - vnext->Color[2]) * factor) >> 12); + + clippedvertices[c++] = mid; + } + } + else + clippedvertices[c++] = vtx; + + prev++; if (prev >= nverts) prev = 0; + next++; if (next >= nverts) next = 0; + } + + // Y clipping + + nverts = c; + prev = nverts-1; next = 1; c = 10; + for (int i = 0; i < nverts; i++) + { + Vertex vtx = clippedvertices[i]; + if (vtx.Position[1] > 0x1000) + { + Vertex* vprev = &clippedvertices[prev]; + if (vprev->Position[1] <= 0x1000) + { + s32 factor = ((0x1000 - vprev->Position[1]) << 12) / (vtx.Position[1] - vprev->Position[1]); + + Vertex mid; + mid.Position[0] = vprev->Position[0] + (((vtx.Position[0] - vprev->Position[0]) * factor) >> 12); + mid.Position[1] = 0x1000; + mid.Position[2] = vprev->Position[2] + (((vtx.Position[2] - vprev->Position[2]) * factor) >> 12); + mid.Color[0] = vprev->Color[0] + (((vtx.Color[0] - vprev->Color[0]) * factor) >> 12); + mid.Color[1] = vprev->Color[1] + (((vtx.Color[1] - vprev->Color[1]) * factor) >> 12); + mid.Color[2] = vprev->Color[2] + (((vtx.Color[2] - vprev->Color[2]) * factor) >> 12); + + clippedvertices[c++] = mid; + } + + Vertex* vnext = &clippedvertices[next]; + if (vnext->Position[1] <= 0x1000) + { + s32 factor = ((0x1000 - vnext->Position[1]) << 12) / (vtx.Position[1] - vnext->Position[1]); + + Vertex mid; + mid.Position[0] = vnext->Position[0] + (((vtx.Position[0] - vnext->Position[0]) * factor) >> 12); + mid.Position[1] = 0x1000; + mid.Position[2] = vnext->Position[2] + (((vtx.Position[2] - vnext->Position[2]) * factor) >> 12); + mid.Color[0] = vnext->Color[0] + (((vtx.Color[0] - vnext->Color[0]) * factor) >> 12); + mid.Color[1] = vnext->Color[1] + (((vtx.Color[1] - vnext->Color[1]) * factor) >> 12); + mid.Color[2] = vnext->Color[2] + (((vtx.Color[2] - vnext->Color[2]) * factor) >> 12); + + clippedvertices[c++] = mid; + } + } + else if (vtx.Position[1] < -0x1000) + { + Vertex* vprev = &clippedvertices[prev]; + if (vprev->Position[1] >= -0x1000) + { + s32 factor = ((-0x1000 - vprev->Position[1]) << 12) / (vtx.Position[1] - vprev->Position[1]); + + Vertex mid; + mid.Position[0] = vprev->Position[0] + (((vtx.Position[0] - vprev->Position[0]) * factor) >> 12); + mid.Position[1] = -0x1000; + mid.Position[2] = vprev->Position[2] + (((vtx.Position[2] - vprev->Position[2]) * factor) >> 12); + mid.Color[0] = vprev->Color[0] + (((vtx.Color[0] - vprev->Color[0]) * factor) >> 12); + mid.Color[1] = vprev->Color[1] + (((vtx.Color[1] - vprev->Color[1]) * factor) >> 12); + mid.Color[2] = vprev->Color[2] + (((vtx.Color[2] - vprev->Color[2]) * factor) >> 12); + + clippedvertices[c++] = mid; + } + + Vertex* vnext = &clippedvertices[next]; + if (vnext->Position[1] >= -0x1000) + { + s32 factor = ((-0x1000 - vnext->Position[1]) << 12) / (vtx.Position[1] - vnext->Position[1]); + + Vertex mid; + mid.Position[0] = vnext->Position[0] + (((vtx.Position[0] - vnext->Position[0]) * factor) >> 12); + mid.Position[1] = -0x1000; + mid.Position[2] = vnext->Position[2] + (((vtx.Position[2] - vnext->Position[2]) * factor) >> 12); + mid.Color[0] = vnext->Color[0] + (((vtx.Color[0] - vnext->Color[0]) * factor) >> 12); + mid.Color[1] = vnext->Color[1] + (((vtx.Color[1] - vnext->Color[1]) * factor) >> 12); + mid.Color[2] = vnext->Color[2] + (((vtx.Color[2] - vnext->Color[2]) * factor) >> 12); + + clippedvertices[c++] = mid; + } + } + else + clippedvertices[c++] = vtx; + + prev++; if (prev >= nverts) prev = 0; + next++; if (next >= nverts) next = 0; + } + + // Z clipping + + nverts = c-10; + prev = nverts-1; next = 1; c = 0; + for (int i = 0; i < nverts; i++) + { + Vertex vtx = clippedvertices[10+i]; + if (vtx.Position[2] > 0x1000) + { + Vertex* vprev = &clippedvertices[10+prev]; + if (vprev->Position[2] <= 0x1000) + { + s32 factor = ((0x1000 - vprev->Position[2]) << 12) / (vtx.Position[2] - vprev->Position[2]); + + Vertex mid; + mid.Position[0] = vprev->Position[0] + (((vtx.Position[0] - vprev->Position[0]) * factor) >> 12); + mid.Position[1] = vprev->Position[1] + (((vtx.Position[1] - vprev->Position[1]) * factor) >> 12); + mid.Position[2] = 0x1000; + mid.Color[0] = vprev->Color[0] + (((vtx.Color[0] - vprev->Color[0]) * factor) >> 12); + mid.Color[1] = vprev->Color[1] + (((vtx.Color[1] - vprev->Color[1]) * factor) >> 12); + mid.Color[2] = vprev->Color[2] + (((vtx.Color[2] - vprev->Color[2]) * factor) >> 12); + + clippedvertices[c++] = mid; + } + + Vertex* vnext = &clippedvertices[10+next]; + if (vnext->Position[2] <= 0x1000) + { + s32 factor = ((0x1000 - vnext->Position[2]) << 12) / (vtx.Position[2] - vnext->Position[2]); + + Vertex mid; + mid.Position[0] = vnext->Position[0] + (((vtx.Position[0] - vnext->Position[0]) * factor) >> 12); + mid.Position[1] = vnext->Position[1] + (((vtx.Position[1] - vnext->Position[1]) * factor) >> 12); + mid.Position[2] = 0x1000; + mid.Color[0] = vnext->Color[0] + (((vtx.Color[0] - vnext->Color[0]) * factor) >> 12); + mid.Color[1] = vnext->Color[1] + (((vtx.Color[1] - vnext->Color[1]) * factor) >> 12); + mid.Color[2] = vnext->Color[2] + (((vtx.Color[2] - vnext->Color[2]) * factor) >> 12); + + clippedvertices[c++] = mid; + } + } + else if (vtx.Position[2] < -0x1000) + { + Vertex* vprev = &clippedvertices[10+prev]; + if (vprev->Position[2] >= -0x1000) + { + s32 factor = ((-0x1000 - vprev->Position[2]) << 12) / (vtx.Position[2] - vprev->Position[2]); + + Vertex mid; + mid.Position[0] = vprev->Position[0] + (((vtx.Position[0] - vprev->Position[0]) * factor) >> 12); + mid.Position[1] = vprev->Position[1] + (((vtx.Position[1] - vprev->Position[1]) * factor) >> 12); + mid.Position[2] = -0x1000; + mid.Color[0] = vprev->Color[0] + (((vtx.Color[0] - vprev->Color[0]) * factor) >> 12); + mid.Color[1] = vprev->Color[1] + (((vtx.Color[1] - vprev->Color[1]) * factor) >> 12); + mid.Color[2] = vprev->Color[2] + (((vtx.Color[2] - vprev->Color[2]) * factor) >> 12); + + clippedvertices[c++] = mid; + } + + Vertex* vnext = &clippedvertices[10+next]; + if (vnext->Position[2] >= -0x1000) + { + s32 factor = ((-0x1000 - vnext->Position[2]) << 12) / (vtx.Position[2] - vnext->Position[2]); + + Vertex mid; + mid.Position[0] = vnext->Position[0] + (((vtx.Position[0] - vnext->Position[0]) * factor) >> 12); + mid.Position[1] = vnext->Position[1] + (((vtx.Position[1] - vnext->Position[1]) * factor) >> 12); + mid.Position[2] = -0x1000; + mid.Color[0] = vnext->Color[0] + (((vtx.Color[0] - vnext->Color[0]) * factor) >> 12); + mid.Color[1] = vnext->Color[1] + (((vtx.Color[1] - vnext->Color[1]) * factor) >> 12); + mid.Color[2] = vnext->Color[2] + (((vtx.Color[2] - vnext->Color[2]) * factor) >> 12); + + clippedvertices[c++] = mid; + } + } + else + clippedvertices[c++] = vtx; + + prev++; if (prev >= nverts) prev = 0; + next++; if (next >= nverts) next = 0; + } + + if (c == 0) return; + + // build the actual polygon + // TODO: tri/quad strips + + if (NumPolygons >= 2048) return; + if (NumVertices+c > 6144) return; + + Polygon* poly = &CurPolygonRAM[NumPolygons++]; + poly->NumVertices = 0; + + for (int i = 0; i < c; i++) + { + CurVertexRAM[NumVertices] = clippedvertices[i]; + poly->Vertices[i] = &CurVertexRAM[NumVertices]; + + NumVertices++; + poly->NumVertices++; + } } void SubmitVertex() { s32 vertex[4] = {(s32)CurVertex[0], (s32)CurVertex[1], (s32)CurVertex[2], 0x1000}; - s32 vertextrans[4]; + //s32 vertextrans[4]; + Vertex* vertextrans = &TempVertexBuffer[VertexNumInPoly]; //printf("vertex: %f %f %f\n", vertex[0]/4096.0f, vertex[1]/4096.0f, vertex[2]/4096.0f); UpdateClipMatrix(); - vertextrans[0] = (vertex[0]*ClipMatrix[0] + vertex[1]*ClipMatrix[4] + vertex[2]*ClipMatrix[8] + vertex[3]*ClipMatrix[12]) >> 12; - vertextrans[1] = (vertex[0]*ClipMatrix[1] + vertex[1]*ClipMatrix[5] + vertex[2]*ClipMatrix[9] + vertex[3]*ClipMatrix[13]) >> 12; - vertextrans[2] = (vertex[0]*ClipMatrix[2] + vertex[1]*ClipMatrix[6] + vertex[2]*ClipMatrix[10] + vertex[3]*ClipMatrix[14]) >> 12; - vertextrans[3] = (vertex[0]*ClipMatrix[3] + vertex[1]*ClipMatrix[7] + vertex[2]*ClipMatrix[11] + vertex[3]*ClipMatrix[15]) >> 12; + vertextrans->Position[0] = (vertex[0]*ClipMatrix[0] + vertex[1]*ClipMatrix[4] + vertex[2]*ClipMatrix[8] + vertex[3]*ClipMatrix[12]) >> 12; + vertextrans->Position[1] = (vertex[0]*ClipMatrix[1] + vertex[1]*ClipMatrix[5] + vertex[2]*ClipMatrix[9] + vertex[3]*ClipMatrix[13]) >> 12; + vertextrans->Position[2] = (vertex[0]*ClipMatrix[2] + vertex[1]*ClipMatrix[6] + vertex[2]*ClipMatrix[10] + vertex[3]*ClipMatrix[14]) >> 12; + vertextrans->Position[3] = (vertex[0]*ClipMatrix[3] + vertex[1]*ClipMatrix[7] + vertex[2]*ClipMatrix[11] + vertex[3]*ClipMatrix[15]) >> 12; - //printf("vertex trans: %f %f %f\n", vertextrans[0]/4096.0f, vertextrans[1]/4096.0f, vertextrans[2]/4096.0f); + s32 w_inv; + if (vertextrans->Position[3] == 0) + w_inv = 0x1000; // checkme + else + w_inv = 0x1000000 / vertextrans->Position[3]; - if (vertextrans[3] == 0) + vertextrans->Position[0] = (vertextrans->Position[0] * w_inv) >> 12; + vertextrans->Position[1] = (vertextrans->Position[1] * w_inv) >> 12; + vertextrans->Position[2] = (vertextrans->Position[2] * w_inv) >> 12; + + // boo + vertextrans->Color[0] = 63; + vertextrans->Color[1] = 0; + vertextrans->Color[2] = 63; + + /*printf("vertex trans: %f %f %f %f\n", + vertextrans->Position[0]/4096.0f, + vertextrans->Position[1]/4096.0f, + vertextrans->Position[2]/4096.0f, + vertextrans->Position[3]/4096.0f);*/ + + /*if (vertextrans[3] == 0) { //printf("!!!! VERTEX W IS ZERO\n"); //return; @@ -374,7 +705,24 @@ void SubmitVertex() s32 screenX = (((vertextrans[0]+vertextrans[3]) * Viewport[2]) / (vertextrans[3]<<1)) + Viewport[0]; s32 screenY = (((vertextrans[1]+vertextrans[3]) * Viewport[3]) / (vertextrans[3]<<1)) + Viewport[1]; - //printf("screen: %d, %d\n", screenX, screenY); + printf("screen: %d, %d\n", screenX, screenY); + + s32* finalvertex = TempVertexBuffer[VertexNumInPoly]; + finalvertex[0] = screenX; + finalvertex[1] = screenY; + finalvertex[2] = vertextrans[2]; + finalvertex[3] = vertextrans[3];*/ + + // triangle strip: 0,1,2 1,2,3 2,3,4 3,4,5 ... + // quad strip: 0,1,3,2 2,3,5,4 4,5,7,6 6,7,9,8 ... + + VertexNum++; + VertexNumInPoly++; + if (VertexNumInPoly >= (PolygonMode & 0x1 ? 4:3)) + { + VertexNumInPoly = (PolygonMode & 0x2 ? 2:0); + SubmitPolygon(); + } } @@ -750,6 +1098,8 @@ void ExecuteCommand() case 0x40: PolygonMode = ExecParams[0] & 0x3; + VertexNum = 0; + VertexNumInPoly = 0; break; case 0x50: @@ -800,6 +1150,19 @@ void CheckFIFODMA() } +void VBlank() +{ + // TODO: render + + CurRAMBank = CurRAMBank?0:1; + CurVertexRAM = &VertexRAM[CurRAMBank ? 6144 : 0]; + CurPolygonRAM = &PolygonRAM[CurRAMBank ? 2048 : 0]; + + NumVertices = 0; + NumPolygons = 0; +} + + u8 Read8(u32 addr) { return 0; diff --git a/GPU3D.h b/GPU3D.h index d606e25d..19356d8e 100644 --- a/GPU3D.h +++ b/GPU3D.h @@ -22,6 +22,20 @@ namespace GPU3D { +typedef struct +{ + s32 Position[4]; + u8 Color[3]; + +} Vertex; + +typedef struct +{ + Vertex* Vertices[10]; + u32 NumVertices; + +} Polygon; + bool Init(); void DeInit(); void Reset(); @@ -30,6 +44,8 @@ void Run(s32 cycles); void CheckFIFOIRQ(); void CheckFIFODMA(); +void VBlank(); + u8 Read8(u32 addr); u16 Read16(u32 addr); u32 Read32(u32 addr); diff --git a/melonDS.depend b/melonDS.depend index 92a73bdb..c13ca037 100644 --- a/melonDS.depend +++ b/melonDS.depend @@ -10,7 +10,7 @@ 1481161027 c:\documents\sources\melonds\types.h -1486603989 source:c:\documents\sources\melonds\nds.cpp +1486730943 source:c:\documents\sources\melonds\nds.cpp "NDS.h" @@ -87,13 +87,13 @@ "NDS.h" "SPI.h" -1486489354 source:c:\documents\sources\melonds\gpu2d.cpp +1486736522 source:c:\documents\sources\melonds\gpu2d.cpp "NDS.h" "GPU.h" -1485991372 c:\documents\sources\melonds\gpu2d.h +1486736507 c:\documents\sources\melonds\gpu2d.h 1481040524 c:\documents\sources\melonds\wifi.h @@ -119,7 +119,7 @@ 1484698068 c:\documents\sources\melonds\dma.h "types.h" -1486502073 source:c:\documents\sources\melonds\gpu.cpp +1486736549 source:c:\documents\sources\melonds\gpu.cpp "NDS.h" @@ -146,9 +146,9 @@ "NDS.h" "NDSCart.h" -1486514429 c:\documents\sources\melonds\gpu3d.h +1486736374 c:\documents\sources\melonds\gpu3d.h -1486603980 source:c:\documents\sources\melonds\gpu3d.cpp +1486736648 source:c:\documents\sources\melonds\gpu3d.cpp "NDS.h" From 78f49d061a1c5d1c3056244db42f2f00bed29c6b Mon Sep 17 00:00:00 2001 From: StapleButter Date: Fri, 10 Feb 2017 16:50:26 +0100 Subject: [PATCH 04/16] base for software renderer --- GPU3D.cpp | 9 ++++++++- GPU3D.h | 11 ++++++++++ GPU3D_Soft.cpp | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++ melonDS.cbp | 1 + 4 files changed, 75 insertions(+), 1 deletion(-) create mode 100644 GPU3D_Soft.cpp diff --git a/GPU3D.cpp b/GPU3D.cpp index bf323c1a..1ee350e8 100644 --- a/GPU3D.cpp +++ b/GPU3D.cpp @@ -177,11 +177,15 @@ bool Init() CmdFIFO = new FIFO(256); CmdPIPE = new FIFO(4); + if (!SoftRenderer::Init()) return false; + return true; } void DeInit() { + SoftRenderer::DeInit(); + delete CmdFIFO; delete CmdPIPE; } @@ -228,6 +232,8 @@ void Reset() CurPolygonRAM = &PolygonRAM[0]; NumVertices = 0; NumPolygons = 0; + + SoftRenderer::Reset(); } @@ -1152,7 +1158,8 @@ void CheckFIFODMA() void VBlank() { - // TODO: render + // TODO: only do this if a SwapBuffers command was issued + SoftRenderer::RenderFrame(CurVertexRAM, CurPolygonRAM, NumPolygons); CurRAMBank = CurRAMBank?0:1; CurVertexRAM = &VertexRAM[CurRAMBank ? 6144 : 0]; diff --git a/GPU3D.h b/GPU3D.h index 19356d8e..d65fa9a7 100644 --- a/GPU3D.h +++ b/GPU3D.h @@ -53,6 +53,17 @@ void Write8(u32 addr, u8 val); void Write16(u32 addr, u16 val); void Write32(u32 addr, u32 val); +namespace SoftRenderer +{ + +bool Init(); +void DeInit(); +void Reset(); + +void RenderFrame(Vertex* vertices, Polygon* polygons, int npolys); + +} + } #endif diff --git a/GPU3D_Soft.cpp b/GPU3D_Soft.cpp new file mode 100644 index 00000000..38d3539c --- /dev/null +++ b/GPU3D_Soft.cpp @@ -0,0 +1,55 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#include +#include +#include "NDS.h" +#include "GPU3D.h" + + +namespace GPU3D +{ +namespace SoftRenderer +{ + +u8 ColorBuffer[256*192 * 4]; + + +bool Init() +{ + // +} + +void DeInit() +{ + // +} + +void Reset() +{ + memset(ColorBuffer, 0, 256*192 * 4); +} + + +void RenderFrame(Vertex* vertices, Polygon* polygons, int npolys) +{ + // +} + +} +} diff --git a/melonDS.cbp b/melonDS.cbp index fb32d8b7..90764d90 100644 --- a/melonDS.cbp +++ b/melonDS.cbp @@ -58,6 +58,7 @@ + From 050ba5dfbe823672bbb1fdd45afa3b3b5f16f7b3 Mon Sep 17 00:00:00 2001 From: StapleButter Date: Sat, 11 Feb 2017 03:54:08 +0100 Subject: [PATCH 05/16] some debuts of 3D drawing. it can draw non-strip polygons, filled with a fixed color. oh and it doesn't work if they're facing back. --- GPU2D.cpp | 21 +++++++- GPU2D.h | 1 + GPU3D.cpp | 5 ++ GPU3D.h | 4 ++ GPU3D_Soft.cpp | 143 ++++++++++++++++++++++++++++++++++++++++++++++++- melonDS.depend | 18 ++++--- 6 files changed, 183 insertions(+), 9 deletions(-) diff --git a/GPU2D.cpp b/GPU2D.cpp index 906be662..5168402b 100644 --- a/GPU2D.cpp +++ b/GPU2D.cpp @@ -290,7 +290,7 @@ void GPU2D::DrawScanlineBGMode(u32 line, u32* spritebuf, u16* dst) if (DispCnt & 0x0100) { if ((!Num) && (DispCnt & 0x8)) - {} // TODO + DrawBG_3D(line, dst); else DrawBG_Text(line, dst, 0); } @@ -333,6 +333,25 @@ void GPU2D::DrawScanline_Mode1(u32 line, u16* dst) } +void GPU2D::DrawBG_3D(u32 line, u16* dst) +{ + // TODO: scroll, etc + + u8* src = GPU3D::GetLine(line); + for (int i = 0; i < 256; i++) + { + // TODO: color buffer should be 18bit!! + + u8 r = *src++; + u8 g = *src++; + u8 b = *src++; + u8 a = *src++; + if (a == 0) continue; + + dst[i] = (r >> 1) | ((g >> 1) << 5) | ((b >> 1) << 10); + } +} + void GPU2D::DrawBG_Text(u32 line, u16* dst, u32 bgnum) { u16 bgcnt = BGCnt[bgnum]; diff --git a/GPU2D.h b/GPU2D.h index 3f767ea4..ced88796 100644 --- a/GPU2D.h +++ b/GPU2D.h @@ -59,6 +59,7 @@ private: template void DrawScanlineBGMode(u32 line, u32* spritebuf, u16* dst); void DrawScanline_Mode1(u32 line, u16* dst); + void DrawBG_3D(u32 line, u16* dst); void DrawBG_Text(u32 line, u16* dst, u32 num); void DrawBG_Extended(u32 line, u16* dst, u32 bgnum); diff --git a/GPU3D.cpp b/GPU3D.cpp index 1ee350e8..a40e685a 100644 --- a/GPU3D.cpp +++ b/GPU3D.cpp @@ -1169,6 +1169,11 @@ void VBlank() NumPolygons = 0; } +u8* GetLine(int line) +{ + return SoftRenderer::GetLine(line); +} + u8 Read8(u32 addr) { diff --git a/GPU3D.h b/GPU3D.h index d65fa9a7..d9e7e1a1 100644 --- a/GPU3D.h +++ b/GPU3D.h @@ -36,6 +36,8 @@ typedef struct } Polygon; +extern s32 Viewport[4]; + bool Init(); void DeInit(); void Reset(); @@ -45,6 +47,7 @@ void CheckFIFOIRQ(); void CheckFIFODMA(); void VBlank(); +u8* GetLine(int line); u8 Read8(u32 addr); u16 Read16(u32 addr); @@ -61,6 +64,7 @@ void DeInit(); void Reset(); void RenderFrame(Vertex* vertices, Polygon* polygons, int npolys); +u8* GetLine(int line); } diff --git a/GPU3D_Soft.cpp b/GPU3D_Soft.cpp index 38d3539c..9c057e9e 100644 --- a/GPU3D_Soft.cpp +++ b/GPU3D_Soft.cpp @@ -32,7 +32,7 @@ u8 ColorBuffer[256*192 * 4]; bool Init() { - // + return true; } void DeInit() @@ -46,9 +46,148 @@ void Reset() } +void RenderPolygon(Polygon* polygon) +{ + int nverts = polygon->NumVertices; + + int vtop, vbot; + s32 ytop = 191, ybot = 0; + s32 scrcoords[10][3]; + + // find the topmost and bottommost vertices of the polygon + + for (int i = 0; i < nverts; i++) + { + Vertex* vtx = polygon->Vertices[i]; + + s32 scrX = (((vtx->Position[0] + 0x1000) * Viewport[2]) >> 13) + Viewport[0]; + s32 scrY = (((vtx->Position[1] + 0x1000) * Viewport[3]) >> 13) + Viewport[1]; + if (scrX > 255) scrX = 255; + if (scrY > 191) scrY = 191; + + scrcoords[i][0] = scrX; + scrcoords[i][1] = 191 - scrY; + scrcoords[i][2] = 0; // TODO: Z + + if (scrcoords[i][1] < ytop) + { + ytop = scrcoords[i][1]; + vtop = i; + } + if (scrcoords[i][1] > ybot) + { + ybot = scrcoords[i][1]; + vbot = i; + } + } + + // draw, line per line + + int lcur = vtop, rcur = vtop; + int lnext, rnext; + s32 lstep, rstep; + //s32 xmin, xmax; + + lnext = lcur + 1; + if (lnext >= nverts) lnext = 0; + rnext = rcur - 1; + if (rnext < 0) rnext = nverts - 1; + + /*if ((scrcoords[lnext][1] - scrcoords[lcur][1]) == 0) lstep = 0; else + lstep = ((scrcoords[lnext][0] - scrcoords[lcur][0]) << 12) / (scrcoords[lnext][1] - scrcoords[lcur][1]); + if ((scrcoords[rnext][1] - scrcoords[rcur][1]) == 0) rstep = 0; else + rstep = ((scrcoords[rnext][0] - scrcoords[rcur][0]) << 12) / (scrcoords[rnext][1] - scrcoords[rcur][1]);*/ + + //xmin = scrcoords[lcur][0] << 12; + //xmax = scrcoords[rcur][0] << 12; + + for (s32 y = ytop; y <= ybot; y++) + { + if (y == scrcoords[lnext][1] && y < ybot) + { + lcur++; + if (lcur >= nverts) lcur = 0; + + lnext = lcur + 1; + if (lnext >= nverts) lnext = 0; + + //lstep = ((scrcoords[lnext][0] - scrcoords[lcur][0]) << 12) / (scrcoords[lnext][1] - scrcoords[lcur][1]); + //xmin = scrcoords[lcur][0] << 12; + } + + if (y == scrcoords[rnext][1] && y < ybot) + { + rcur--; + if (rcur < 0) rcur = nverts - 1; + + rnext = rcur - 1; + if (rnext < 0) rnext = nverts - 1; + + //rstep = ((scrcoords[rnext][0] - scrcoords[rcur][0]) << 12) / (scrcoords[rnext][1] - scrcoords[rcur][1]); + //xmax = scrcoords[rcur][0] << 12; + } + + s32 lfactor, rfactor; + + if (scrcoords[lnext][1] == scrcoords[lcur][1]) + lfactor = 0; + else + lfactor = ((y - scrcoords[lcur][1]) << 12) / (scrcoords[lnext][1] - scrcoords[lcur][1]); + + if (scrcoords[rnext][1] == scrcoords[rcur][1]) + rfactor = 0; + else + rfactor = ((y - scrcoords[rcur][1]) << 12) / (scrcoords[rnext][1] - scrcoords[rcur][1]); + + s32 xmin = scrcoords[lcur][0] + (((scrcoords[lnext][0] - scrcoords[lcur][0]) * lfactor) >> 12); + s32 xmax = scrcoords[rcur][0] + (((scrcoords[rnext][0] - scrcoords[rcur][0]) * rfactor) >> 12); + + for (s32 x = xmin; x <= xmax; x++) + { + u8* pixel = &ColorBuffer[((256*y) + x) * 4]; + pixel[0] = 0; + pixel[1] = 63; + pixel[2] = 0; + pixel[3] = 31; + } + } + + // test + /*for (int i = 0; i < nverts; i++) + { + Vertex* vtx = polygon->Vertices[i]; + + s32 scrX = (((vtx->Position[0] + 0x1000) * Viewport[2]) >> 13) + Viewport[0]; + s32 scrY = (((vtx->Position[1] + 0x1000) * Viewport[3]) >> 13) + Viewport[1]; + if (scrX > 255) scrX = 255; + if (scrY > 191) scrY = 191; + + u8* pixel = &ColorBuffer[((256*scrY) + scrX) * 4]; + pixel[0] = 0; + pixel[1] = 63; + pixel[2] = 0; + pixel[3] = 31; + }*/ +} + void RenderFrame(Vertex* vertices, Polygon* polygons, int npolys) { - // + // TODO: render translucent polygons last + + for (int i = 0; i < 256*192; i++) + { + ((u32*)ColorBuffer)[i] = 0x1F000000; + } + + for (int i = 0; i < npolys; i++) + { + RenderPolygon(&polygons[i]); + } +} + +u8* GetLine(int line) +{ + return &ColorBuffer[line * 256 * 4]; } } diff --git a/melonDS.depend b/melonDS.depend index c13ca037..7c0308e1 100644 --- a/melonDS.depend +++ b/melonDS.depend @@ -10,7 +10,7 @@ 1481161027 c:\documents\sources\melonds\types.h -1486730943 source:c:\documents\sources\melonds\nds.cpp +1486778178 source:c:\documents\sources\melonds\nds.cpp "NDS.h" @@ -24,7 +24,7 @@ "RTC.h" "Wifi.h" -1486512922 source:c:\documents\sources\melonds\arm.cpp +1486736660 source:c:\documents\sources\melonds\arm.cpp "NDS.h" "ARM.h" @@ -87,13 +87,13 @@ "NDS.h" "SPI.h" -1486736522 source:c:\documents\sources\melonds\gpu2d.cpp +1486778220 source:c:\documents\sources\melonds\gpu2d.cpp "NDS.h" "GPU.h" -1486736507 c:\documents\sources\melonds\gpu2d.h +1486777351 c:\documents\sources\melonds\gpu2d.h 1481040524 c:\documents\sources\melonds\wifi.h @@ -146,12 +146,18 @@ "NDS.h" "NDSCart.h" -1486736374 c:\documents\sources\melonds\gpu3d.h +1486777933 c:\documents\sources\melonds\gpu3d.h -1486736648 source:c:\documents\sources\melonds\gpu3d.cpp +1486777278 source:c:\documents\sources\melonds\gpu3d.cpp "NDS.h" "GPU.h" "FIFO.h" +1486781263 source:c:\documents\sources\melonds\gpu3d_soft.cpp + + + "NDS.h" + "GPU3D.h" + From 15c8d59e2b0f1aada8804b363515dcd146c702d7 Mon Sep 17 00:00:00 2001 From: StapleButter Date: Sat, 11 Feb 2017 04:10:59 +0100 Subject: [PATCH 06/16] vertex colors, with Gouraud shading and all has yet to be checked for whether it's accurate, optimized, etc... --- GPU3D.cpp | 20 ++++++++++++++++---- GPU3D_Soft.cpp | 50 +++++++++++++++++++++++++++----------------------- melonDS.depend | 2 +- 3 files changed, 44 insertions(+), 28 deletions(-) diff --git a/GPU3D.cpp b/GPU3D.cpp index a40e685a..edc9f127 100644 --- a/GPU3D.cpp +++ b/GPU3D.cpp @@ -157,6 +157,7 @@ void UpdateClipMatrix(); u32 PolygonMode; s16 CurVertex[3]; +u8 VertexColor[3]; Vertex TempVertexBuffer[4]; u32 VertexNum; @@ -690,10 +691,9 @@ void SubmitVertex() vertextrans->Position[1] = (vertextrans->Position[1] * w_inv) >> 12; vertextrans->Position[2] = (vertextrans->Position[2] * w_inv) >> 12; - // boo - vertextrans->Color[0] = 63; - vertextrans->Color[1] = 0; - vertextrans->Color[2] = 63; + vertextrans->Color[0] = VertexColor[0]; + vertextrans->Color[1] = VertexColor[1]; + vertextrans->Color[2] = VertexColor[2]; /*printf("vertex trans: %f %f %f %f\n", vertextrans->Position[0]/4096.0f, @@ -1059,6 +1059,18 @@ void ExecuteCommand() } break; + case 0x20: // vertex color + { + u32 c = ExecParams[0]; + u32 r = c & 0x1F; + u32 g = (c >> 5) & 0x1F; + u32 b = (c >> 10) & 0x1F; + VertexColor[0] = r ? (r<<1)+1 : 0; + VertexColor[1] = g ? (g<<1)+1 : 0; + VertexColor[2] = b ? (b<<1)+1 : 0; + } + break; + case 0x21: // TODO: more cycles if lights are enabled break; diff --git a/GPU3D_Soft.cpp b/GPU3D_Soft.cpp index 9c057e9e..4155751b 100644 --- a/GPU3D_Soft.cpp +++ b/GPU3D_Soft.cpp @@ -127,6 +127,11 @@ void RenderPolygon(Polygon* polygon) //xmax = scrcoords[rcur][0] << 12; } + Vertex* vlcur = polygon->Vertices[lcur]; + Vertex* vlnext = polygon->Vertices[lnext]; + Vertex* vrcur = polygon->Vertices[rcur]; + Vertex* vrnext = polygon->Vertices[rnext]; + s32 lfactor, rfactor; if (scrcoords[lnext][1] == scrcoords[lcur][1]) @@ -139,35 +144,34 @@ void RenderPolygon(Polygon* polygon) else rfactor = ((y - scrcoords[rcur][1]) << 12) / (scrcoords[rnext][1] - scrcoords[rcur][1]); - s32 xmin = scrcoords[lcur][0] + (((scrcoords[lnext][0] - scrcoords[lcur][0]) * lfactor) >> 12); - s32 xmax = scrcoords[rcur][0] + (((scrcoords[rnext][0] - scrcoords[rcur][0]) * rfactor) >> 12); + s32 xl = scrcoords[lcur][0] + (((scrcoords[lnext][0] - scrcoords[lcur][0]) * lfactor) >> 12); + s32 xr = scrcoords[rcur][0] + (((scrcoords[rnext][0] - scrcoords[rcur][0]) * rfactor) >> 12); - for (s32 x = xmin; x <= xmax; x++) + u8 rl = vlcur->Color[0] + (((vlnext->Color[0] - vlcur->Color[0]) * lfactor) >> 12); + u8 gl = vlcur->Color[1] + (((vlnext->Color[1] - vlcur->Color[1]) * lfactor) >> 12); + u8 bl = vlcur->Color[2] + (((vlnext->Color[2] - vlcur->Color[2]) * lfactor) >> 12); + + u8 rr = vrcur->Color[0] + (((vrnext->Color[0] - vrcur->Color[0]) * rfactor) >> 12); + u8 gr = vrcur->Color[1] + (((vrnext->Color[1] - vrcur->Color[1]) * rfactor) >> 12); + u8 br = vrcur->Color[2] + (((vrnext->Color[2] - vrcur->Color[2]) * rfactor) >> 12); + + s32 xdiv; + if (xr == xl) + xdiv = 0; + else + xdiv = 0x1000 / (xr - xl); + + for (s32 x = xl; x <= xr; x++) { + s32 xfactor = (x - xl) * xdiv; + u8* pixel = &ColorBuffer[((256*y) + x) * 4]; - pixel[0] = 0; - pixel[1] = 63; - pixel[2] = 0; + pixel[0] = rl + (((rr - rl) * xfactor) >> 12); + pixel[1] = gl + (((gr - gl) * xfactor) >> 12); + pixel[2] = bl + (((br - bl) * xfactor) >> 12); pixel[3] = 31; } } - - // test - /*for (int i = 0; i < nverts; i++) - { - Vertex* vtx = polygon->Vertices[i]; - - s32 scrX = (((vtx->Position[0] + 0x1000) * Viewport[2]) >> 13) + Viewport[0]; - s32 scrY = (((vtx->Position[1] + 0x1000) * Viewport[3]) >> 13) + Viewport[1]; - if (scrX > 255) scrX = 255; - if (scrY > 191) scrY = 191; - - u8* pixel = &ColorBuffer[((256*scrY) + scrX) * 4]; - pixel[0] = 0; - pixel[1] = 63; - pixel[2] = 0; - pixel[3] = 31; - }*/ } void RenderFrame(Vertex* vertices, Polygon* polygons, int npolys) diff --git a/melonDS.depend b/melonDS.depend index 7c0308e1..8fffc74e 100644 --- a/melonDS.depend +++ b/melonDS.depend @@ -148,7 +148,7 @@ 1486777933 c:\documents\sources\melonds\gpu3d.h -1486777278 source:c:\documents\sources\melonds\gpu3d.cpp +1486782030 source:c:\documents\sources\melonds\gpu3d.cpp "NDS.h" From 361ddd7595671a907ebdecfaf6b0fcba499c9e07 Mon Sep 17 00:00:00 2001 From: StapleButter Date: Mon, 13 Feb 2017 02:07:54 +0100 Subject: [PATCH 07/16] * fix GXFIFO decoding * proper SwapBuffers implementation * fix polygon clipping * misc fixes --- DMA.cpp | 2 +- GPU3D.cpp | 466 +++++++++++++++++++++++++------------------------ GPU3D_Soft.cpp | 109 +++++++++--- README.md | 19 +- main.cpp | 4 + melonDS.depend | 12 +- 6 files changed, 352 insertions(+), 260 deletions(-) diff --git a/DMA.cpp b/DMA.cpp index 19711ae3..dbedc010 100644 --- a/DMA.cpp +++ b/DMA.cpp @@ -113,7 +113,7 @@ void DMA::Start() if ((Cnt & 0x00600000) == 0x00600000) CurDstAddr = DstAddr; - //printf("ARM%d DMA%d %08X %08X->%08X %d bytes %dbit\n", CPU?7:9, Num, Cnt, CurSrcAddr, CurDstAddr, RemCount*((Cnt&0x04000000)?4:2), (Cnt&0x04000000)?32:16); + //printf("ARM%d DMA%d %08X %02X %08X->%08X %d bytes %dbit\n", CPU?7:9, Num, Cnt, StartMode, CurSrcAddr, CurDstAddr, RemCount*((Cnt&0x04000000)?4:2), (Cnt&0x04000000)?32:16); // special path for cart DMA. this is a gross hack. // emulating it properly requires emulating cart transfer delays, so uh... TODO diff --git a/GPU3D.cpp b/GPU3D.cpp index edc9f127..01fdfb82 100644 --- a/GPU3D.cpp +++ b/GPU3D.cpp @@ -171,6 +171,8 @@ Polygon* CurPolygonRAM; u32 NumVertices, NumPolygons; u32 CurRAMBank; +u32 FlushRequest; + bool Init() @@ -234,6 +236,8 @@ void Reset() NumVertices = 0; NumPolygons = 0; + FlushRequest = 0; + SoftRenderer::Reset(); } @@ -372,32 +376,39 @@ void UpdateClipMatrix() +template +void ClipSegment(Vertex* outbuf, int num, Vertex* vout, Vertex* vin) +{ + s32 factor = ((vin->Position[3] - (plane*vin->Position[comp])) << 12) / + ((vin->Position[3] - (plane*vin->Position[comp])) - (vout->Position[3] - (plane*vout->Position[comp]))); + + Vertex mid; +#define INTERPOLATE(var) mid.var = vin->var + (((vout->var - vin->var) * factor) >> 12); + + INTERPOLATE(Position[0]); + INTERPOLATE(Position[1]); + INTERPOLATE(Position[2]); + INTERPOLATE(Position[3]); + + INTERPOLATE(Color[0]); + INTERPOLATE(Color[1]); + INTERPOLATE(Color[2]); + +#undef INTERPOLATE + outbuf[num] = mid; +} + void SubmitPolygon() { // clip. // for each vertex: // if it's outside, check if the previous and next vertices are inside, if so, fixor - Vertex clippedvertices[20]; + Vertex clippedvertices[2][10]; u32 numclipped; int nverts = PolygonMode & 0x1 ? 4:3; int nvisible = 0; - - /*for (int i = 0; i < nverts; i++) - { - s32* v = TempVertexBuffer[i].Position; - - if ((u32)(v[0]+0x1000) <= 0x2000 && - (u32)(v[1]+0x1000) <= 0x2000 && - (u32)(v[2]+0x1000) <= 0x2000) - { - nvisible++; - } - } - - if (!nvisible) return;*/ - int prev, next; int c; @@ -407,76 +418,51 @@ void SubmitPolygon() for (int i = 0; i < nverts; i++) { Vertex vtx = TempVertexBuffer[i]; - if (vtx.Position[0] > 0x1000) + if (vtx.Position[0] > vtx.Position[3]) { Vertex* vprev = &TempVertexBuffer[prev]; - if (vprev->Position[0] <= 0x1000) + if (vprev->Position[0] <= vprev->Position[3]) { - s32 factor = ((0x1000 - vprev->Position[0]) << 12) / (vtx.Position[0] - vprev->Position[0]); - - Vertex mid; - mid.Position[0] = 0x1000; - mid.Position[1] = vprev->Position[1] + (((vtx.Position[1] - vprev->Position[1]) * factor) >> 12); - mid.Position[2] = vprev->Position[2] + (((vtx.Position[2] - vprev->Position[2]) * factor) >> 12); - mid.Color[0] = vprev->Color[0] + (((vtx.Color[0] - vprev->Color[0]) * factor) >> 12); - mid.Color[1] = vprev->Color[1] + (((vtx.Color[1] - vprev->Color[1]) * factor) >> 12); - mid.Color[2] = vprev->Color[2] + (((vtx.Color[2] - vprev->Color[2]) * factor) >> 12); - - clippedvertices[c++] = mid; + ClipSegment<0, 1>(clippedvertices[0], c, &vtx, vprev); + c++; } Vertex* vnext = &TempVertexBuffer[next]; - if (vnext->Position[0] <= 0x1000) + if (vnext->Position[0] <= vnext->Position[3]) { - s32 factor = ((0x1000 - vnext->Position[0]) << 12) / (vtx.Position[0] - vnext->Position[0]); - - Vertex mid; - mid.Position[0] = 0x1000; - mid.Position[1] = vnext->Position[1] + (((vtx.Position[1] - vnext->Position[1]) * factor) >> 12); - mid.Position[2] = vnext->Position[2] + (((vtx.Position[2] - vnext->Position[2]) * factor) >> 12); - mid.Color[0] = vnext->Color[0] + (((vtx.Color[0] - vnext->Color[0]) * factor) >> 12); - mid.Color[1] = vnext->Color[1] + (((vtx.Color[1] - vnext->Color[1]) * factor) >> 12); - mid.Color[2] = vnext->Color[2] + (((vtx.Color[2] - vnext->Color[2]) * factor) >> 12); - - clippedvertices[c++] = mid; - } - } - else if (vtx.Position[0] < -0x1000) - { - Vertex* vprev = &TempVertexBuffer[prev]; - if (vprev->Position[0] >= -0x1000) - { - s32 factor = ((-0x1000 - vprev->Position[0]) << 12) / (vtx.Position[0] - vprev->Position[0]); - - Vertex mid; - mid.Position[0] = -0x1000; - mid.Position[1] = vprev->Position[1] + (((vtx.Position[1] - vprev->Position[1]) * factor) >> 12); - mid.Position[2] = vprev->Position[2] + (((vtx.Position[2] - vprev->Position[2]) * factor) >> 12); - mid.Color[0] = vprev->Color[0] + (((vtx.Color[0] - vprev->Color[0]) * factor) >> 12); - mid.Color[1] = vprev->Color[1] + (((vtx.Color[1] - vprev->Color[1]) * factor) >> 12); - mid.Color[2] = vprev->Color[2] + (((vtx.Color[2] - vprev->Color[2]) * factor) >> 12); - - clippedvertices[c++] = mid; - } - - Vertex* vnext = &TempVertexBuffer[next]; - if (vnext->Position[0] >= -0x1000) - { - s32 factor = ((-0x1000 - vnext->Position[0]) << 12) / (vtx.Position[0] - vnext->Position[0]); - - Vertex mid; - mid.Position[0] = -0x1000; - mid.Position[1] = vnext->Position[1] + (((vtx.Position[1] - vnext->Position[1]) * factor) >> 12); - mid.Position[2] = vnext->Position[2] + (((vtx.Position[2] - vnext->Position[2]) * factor) >> 12); - mid.Color[0] = vnext->Color[0] + (((vtx.Color[0] - vnext->Color[0]) * factor) >> 12); - mid.Color[1] = vnext->Color[1] + (((vtx.Color[1] - vnext->Color[1]) * factor) >> 12); - mid.Color[2] = vnext->Color[2] + (((vtx.Color[2] - vnext->Color[2]) * factor) >> 12); - - clippedvertices[c++] = mid; + ClipSegment<0, 1>(clippedvertices[0], c, &vtx, vnext); + c++; } } else - clippedvertices[c++] = vtx; + clippedvertices[0][c++] = vtx; + + prev++; if (prev >= nverts) prev = 0; + next++; if (next >= nverts) next = 0; + } + + nverts = c; prev = nverts-1; next = 1; c = 0; + for (int i = 0; i < nverts; i++) + { + Vertex vtx = clippedvertices[0][i]; + if (vtx.Position[0] < -vtx.Position[3]) + { + Vertex* vprev = &clippedvertices[0][prev]; + if (vprev->Position[0] >= -vprev->Position[3]) + { + ClipSegment<0, -1>(clippedvertices[1], c, &vtx, vprev); + c++; + } + + Vertex* vnext = &clippedvertices[0][next]; + if (vnext->Position[0] >= -vnext->Position[3]) + { + ClipSegment<0, -1>(clippedvertices[1], c, &vtx, vnext); + c++; + } + } + else + clippedvertices[1][c++] = vtx; prev++; if (prev >= nverts) prev = 0; next++; if (next >= nverts) next = 0; @@ -484,81 +470,55 @@ void SubmitPolygon() // Y clipping - nverts = c; - prev = nverts-1; next = 1; c = 10; + nverts = c; prev = nverts-1; next = 1; c = 0; for (int i = 0; i < nverts; i++) { - Vertex vtx = clippedvertices[i]; - if (vtx.Position[1] > 0x1000) + Vertex vtx = clippedvertices[1][i]; + if (vtx.Position[1] > vtx.Position[3]) { - Vertex* vprev = &clippedvertices[prev]; - if (vprev->Position[1] <= 0x1000) + Vertex* vprev = &clippedvertices[1][prev]; + if (vprev->Position[1] <= vprev->Position[3]) { - s32 factor = ((0x1000 - vprev->Position[1]) << 12) / (vtx.Position[1] - vprev->Position[1]); - - Vertex mid; - mid.Position[0] = vprev->Position[0] + (((vtx.Position[0] - vprev->Position[0]) * factor) >> 12); - mid.Position[1] = 0x1000; - mid.Position[2] = vprev->Position[2] + (((vtx.Position[2] - vprev->Position[2]) * factor) >> 12); - mid.Color[0] = vprev->Color[0] + (((vtx.Color[0] - vprev->Color[0]) * factor) >> 12); - mid.Color[1] = vprev->Color[1] + (((vtx.Color[1] - vprev->Color[1]) * factor) >> 12); - mid.Color[2] = vprev->Color[2] + (((vtx.Color[2] - vprev->Color[2]) * factor) >> 12); - - clippedvertices[c++] = mid; + ClipSegment<1, 1>(clippedvertices[0], c, &vtx, vprev); + c++; } - Vertex* vnext = &clippedvertices[next]; - if (vnext->Position[1] <= 0x1000) + Vertex* vnext = &clippedvertices[1][next]; + if (vnext->Position[1] <= vnext->Position[3]) { - s32 factor = ((0x1000 - vnext->Position[1]) << 12) / (vtx.Position[1] - vnext->Position[1]); - - Vertex mid; - mid.Position[0] = vnext->Position[0] + (((vtx.Position[0] - vnext->Position[0]) * factor) >> 12); - mid.Position[1] = 0x1000; - mid.Position[2] = vnext->Position[2] + (((vtx.Position[2] - vnext->Position[2]) * factor) >> 12); - mid.Color[0] = vnext->Color[0] + (((vtx.Color[0] - vnext->Color[0]) * factor) >> 12); - mid.Color[1] = vnext->Color[1] + (((vtx.Color[1] - vnext->Color[1]) * factor) >> 12); - mid.Color[2] = vnext->Color[2] + (((vtx.Color[2] - vnext->Color[2]) * factor) >> 12); - - clippedvertices[c++] = mid; - } - } - else if (vtx.Position[1] < -0x1000) - { - Vertex* vprev = &clippedvertices[prev]; - if (vprev->Position[1] >= -0x1000) - { - s32 factor = ((-0x1000 - vprev->Position[1]) << 12) / (vtx.Position[1] - vprev->Position[1]); - - Vertex mid; - mid.Position[0] = vprev->Position[0] + (((vtx.Position[0] - vprev->Position[0]) * factor) >> 12); - mid.Position[1] = -0x1000; - mid.Position[2] = vprev->Position[2] + (((vtx.Position[2] - vprev->Position[2]) * factor) >> 12); - mid.Color[0] = vprev->Color[0] + (((vtx.Color[0] - vprev->Color[0]) * factor) >> 12); - mid.Color[1] = vprev->Color[1] + (((vtx.Color[1] - vprev->Color[1]) * factor) >> 12); - mid.Color[2] = vprev->Color[2] + (((vtx.Color[2] - vprev->Color[2]) * factor) >> 12); - - clippedvertices[c++] = mid; - } - - Vertex* vnext = &clippedvertices[next]; - if (vnext->Position[1] >= -0x1000) - { - s32 factor = ((-0x1000 - vnext->Position[1]) << 12) / (vtx.Position[1] - vnext->Position[1]); - - Vertex mid; - mid.Position[0] = vnext->Position[0] + (((vtx.Position[0] - vnext->Position[0]) * factor) >> 12); - mid.Position[1] = -0x1000; - mid.Position[2] = vnext->Position[2] + (((vtx.Position[2] - vnext->Position[2]) * factor) >> 12); - mid.Color[0] = vnext->Color[0] + (((vtx.Color[0] - vnext->Color[0]) * factor) >> 12); - mid.Color[1] = vnext->Color[1] + (((vtx.Color[1] - vnext->Color[1]) * factor) >> 12); - mid.Color[2] = vnext->Color[2] + (((vtx.Color[2] - vnext->Color[2]) * factor) >> 12); - - clippedvertices[c++] = mid; + ClipSegment<1, 1>(clippedvertices[0], c, &vtx, vnext); + c++; } } else - clippedvertices[c++] = vtx; + clippedvertices[0][c++] = vtx; + + prev++; if (prev >= nverts) prev = 0; + next++; if (next >= nverts) next = 0; + } + + nverts = c; prev = nverts-1; next = 1; c = 0; + for (int i = 0; i < nverts; i++) + { + Vertex vtx = clippedvertices[0][i]; + if (vtx.Position[1] < -vtx.Position[3]) + { + Vertex* vprev = &clippedvertices[0][prev]; + if (vprev->Position[1] >= -vprev->Position[3]) + { + ClipSegment<1, -1>(clippedvertices[1], c, &vtx, vprev); + c++; + } + + Vertex* vnext = &clippedvertices[0][next]; + if (vnext->Position[1] >= -vnext->Position[3]) + { + ClipSegment<1, -1>(clippedvertices[1], c, &vtx, vnext); + c++; + } + } + else + clippedvertices[1][c++] = vtx; prev++; if (prev >= nverts) prev = 0; next++; if (next >= nverts) next = 0; @@ -566,81 +526,55 @@ void SubmitPolygon() // Z clipping - nverts = c-10; - prev = nverts-1; next = 1; c = 0; + nverts = c; prev = nverts-1; next = 1; c = 0; for (int i = 0; i < nverts; i++) { - Vertex vtx = clippedvertices[10+i]; - if (vtx.Position[2] > 0x1000) + Vertex vtx = clippedvertices[1][i]; + if (vtx.Position[2] > vtx.Position[3]) { - Vertex* vprev = &clippedvertices[10+prev]; - if (vprev->Position[2] <= 0x1000) + Vertex* vprev = &clippedvertices[1][prev]; + if (vprev->Position[2] <= vprev->Position[3]) { - s32 factor = ((0x1000 - vprev->Position[2]) << 12) / (vtx.Position[2] - vprev->Position[2]); - - Vertex mid; - mid.Position[0] = vprev->Position[0] + (((vtx.Position[0] - vprev->Position[0]) * factor) >> 12); - mid.Position[1] = vprev->Position[1] + (((vtx.Position[1] - vprev->Position[1]) * factor) >> 12); - mid.Position[2] = 0x1000; - mid.Color[0] = vprev->Color[0] + (((vtx.Color[0] - vprev->Color[0]) * factor) >> 12); - mid.Color[1] = vprev->Color[1] + (((vtx.Color[1] - vprev->Color[1]) * factor) >> 12); - mid.Color[2] = vprev->Color[2] + (((vtx.Color[2] - vprev->Color[2]) * factor) >> 12); - - clippedvertices[c++] = mid; + ClipSegment<2, 1>(clippedvertices[0], c, &vtx, vprev); + c++; } - Vertex* vnext = &clippedvertices[10+next]; - if (vnext->Position[2] <= 0x1000) + Vertex* vnext = &clippedvertices[1][next]; + if (vnext->Position[2] <= vnext->Position[3]) { - s32 factor = ((0x1000 - vnext->Position[2]) << 12) / (vtx.Position[2] - vnext->Position[2]); - - Vertex mid; - mid.Position[0] = vnext->Position[0] + (((vtx.Position[0] - vnext->Position[0]) * factor) >> 12); - mid.Position[1] = vnext->Position[1] + (((vtx.Position[1] - vnext->Position[1]) * factor) >> 12); - mid.Position[2] = 0x1000; - mid.Color[0] = vnext->Color[0] + (((vtx.Color[0] - vnext->Color[0]) * factor) >> 12); - mid.Color[1] = vnext->Color[1] + (((vtx.Color[1] - vnext->Color[1]) * factor) >> 12); - mid.Color[2] = vnext->Color[2] + (((vtx.Color[2] - vnext->Color[2]) * factor) >> 12); - - clippedvertices[c++] = mid; - } - } - else if (vtx.Position[2] < -0x1000) - { - Vertex* vprev = &clippedvertices[10+prev]; - if (vprev->Position[2] >= -0x1000) - { - s32 factor = ((-0x1000 - vprev->Position[2]) << 12) / (vtx.Position[2] - vprev->Position[2]); - - Vertex mid; - mid.Position[0] = vprev->Position[0] + (((vtx.Position[0] - vprev->Position[0]) * factor) >> 12); - mid.Position[1] = vprev->Position[1] + (((vtx.Position[1] - vprev->Position[1]) * factor) >> 12); - mid.Position[2] = -0x1000; - mid.Color[0] = vprev->Color[0] + (((vtx.Color[0] - vprev->Color[0]) * factor) >> 12); - mid.Color[1] = vprev->Color[1] + (((vtx.Color[1] - vprev->Color[1]) * factor) >> 12); - mid.Color[2] = vprev->Color[2] + (((vtx.Color[2] - vprev->Color[2]) * factor) >> 12); - - clippedvertices[c++] = mid; - } - - Vertex* vnext = &clippedvertices[10+next]; - if (vnext->Position[2] >= -0x1000) - { - s32 factor = ((-0x1000 - vnext->Position[2]) << 12) / (vtx.Position[2] - vnext->Position[2]); - - Vertex mid; - mid.Position[0] = vnext->Position[0] + (((vtx.Position[0] - vnext->Position[0]) * factor) >> 12); - mid.Position[1] = vnext->Position[1] + (((vtx.Position[1] - vnext->Position[1]) * factor) >> 12); - mid.Position[2] = -0x1000; - mid.Color[0] = vnext->Color[0] + (((vtx.Color[0] - vnext->Color[0]) * factor) >> 12); - mid.Color[1] = vnext->Color[1] + (((vtx.Color[1] - vnext->Color[1]) * factor) >> 12); - mid.Color[2] = vnext->Color[2] + (((vtx.Color[2] - vnext->Color[2]) * factor) >> 12); - - clippedvertices[c++] = mid; + ClipSegment<2, 1>(clippedvertices[0], c, &vtx, vnext); + c++; } } else - clippedvertices[c++] = vtx; + clippedvertices[0][c++] = vtx; + + prev++; if (prev >= nverts) prev = 0; + next++; if (next >= nverts) next = 0; + } + + nverts = c; prev = nverts-1; next = 1; c = 0; + for (int i = 0; i < nverts; i++) + { + Vertex vtx = clippedvertices[0][i]; + if (vtx.Position[2] < -vtx.Position[3]) + { + Vertex* vprev = &clippedvertices[0][prev]; + if (vprev->Position[2] >= -vprev->Position[3]) + { + ClipSegment<2, -1>(clippedvertices[1], c, &vtx, vprev); + c++; + } + + Vertex* vnext = &clippedvertices[0][next]; + if (vnext->Position[2] >= -vnext->Position[3]) + { + ClipSegment<2, -1>(clippedvertices[1], c, &vtx, vnext); + c++; + } + } + else + clippedvertices[1][c++] = vtx; prev++; if (prev >= nverts) prev = 0; next++; if (next >= nverts) next = 0; @@ -659,7 +593,7 @@ void SubmitPolygon() for (int i = 0; i < c; i++) { - CurVertexRAM[NumVertices] = clippedvertices[i]; + CurVertexRAM[NumVertices] = clippedvertices[1][i]; poly->Vertices[i] = &CurVertexRAM[NumVertices]; NumVertices++; @@ -673,6 +607,8 @@ void SubmitVertex() //s32 vertextrans[4]; Vertex* vertextrans = &TempVertexBuffer[VertexNumInPoly]; + if (PolygonMode & 0x2) return; + //printf("vertex: %f %f %f\n", vertex[0]/4096.0f, vertex[1]/4096.0f, vertex[2]/4096.0f); UpdateClipMatrix(); @@ -681,15 +617,23 @@ void SubmitVertex() vertextrans->Position[2] = (vertex[0]*ClipMatrix[2] + vertex[1]*ClipMatrix[6] + vertex[2]*ClipMatrix[10] + vertex[3]*ClipMatrix[14]) >> 12; vertextrans->Position[3] = (vertex[0]*ClipMatrix[3] + vertex[1]*ClipMatrix[7] + vertex[2]*ClipMatrix[11] + vertex[3]*ClipMatrix[15]) >> 12; - s32 w_inv; + /*printf("vertex fart: %f %f %f %f\n", + vertextrans->Position[0]/4096.0f, + vertextrans->Position[1]/4096.0f, + vertextrans->Position[2]/4096.0f, + vertextrans->Position[3]/4096.0f);*/ + + /*s32 w_inv; if (vertextrans->Position[3] == 0) w_inv = 0x1000; // checkme + else if(vertextrans->Position[3] < 0) + w_inv = 0x1000000 / -vertextrans->Position[3]; else w_inv = 0x1000000 / vertextrans->Position[3]; vertextrans->Position[0] = (vertextrans->Position[0] * w_inv) >> 12; vertextrans->Position[1] = (vertextrans->Position[1] * w_inv) >> 12; - vertextrans->Position[2] = (vertextrans->Position[2] * w_inv) >> 12; + vertextrans->Position[2] = (vertextrans->Position[2] * w_inv) >> 12;*/ vertextrans->Color[0] = VertexColor[0]; vertextrans->Color[1] = VertexColor[1]; @@ -699,7 +643,12 @@ void SubmitVertex() vertextrans->Position[0]/4096.0f, vertextrans->Position[1]/4096.0f, vertextrans->Position[2]/4096.0f, - vertextrans->Position[3]/4096.0f);*/ + vertextrans->Position[3]/4096.0f); + printf("clip: %f %f %f %f\n", + ClipMatrix[3]/4096.0f, + ClipMatrix[7]/4096.0f, + ClipMatrix[11]/4096.0f, + ClipMatrix[15]/4096.0f);*/ /*if (vertextrans[3] == 0) { @@ -724,10 +673,48 @@ void SubmitVertex() VertexNum++; VertexNumInPoly++; - if (VertexNumInPoly >= (PolygonMode & 0x1 ? 4:3)) + + switch (PolygonMode) { - VertexNumInPoly = (PolygonMode & 0x2 ? 2:0); - SubmitPolygon(); + case 0: // triangle + if (VertexNumInPoly == 3) + { + VertexNumInPoly = 0; + SubmitPolygon(); + } + break; + + case 1: // quad + if (VertexNumInPoly == 4) + { + VertexNumInPoly = 0; + SubmitPolygon(); + } + break; + + /*case 2: // triangle strip + if (VertexNum > 3) + { + if (VertexNumInPoly == 1) + { + VertexNumInPoly = 0; + // reorder + } + else + VertexNumInPoly = 0; + + SubmitPolygon(); + } + else if (VertexNum == 3) + { + VertexNumInPoly = 2; + SubmitPolygon(); + + TempVertexBuffer[0] = TempVertexBuffer[1]; + TempVertexBuffer[1] = TempVertexBuffer[2]; + } + break;*/ + default: VertexNumInPoly = 0; break; } } @@ -738,12 +725,14 @@ void CmdFIFOWrite(CmdFIFOEntry& entry) if (CmdFIFO->IsEmpty() && !CmdPIPE->IsFull()) { CmdPIPE->Write(entry); + GXStat |= (1<<27); } else { if (CmdFIFO->IsFull()) { printf("!!! GX FIFO FULL\n"); + //NDS::debug(0); return; } @@ -775,6 +764,8 @@ void ExecuteCommand() { CmdFIFOEntry entry = CmdFIFORead(); + //printf("FIFO: %02X %08X\n", entry.Command, entry.Param); + ExecParams[ExecParamCount] = entry.Param; ExecParamCount++; @@ -784,6 +775,8 @@ void ExecuteCommand() ExecParamCount = 0; GXStat &= ~(1<<14); + //if (CycleCount > 0) + // GXStat |= (1<<27); //printf("3D CMD %02X\n", entry.Command); @@ -1121,7 +1114,7 @@ void ExecuteCommand() break; case 0x50: - // TODO: make it happen upon VBlank, not right now + FlushRequest = 1;//0x80000000 | (ExecParams[0] & 0x3); break; case 0x60: // viewport x1,y1,x2,y2 @@ -1145,7 +1138,11 @@ void Run(s32 cycles) CycleCount -= cycles; if (CycleCount <= 0 && CmdPIPE->IsEmpty()) + { CycleCount = 0; + if (!FlushRequest) + GXStat &= ~(1<<27); + } } @@ -1170,15 +1167,20 @@ void CheckFIFODMA() void VBlank() { - // TODO: only do this if a SwapBuffers command was issued - SoftRenderer::RenderFrame(CurVertexRAM, CurPolygonRAM, NumPolygons); + if (FlushRequest) + { + SoftRenderer::RenderFrame(CurVertexRAM, CurPolygonRAM, NumPolygons); - CurRAMBank = CurRAMBank?0:1; - CurVertexRAM = &VertexRAM[CurRAMBank ? 6144 : 0]; - CurPolygonRAM = &PolygonRAM[CurRAMBank ? 2048 : 0]; + CurRAMBank = CurRAMBank?0:1; + CurVertexRAM = &VertexRAM[CurRAMBank ? 6144 : 0]; + CurPolygonRAM = &PolygonRAM[CurRAMBank ? 2048 : 0]; - NumVertices = 0; - NumPolygons = 0; + NumVertices = 0; + NumPolygons = 0; + + FlushRequest = 0; + GXStat &= ~(1<<27); + } } u8* GetLine(int line) @@ -1213,8 +1215,7 @@ u32 Read32(u32 addr) ((ProjMatrixStackPointer & 0x1) << 13) | (fifolevel << 16) | (fifolevel < 128 ? (1<<25) : 0) | - (fifolevel == 0 ? (1<<26) : 0) | - (CycleCount > 0 ? (1<<27) : 0); + (fifolevel == 0 ? (1<<26) : 0); } } @@ -1262,23 +1263,30 @@ void Write32(u32 addr, u32 val) CurCommand = val; ParamCount = 0; TotalParams = CmdNumParams[CurCommand & 0xFF]; + + if (TotalParams > 0) return; } else ParamCount++; - while (ParamCount == TotalParams) + for (;;) { CmdFIFOEntry entry; entry.Command = CurCommand & 0xFF; entry.Param = val; CmdFIFOWrite(entry); - CurCommand >>= 8; - NumCommands--; - if (NumCommands == 0) break; + if (ParamCount >= TotalParams) + { + CurCommand >>= 8; + NumCommands--; + if (NumCommands == 0) break; - ParamCount = 0; - TotalParams = CmdNumParams[CurCommand & 0xFF]; + ParamCount = 0; + TotalParams = CmdNumParams[CurCommand & 0xFF]; + } + if (ParamCount < TotalParams) + break; } return; diff --git a/GPU3D_Soft.cpp b/GPU3D_Soft.cpp index 4155751b..d47975df 100644 --- a/GPU3D_Soft.cpp +++ b/GPU3D_Soft.cpp @@ -50,9 +50,9 @@ void RenderPolygon(Polygon* polygon) { int nverts = polygon->NumVertices; - int vtop, vbot; + int vtop = 0, vbot = 0; s32 ytop = 191, ybot = 0; - s32 scrcoords[10][3]; + s32 scrcoords[10][4]; // find the topmost and bottommost vertices of the polygon @@ -60,14 +60,36 @@ void RenderPolygon(Polygon* polygon) { Vertex* vtx = polygon->Vertices[i]; - s32 scrX = (((vtx->Position[0] + 0x1000) * Viewport[2]) >> 13) + Viewport[0]; - s32 scrY = (((vtx->Position[1] + 0x1000) * Viewport[3]) >> 13) + Viewport[1]; + s32 w_inv; + if (vtx->Position[3] == 0) + { + w_inv = 0x1000; // checkme + printf("!! W=0\n"); + } + else + w_inv = 0x1000000 / vtx->Position[3]; + + if (vtx->Position[3] < 0) printf("!!! W=%d\n", vtx->Position[3]); + + s32 posX = (vtx->Position[0] * w_inv) >> 12; + s32 posY = (vtx->Position[1] * w_inv) >> 12; + s32 posZ = (vtx->Position[2] * w_inv) >> 12; + //s32 posX = vtx->Position[0]; + //s32 posY = vtx->Position[1]; + + s32 scrX = (((posX + 0x1000) * Viewport[2]) >> 13) + Viewport[0]; + s32 scrY = (((posY + 0x1000) * Viewport[3]) >> 13) + Viewport[1]; + s32 scrZ = (vtx->Position[2] + 0x1000) >> 1; if (scrX > 255) scrX = 255; if (scrY > 191) scrY = 191; + if (scrZ > 0xFFF) scrZ = 0xFFF; + if (scrX < 0) { printf("!! bad X %d\n", scrX); scrX = 0;} + if (scrY < 0) { printf("!! bad Y %d\n", scrY); scrY = 0;} scrcoords[i][0] = scrX; scrcoords[i][1] = 191 - scrY; - scrcoords[i][2] = 0; // TODO: Z + scrcoords[i][2] = scrZ; + scrcoords[i][3] = vtx->Position[3]; if (scrcoords[i][1] < ytop) { @@ -79,6 +101,8 @@ void RenderPolygon(Polygon* polygon) ybot = scrcoords[i][1]; vbot = i; } + //if (vtx->Color[0]==63 && vtx->Color[1]==0 && vtx->Color[2]==0) + //printf("v%d: %d,%d W=%d\n", i, scrX, 191-scrY, vtx->Position[3]); } // draw, line per line @@ -103,28 +127,33 @@ void RenderPolygon(Polygon* polygon) for (s32 y = ytop; y <= ybot; y++) { - if (y == scrcoords[lnext][1] && y < ybot) + if (y < ybot) { - lcur++; - if (lcur >= nverts) lcur = 0; + while (y == scrcoords[lnext][1]) + { + lcur++; + if (lcur >= nverts) lcur = 0; - lnext = lcur + 1; - if (lnext >= nverts) lnext = 0; + lnext = lcur + 1; + if (lnext >= nverts) lnext = 0; - //lstep = ((scrcoords[lnext][0] - scrcoords[lcur][0]) << 12) / (scrcoords[lnext][1] - scrcoords[lcur][1]); - //xmin = scrcoords[lcur][0] << 12; - } + //lstep = ((scrcoords[lnext][0] - scrcoords[lcur][0]) << 12) / (scrcoords[lnext][1] - scrcoords[lcur][1]); + //xmin = scrcoords[lcur][0] << 12; + if (lcur == vbot) break; + } - if (y == scrcoords[rnext][1] && y < ybot) - { - rcur--; - if (rcur < 0) rcur = nverts - 1; + while (y == scrcoords[rnext][1]) + { + rcur--; + if (rcur < 0) rcur = nverts - 1; - rnext = rcur - 1; - if (rnext < 0) rnext = nverts - 1; + rnext = rcur - 1; + if (rnext < 0) rnext = nverts - 1; - //rstep = ((scrcoords[rnext][0] - scrcoords[rcur][0]) << 12) / (scrcoords[rnext][1] - scrcoords[rcur][1]); - //xmax = scrcoords[rcur][0] << 12; + //rstep = ((scrcoords[rnext][0] - scrcoords[rcur][0]) << 12) / (scrcoords[rnext][1] - scrcoords[rcur][1]); + //xmax = scrcoords[rcur][0] << 12; + if (rcur == vbot) break; + } } Vertex* vlcur = polygon->Vertices[lcur]; @@ -147,6 +176,12 @@ void RenderPolygon(Polygon* polygon) s32 xl = scrcoords[lcur][0] + (((scrcoords[lnext][0] - scrcoords[lcur][0]) * lfactor) >> 12); s32 xr = scrcoords[rcur][0] + (((scrcoords[rnext][0] - scrcoords[rcur][0]) * rfactor) >> 12); + //if (vlcur->Color[0]==0 && vlcur->Color[1]==63 && vlcur->Color[2]==0) + // printf("y:%d xleft:%d xright:%d %d,%d %d,%d\n", y, xl, xr, lcur, rcur, vtop, vbot); + + //s32 zl = scrcoords[lcur][3] + (((scrcoords[lnext][3] - scrcoords[lcur][3]) * lfactor) >> 12); + //s32 zr = scrcoords[rcur][3] + (((scrcoords[rnext][3] - scrcoords[rcur][3]) * rfactor) >> 12); + u8 rl = vlcur->Color[0] + (((vlnext->Color[0] - vlcur->Color[0]) * lfactor) >> 12); u8 gl = vlcur->Color[1] + (((vlnext->Color[1] - vlcur->Color[1]) * lfactor) >> 12); u8 bl = vlcur->Color[2] + (((vlnext->Color[2] - vlcur->Color[2]) * lfactor) >> 12); @@ -165,11 +200,31 @@ void RenderPolygon(Polygon* polygon) { s32 xfactor = (x - xl) * xdiv; + //s32 z = (zl << 12) + ((zr - zl) * xfactor); + //z = zl + (((zr - zl) * xfactor) >> 12); + + //s32 z_inv = ((z>>12)==0) ? 0x1000 : 0x1000000 / (z >> 12); + //xfactor = (xfactor * z_inv) >> 12; + //xfactor = (xfactor << 12) / z; + + // TODO: get rid of this shit + if (x<0 || x>255 || y<0 || y>191) + { + //printf("BAD COORDS!! %d %d\n", x, y); + x = 0; y = 0; + } + u8* pixel = &ColorBuffer[((256*y) + x) * 4]; pixel[0] = rl + (((rr - rl) * xfactor) >> 12); pixel[1] = gl + (((gr - gl) * xfactor) >> 12); pixel[2] = bl + (((br - bl) * xfactor) >> 12); - pixel[3] = 31; + pixel[3] = 31; // TODO: alpha + + // Z debug + /*u8 zerp = (z * 63) / 0xFFFFFF; + pixel[0] = zerp; + pixel[1] = zerp; + pixel[2] = zerp;*/ } } } @@ -180,11 +235,19 @@ void RenderFrame(Vertex* vertices, Polygon* polygons, int npolys) for (int i = 0; i < 256*192; i++) { - ((u32*)ColorBuffer)[i] = 0x1F000000; + ((u32*)ColorBuffer)[i] = 0x00000000; } for (int i = 0; i < npolys; i++) { + /*printf("polygon %d: %d %d %d\n", i, polygons[i].Vertices[0]->Color[0], polygons[i].Vertices[0]->Color[1], polygons[i].Vertices[0]->Color[2]); + for (int j = 0; j < polygons[i].NumVertices; j++) + printf(" %d: %f %f %f\n", + j, + polygons[i].Vertices[j]->Position[0]/4096.0f, + polygons[i].Vertices[j]->Position[1]/4096.0f, + polygons[i].Vertices[j]->Position[2]/4096.0f); +*/ RenderPolygon(&polygons[i]); } } diff --git a/README.md b/README.md index f2545aab..9b2103e8 100644 --- a/README.md +++ b/README.md @@ -35,4 +35,21 @@ TODO LIST * 3D engine * sound * wifi - * other non-core shit (debugger, graphics viewers, cheat crapo, etc) \ No newline at end of file + * other non-core shit (debugger, graphics viewers, cheat crapo, etc) + + +IMMEDIATE TODO LIST (prior release 1.0) + + * UI + * 3D engine that atleast supports texturing + * emulate DMA timings + * make timers suck less + + +TODO LIST FOR LATER + + * sound + * more 3D engine features + * hardware renderer for 3D + * wifi + * maybe emulate flashcarts or other fancy hardware diff --git a/main.cpp b/main.cpp index 600cdbfe..7600102f 100644 --- a/main.cpp +++ b/main.cpp @@ -85,6 +85,8 @@ LRESULT CALLBACK derpo(HWND window, UINT msg, WPARAM wparam, LPARAM lparam) case VK_RIGHT: NDS::PressKey(4); break; case 'A': NDS::PressKey(0); break; case 'B': NDS::PressKey(1); break; + case 'L': NDS::PressKey(9); break; + case 'R': NDS::PressKey(8); break; case 'D': NDS::debug(0); break; } return 0; @@ -100,6 +102,8 @@ LRESULT CALLBACK derpo(HWND window, UINT msg, WPARAM wparam, LPARAM lparam) case VK_RIGHT: NDS::ReleaseKey(4); break; case 'A': NDS::ReleaseKey(0); break; case 'B': NDS::ReleaseKey(1); break; + case 'L': NDS::ReleaseKey(9); break; + case 'R': NDS::ReleaseKey(8); break; } return 0; diff --git a/melonDS.depend b/melonDS.depend index 8fffc74e..08e52b74 100644 --- a/melonDS.depend +++ b/melonDS.depend @@ -1,16 +1,16 @@ # depslib dependency file v1.0 -1486502416 source:c:\documents\sources\melonds\main.cpp +1486824787 source:c:\documents\sources\melonds\main.cpp "NDS.h" "GPU.h" -1486502049 c:\documents\sources\melonds\nds.h +1486822548 c:\documents\sources\melonds\nds.h "types.h" 1481161027 c:\documents\sources\melonds\types.h -1486778178 source:c:\documents\sources\melonds\nds.cpp +1486947856 source:c:\documents\sources\melonds\nds.cpp "NDS.h" @@ -109,7 +109,7 @@ 1486511075 c:\documents\sources\melonds\fifo.h "types.h" -1486589927 source:c:\documents\sources\melonds\dma.cpp +1486823366 source:c:\documents\sources\melonds\dma.cpp "NDS.h" "DMA.h" @@ -148,14 +148,14 @@ 1486777933 c:\documents\sources\melonds\gpu3d.h -1486782030 source:c:\documents\sources\melonds\gpu3d.cpp +1486947978 source:c:\documents\sources\melonds\gpu3d.cpp "NDS.h" "GPU.h" "FIFO.h" -1486781263 source:c:\documents\sources\melonds\gpu3d_soft.cpp +1486947027 source:c:\documents\sources\melonds\gpu3d_soft.cpp "NDS.h" From fb53fd5195f52365dd802e54412b4af5e049b677 Mon Sep 17 00:00:00 2001 From: StapleButter Date: Mon, 13 Feb 2017 14:59:51 +0100 Subject: [PATCH 08/16] * fix overflows during fixed-point multiply * small fix to SwapBuffers --- GPU3D.cpp | 150 ++++++++++++++++++++++++++++--------------------- GPU3D_Soft.cpp | 22 +++++--- melonDS.depend | 8 +-- 3 files changed, 104 insertions(+), 76 deletions(-) diff --git a/GPU3D.cpp b/GPU3D.cpp index 01fdfb82..e77af690 100644 --- a/GPU3D.cpp +++ b/GPU3D.cpp @@ -270,25 +270,25 @@ void MatrixMult4x4(s32* m, s32* s) memcpy(tmp, m, 16*4); // m = s*m - m[0] = (s[0]*tmp[0] + s[1]*tmp[4] + s[2]*tmp[8] + s[3]*tmp[12]) >> 12; - m[1] = (s[0]*tmp[1] + s[1]*tmp[5] + s[2]*tmp[9] + s[3]*tmp[13]) >> 12; - m[2] = (s[0]*tmp[2] + s[1]*tmp[6] + s[2]*tmp[10] + s[3]*tmp[14]) >> 12; - m[3] = (s[0]*tmp[3] + s[1]*tmp[7] + s[2]*tmp[11] + s[3]*tmp[15]) >> 12; + m[0] = ((s64)s[0]*tmp[0] + (s64)s[1]*tmp[4] + (s64)s[2]*tmp[8] + (s64)s[3]*tmp[12]) >> 12; + m[1] = ((s64)s[0]*tmp[1] + (s64)s[1]*tmp[5] + (s64)s[2]*tmp[9] + (s64)s[3]*tmp[13]) >> 12; + m[2] = ((s64)s[0]*tmp[2] + (s64)s[1]*tmp[6] + (s64)s[2]*tmp[10] + (s64)s[3]*tmp[14]) >> 12; + m[3] = ((s64)s[0]*tmp[3] + (s64)s[1]*tmp[7] + (s64)s[2]*tmp[11] + (s64)s[3]*tmp[15]) >> 12; - m[4] = (s[4]*tmp[0] + s[5]*tmp[4] + s[6]*tmp[8] + s[7]*tmp[12]) >> 12; - m[5] = (s[4]*tmp[1] + s[5]*tmp[5] + s[6]*tmp[9] + s[7]*tmp[13]) >> 12; - m[6] = (s[4]*tmp[2] + s[5]*tmp[6] + s[6]*tmp[10] + s[7]*tmp[14]) >> 12; - m[7] = (s[4]*tmp[3] + s[5]*tmp[7] + s[6]*tmp[11] + s[7]*tmp[15]) >> 12; + m[4] = ((s64)s[4]*tmp[0] + (s64)s[5]*tmp[4] + (s64)s[6]*tmp[8] + (s64)s[7]*tmp[12]) >> 12; + m[5] = ((s64)s[4]*tmp[1] + (s64)s[5]*tmp[5] + (s64)s[6]*tmp[9] + (s64)s[7]*tmp[13]) >> 12; + m[6] = ((s64)s[4]*tmp[2] + (s64)s[5]*tmp[6] + (s64)s[6]*tmp[10] + (s64)s[7]*tmp[14]) >> 12; + m[7] = ((s64)s[4]*tmp[3] + (s64)s[5]*tmp[7] + (s64)s[6]*tmp[11] + (s64)s[7]*tmp[15]) >> 12; - m[8] = (s[8]*tmp[0] + s[9]*tmp[4] + s[10]*tmp[8] + s[11]*tmp[12]) >> 12; - m[9] = (s[8]*tmp[1] + s[9]*tmp[5] + s[10]*tmp[9] + s[11]*tmp[13]) >> 12; - m[10] = (s[8]*tmp[2] + s[9]*tmp[6] + s[10]*tmp[10] + s[11]*tmp[14]) >> 12; - m[11] = (s[8]*tmp[3] + s[9]*tmp[7] + s[10]*tmp[11] + s[11]*tmp[15]) >> 12; + m[8] = ((s64)s[8]*tmp[0] + (s64)s[9]*tmp[4] + (s64)s[10]*tmp[8] + (s64)s[11]*tmp[12]) >> 12; + m[9] = ((s64)s[8]*tmp[1] + (s64)s[9]*tmp[5] + (s64)s[10]*tmp[9] + (s64)s[11]*tmp[13]) >> 12; + m[10] = ((s64)s[8]*tmp[2] + (s64)s[9]*tmp[6] + (s64)s[10]*tmp[10] + (s64)s[11]*tmp[14]) >> 12; + m[11] = ((s64)s[8]*tmp[3] + (s64)s[9]*tmp[7] + (s64)s[10]*tmp[11] + (s64)s[11]*tmp[15]) >> 12; - m[12] = (s[12]*tmp[0] + s[13]*tmp[4] + s[14]*tmp[8] + s[15]*tmp[12]) >> 12; - m[13] = (s[12]*tmp[1] + s[13]*tmp[5] + s[14]*tmp[9] + s[15]*tmp[13]) >> 12; - m[14] = (s[12]*tmp[2] + s[13]*tmp[6] + s[14]*tmp[10] + s[15]*tmp[14]) >> 12; - m[15] = (s[12]*tmp[3] + s[13]*tmp[7] + s[14]*tmp[11] + s[15]*tmp[15]) >> 12; + m[12] = ((s64)s[12]*tmp[0] + (s64)s[13]*tmp[4] + (s64)s[14]*tmp[8] + (s64)s[15]*tmp[12]) >> 12; + m[13] = ((s64)s[12]*tmp[1] + (s64)s[13]*tmp[5] + (s64)s[14]*tmp[9] + (s64)s[15]*tmp[13]) >> 12; + m[14] = ((s64)s[12]*tmp[2] + (s64)s[13]*tmp[6] + (s64)s[14]*tmp[10] + (s64)s[15]*tmp[14]) >> 12; + m[15] = ((s64)s[12]*tmp[3] + (s64)s[13]*tmp[7] + (s64)s[14]*tmp[11] + (s64)s[15]*tmp[15]) >> 12; } void MatrixMult4x3(s32* m, s32* s) @@ -296,26 +296,34 @@ void MatrixMult4x3(s32* m, s32* s) s32 tmp[16]; memcpy(tmp, m, 16*4); + /*printf("4x3 matrix\n"); + for (int j = 0; j < 12; j += 3) + { + for (int i = 0; i < 3; i++) + printf("%f ", s[i]/4096.0f); + printf("\n"); + }*/ + // m = s*m - m[0] = (s[0]*tmp[0] + s[1]*tmp[4] + s[2]*tmp[8]) >> 12; - m[1] = (s[0]*tmp[1] + s[1]*tmp[5] + s[2]*tmp[9]) >> 12; - m[2] = (s[0]*tmp[2] + s[1]*tmp[6] + s[2]*tmp[10]) >> 12; - m[3] = (s[0]*tmp[3] + s[1]*tmp[7] + s[2]*tmp[11]) >> 12; + m[0] = ((s64)s[0]*tmp[0] + (s64)s[1]*tmp[4] + (s64)s[2]*tmp[8]) >> 12; + m[1] = ((s64)s[0]*tmp[1] + (s64)s[1]*tmp[5] + (s64)s[2]*tmp[9]) >> 12; + m[2] = ((s64)s[0]*tmp[2] + (s64)s[1]*tmp[6] + (s64)s[2]*tmp[10]) >> 12; + m[3] = ((s64)s[0]*tmp[3] + (s64)s[1]*tmp[7] + (s64)s[2]*tmp[11]) >> 12; - m[4] = (s[3]*tmp[0] + s[4]*tmp[4] + s[5]*tmp[8]) >> 12; - m[5] = (s[3]*tmp[1] + s[4]*tmp[5] + s[5]*tmp[9]) >> 12; - m[6] = (s[3]*tmp[2] + s[4]*tmp[6] + s[5]*tmp[10]) >> 12; - m[7] = (s[3]*tmp[3] + s[4]*tmp[7] + s[5]*tmp[11]) >> 12; + m[4] = ((s64)s[3]*tmp[0] + (s64)s[4]*tmp[4] + (s64)s[5]*tmp[8]) >> 12; + m[5] = ((s64)s[3]*tmp[1] + (s64)s[4]*tmp[5] + (s64)s[5]*tmp[9]) >> 12; + m[6] = ((s64)s[3]*tmp[2] + (s64)s[4]*tmp[6] + (s64)s[5]*tmp[10]) >> 12; + m[7] = ((s64)s[3]*tmp[3] + (s64)s[4]*tmp[7] + (s64)s[5]*tmp[11]) >> 12; - m[8] = (s[6]*tmp[0] + s[7]*tmp[4] + s[8]*tmp[8]) >> 12; - m[9] = (s[6]*tmp[1] + s[7]*tmp[5] + s[8]*tmp[9]) >> 12; - m[10] = (s[6]*tmp[2] + s[7]*tmp[6] + s[8]*tmp[10]) >> 12; - m[11] = (s[6]*tmp[3] + s[7]*tmp[7] + s[8]*tmp[11]) >> 12; + m[8] = ((s64)s[6]*tmp[0] + (s64)s[7]*tmp[4] + (s64)s[8]*tmp[8]) >> 12; + m[9] = ((s64)s[6]*tmp[1] + (s64)s[7]*tmp[5] + (s64)s[8]*tmp[9]) >> 12; + m[10] = ((s64)s[6]*tmp[2] + (s64)s[7]*tmp[6] + (s64)s[8]*tmp[10]) >> 12; + m[11] = ((s64)s[6]*tmp[3] + (s64)s[7]*tmp[7] + (s64)s[8]*tmp[11]) >> 12; - m[12] = (s[9]*tmp[0] + s[10]*tmp[4] + s[11]*tmp[8] + 0x1000*tmp[12]) >> 12; - m[13] = (s[9]*tmp[1] + s[10]*tmp[5] + s[11]*tmp[9] + 0x1000*tmp[13]) >> 12; - m[14] = (s[9]*tmp[2] + s[10]*tmp[6] + s[11]*tmp[10] + 0x1000*tmp[14]) >> 12; - m[15] = (s[9]*tmp[3] + s[10]*tmp[7] + s[11]*tmp[11] + 0x1000*tmp[15]) >> 12; + m[12] = ((s64)s[9]*tmp[0] + (s64)s[10]*tmp[4] + (s64)s[11]*tmp[8] + (s64)0x1000*tmp[12]) >> 12; + m[13] = ((s64)s[9]*tmp[1] + (s64)s[10]*tmp[5] + (s64)s[11]*tmp[9] + (s64)0x1000*tmp[13]) >> 12; + m[14] = ((s64)s[9]*tmp[2] + (s64)s[10]*tmp[6] + (s64)s[11]*tmp[10] + (s64)0x1000*tmp[14]) >> 12; + m[15] = ((s64)s[9]*tmp[3] + (s64)s[10]*tmp[7] + (s64)s[11]*tmp[11] + (s64)0x1000*tmp[15]) >> 12; } void MatrixMult3x3(s32* m, s32* s) @@ -324,45 +332,45 @@ void MatrixMult3x3(s32* m, s32* s) memcpy(tmp, m, 12*4); // m = s*m - m[0] = (s[0]*tmp[0] + s[1]*tmp[4] + s[2]*tmp[8]) >> 12; - m[1] = (s[0]*tmp[1] + s[1]*tmp[5] + s[2]*tmp[9]) >> 12; - m[2] = (s[0]*tmp[2] + s[1]*tmp[6] + s[2]*tmp[10]) >> 12; - m[3] = (s[0]*tmp[3] + s[1]*tmp[7] + s[2]*tmp[11]) >> 12; + m[0] = ((s64)s[0]*tmp[0] + (s64)s[1]*tmp[4] + (s64)s[2]*tmp[8]) >> 12; + m[1] = ((s64)s[0]*tmp[1] + (s64)s[1]*tmp[5] + (s64)s[2]*tmp[9]) >> 12; + m[2] = ((s64)s[0]*tmp[2] + (s64)s[1]*tmp[6] + (s64)s[2]*tmp[10]) >> 12; + m[3] = ((s64)s[0]*tmp[3] + (s64)s[1]*tmp[7] + (s64)s[2]*tmp[11]) >> 12; - m[4] = (s[3]*tmp[0] + s[4]*tmp[4] + s[5]*tmp[8]) >> 12; - m[5] = (s[3]*tmp[1] + s[4]*tmp[5] + s[5]*tmp[9]) >> 12; - m[6] = (s[3]*tmp[2] + s[4]*tmp[6] + s[5]*tmp[10]) >> 12; - m[7] = (s[3]*tmp[3] + s[4]*tmp[7] + s[5]*tmp[11]) >> 12; + m[4] = ((s64)s[3]*tmp[0] + (s64)s[4]*tmp[4] + (s64)s[5]*tmp[8]) >> 12; + m[5] = ((s64)s[3]*tmp[1] + (s64)s[4]*tmp[5] + (s64)s[5]*tmp[9]) >> 12; + m[6] = ((s64)s[3]*tmp[2] + (s64)s[4]*tmp[6] + (s64)s[5]*tmp[10]) >> 12; + m[7] = ((s64)s[3]*tmp[3] + (s64)s[4]*tmp[7] + (s64)s[5]*tmp[11]) >> 12; - m[8] = (s[6]*tmp[0] + s[7]*tmp[4] + s[8]*tmp[8]) >> 12; - m[9] = (s[6]*tmp[1] + s[7]*tmp[5] + s[8]*tmp[9]) >> 12; - m[10] = (s[6]*tmp[2] + s[7]*tmp[6] + s[8]*tmp[10]) >> 12; - m[11] = (s[6]*tmp[3] + s[7]*tmp[7] + s[8]*tmp[11]) >> 12; + m[8] = ((s64)s[6]*tmp[0] + (s64)s[7]*tmp[4] + (s64)s[8]*tmp[8]) >> 12; + m[9] = ((s64)s[6]*tmp[1] + (s64)s[7]*tmp[5] + (s64)s[8]*tmp[9]) >> 12; + m[10] = ((s64)s[6]*tmp[2] + (s64)s[7]*tmp[6] + (s64)s[8]*tmp[10]) >> 12; + m[11] = ((s64)s[6]*tmp[3] + (s64)s[7]*tmp[7] + (s64)s[8]*tmp[11]) >> 12; } void MatrixScale(s32* m, s32* s) { - m[0] = (s[0]*m[0]) >> 12; - m[1] = (s[0]*m[1]) >> 12; - m[2] = (s[0]*m[2]) >> 12; - m[3] = (s[0]*m[3]) >> 12; + m[0] = ((s64)s[0]*m[0]) >> 12; + m[1] = ((s64)s[0]*m[1]) >> 12; + m[2] = ((s64)s[0]*m[2]) >> 12; + m[3] = ((s64)s[0]*m[3]) >> 12; - m[4] = (s[1]*m[4]) >> 12; - m[5] = (s[1]*m[5]) >> 12; - m[6] = (s[1]*m[6]) >> 12; - m[7] = (s[1]*m[7]) >> 12; + m[4] = ((s64)s[1]*m[4]) >> 12; + m[5] = ((s64)s[1]*m[5]) >> 12; + m[6] = ((s64)s[1]*m[6]) >> 12; + m[7] = ((s64)s[1]*m[7]) >> 12; - m[8] = (s[2]*m[8]) >> 12; - m[9] = (s[2]*m[9]) >> 12; - m[10] = (s[2]*m[10]) >> 12; - m[11] = (s[2]*m[11]) >> 12; + m[8] = ((s64)s[2]*m[8]) >> 12; + m[9] = ((s64)s[2]*m[9]) >> 12; + m[10] = ((s64)s[2]*m[10]) >> 12; + m[11] = ((s64)s[2]*m[11]) >> 12; } void MatrixTranslate(s32* m, s32* s) { - m[12] += (s[0]*m[0] + s[1]*m[4] + s[2]*m[8]) >> 12; - m[13] += (s[0]*m[1] + s[1]*m[5] + s[2]*m[9]) >> 12; - m[14] += (s[0]*m[2] + s[1]*m[6] + s[2]*m[10]) >> 12; + m[12] += ((s64)s[0]*m[0] + (s64)s[1]*m[4] + (s64)s[2]*m[8]) >> 12; + m[13] += ((s64)s[0]*m[1] + (s64)s[1]*m[5] + (s64)s[2]*m[9]) >> 12; + m[14] += ((s64)s[0]*m[2] + (s64)s[1]*m[6] + (s64)s[2]*m[10]) >> 12; } void UpdateClipMatrix() @@ -379,7 +387,7 @@ void UpdateClipMatrix() template void ClipSegment(Vertex* outbuf, int num, Vertex* vout, Vertex* vin) { - s32 factor = ((vin->Position[3] - (plane*vin->Position[comp])) << 12) / + s64 factor = ((vin->Position[3] - (plane*vin->Position[comp])) << 12) / ((vin->Position[3] - (plane*vin->Position[comp])) - (vout->Position[3] - (plane*vout->Position[comp]))); Vertex mid; @@ -412,6 +420,15 @@ void SubmitPolygon() int prev, next; int c; + /*if (NumPolygons == 91) + for (int i = 0; i < nverts; i++) + { + Vertex vtx = TempVertexBuffer[i]; + printf("pre-clip v%d: %f %f %f %f\n", i, + vtx.Position[0]/4096.0f, vtx.Position[1]/4096.0f, + vtx.Position[2]/4096.0f, vtx.Position[3]/4096.0f); + }*/ + // X clipping prev = nverts-1; next = 1; c = 0; @@ -603,7 +620,7 @@ void SubmitPolygon() void SubmitVertex() { - s32 vertex[4] = {(s32)CurVertex[0], (s32)CurVertex[1], (s32)CurVertex[2], 0x1000}; + s64 vertex[4] = {(s64)CurVertex[0], (s64)CurVertex[1], (s64)CurVertex[2], 0x1000}; //s32 vertextrans[4]; Vertex* vertextrans = &TempVertexBuffer[VertexNumInPoly]; @@ -769,9 +786,12 @@ void ExecuteCommand() ExecParams[ExecParamCount] = entry.Param; ExecParamCount++; + //if ((entry.Command&0xF0)==0x10) + // printf("MATRIX CMD %02X %08X\n", entry.Command, entry.Param); + if (ExecParamCount >= CmdNumParams[entry.Command]) { - CycleCount += CmdNumCycles[entry.Command]; + //CycleCount += CmdNumCycles[entry.Command]; ExecParamCount = 0; GXStat &= ~(1<<14); @@ -1129,6 +1149,9 @@ void ExecuteCommand() void Run(s32 cycles) { + if (FlushRequest) + return; + if (CycleCount <= 0) { while (CycleCount <= 0 && !CmdPIPE->IsEmpty()) @@ -1140,8 +1163,7 @@ void Run(s32 cycles) if (CycleCount <= 0 && CmdPIPE->IsEmpty()) { CycleCount = 0; - if (!FlushRequest) - GXStat &= ~(1<<27); + GXStat &= ~(1<<27); } } diff --git a/GPU3D_Soft.cpp b/GPU3D_Soft.cpp index d47975df..6ac4e81d 100644 --- a/GPU3D_Soft.cpp +++ b/GPU3D_Soft.cpp @@ -102,7 +102,7 @@ void RenderPolygon(Polygon* polygon) vbot = i; } //if (vtx->Color[0]==63 && vtx->Color[1]==0 && vtx->Color[2]==0) - //printf("v%d: %d,%d W=%d\n", i, scrX, 191-scrY, vtx->Position[3]); + //printf("v%d: %d,%d Z=%f W=%f\n", i, scrX, 191-scrY, vtx->Position[2]/4096.0f, vtx->Position[3]/4096.0f); } // draw, line per line @@ -176,11 +176,16 @@ void RenderPolygon(Polygon* polygon) s32 xl = scrcoords[lcur][0] + (((scrcoords[lnext][0] - scrcoords[lcur][0]) * lfactor) >> 12); s32 xr = scrcoords[rcur][0] + (((scrcoords[rnext][0] - scrcoords[rcur][0]) * rfactor) >> 12); - //if (vlcur->Color[0]==0 && vlcur->Color[1]==63 && vlcur->Color[2]==0) - // printf("y:%d xleft:%d xright:%d %d,%d %d,%d\n", y, xl, xr, lcur, rcur, vtop, vbot); + if (xl<0 || xr>255) continue; // hax - //s32 zl = scrcoords[lcur][3] + (((scrcoords[lnext][3] - scrcoords[lcur][3]) * lfactor) >> 12); - //s32 zr = scrcoords[rcur][3] + (((scrcoords[rnext][3] - scrcoords[rcur][3]) * rfactor) >> 12); + //if (vlcur->Color[0]==0 && vlcur->Color[1]==63 && vlcur->Color[2]==0) + /*printf("y:%d xleft:%d xright:%d %d,%d %d,%d | left: %d to %d right: %d to %d\n", + y, xl, xr, lcur, rcur, vtop, vbot, + scrcoords[lcur][0], scrcoords[lnext][0], + scrcoords[rcur][0], scrcoords[rnext][0]);*/ + + //s32 zl = scrcoords[lcur][2] + (((scrcoords[lnext][2] - scrcoords[lcur][2]) * lfactor) >> 12); + //s32 zr = scrcoords[rcur][2] + (((scrcoords[rnext][2] - scrcoords[rcur][2]) * rfactor) >> 12); u8 rl = vlcur->Color[0] + (((vlnext->Color[0] - vlcur->Color[0]) * lfactor) >> 12); u8 gl = vlcur->Color[1] + (((vlnext->Color[1] - vlcur->Color[1]) * lfactor) >> 12); @@ -200,12 +205,12 @@ void RenderPolygon(Polygon* polygon) { s32 xfactor = (x - xl) * xdiv; - //s32 z = (zl << 12) + ((zr - zl) * xfactor); - //z = zl + (((zr - zl) * xfactor) >> 12); + //s32 z = (((zr - zl) * xfactor) >> 12); + //if (zr!=zl) z = (z << 12) / (zr - zl); //s32 z_inv = ((z>>12)==0) ? 0x1000 : 0x1000000 / (z >> 12); //xfactor = (xfactor * z_inv) >> 12; - //xfactor = (xfactor << 12) / z; + //if (z) xfactor = (xfactor << 12) / z; // TODO: get rid of this shit if (x<0 || x>255 || y<0 || y>191) @@ -248,6 +253,7 @@ void RenderFrame(Vertex* vertices, Polygon* polygons, int npolys) polygons[i].Vertices[j]->Position[1]/4096.0f, polygons[i].Vertices[j]->Position[2]/4096.0f); */ + //printf("polygon %d\n", i); RenderPolygon(&polygons[i]); } } diff --git a/melonDS.depend b/melonDS.depend index 08e52b74..51e832ce 100644 --- a/melonDS.depend +++ b/melonDS.depend @@ -1,5 +1,5 @@ # depslib dependency file v1.0 -1486824787 source:c:\documents\sources\melonds\main.cpp +1486993536 source:c:\documents\sources\melonds\main.cpp "NDS.h" @@ -10,7 +10,7 @@ 1481161027 c:\documents\sources\melonds\types.h -1486947856 source:c:\documents\sources\melonds\nds.cpp +1486994139 source:c:\documents\sources\melonds\nds.cpp "NDS.h" @@ -148,14 +148,14 @@ 1486777933 c:\documents\sources\melonds\gpu3d.h -1486947978 source:c:\documents\sources\melonds\gpu3d.cpp +1486993935 source:c:\documents\sources\melonds\gpu3d.cpp "NDS.h" "GPU.h" "FIFO.h" -1486947027 source:c:\documents\sources\melonds\gpu3d_soft.cpp +1486994049 source:c:\documents\sources\melonds\gpu3d_soft.cpp "NDS.h" From 68fb77b2047ace4ac3f2d39b39cff70832d4460a Mon Sep 17 00:00:00 2001 From: StapleButter Date: Tue, 14 Feb 2017 02:43:35 +0100 Subject: [PATCH 09/16] * backface/frontface culling (and rendering of backfacing polygons) * fix clipping/viewport transform precision errors * triangle/quad strips --- GPU3D.cpp | 370 ++++++++++++++++++++++++++++++++----------------- GPU3D.h | 6 + GPU3D_Soft.cpp | 92 ++++++++---- NDS.cpp | 2 +- main.cpp | 4 + melonDS.depend | 10 +- 6 files changed, 323 insertions(+), 161 deletions(-) diff --git a/GPU3D.cpp b/GPU3D.cpp index e77af690..9f461a4c 100644 --- a/GPU3D.cpp +++ b/GPU3D.cpp @@ -159,9 +159,14 @@ u32 PolygonMode; s16 CurVertex[3]; u8 VertexColor[3]; +u32 PolygonAttr; +u32 CurPolygonAttr; + Vertex TempVertexBuffer[4]; u32 VertexNum; u32 VertexNumInPoly; +u32 NumConsecutivePolygons; +Polygon* LastStripPolygon; Vertex VertexRAM[6144 * 2]; Polygon PolygonRAM[2048 * 2]; @@ -385,13 +390,13 @@ void UpdateClipMatrix() template -void ClipSegment(Vertex* outbuf, int num, Vertex* vout, Vertex* vin) +void ClipSegment(Vertex* outbuf, Vertex* vout, Vertex* vin) { - s64 factor = ((vin->Position[3] - (plane*vin->Position[comp])) << 12) / + s64 factor = ((s64)(vin->Position[3] - (plane*vin->Position[comp])) << 24) / ((vin->Position[3] - (plane*vin->Position[comp])) - (vout->Position[3] - (plane*vout->Position[comp]))); Vertex mid; -#define INTERPOLATE(var) mid.var = vin->var + (((vout->var - vin->var) * factor) >> 12); +#define INTERPOLATE(var) mid.var = vin->var + (((vout->var - vin->var) * factor) >> 24); INTERPOLATE(Position[0]); INTERPOLATE(Position[1]); @@ -402,213 +407,339 @@ void ClipSegment(Vertex* outbuf, int num, Vertex* vout, Vertex* vin) INTERPOLATE(Color[1]); INTERPOLATE(Color[2]); + mid.Clipped = true; + #undef INTERPOLATE - outbuf[num] = mid; + *outbuf = mid; } void SubmitPolygon() { - // clip. - // for each vertex: - // if it's outside, check if the previous and next vertices are inside, if so, fixor - Vertex clippedvertices[2][10]; - u32 numclipped; + Vertex* reusedvertices[2]; + int clipstart = 0; + int lastpolyverts = 0; int nverts = PolygonMode & 0x1 ? 4:3; - int nvisible = 0; int prev, next; int c; - /*if (NumPolygons == 91) - for (int i = 0; i < nverts; i++) + // culling + //if (!(TempVertexBuffer[0].Color[0]==0 && TempVertexBuffer[0].Color[1]==63 && TempVertexBuffer[0].Color[2]==63)) + // return; + + // checkme: does it work this way for quads and up? + /*s32 _x1 = TempVertexBuffer[1].Position[0] - TempVertexBuffer[0].Position[0]; + s32 _x2 = TempVertexBuffer[2].Position[0] - TempVertexBuffer[0].Position[0]; + s32 _y1 = TempVertexBuffer[1].Position[1] - TempVertexBuffer[0].Position[1]; + s32 _y2 = TempVertexBuffer[2].Position[1] - TempVertexBuffer[0].Position[1]; + s32 _z1 = TempVertexBuffer[1].Position[2] - TempVertexBuffer[0].Position[2]; + s32 _z2 = TempVertexBuffer[2].Position[2] - TempVertexBuffer[0].Position[2]; + s32 normalX = (((s64)_y1 * _z2) - ((s64)_z1 * _y2)) >> 12; + s32 normalY = (((s64)_z1 * _x2) - ((s64)_x1 * _z2)) >> 12; + s32 normalZ = (((s64)_x1 * _y2) - ((s64)_y1 * _x2)) >> 12;*/ + /*s32 centerX = ((s64)TempVertexBuffer[0].Position[3] * ClipMatrix[12]) >> 12; + s32 centerY = ((s64)TempVertexBuffer[0].Position[3] * ClipMatrix[13]) >> 12; + s32 centerZ = ((s64)TempVertexBuffer[0].Position[3] * ClipMatrix[14]) >> 12;*/ + /*s64 dot = ((s64)(-TempVertexBuffer[0].Position[0]) * normalX) + + ((s64)(-TempVertexBuffer[0].Position[1]) * normalY) + + ((s64)(-TempVertexBuffer[0].Position[2]) * normalZ); // checkme*/ + // code inspired from Dolphin's software renderer. + // maybe not 100% right + s32 _x0 = TempVertexBuffer[0].Position[0]; + s32 _x1 = TempVertexBuffer[1].Position[0]; + s32 _x2 = TempVertexBuffer[2].Position[0]; + s32 _y0 = TempVertexBuffer[0].Position[1]; + s32 _y1 = TempVertexBuffer[1].Position[1]; + s32 _y2 = TempVertexBuffer[2].Position[1]; + s32 _z0 = TempVertexBuffer[0].Position[3]; + s32 _z1 = TempVertexBuffer[1].Position[3]; + s32 _z2 = TempVertexBuffer[2].Position[3]; + s32 normalX = (((s64)_y0 * _z2) - ((s64)_z0 * _y2)) >> 12; + s32 normalY = (((s64)_z0 * _x2) - ((s64)_x0 * _z2)) >> 12; + s32 normalZ = (((s64)_x0 * _y2) - ((s64)_y0 * _x2)) >> 12; + s64 dot = ((s64)_x1 * normalX) + ((s64)_y1 * normalY) + ((s64)_z1 * normalZ); + bool facingview = (dot < 0); +//printf("Z: %d %d\n", normalZ, -TempVertexBuffer[0].Position[2]); + if (facingview) { - Vertex vtx = TempVertexBuffer[i]; - printf("pre-clip v%d: %f %f %f %f\n", i, - vtx.Position[0]/4096.0f, vtx.Position[1]/4096.0f, - vtx.Position[2]/4096.0f, vtx.Position[3]/4096.0f); - }*/ + if (!(CurPolygonAttr & (1<<7))) + { + LastStripPolygon = NULL; + return; + } + } + else + { + if (!(CurPolygonAttr & (1<<6))) + { + LastStripPolygon = NULL; + return; + } + } + + // for strips, check whether we can attach to the previous polygon + // this requires two vertices shared with the previous polygon, and that + // the two polygons be of the same type + + if (PolygonMode >= 2 && LastStripPolygon) + { + int id0, id1; + if (PolygonMode == 2) + { + if (NumConsecutivePolygons & 1) + { + id0 = 2; + id1 = 1; + } + else + { + id0 = 0; + id1 = 2; + } + + lastpolyverts = 3; + } + else + { + id0 = 3; + id1 = 2; + + lastpolyverts = 4; + } + + if (LastStripPolygon->NumVertices == lastpolyverts && + !LastStripPolygon->Vertices[id0]->Clipped && + !LastStripPolygon->Vertices[id1]->Clipped) + { + reusedvertices[0] = LastStripPolygon->Vertices[id0]; + reusedvertices[1] = LastStripPolygon->Vertices[id1]; + + clippedvertices[0][0] = *reusedvertices[0]; + clippedvertices[0][1] = *reusedvertices[1]; + clippedvertices[1][0] = *reusedvertices[0]; + clippedvertices[1][1] = *reusedvertices[1]; + + clipstart = 2; + } + } + + // clip. + // for each vertex: + // if it's outside, check if the previous and next vertices are inside + // if so, place a new vertex at the edge of the view volume // X clipping - prev = nverts-1; next = 1; c = 0; - for (int i = 0; i < nverts; i++) + c = clipstart; + for (int i = clipstart; i < nverts; i++) { + prev = i-1; if (prev < 0) prev = nverts-1; + next = i+1; if (next >= nverts) next = 0; + Vertex vtx = TempVertexBuffer[i]; if (vtx.Position[0] > vtx.Position[3]) { Vertex* vprev = &TempVertexBuffer[prev]; if (vprev->Position[0] <= vprev->Position[3]) { - ClipSegment<0, 1>(clippedvertices[0], c, &vtx, vprev); + ClipSegment<0, 1>(&clippedvertices[0][c], &vtx, vprev); c++; } Vertex* vnext = &TempVertexBuffer[next]; if (vnext->Position[0] <= vnext->Position[3]) { - ClipSegment<0, 1>(clippedvertices[0], c, &vtx, vnext); + ClipSegment<0, 1>(&clippedvertices[0][c], &vtx, vnext); c++; } } else clippedvertices[0][c++] = vtx; - - prev++; if (prev >= nverts) prev = 0; - next++; if (next >= nverts) next = 0; } - nverts = c; prev = nverts-1; next = 1; c = 0; - for (int i = 0; i < nverts; i++) + nverts = c; c = clipstart; + for (int i = clipstart; i < nverts; i++) { + prev = i-1; if (prev < 0) prev = nverts-1; + next = i+1; if (next >= nverts) next = 0; + Vertex vtx = clippedvertices[0][i]; if (vtx.Position[0] < -vtx.Position[3]) { Vertex* vprev = &clippedvertices[0][prev]; if (vprev->Position[0] >= -vprev->Position[3]) { - ClipSegment<0, -1>(clippedvertices[1], c, &vtx, vprev); + ClipSegment<0, -1>(&clippedvertices[1][c], &vtx, vprev); c++; } Vertex* vnext = &clippedvertices[0][next]; if (vnext->Position[0] >= -vnext->Position[3]) { - ClipSegment<0, -1>(clippedvertices[1], c, &vtx, vnext); + ClipSegment<0, -1>(&clippedvertices[1][c], &vtx, vnext); c++; } } else clippedvertices[1][c++] = vtx; - - prev++; if (prev >= nverts) prev = 0; - next++; if (next >= nverts) next = 0; } // Y clipping - nverts = c; prev = nverts-1; next = 1; c = 0; - for (int i = 0; i < nverts; i++) + nverts = c; c = clipstart; + for (int i = clipstart; i < nverts; i++) { + prev = i-1; if (prev < 0) prev = nverts-1; + next = i+1; if (next >= nverts) next = 0; + Vertex vtx = clippedvertices[1][i]; if (vtx.Position[1] > vtx.Position[3]) { Vertex* vprev = &clippedvertices[1][prev]; if (vprev->Position[1] <= vprev->Position[3]) { - ClipSegment<1, 1>(clippedvertices[0], c, &vtx, vprev); + ClipSegment<1, 1>(&clippedvertices[0][c], &vtx, vprev); c++; } Vertex* vnext = &clippedvertices[1][next]; if (vnext->Position[1] <= vnext->Position[3]) { - ClipSegment<1, 1>(clippedvertices[0], c, &vtx, vnext); + ClipSegment<1, 1>(&clippedvertices[0][c], &vtx, vnext); c++; } } else clippedvertices[0][c++] = vtx; - - prev++; if (prev >= nverts) prev = 0; - next++; if (next >= nverts) next = 0; } - nverts = c; prev = nverts-1; next = 1; c = 0; - for (int i = 0; i < nverts; i++) + nverts = c; c = clipstart; + for (int i = clipstart; i < nverts; i++) { + prev = i-1; if (prev < 0) prev = nverts-1; + next = i+1; if (next >= nverts) next = 0; + Vertex vtx = clippedvertices[0][i]; if (vtx.Position[1] < -vtx.Position[3]) { Vertex* vprev = &clippedvertices[0][prev]; if (vprev->Position[1] >= -vprev->Position[3]) { - ClipSegment<1, -1>(clippedvertices[1], c, &vtx, vprev); + ClipSegment<1, -1>(&clippedvertices[1][c], &vtx, vprev); c++; } Vertex* vnext = &clippedvertices[0][next]; if (vnext->Position[1] >= -vnext->Position[3]) { - ClipSegment<1, -1>(clippedvertices[1], c, &vtx, vnext); + ClipSegment<1, -1>(&clippedvertices[1][c], &vtx, vnext); c++; } } else clippedvertices[1][c++] = vtx; - - prev++; if (prev >= nverts) prev = 0; - next++; if (next >= nverts) next = 0; } // Z clipping - nverts = c; prev = nverts-1; next = 1; c = 0; - for (int i = 0; i < nverts; i++) + nverts = c; c = clipstart; + for (int i = clipstart; i < nverts; i++) { + prev = i-1; if (prev < 0) prev = nverts-1; + next = i+1; if (next >= nverts) next = 0; + Vertex vtx = clippedvertices[1][i]; if (vtx.Position[2] > vtx.Position[3]) { Vertex* vprev = &clippedvertices[1][prev]; if (vprev->Position[2] <= vprev->Position[3]) { - ClipSegment<2, 1>(clippedvertices[0], c, &vtx, vprev); + ClipSegment<2, 1>(&clippedvertices[0][c], &vtx, vprev); c++; } Vertex* vnext = &clippedvertices[1][next]; if (vnext->Position[2] <= vnext->Position[3]) { - ClipSegment<2, 1>(clippedvertices[0], c, &vtx, vnext); + ClipSegment<2, 1>(&clippedvertices[0][c], &vtx, vnext); c++; } } else clippedvertices[0][c++] = vtx; - - prev++; if (prev >= nverts) prev = 0; - next++; if (next >= nverts) next = 0; } - nverts = c; prev = nverts-1; next = 1; c = 0; - for (int i = 0; i < nverts; i++) + nverts = c; c = clipstart; + for (int i = clipstart; i < nverts; i++) { + prev = i-1; if (prev < 0) prev = nverts-1; + next = i+1; if (next >= nverts) next = 0; + Vertex vtx = clippedvertices[0][i]; if (vtx.Position[2] < -vtx.Position[3]) { Vertex* vprev = &clippedvertices[0][prev]; if (vprev->Position[2] >= -vprev->Position[3]) { - ClipSegment<2, -1>(clippedvertices[1], c, &vtx, vprev); + ClipSegment<2, -1>(&clippedvertices[1][c], &vtx, vprev); c++; } Vertex* vnext = &clippedvertices[0][next]; if (vnext->Position[2] >= -vnext->Position[3]) { - ClipSegment<2, -1>(clippedvertices[1], c, &vtx, vnext); + ClipSegment<2, -1>(&clippedvertices[1][c], &vtx, vnext); c++; } } else clippedvertices[1][c++] = vtx; - - prev++; if (prev >= nverts) prev = 0; - next++; if (next >= nverts) next = 0; } - if (c == 0) return; + if (c == 0) + { + LastStripPolygon = NULL; + return; + } // build the actual polygon - // TODO: tri/quad strips - if (NumPolygons >= 2048) return; - if (NumVertices+c > 6144) return; + if (NumPolygons >= 2048 || NumVertices+c > 6144) + { + LastStripPolygon = NULL; + return; + } Polygon* poly = &CurPolygonRAM[NumPolygons++]; poly->NumVertices = 0; - for (int i = 0; i < c; i++) + poly->Attr = CurPolygonAttr; + poly->FacingView = facingview; + + if (LastStripPolygon && clipstart > 0) + { + if (c == lastpolyverts) + { + poly->Vertices[0] = reusedvertices[0]; + poly->Vertices[1] = reusedvertices[1]; + } + else + { + Vertex v0 = *reusedvertices[0]; + Vertex v1 = *reusedvertices[1]; + + CurVertexRAM[NumVertices] = v0; + poly->Vertices[0] = &CurVertexRAM[NumVertices]; + CurVertexRAM[NumVertices+1] = v1; + poly->Vertices[1] = &CurVertexRAM[NumVertices+1]; + NumVertices += 2; + } + + poly->NumVertices += 2; + } + + for (int i = clipstart; i < c; i++) { CurVertexRAM[NumVertices] = clippedvertices[1][i]; poly->Vertices[i] = &CurVertexRAM[NumVertices]; @@ -616,77 +747,29 @@ void SubmitPolygon() NumVertices++; poly->NumVertices++; } + + if (PolygonMode >= 2) + LastStripPolygon = poly; + else + LastStripPolygon = NULL; } void SubmitVertex() { s64 vertex[4] = {(s64)CurVertex[0], (s64)CurVertex[1], (s64)CurVertex[2], 0x1000}; - //s32 vertextrans[4]; Vertex* vertextrans = &TempVertexBuffer[VertexNumInPoly]; - if (PolygonMode & 0x2) return; - - //printf("vertex: %f %f %f\n", vertex[0]/4096.0f, vertex[1]/4096.0f, vertex[2]/4096.0f); - UpdateClipMatrix(); vertextrans->Position[0] = (vertex[0]*ClipMatrix[0] + vertex[1]*ClipMatrix[4] + vertex[2]*ClipMatrix[8] + vertex[3]*ClipMatrix[12]) >> 12; vertextrans->Position[1] = (vertex[0]*ClipMatrix[1] + vertex[1]*ClipMatrix[5] + vertex[2]*ClipMatrix[9] + vertex[3]*ClipMatrix[13]) >> 12; vertextrans->Position[2] = (vertex[0]*ClipMatrix[2] + vertex[1]*ClipMatrix[6] + vertex[2]*ClipMatrix[10] + vertex[3]*ClipMatrix[14]) >> 12; vertextrans->Position[3] = (vertex[0]*ClipMatrix[3] + vertex[1]*ClipMatrix[7] + vertex[2]*ClipMatrix[11] + vertex[3]*ClipMatrix[15]) >> 12; - /*printf("vertex fart: %f %f %f %f\n", - vertextrans->Position[0]/4096.0f, - vertextrans->Position[1]/4096.0f, - vertextrans->Position[2]/4096.0f, - vertextrans->Position[3]/4096.0f);*/ - - /*s32 w_inv; - if (vertextrans->Position[3] == 0) - w_inv = 0x1000; // checkme - else if(vertextrans->Position[3] < 0) - w_inv = 0x1000000 / -vertextrans->Position[3]; - else - w_inv = 0x1000000 / vertextrans->Position[3]; - - vertextrans->Position[0] = (vertextrans->Position[0] * w_inv) >> 12; - vertextrans->Position[1] = (vertextrans->Position[1] * w_inv) >> 12; - vertextrans->Position[2] = (vertextrans->Position[2] * w_inv) >> 12;*/ - vertextrans->Color[0] = VertexColor[0]; vertextrans->Color[1] = VertexColor[1]; vertextrans->Color[2] = VertexColor[2]; - /*printf("vertex trans: %f %f %f %f\n", - vertextrans->Position[0]/4096.0f, - vertextrans->Position[1]/4096.0f, - vertextrans->Position[2]/4096.0f, - vertextrans->Position[3]/4096.0f); - printf("clip: %f %f %f %f\n", - ClipMatrix[3]/4096.0f, - ClipMatrix[7]/4096.0f, - ClipMatrix[11]/4096.0f, - ClipMatrix[15]/4096.0f);*/ - - /*if (vertextrans[3] == 0) - { - //printf("!!!! VERTEX W IS ZERO\n"); - //return; - vertextrans[3] = 0x1000; // checkme - } - - s32 screenX = (((vertextrans[0]+vertextrans[3]) * Viewport[2]) / (vertextrans[3]<<1)) + Viewport[0]; - s32 screenY = (((vertextrans[1]+vertextrans[3]) * Viewport[3]) / (vertextrans[3]<<1)) + Viewport[1]; - - printf("screen: %d, %d\n", screenX, screenY); - - s32* finalvertex = TempVertexBuffer[VertexNumInPoly]; - finalvertex[0] = screenX; - finalvertex[1] = screenY; - finalvertex[2] = vertextrans[2]; - finalvertex[3] = vertextrans[3];*/ - - // triangle strip: 0,1,2 1,2,3 2,3,4 3,4,5 ... - // quad strip: 0,1,3,2 2,3,5,4 4,5,7,6 6,7,9,8 ... + vertextrans->Clipped = false; VertexNum++; VertexNumInPoly++; @@ -698,6 +781,7 @@ void SubmitVertex() { VertexNumInPoly = 0; SubmitPolygon(); + NumConsecutivePolygons++; } break; @@ -706,32 +790,49 @@ void SubmitVertex() { VertexNumInPoly = 0; SubmitPolygon(); + NumConsecutivePolygons++; } break; - /*case 2: // triangle strip - if (VertexNum > 3) + case 2: // triangle strip + if (NumConsecutivePolygons & 1) { - if (VertexNumInPoly == 1) - { - VertexNumInPoly = 0; - // reorder - } - else - VertexNumInPoly = 0; + Vertex tmp = TempVertexBuffer[1]; + TempVertexBuffer[1] = TempVertexBuffer[0]; + TempVertexBuffer[0] = tmp; + VertexNumInPoly = 2; SubmitPolygon(); + NumConsecutivePolygons++; + + TempVertexBuffer[1] = TempVertexBuffer[2]; } - else if (VertexNum == 3) + else if (VertexNumInPoly == 3) { VertexNumInPoly = 2; SubmitPolygon(); + NumConsecutivePolygons++; TempVertexBuffer[0] = TempVertexBuffer[1]; TempVertexBuffer[1] = TempVertexBuffer[2]; } - break;*/ - default: VertexNumInPoly = 0; break; + break; + + case 3: // quad strip + if (VertexNumInPoly == 4) + { + Vertex tmp = TempVertexBuffer[3]; + TempVertexBuffer[3] = TempVertexBuffer[2]; + TempVertexBuffer[2] = tmp; + + VertexNumInPoly = 2; + SubmitPolygon(); + NumConsecutivePolygons++; + + TempVertexBuffer[0] = TempVertexBuffer[3]; + TempVertexBuffer[1] = TempVertexBuffer[2]; + } + break; } } @@ -791,7 +892,7 @@ void ExecuteCommand() if (ExecParamCount >= CmdNumParams[entry.Command]) { - //CycleCount += CmdNumCycles[entry.Command]; + CycleCount += CmdNumCycles[entry.Command]; ExecParamCount = 0; GXStat &= ~(1<<14); @@ -1127,10 +1228,17 @@ void ExecuteCommand() SubmitVertex(); break; + case 0x29: // polygon attributes + PolygonAttr = ExecParams[0]; + break; + case 0x40: PolygonMode = ExecParams[0] & 0x3; VertexNum = 0; VertexNumInPoly = 0; + NumConsecutivePolygons = 0; + LastStripPolygon = NULL; + CurPolygonAttr = PolygonAttr; break; case 0x50: diff --git a/GPU3D.h b/GPU3D.h index d9e7e1a1..96e76aa1 100644 --- a/GPU3D.h +++ b/GPU3D.h @@ -27,6 +27,8 @@ typedef struct s32 Position[4]; u8 Color[3]; + bool Clipped; + } Vertex; typedef struct @@ -34,6 +36,10 @@ typedef struct Vertex* Vertices[10]; u32 NumVertices; + u32 Attr; + + bool FacingView; + } Polygon; extern s32 Viewport[4]; diff --git a/GPU3D_Soft.cpp b/GPU3D_Soft.cpp index 6ac4e81d..abca24a2 100644 --- a/GPU3D_Soft.cpp +++ b/GPU3D_Soft.cpp @@ -60,23 +60,27 @@ void RenderPolygon(Polygon* polygon) { Vertex* vtx = polygon->Vertices[i]; - s32 w_inv; - if (vtx->Position[3] == 0) + s32 posX, posY, posZ; + s32 w = vtx->Position[3]; + if (w == 0) { - w_inv = 0x1000; // checkme - printf("!! W=0\n"); + posX = 0; + posY = 0; + posZ = 0; } else - w_inv = 0x1000000 / vtx->Position[3]; - - if (vtx->Position[3] < 0) printf("!!! W=%d\n", vtx->Position[3]); - - s32 posX = (vtx->Position[0] * w_inv) >> 12; - s32 posY = (vtx->Position[1] * w_inv) >> 12; - s32 posZ = (vtx->Position[2] * w_inv) >> 12; + { + // TODO: find a way to avoid doing 3 divisions :/ + posX = ((s64)vtx->Position[0] << 12) / w; + posY = ((s64)vtx->Position[1] << 12) / w; + posZ = ((s64)vtx->Position[2] << 12) / w; + } //s32 posX = vtx->Position[0]; //s32 posY = vtx->Position[1]; + //printf("xy: %08X %08X %08X\n", vtx->Position[0], vtx->Position[1], vtx->Position[3]); + //printf("w_inv: %08X res: %08X %08X\n", w_inv, posX, posY); + s32 scrX = (((posX + 0x1000) * Viewport[2]) >> 13) + Viewport[0]; s32 scrY = (((posY + 0x1000) * Viewport[3]) >> 13) + Viewport[1]; s32 scrZ = (vtx->Position[2] + 0x1000) >> 1; @@ -102,7 +106,8 @@ void RenderPolygon(Polygon* polygon) vbot = i; } //if (vtx->Color[0]==63 && vtx->Color[1]==0 && vtx->Color[2]==0) - //printf("v%d: %d,%d Z=%f W=%f\n", i, scrX, 191-scrY, vtx->Position[2]/4096.0f, vtx->Position[3]/4096.0f); + //printf("v%d: %d,%d Z=%f W=%f %d %d\n", i, scrX, 191-scrY, vtx->Position[2]/4096.0f, vtx->Position[3]/4096.0f, + // polygon->FacingView, vtx->Clipped); } // draw, line per line @@ -112,10 +117,20 @@ void RenderPolygon(Polygon* polygon) s32 lstep, rstep; //s32 xmin, xmax; - lnext = lcur + 1; - if (lnext >= nverts) lnext = 0; - rnext = rcur - 1; - if (rnext < 0) rnext = nverts - 1; + if (polygon->FacingView) + { + lnext = lcur + 1; + if (lnext >= nverts) lnext = 0; + rnext = rcur - 1; + if (rnext < 0) rnext = nverts - 1; + } + else + { + lnext = lcur - 1; + if (lnext < 0) lnext = nverts - 1; + rnext = rcur + 1; + if (rnext >= nverts) rnext = 0; + } /*if ((scrcoords[lnext][1] - scrcoords[lcur][1]) == 0) lstep = 0; else lstep = ((scrcoords[lnext][0] - scrcoords[lcur][0]) << 12) / (scrcoords[lnext][1] - scrcoords[lcur][1]); @@ -131,11 +146,18 @@ void RenderPolygon(Polygon* polygon) { while (y == scrcoords[lnext][1]) { - lcur++; - if (lcur >= nverts) lcur = 0; + lcur = lnext; - lnext = lcur + 1; - if (lnext >= nverts) lnext = 0; + if (polygon->FacingView) + { + lnext = lcur + 1; + if (lnext >= nverts) lnext = 0; + } + else + { + lnext = lcur - 1; + if (lnext < 0) lnext = nverts - 1; + } //lstep = ((scrcoords[lnext][0] - scrcoords[lcur][0]) << 12) / (scrcoords[lnext][1] - scrcoords[lcur][1]); //xmin = scrcoords[lcur][0] << 12; @@ -144,11 +166,18 @@ void RenderPolygon(Polygon* polygon) while (y == scrcoords[rnext][1]) { - rcur--; - if (rcur < 0) rcur = nverts - 1; + rcur = rnext; - rnext = rcur - 1; - if (rnext < 0) rnext = nverts - 1; + if (polygon->FacingView) + { + rnext = rcur - 1; + if (rnext < 0) rnext = nverts - 1; + } + else + { + rnext = rcur + 1; + if (rnext >= nverts) rnext = 0; + } //rstep = ((scrcoords[rnext][0] - scrcoords[rcur][0]) << 12) / (scrcoords[rnext][1] - scrcoords[rcur][1]); //xmax = scrcoords[rcur][0] << 12; @@ -232,6 +261,19 @@ void RenderPolygon(Polygon* polygon) pixel[2] = zerp;*/ } } + + // DEBUG CODE + /*for (int i = 0; i < nverts; i++) + { + s32 x = scrcoords[i][0]; + s32 y = scrcoords[i][1]; + + u8* pixel = &ColorBuffer[((256*y) + x) * 4]; + pixel[0] = 63; + pixel[1] = 63; + pixel[2] = 63; + pixel[3] = 31; + }*/ } void RenderFrame(Vertex* vertices, Polygon* polygons, int npolys) @@ -253,6 +295,8 @@ void RenderFrame(Vertex* vertices, Polygon* polygons, int npolys) polygons[i].Vertices[j]->Position[1]/4096.0f, polygons[i].Vertices[j]->Position[2]/4096.0f); */ + //printf("polygon %d\n", i); + //if (!polygons[i].Vertices[0]->Clipped) continue; //printf("polygon %d\n", i); RenderPolygon(&polygons[i]); } diff --git a/NDS.cpp b/NDS.cpp index 68ca25c2..8b911d32 100644 --- a/NDS.cpp +++ b/NDS.cpp @@ -307,7 +307,7 @@ void Reset() // test //LoadROM(); //LoadFirmware(); - if (NDSCart::LoadROM("rom/Simple_Tri.nds")) + if (NDSCart::LoadROM("rom/nsmb.nds")) Running = true; // hax } diff --git a/main.cpp b/main.cpp index 7600102f..c403cecf 100644 --- a/main.cpp +++ b/main.cpp @@ -85,6 +85,8 @@ LRESULT CALLBACK derpo(HWND window, UINT msg, WPARAM wparam, LPARAM lparam) case VK_RIGHT: NDS::PressKey(4); break; case 'A': NDS::PressKey(0); break; case 'B': NDS::PressKey(1); break; + case 'X': NDS::PressKey(16); break; + case 'Y': NDS::PressKey(17); break; case 'L': NDS::PressKey(9); break; case 'R': NDS::PressKey(8); break; case 'D': NDS::debug(0); break; @@ -102,6 +104,8 @@ LRESULT CALLBACK derpo(HWND window, UINT msg, WPARAM wparam, LPARAM lparam) case VK_RIGHT: NDS::ReleaseKey(4); break; case 'A': NDS::ReleaseKey(0); break; case 'B': NDS::ReleaseKey(1); break; + case 'X': NDS::ReleaseKey(16); break; + case 'Y': NDS::ReleaseKey(17); break; case 'L': NDS::ReleaseKey(9); break; case 'R': NDS::ReleaseKey(8); break; } diff --git a/melonDS.depend b/melonDS.depend index 51e832ce..35030a52 100644 --- a/melonDS.depend +++ b/melonDS.depend @@ -1,5 +1,5 @@ # depslib dependency file v1.0 -1486993536 source:c:\documents\sources\melonds\main.cpp +1487028720 source:c:\documents\sources\melonds\main.cpp "NDS.h" @@ -10,7 +10,7 @@ 1481161027 c:\documents\sources\melonds\types.h -1486994139 source:c:\documents\sources\melonds\nds.cpp +1487033652 source:c:\documents\sources\melonds\nds.cpp "NDS.h" @@ -146,16 +146,16 @@ "NDS.h" "NDSCart.h" -1486777933 c:\documents\sources\melonds\gpu3d.h +1487016725 c:\documents\sources\melonds\gpu3d.h -1486993935 source:c:\documents\sources\melonds\gpu3d.cpp +1487036430 source:c:\documents\sources\melonds\gpu3d.cpp "NDS.h" "GPU.h" "FIFO.h" -1486994049 source:c:\documents\sources\melonds\gpu3d_soft.cpp +1487035597 source:c:\documents\sources\melonds\gpu3d_soft.cpp "NDS.h" From c5b7ec2168a1d9a06c87fdfc4ef7c8b92d4c2dcf Mon Sep 17 00:00:00 2001 From: StapleButter Date: Tue, 14 Feb 2017 03:29:02 +0100 Subject: [PATCH 10/16] attempt at depth buffer --- GPU3D_Soft.cpp | 64 +++++++++++++++++++++++++++++++++++++++++--------- melonDS.depend | 4 ++-- 2 files changed, 55 insertions(+), 13 deletions(-) diff --git a/GPU3D_Soft.cpp b/GPU3D_Soft.cpp index abca24a2..7a901dd7 100644 --- a/GPU3D_Soft.cpp +++ b/GPU3D_Soft.cpp @@ -28,6 +28,7 @@ namespace SoftRenderer { u8 ColorBuffer[256*192 * 4]; +u32 DepthBuffer[256*192]; bool Init() @@ -43,9 +44,37 @@ void DeInit() void Reset() { memset(ColorBuffer, 0, 256*192 * 4); + memset(DepthBuffer, 0, 256*192 * 4); } +void RenderPixel(u32 attr, s32 x, s32 y, s32 z, u8 vr, u8 vg, u8 vb) +{ + u32* depth = &DepthBuffer[(256*y) + x]; + + bool passdepth = false; + if (attr & (1<<14)) + { + s32 diff = *depth - z; + if ((u32)(diff + 0x200) <= 0x400) + passdepth = true; + } + else + if (z < *depth) + passdepth = true; + + if (!passdepth) return; + + u8* pixel = &ColorBuffer[((256*y) + x) * 4]; + pixel[0] = vr; + pixel[1] = vg; + pixel[2] = vb; + pixel[3] = 31; // TODO: alpha + + // TODO: optional update for translucent pixels + *depth = z; +} + void RenderPolygon(Polygon* polygon) { int nverts = polygon->NumVertices; @@ -83,17 +112,19 @@ void RenderPolygon(Polygon* polygon) s32 scrX = (((posX + 0x1000) * Viewport[2]) >> 13) + Viewport[0]; s32 scrY = (((posY + 0x1000) * Viewport[3]) >> 13) + Viewport[1]; - s32 scrZ = (vtx->Position[2] + 0x1000) >> 1; + s32 scrZ = (((s64)(posZ + 0x1000) * 0xFFFFFF) >> 13); + s32 scrW = (((s64)(w + 0x1000) * 0xFFFFFF) >> 13); if (scrX > 255) scrX = 255; if (scrY > 191) scrY = 191; - if (scrZ > 0xFFF) scrZ = 0xFFF; + if (scrZ > 0xFFFFFF) scrZ = 0xFFFFFF; if (scrX < 0) { printf("!! bad X %d\n", scrX); scrX = 0;} if (scrY < 0) { printf("!! bad Y %d\n", scrY); scrY = 0;} + if (scrZ < 0) { printf("!! bad Z %d %d\n", scrZ, vtx->Position[2]); scrZ = 0;} scrcoords[i][0] = scrX; scrcoords[i][1] = 191 - scrY; scrcoords[i][2] = scrZ; - scrcoords[i][3] = vtx->Position[3]; + scrcoords[i][3] = scrW; if (scrcoords[i][1] < ytop) { @@ -213,8 +244,11 @@ void RenderPolygon(Polygon* polygon) scrcoords[lcur][0], scrcoords[lnext][0], scrcoords[rcur][0], scrcoords[rnext][0]);*/ - //s32 zl = scrcoords[lcur][2] + (((scrcoords[lnext][2] - scrcoords[lcur][2]) * lfactor) >> 12); - //s32 zr = scrcoords[rcur][2] + (((scrcoords[rnext][2] - scrcoords[rcur][2]) * rfactor) >> 12); + s32 zl = scrcoords[lcur][2] + (((s64)(scrcoords[lnext][2] - scrcoords[lcur][2]) * lfactor) >> 12); + s32 zr = scrcoords[rcur][2] + (((s64)(scrcoords[rnext][2] - scrcoords[rcur][2]) * rfactor) >> 12); + + //s32 wl = scrcoords[lcur][3] + (((s64)(scrcoords[lnext][3] - scrcoords[lcur][3]) * lfactor) >> 12); + //s32 wr = scrcoords[rcur][3] + (((s64)(scrcoords[rnext][3] - scrcoords[rcur][3]) * rfactor) >> 12); u8 rl = vlcur->Color[0] + (((vlnext->Color[0] - vlcur->Color[0]) * lfactor) >> 12); u8 gl = vlcur->Color[1] + (((vlnext->Color[1] - vlcur->Color[1]) * lfactor) >> 12); @@ -234,8 +268,14 @@ void RenderPolygon(Polygon* polygon) { s32 xfactor = (x - xl) * xdiv; + s32 z = zl + (((s64)(zr - zl) * xfactor) >> 12); + //s32 z = (((zr - zl) * xfactor) >> 12); //if (zr!=zl) z = (z << 12) / (zr - zl); + //s32 w = wl + (((s64)(wr - wl) * xfactor) >> 12); + //w >>= 12; + //if (w!=0) xfactor = ((s64)xfactor * 0xFFFFFF) / w; + //xfactor = (xfactor * w) >> 12; //s32 z_inv = ((z>>12)==0) ? 0x1000 : 0x1000000 / (z >> 12); //xfactor = (xfactor * z_inv) >> 12; @@ -248,14 +288,14 @@ void RenderPolygon(Polygon* polygon) x = 0; y = 0; } - u8* pixel = &ColorBuffer[((256*y) + x) * 4]; - pixel[0] = rl + (((rr - rl) * xfactor) >> 12); - pixel[1] = gl + (((gr - gl) * xfactor) >> 12); - pixel[2] = bl + (((br - bl) * xfactor) >> 12); - pixel[3] = 31; // TODO: alpha + // possible optimization: only do color interpolation if the depth test passes + u8 vr = rl + (((rr - rl) * xfactor) >> 12); + u8 vg = gl + (((gr - gl) * xfactor) >> 12); + u8 vb = bl + (((br - bl) * xfactor) >> 12); + RenderPixel(polygon->Attr, x, y, z, vr, vg, vb); // Z debug - /*u8 zerp = (z * 63) / 0xFFFFFF; + /*u8 zerp = (w * 63) / 0xFFFFFF; pixel[0] = zerp; pixel[1] = zerp; pixel[2] = zerp;*/ @@ -280,9 +320,11 @@ void RenderFrame(Vertex* vertices, Polygon* polygons, int npolys) { // TODO: render translucent polygons last + // TODO proper clear color/depth support! for (int i = 0; i < 256*192; i++) { ((u32*)ColorBuffer)[i] = 0x00000000; + DepthBuffer[i] = 0xFFFFFF; } for (int i = 0; i < npolys; i++) diff --git a/melonDS.depend b/melonDS.depend index 35030a52..db3140cc 100644 --- a/melonDS.depend +++ b/melonDS.depend @@ -10,7 +10,7 @@ 1481161027 c:\documents\sources\melonds\types.h -1487033652 source:c:\documents\sources\melonds\nds.cpp +1487039174 source:c:\documents\sources\melonds\nds.cpp "NDS.h" @@ -155,7 +155,7 @@ "GPU.h" "FIFO.h" -1487035597 source:c:\documents\sources\melonds\gpu3d_soft.cpp +1487039203 source:c:\documents\sources\melonds\gpu3d_soft.cpp "NDS.h" From ef7fbf8b0344b9ace77918f3de77690404aaa22f Mon Sep 17 00:00:00 2001 From: StapleButter Date: Tue, 14 Feb 2017 21:55:51 +0100 Subject: [PATCH 11/16] 18-bit graphics pipeline (final buffer is 32-bit) --- GPU.cpp | 4 +-- GPU.h | 2 +- GPU2D.cpp | 94 +++++++++++++++++++++++++++++++++++--------------- GPU2D.h | 20 ++++++----- main.cpp | 8 ++--- melonDS.depend | 12 +++---- 6 files changed, 91 insertions(+), 49 deletions(-) diff --git a/GPU.cpp b/GPU.cpp index b5db5cbe..a945b3f8 100644 --- a/GPU.cpp +++ b/GPU.cpp @@ -62,7 +62,7 @@ u8* VRAM_AOBJExtPal; u8* VRAM_BBGExtPal[4]; u8* VRAM_BOBJExtPal; -u16 Framebuffer[256*192*2]; +u32 Framebuffer[256*192*2]; GPU2D* GPU2D_A; GPU2D* GPU2D_B; @@ -123,7 +123,7 @@ void Reset() for (int i = 0; i < 256*192*2; i++) { - Framebuffer[i] = 0x7FFF; + Framebuffer[i] = 0xFFFFFFFF; } GPU2D_A->Reset(); diff --git a/GPU.h b/GPU.h index 18661ca2..a39faeab 100644 --- a/GPU.h +++ b/GPU.h @@ -48,7 +48,7 @@ extern u8* VRAM_AOBJExtPal; extern u8* VRAM_BBGExtPal[4]; extern u8* VRAM_BOBJExtPal; -extern u16 Framebuffer[256*192*2]; +extern u32 Framebuffer[256*192*2]; extern GPU2D* GPU2D_A; extern GPU2D* GPU2D_B; diff --git a/GPU2D.cpp b/GPU2D.cpp index 5168402b..d9634fdd 100644 --- a/GPU2D.cpp +++ b/GPU2D.cpp @@ -82,11 +82,8 @@ void GPU2D::Reset() memset(BGRotD, 0, 2*2); } -void GPU2D::SetFramebuffer(u16* buf) +void GPU2D::SetFramebuffer(u32* buf) { - // framebuffer is 256x192 16bit. - // might eventually support other framebuffer types/sizes - // TODO: change this. the DS uses 18bit color Framebuffer = buf; } @@ -205,7 +202,7 @@ void GPU2D::Write32(u32 addr, u32 val) void GPU2D::DrawScanline(u32 line) { - u16* dst = &Framebuffer[256*line]; + u32* dst = &Framebuffer[256*line]; u32 dispmode = DispCnt >> 16; dispmode &= (Num ? 0x1 : 0x3); @@ -214,8 +211,8 @@ void GPU2D::DrawScanline(u32 line) { case 0: // screen off { - for (int i = 0; i < 256>>1; i++) - ((u32*)dst)[i] = 0x7FFF7FFF; + for (int i = 0; i < 256; i++) + dst[i] = 0xFF3F3F3F; } break; @@ -230,8 +227,15 @@ void GPU2D::DrawScanline(u32 line) u32* vram = (u32*)GPU::VRAM[(DispCnt >> 18) & 0x3]; vram = &vram[line << 7]; - for (int i = 0; i < 256>>1; i++) - ((u32*)dst)[i] = vram[i]; + for (int i = 0; i < 256; i++) + { + u16 color = vram[i]; + u8 r = (color & 0x001F) << 1; + u8 g = (color & 0x03E0) >> 4; + u8 b = (color & 0x7C00) >> 9; + + dst[i] = r | (g << 8) | (b << 16); + } } break; @@ -241,6 +245,12 @@ void GPU2D::DrawScanline(u32 line) } break; } + + // convert to 32-bit RGBA + for (int i = 0; i < 256; i++) + dst[i] = ((dst[i] & 0x003F3F3F) << 2) | + ((dst[i] & 0x00303030) >> 4) | + 0xFF000000; } void GPU2D::VBlank() @@ -250,7 +260,7 @@ void GPU2D::VBlank() template -void GPU2D::DrawScanlineBGMode(u32 line, u32* spritebuf, u16* dst) +void GPU2D::DrawScanlineBGMode(u32 line, u32* spritebuf, u32* dst) { for (int i = 3; i >= 0; i--) { @@ -300,17 +310,24 @@ void GPU2D::DrawScanlineBGMode(u32 line, u32* spritebuf, u16* dst) } } -void GPU2D::DrawScanline_Mode1(u32 line, u16* dst) +void GPU2D::DrawScanline_Mode1(u32 line, u32* dst) { u32 backdrop; if (Num) backdrop = *(u16*)&GPU::Palette[0x400]; else backdrop = *(u16*)&GPU::Palette[0]; - // TODO: color effect for backdrop + { + u8 r = (backdrop & 0x001F) << 1; + u8 g = (backdrop & 0x03E0) >> 4; + u8 b = (backdrop & 0x7C00) >> 9; - backdrop |= (backdrop<<16); - for (int i = 0; i < 256>>1; i++) - ((u32*)dst)[i] = backdrop; + // TODO: color effect for backdrop + + backdrop = r | (g << 8) | (b << 16) | 0x20000000; + + for (int i = 0; i < 256; i++) + dst[i] = backdrop; + } // prerender sprites u32 spritebuf[256]; @@ -333,26 +350,38 @@ void GPU2D::DrawScanline_Mode1(u32 line, u16* dst) } -void GPU2D::DrawBG_3D(u32 line, u16* dst) +typedef void (*DrawPixelFunc)(u32 bgnum, u32* dst, u16 color, u32 blendfunc); + +void GPU2D::DrawPixel_Normal(u32 bgnum, u32* dst, u16 color, u32 blendfunc) +{ + u8 r = (color & 0x001F) << 1; + u8 g = (color & 0x03E0) >> 4; + u8 b = (color & 0x7C00) >> 9; + + *dst = r | (g << 8) | (b << 16) | (0x01000000 << bgnum); +} + +void GPU2D::DrawBG_3D(u32 line, u32* dst) { // TODO: scroll, etc u8* src = GPU3D::GetLine(line); for (int i = 0; i < 256; i++) { - // TODO: color buffer should be 18bit!! - u8 r = *src++; u8 g = *src++; u8 b = *src++; u8 a = *src++; if (a == 0) continue; - dst[i] = (r >> 1) | ((g >> 1) << 5) | ((b >> 1) << 10); + // TODO: blending + // alpha is 6bit too....? + + dst[i] = r | (g << 8) | (b << 16); } } -void GPU2D::DrawBG_Text(u32 line, u16* dst, u32 bgnum) +void GPU2D::DrawBG_Text(u32 line, u32* dst, u32 bgnum) { u16 bgcnt = BGCnt[bgnum]; @@ -366,6 +395,8 @@ void GPU2D::DrawBG_Text(u32 line, u16* dst, u32 bgnum) u32 widexmask = (bgcnt & 0x4000) ? 0x100 : 0; + DrawPixelFunc drawpixelfn = DrawPixel_Normal; + extpal = (bgcnt & 0x0080) && (DispCnt & 0x40000000); if (Num) @@ -456,7 +487,7 @@ void GPU2D::DrawBG_Text(u32 line, u16* dst, u32 bgnum) color = pixels[tilexoff]; if (color) - dst[i] = curpal[color]; + drawpixelfn(bgnum, &dst[i], curpal[color], BlendFunc); xoff++; } @@ -499,14 +530,14 @@ void GPU2D::DrawBG_Text(u32 line, u16* dst, u32 bgnum) } if (color) - dst[i] = curpal[color]; + drawpixelfn(bgnum, &dst[i], curpal[color], BlendFunc); xoff++; } } } -void GPU2D::DrawBG_Extended(u32 line, u16* dst, u32 bgnum) +void GPU2D::DrawBG_Extended(u32 line, u32* dst, u32 bgnum) { u16 bgcnt = BGCnt[bgnum]; @@ -529,6 +560,8 @@ void GPU2D::DrawBG_Extended(u32 line, u16* dst, u32 bgnum) if (bgcnt & 0x2000) overflowmask = 0; else overflowmask = ~(coordmask | 0x7FF); + DrawPixelFunc drawpixelfn = DrawPixel_Normal; + extpal = (DispCnt & 0x40000000); s16 rotA = BGRotA[bgnum-2]; @@ -566,7 +599,7 @@ void GPU2D::DrawBG_Extended(u32 line, u16* dst, u32 bgnum) u16 color = bitmap[(((rotY & coordmask) >> 8) << yshift) + ((rotX & coordmask) >> 8)]; if (color & 0x8000) - dst[i] = color; + drawpixelfn(bgnum, &dst[i], color, BlendFunc); } rotX += rotA; @@ -587,7 +620,7 @@ void GPU2D::DrawBG_Extended(u32 line, u16* dst, u32 bgnum) u8 color = tileset[(((rotY & coordmask) >> 8) << yshift) + ((rotX & coordmask) >> 8)]; if (color) - dst[i] = pal[color]; + drawpixelfn(bgnum, &dst[i], pal[color], BlendFunc); } rotX += rotA; @@ -660,7 +693,7 @@ void GPU2D::DrawBG_Extended(u32 line, u16* dst, u32 bgnum) color = pixels[(tileyoff << 3) + tilexoff]; if (color) - dst[i] = curpal[color]; + drawpixelfn(bgnum, &dst[i], curpal[color], BlendFunc); } rotX += rotA; @@ -672,12 +705,17 @@ void GPU2D::DrawBG_Extended(u32 line, u16* dst, u32 bgnum) //BGYCenter[bgnum-2] += rotD; } -void GPU2D::InterleaveSprites(u32* buf, u32 prio, u16* dst) +void GPU2D::InterleaveSprites(u32* buf, u32 prio, u32* dst) { + DrawPixelFunc drawpixelfn = DrawPixel_Normal; + for (u32 i = 0; i < 256; i++) { if ((buf[i] & 0xF8000) == prio) - dst[i] = buf[i] & 0x7FFF; + { + u32 blendfunc = 0; + drawpixelfn(4, &dst[i], buf[i], blendfunc); + } } } diff --git a/GPU2D.h b/GPU2D.h index ced88796..2bd45216 100644 --- a/GPU2D.h +++ b/GPU2D.h @@ -27,7 +27,7 @@ public: void Reset(); - void SetFramebuffer(u16* buf); + void SetFramebuffer(u32* buf); u8 Read8(u32 addr); u16 Read16(u32 addr); @@ -41,7 +41,7 @@ public: private: u32 Num; - u16* Framebuffer; + u32* Framebuffer; u32 DispCnt; u16 BGCnt[4]; @@ -56,14 +56,18 @@ private: s16 BGRotC[2]; s16 BGRotD[2]; - template void DrawScanlineBGMode(u32 line, u32* spritebuf, u16* dst); - void DrawScanline_Mode1(u32 line, u16* dst); + u32 BlendFunc; - void DrawBG_3D(u32 line, u16* dst); - void DrawBG_Text(u32 line, u16* dst, u32 num); - void DrawBG_Extended(u32 line, u16* dst, u32 bgnum); + template void DrawScanlineBGMode(u32 line, u32* spritebuf, u32* dst); + void DrawScanline_Mode1(u32 line, u32* dst); - void InterleaveSprites(u32* buf, u32 prio, u16* dst); + static void DrawPixel_Normal(u32 bgnum, u32* dst, u16 color, u32 blendfunc); + + void DrawBG_3D(u32 line, u32* dst); + void DrawBG_Text(u32 line, u32* dst, u32 num); + void DrawBG_Extended(u32 line, u32* dst, u32 bgnum); + + void InterleaveSprites(u32* buf, u32 prio, u32* dst); void DrawSprites(u32 line, u32* dst); void DrawSprite_Rotscale(u16* attrib, u16* rotparams, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, u32 ypos, u32* dst); void DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, u32 ypos, u32* dst); diff --git a/main.cpp b/main.cpp index c403cecf..8f21d53f 100644 --- a/main.cpp +++ b/main.cpp @@ -216,11 +216,11 @@ int main() bmp.bV4Width = 256; bmp.bV4Height = -384; bmp.bV4Planes = 1; - bmp.bV4BitCount = 16; + bmp.bV4BitCount = 32; bmp.bV4V4Compression = BI_RGB|BI_BITFIELDS; - bmp.bV4RedMask = 0x001F; - bmp.bV4GreenMask = 0x03E0; - bmp.bV4BlueMask = 0x7C00; + bmp.bV4RedMask = 0x000000FF; + bmp.bV4GreenMask = 0x0000FF00; + bmp.bV4BlueMask = 0x00FF0000; NDS::Init(); diff --git a/melonDS.depend b/melonDS.depend index db3140cc..5553a148 100644 --- a/melonDS.depend +++ b/melonDS.depend @@ -1,5 +1,5 @@ # depslib dependency file v1.0 -1487028720 source:c:\documents\sources\melonds\main.cpp +1487105574 source:c:\documents\sources\melonds\main.cpp "NDS.h" @@ -10,7 +10,7 @@ 1481161027 c:\documents\sources\melonds\types.h -1487039174 source:c:\documents\sources\melonds\nds.cpp +1487101361 source:c:\documents\sources\melonds\nds.cpp "NDS.h" @@ -87,13 +87,13 @@ "NDS.h" "SPI.h" -1486778220 source:c:\documents\sources\melonds\gpu2d.cpp +1487105611 source:c:\documents\sources\melonds\gpu2d.cpp "NDS.h" "GPU.h" -1486777351 c:\documents\sources\melonds\gpu2d.h +1487105228 c:\documents\sources\melonds\gpu2d.h 1481040524 c:\documents\sources\melonds\wifi.h @@ -119,13 +119,13 @@ 1484698068 c:\documents\sources\melonds\dma.h "types.h" -1486736549 source:c:\documents\sources\melonds\gpu.cpp +1487102235 source:c:\documents\sources\melonds\gpu.cpp "NDS.h" "GPU.h" -1486501976 c:\documents\sources\melonds\gpu.h +1487102203 c:\documents\sources\melonds\gpu.h "GPU2D.h" "GPU3D.h" From 676e5b32cceaed177af2ff5711e9de8885792022 Mon Sep 17 00:00:00 2001 From: StapleButter Date: Wed, 15 Feb 2017 17:49:14 +0100 Subject: [PATCH 12/16] perspective-correct Gouraud --- GPU3D_Soft.cpp | 67 ++++++++++++++++++-------------------------------- melonDS.depend | 6 ++--- 2 files changed, 27 insertions(+), 46 deletions(-) diff --git a/GPU3D_Soft.cpp b/GPU3D_Soft.cpp index 7a901dd7..14bced40 100644 --- a/GPU3D_Soft.cpp +++ b/GPU3D_Soft.cpp @@ -89,13 +89,14 @@ void RenderPolygon(Polygon* polygon) { Vertex* vtx = polygon->Vertices[i]; - s32 posX, posY, posZ; + s32 posX, posY, posZ, posW; s32 w = vtx->Position[3]; if (w == 0) { posX = 0; posY = 0; posZ = 0; + posW = 0x1000; } else { @@ -103,17 +104,12 @@ void RenderPolygon(Polygon* polygon) posX = ((s64)vtx->Position[0] << 12) / w; posY = ((s64)vtx->Position[1] << 12) / w; posZ = ((s64)vtx->Position[2] << 12) / w; + posW = w; } - //s32 posX = vtx->Position[0]; - //s32 posY = vtx->Position[1]; - - //printf("xy: %08X %08X %08X\n", vtx->Position[0], vtx->Position[1], vtx->Position[3]); - //printf("w_inv: %08X res: %08X %08X\n", w_inv, posX, posY); s32 scrX = (((posX + 0x1000) * Viewport[2]) >> 13) + Viewport[0]; s32 scrY = (((posY + 0x1000) * Viewport[3]) >> 13) + Viewport[1]; s32 scrZ = (((s64)(posZ + 0x1000) * 0xFFFFFF) >> 13); - s32 scrW = (((s64)(w + 0x1000) * 0xFFFFFF) >> 13); if (scrX > 255) scrX = 255; if (scrY > 191) scrY = 191; if (scrZ > 0xFFFFFF) scrZ = 0xFFFFFF; @@ -124,7 +120,7 @@ void RenderPolygon(Polygon* polygon) scrcoords[i][0] = scrX; scrcoords[i][1] = 191 - scrY; scrcoords[i][2] = scrZ; - scrcoords[i][3] = scrW; + scrcoords[i][3] = posW; if (scrcoords[i][1] < ytop) { @@ -238,25 +234,24 @@ void RenderPolygon(Polygon* polygon) if (xl<0 || xr>255) continue; // hax - //if (vlcur->Color[0]==0 && vlcur->Color[1]==63 && vlcur->Color[2]==0) - /*printf("y:%d xleft:%d xright:%d %d,%d %d,%d | left: %d to %d right: %d to %d\n", - y, xl, xr, lcur, rcur, vtop, vbot, - scrcoords[lcur][0], scrcoords[lnext][0], - scrcoords[rcur][0], scrcoords[rnext][0]);*/ - s32 zl = scrcoords[lcur][2] + (((s64)(scrcoords[lnext][2] - scrcoords[lcur][2]) * lfactor) >> 12); s32 zr = scrcoords[rcur][2] + (((s64)(scrcoords[rnext][2] - scrcoords[rcur][2]) * rfactor) >> 12); - //s32 wl = scrcoords[lcur][3] + (((s64)(scrcoords[lnext][3] - scrcoords[lcur][3]) * lfactor) >> 12); - //s32 wr = scrcoords[rcur][3] + (((s64)(scrcoords[rnext][3] - scrcoords[rcur][3]) * rfactor) >> 12); + s32 wl = scrcoords[lcur][3] + (((s64)(scrcoords[lnext][3] - scrcoords[lcur][3]) * lfactor) >> 12); + s32 wr = scrcoords[rcur][3] + (((s64)(scrcoords[rnext][3] - scrcoords[rcur][3]) * rfactor) >> 12); - u8 rl = vlcur->Color[0] + (((vlnext->Color[0] - vlcur->Color[0]) * lfactor) >> 12); - u8 gl = vlcur->Color[1] + (((vlnext->Color[1] - vlcur->Color[1]) * lfactor) >> 12); - u8 bl = vlcur->Color[2] + (((vlnext->Color[2] - vlcur->Color[2]) * lfactor) >> 12); + s64 perspfactorl1 = ((s64)(0x1000 - lfactor) << 12) / scrcoords[lcur][3]; + s64 perspfactorl2 = ((s64)lfactor << 12) / scrcoords[lnext][3]; + s64 perspfactorr1 = ((s64)(0x1000 - rfactor) << 12) / scrcoords[rcur][3]; + s64 perspfactorr2 = ((s64)rfactor << 12) / scrcoords[rnext][3]; - u8 rr = vrcur->Color[0] + (((vrnext->Color[0] - vrcur->Color[0]) * rfactor) >> 12); - u8 gr = vrcur->Color[1] + (((vrnext->Color[1] - vrcur->Color[1]) * rfactor) >> 12); - u8 br = vrcur->Color[2] + (((vrnext->Color[2] - vrcur->Color[2]) * rfactor) >> 12); + u32 rl = (((perspfactorl1 * vlcur->Color[0]) + (perspfactorl2 * vlnext->Color[0])) << 12) / (perspfactorl1 + perspfactorl2); + u32 gl = (((perspfactorl1 * vlcur->Color[1]) + (perspfactorl2 * vlnext->Color[1])) << 12) / (perspfactorl1 + perspfactorl2); + u32 bl = (((perspfactorl1 * vlcur->Color[2]) + (perspfactorl2 * vlnext->Color[2])) << 12) / (perspfactorl1 + perspfactorl2); + + u32 rr = (((perspfactorr1 * vrcur->Color[0]) + (perspfactorr2 * vrnext->Color[0])) << 12) / (perspfactorr1 + perspfactorr2); + u32 gr = (((perspfactorr1 * vrcur->Color[1]) + (perspfactorr2 * vrnext->Color[1])) << 12) / (perspfactorr1 + perspfactorr2); + u32 br = (((perspfactorr1 * vrcur->Color[2]) + (perspfactorr2 * vrnext->Color[2])) << 12) / (perspfactorr1 + perspfactorr2); s32 xdiv; if (xr == xl) @@ -270,29 +265,15 @@ void RenderPolygon(Polygon* polygon) s32 z = zl + (((s64)(zr - zl) * xfactor) >> 12); - //s32 z = (((zr - zl) * xfactor) >> 12); - //if (zr!=zl) z = (z << 12) / (zr - zl); - //s32 w = wl + (((s64)(wr - wl) * xfactor) >> 12); - //w >>= 12; - //if (w!=0) xfactor = ((s64)xfactor * 0xFFFFFF) / w; - //xfactor = (xfactor * w) >> 12; - - //s32 z_inv = ((z>>12)==0) ? 0x1000 : 0x1000000 / (z >> 12); - //xfactor = (xfactor * z_inv) >> 12; - //if (z) xfactor = (xfactor << 12) / z; - - // TODO: get rid of this shit - if (x<0 || x>255 || y<0 || y>191) - { - //printf("BAD COORDS!! %d %d\n", x, y); - x = 0; y = 0; - } + s32 perspfactor1 = ((0x1000 - xfactor) << 12) / wl; + s32 perspfactor2 = (xfactor << 12) / wr; // possible optimization: only do color interpolation if the depth test passes - u8 vr = rl + (((rr - rl) * xfactor) >> 12); - u8 vg = gl + (((gr - gl) * xfactor) >> 12); - u8 vb = bl + (((br - bl) * xfactor) >> 12); - RenderPixel(polygon->Attr, x, y, z, vr, vg, vb); + u32 vr = (s64)((perspfactor1 * rl) + (perspfactor2 * rr)) / (perspfactor1 + perspfactor2); + u32 vg = (s64)((perspfactor1 * gl) + (perspfactor2 * gr)) / (perspfactor1 + perspfactor2); + u32 vb = (s64)((perspfactor1 * bl) + (perspfactor2 * br)) / (perspfactor1 + perspfactor2); + + RenderPixel(polygon->Attr, x, y, z, vr>>12, vg>>12, vb>>12); // Z debug /*u8 zerp = (w * 63) / 0xFFFFFF; diff --git a/melonDS.depend b/melonDS.depend index 5553a148..0c715c1d 100644 --- a/melonDS.depend +++ b/melonDS.depend @@ -1,5 +1,5 @@ # depslib dependency file v1.0 -1487105574 source:c:\documents\sources\melonds\main.cpp +1487173011 source:c:\documents\sources\melonds\main.cpp "NDS.h" @@ -10,7 +10,7 @@ 1481161027 c:\documents\sources\melonds\types.h -1487101361 source:c:\documents\sources\melonds\nds.cpp +1487177244 source:c:\documents\sources\melonds\nds.cpp "NDS.h" @@ -155,7 +155,7 @@ "GPU.h" "FIFO.h" -1487039203 source:c:\documents\sources\melonds\gpu3d_soft.cpp +1487177182 source:c:\documents\sources\melonds\gpu3d_soft.cpp "NDS.h" From abd2cb444be51140bbf1482ee0c15103687ede23 Mon Sep 17 00:00:00 2001 From: StapleButter Date: Fri, 17 Feb 2017 04:07:00 +0100 Subject: [PATCH 13/16] improve 3D renderer precision. not perfect, but not bad at all. --- GPU3D.cpp | 40 ++++++----- GPU3D.h | 8 +++ GPU3D_Soft.cpp | 178 +++++++++++++++++++++++++++---------------------- melonDS.depend | 10 +-- 4 files changed, 133 insertions(+), 103 deletions(-) diff --git a/GPU3D.cpp b/GPU3D.cpp index 9f461a4c..4d0dd7bc 100644 --- a/GPU3D.cpp +++ b/GPU3D.cpp @@ -34,13 +34,24 @@ // clipping rules: // * if a shared vertex in a strip is clipped, affected polygons are converted into single polygons // strip is resumed at the first eligible polygon +// +// clipping exhibits oddities on the real thing. bad precision? fancy algorithm? TODO: investigate. +// +// vertex color precision: +// * vertex colors are kept at 5-bit during clipping. makes for shitty results. +// * vertex colors are converted to 9-bit before drawing, as such: +// if (x > 0) x = (x << 4) + 0xF +// the added bias affects interpolation. +// +// depth buffer: +// Z-buffering mode: val = ((Z * 0x800 * 0x1000) / W) + 0x7FFCFF +// W-buffering mode: val = W - 0x1FF +// TODO: confirm W, because it's weird namespace GPU3D { -#define COPYVERTEX(a, b) { *(u64*)&a[0] = *(u64*)&b[0]; *(u64*)&a[2] = *(u64*)&b[2]; } - const u32 CmdNumParams[256] = { // 0x00 @@ -301,14 +312,6 @@ void MatrixMult4x3(s32* m, s32* s) s32 tmp[16]; memcpy(tmp, m, 16*4); - /*printf("4x3 matrix\n"); - for (int j = 0; j < 12; j += 3) - { - for (int i = 0; i < 3; i++) - printf("%f ", s[i]/4096.0f); - printf("\n"); - }*/ - // m = s*m m[0] = ((s64)s[0]*tmp[0] + (s64)s[1]*tmp[4] + (s64)s[2]*tmp[8]) >> 12; m[1] = ((s64)s[0]*tmp[1] + (s64)s[1]*tmp[5] + (s64)s[2]*tmp[9]) >> 12; @@ -392,11 +395,11 @@ void UpdateClipMatrix() template void ClipSegment(Vertex* outbuf, Vertex* vout, Vertex* vin) { - s64 factor = ((s64)(vin->Position[3] - (plane*vin->Position[comp])) << 24) / - ((vin->Position[3] - (plane*vin->Position[comp])) - (vout->Position[3] - (plane*vout->Position[comp]))); + s64 factor_num = vin->Position[3] - (plane*vin->Position[comp]); + s32 factor_den = factor_num - (vout->Position[3] - (plane*vout->Position[comp])); Vertex mid; -#define INTERPOLATE(var) mid.var = vin->var + (((vout->var - vin->var) * factor) >> 24); +#define INTERPOLATE(var) mid.var = vin->var + (((vout->var - vin->var) * factor_num) / factor_den); INTERPOLATE(Position[0]); INTERPOLATE(Position[1]); @@ -408,6 +411,7 @@ void ClipSegment(Vertex* outbuf, Vertex* vout, Vertex* vin) INTERPOLATE(Color[2]); mid.Clipped = true; + mid.ViewportTransformDone = false; #undef INTERPOLATE *outbuf = mid; @@ -425,8 +429,6 @@ void SubmitPolygon() int c; // culling - //if (!(TempVertexBuffer[0].Color[0]==0 && TempVertexBuffer[0].Color[1]==63 && TempVertexBuffer[0].Color[2]==63)) - // return; // checkme: does it work this way for quads and up? /*s32 _x1 = TempVertexBuffer[1].Position[0] - TempVertexBuffer[0].Position[0]; @@ -770,6 +772,7 @@ void SubmitVertex() vertextrans->Color[2] = VertexColor[2]; vertextrans->Clipped = false; + vertextrans->ViewportTransformDone = false; VertexNum++; VertexNumInPoly++; @@ -1179,9 +1182,9 @@ void ExecuteCommand() u32 r = c & 0x1F; u32 g = (c >> 5) & 0x1F; u32 b = (c >> 10) & 0x1F; - VertexColor[0] = r ? (r<<1)+1 : 0; - VertexColor[1] = g ? (g<<1)+1 : 0; - VertexColor[2] = b ? (b<<1)+1 : 0; + VertexColor[0] = r; + VertexColor[1] = g; + VertexColor[2] = b; } break; @@ -1243,6 +1246,7 @@ void ExecuteCommand() case 0x50: FlushRequest = 1;//0x80000000 | (ExecParams[0] & 0x3); + CycleCount = 392; break; case 0x60: // viewport x1,y1,x2,y2 diff --git a/GPU3D.h b/GPU3D.h index 96e76aa1..cbb8234a 100644 --- a/GPU3D.h +++ b/GPU3D.h @@ -29,6 +29,14 @@ typedef struct bool Clipped; + // final vertex attributes. + // allows them to be reused in polygon strips. + + s32 FinalPosition[4]; + s32 FinalColor[3]; + + bool ViewportTransformDone; + } Vertex; typedef struct diff --git a/GPU3D_Soft.cpp b/GPU3D_Soft.cpp index 14bced40..afc31877 100644 --- a/GPU3D_Soft.cpp +++ b/GPU3D_Soft.cpp @@ -81,68 +81,74 @@ void RenderPolygon(Polygon* polygon) int vtop = 0, vbot = 0; s32 ytop = 191, ybot = 0; - s32 scrcoords[10][4]; + // process the vertices, transform to screen coordinates // find the topmost and bottommost vertices of the polygon for (int i = 0; i < nverts; i++) { Vertex* vtx = polygon->Vertices[i]; - s32 posX, posY, posZ, posW; - s32 w = vtx->Position[3]; - if (w == 0) + if (!vtx->ViewportTransformDone) { - posX = 0; - posY = 0; - posZ = 0; - posW = 0x1000; - } - else - { - // TODO: find a way to avoid doing 3 divisions :/ - posX = ((s64)vtx->Position[0] << 12) / w; - posY = ((s64)vtx->Position[1] << 12) / w; - posZ = ((s64)vtx->Position[2] << 12) / w; - posW = w; + s32 posX, posY, posZ, posW; + s32 w = vtx->Position[3]; + if (w == 0) + { + posX = 0; + posY = 0; + posZ = 0; + posW = 0x1000; + } + else + { + posX = ((s64)vtx->Position[0] << 12) / w; + posY = ((s64)vtx->Position[1] << 12) / w; + + // TODO: W-buffering + posZ = (((s64)vtx->Position[2] * 0x800000) / w) + 0x7FFCFF; + + posW = w; + } + + s32 scrX = (((posX + 0x1000) * Viewport[2]) >> 13) + Viewport[0]; + s32 scrY = (((posY + 0x1000) * Viewport[3]) >> 13) + Viewport[1]; + + if (scrX < 0) scrX = 0; + else if (scrX > 255) scrX = 255; + if (scrY < 0) scrY = 0; + else if (scrY > 191) scrY = 191; + if (posZ < 0) posZ = 0; + else if (posZ > 0xFFFFFF) posZ = 0xFFFFFF; + + vtx->FinalPosition[0] = scrX; + vtx->FinalPosition[1] = 191 - scrY; + vtx->FinalPosition[2] = posZ; + vtx->FinalPosition[3] = posW; + + vtx->FinalColor[0] = vtx->Color[0] ? ((vtx->Color[0] << 4) + 0xF) : 0; + vtx->FinalColor[1] = vtx->Color[1] ? ((vtx->Color[1] << 4) + 0xF) : 0; + vtx->FinalColor[2] = vtx->Color[2] ? ((vtx->Color[2] << 4) + 0xF) : 0; + + vtx->ViewportTransformDone = true; } - s32 scrX = (((posX + 0x1000) * Viewport[2]) >> 13) + Viewport[0]; - s32 scrY = (((posY + 0x1000) * Viewport[3]) >> 13) + Viewport[1]; - s32 scrZ = (((s64)(posZ + 0x1000) * 0xFFFFFF) >> 13); - if (scrX > 255) scrX = 255; - if (scrY > 191) scrY = 191; - if (scrZ > 0xFFFFFF) scrZ = 0xFFFFFF; - if (scrX < 0) { printf("!! bad X %d\n", scrX); scrX = 0;} - if (scrY < 0) { printf("!! bad Y %d\n", scrY); scrY = 0;} - if (scrZ < 0) { printf("!! bad Z %d %d\n", scrZ, vtx->Position[2]); scrZ = 0;} - - scrcoords[i][0] = scrX; - scrcoords[i][1] = 191 - scrY; - scrcoords[i][2] = scrZ; - scrcoords[i][3] = posW; - - if (scrcoords[i][1] < ytop) + if (vtx->FinalPosition[1] < ytop) { - ytop = scrcoords[i][1]; + ytop = vtx->FinalPosition[1]; vtop = i; } - if (scrcoords[i][1] > ybot) + if (vtx->FinalPosition[1] > ybot) { - ybot = scrcoords[i][1]; + ybot = vtx->FinalPosition[1]; vbot = i; } - //if (vtx->Color[0]==63 && vtx->Color[1]==0 && vtx->Color[2]==0) - //printf("v%d: %d,%d Z=%f W=%f %d %d\n", i, scrX, 191-scrY, vtx->Position[2]/4096.0f, vtx->Position[3]/4096.0f, - // polygon->FacingView, vtx->Clipped); } // draw, line per line int lcur = vtop, rcur = vtop; int lnext, rnext; - s32 lstep, rstep; - //s32 xmin, xmax; if (polygon->FacingView) { @@ -159,19 +165,11 @@ void RenderPolygon(Polygon* polygon) if (rnext >= nverts) rnext = 0; } - /*if ((scrcoords[lnext][1] - scrcoords[lcur][1]) == 0) lstep = 0; else - lstep = ((scrcoords[lnext][0] - scrcoords[lcur][0]) << 12) / (scrcoords[lnext][1] - scrcoords[lcur][1]); - if ((scrcoords[rnext][1] - scrcoords[rcur][1]) == 0) rstep = 0; else - rstep = ((scrcoords[rnext][0] - scrcoords[rcur][0]) << 12) / (scrcoords[rnext][1] - scrcoords[rcur][1]);*/ - - //xmin = scrcoords[lcur][0] << 12; - //xmax = scrcoords[rcur][0] << 12; - for (s32 y = ytop; y <= ybot; y++) { if (y < ybot) { - while (y == scrcoords[lnext][1]) + while (y == polygon->Vertices[lnext]->FinalPosition[1]) { lcur = lnext; @@ -186,12 +184,10 @@ void RenderPolygon(Polygon* polygon) if (lnext < 0) lnext = nverts - 1; } - //lstep = ((scrcoords[lnext][0] - scrcoords[lcur][0]) << 12) / (scrcoords[lnext][1] - scrcoords[lcur][1]); - //xmin = scrcoords[lcur][0] << 12; if (lcur == vbot) break; } - while (y == scrcoords[rnext][1]) + while (y == polygon->Vertices[rnext]->FinalPosition[1]) { rcur = rnext; @@ -206,8 +202,6 @@ void RenderPolygon(Polygon* polygon) if (rnext >= nverts) rnext = 0; } - //rstep = ((scrcoords[rnext][0] - scrcoords[rcur][0]) << 12) / (scrcoords[rnext][1] - scrcoords[rcur][1]); - //xmax = scrcoords[rcur][0] << 12; if (rcur == vbot) break; } } @@ -219,39 +213,54 @@ void RenderPolygon(Polygon* polygon) s32 lfactor, rfactor; - if (scrcoords[lnext][1] == scrcoords[lcur][1]) + if (vlnext->FinalPosition[1] == vlcur->FinalPosition[1]) lfactor = 0; else - lfactor = ((y - scrcoords[lcur][1]) << 12) / (scrcoords[lnext][1] - scrcoords[lcur][1]); + lfactor = ((y - vlcur->FinalPosition[1]) << 12) / (vlnext->FinalPosition[1] - vlcur->FinalPosition[1]); - if (scrcoords[rnext][1] == scrcoords[rcur][1]) + if (vrnext->FinalPosition[1] == vrcur->FinalPosition[1]) rfactor = 0; else - rfactor = ((y - scrcoords[rcur][1]) << 12) / (scrcoords[rnext][1] - scrcoords[rcur][1]); + rfactor = ((y - vrcur->FinalPosition[1]) << 12) / (vrnext->FinalPosition[1] - vrcur->FinalPosition[1]); - s32 xl = scrcoords[lcur][0] + (((scrcoords[lnext][0] - scrcoords[lcur][0]) * lfactor) >> 12); - s32 xr = scrcoords[rcur][0] + (((scrcoords[rnext][0] - scrcoords[rcur][0]) * rfactor) >> 12); + s32 xl = vlcur->FinalPosition[0] + (((vlnext->FinalPosition[0] - vlcur->FinalPosition[0]) * lfactor) >> 12); + s32 xr = vrcur->FinalPosition[0] + (((vrnext->FinalPosition[0] - vrcur->FinalPosition[0]) * rfactor) >> 12); - if (xl<0 || xr>255) continue; // hax + if (xl<0 || xr>255) + { + printf("!! BAD X %d %d\n", xl, xr); + continue; // hax + } - s32 zl = scrcoords[lcur][2] + (((s64)(scrcoords[lnext][2] - scrcoords[lcur][2]) * lfactor) >> 12); - s32 zr = scrcoords[rcur][2] + (((s64)(scrcoords[rnext][2] - scrcoords[rcur][2]) * rfactor) >> 12); + s32 zl = vlcur->FinalPosition[2] + (((s64)(vlnext->FinalPosition[2] -vlcur->FinalPosition[2]) * lfactor) >> 12); + s32 zr = vrcur->FinalPosition[2] + (((s64)(vrnext->FinalPosition[2] - vrcur->FinalPosition[2]) * rfactor) >> 12); - s32 wl = scrcoords[lcur][3] + (((s64)(scrcoords[lnext][3] - scrcoords[lcur][3]) * lfactor) >> 12); - s32 wr = scrcoords[rcur][3] + (((s64)(scrcoords[rnext][3] - scrcoords[rcur][3]) * rfactor) >> 12); + s32 wl = vlcur->FinalPosition[3] + (((s64)(vlnext->FinalPosition[3] - vlcur->FinalPosition[3]) * lfactor) >> 12); + s32 wr = vrcur->FinalPosition[3] + (((s64)(vrnext->FinalPosition[3] - vrcur->FinalPosition[3]) * rfactor) >> 12); - s64 perspfactorl1 = ((s64)(0x1000 - lfactor) << 12) / scrcoords[lcur][3]; - s64 perspfactorl2 = ((s64)lfactor << 12) / scrcoords[lnext][3]; - s64 perspfactorr1 = ((s64)(0x1000 - rfactor) << 12) / scrcoords[rcur][3]; - s64 perspfactorr2 = ((s64)rfactor << 12) / scrcoords[rnext][3]; + s64 perspfactorl1 = ((s64)(0x1000 - lfactor) << 12) / vlcur->FinalPosition[3]; + s64 perspfactorl2 = ((s64)lfactor << 12) / vlnext->FinalPosition[3]; + s64 perspfactorr1 = ((s64)(0x1000 - rfactor) << 12) / vrcur->FinalPosition[3]; + s64 perspfactorr2 = ((s64)rfactor << 12) / vrnext->FinalPosition[3]; - u32 rl = (((perspfactorl1 * vlcur->Color[0]) + (perspfactorl2 * vlnext->Color[0])) << 12) / (perspfactorl1 + perspfactorl2); - u32 gl = (((perspfactorl1 * vlcur->Color[1]) + (perspfactorl2 * vlnext->Color[1])) << 12) / (perspfactorl1 + perspfactorl2); - u32 bl = (((perspfactorl1 * vlcur->Color[2]) + (perspfactorl2 * vlnext->Color[2])) << 12) / (perspfactorl1 + perspfactorl2); + if (perspfactorl1 + perspfactorl2 == 0) + { + perspfactorl1 = 0x1000; + perspfactorl2 = 0; + } + if (perspfactorr1 + perspfactorr2 == 0) + { + perspfactorr1 = 0x1000; + perspfactorr2 = 0; + } - u32 rr = (((perspfactorr1 * vrcur->Color[0]) + (perspfactorr2 * vrnext->Color[0])) << 12) / (perspfactorr1 + perspfactorr2); - u32 gr = (((perspfactorr1 * vrcur->Color[1]) + (perspfactorr2 * vrnext->Color[1])) << 12) / (perspfactorr1 + perspfactorr2); - u32 br = (((perspfactorr1 * vrcur->Color[2]) + (perspfactorr2 * vrnext->Color[2])) << 12) / (perspfactorr1 + perspfactorr2); + s32 rl = ((perspfactorl1 * vlcur->FinalColor[0]) + (perspfactorl2 * vlnext->FinalColor[0])) / (perspfactorl1 + perspfactorl2); + s32 gl = ((perspfactorl1 * vlcur->FinalColor[1]) + (perspfactorl2 * vlnext->FinalColor[1])) / (perspfactorl1 + perspfactorl2); + s32 bl = ((perspfactorl1 * vlcur->FinalColor[2]) + (perspfactorl2 * vlnext->FinalColor[2])) / (perspfactorl1 + perspfactorl2); + + s32 rr = ((perspfactorr1 * vrcur->FinalColor[0]) + (perspfactorr2 * vrnext->FinalColor[0])) / (perspfactorr1 + perspfactorr2); + s32 gr = ((perspfactorr1 * vrcur->FinalColor[1]) + (perspfactorr2 * vrnext->FinalColor[1])) / (perspfactorr1 + perspfactorr2); + s32 br = ((perspfactorr1 * vrcur->FinalColor[2]) + (perspfactorr2 * vrnext->FinalColor[2])) / (perspfactorr1 + perspfactorr2); s32 xdiv; if (xr == xl) @@ -264,16 +273,25 @@ void RenderPolygon(Polygon* polygon) s32 xfactor = (x - xl) * xdiv; s32 z = zl + (((s64)(zr - zl) * xfactor) >> 12); + //z = wl + (((s64)(wr - wl) * xfactor) >> 12); + //z -= 0x1FF; + //if (z < 0) z = 0; s32 perspfactor1 = ((0x1000 - xfactor) << 12) / wl; s32 perspfactor2 = (xfactor << 12) / wr; - // possible optimization: only do color interpolation if the depth test passes - u32 vr = (s64)((perspfactor1 * rl) + (perspfactor2 * rr)) / (perspfactor1 + perspfactor2); - u32 vg = (s64)((perspfactor1 * gl) + (perspfactor2 * gr)) / (perspfactor1 + perspfactor2); - u32 vb = (s64)((perspfactor1 * bl) + (perspfactor2 * br)) / (perspfactor1 + perspfactor2); + if (perspfactor1 + perspfactor2 == 0) + { + perspfactor1 = 0x1000; + perspfactor2 = 0; + } - RenderPixel(polygon->Attr, x, y, z, vr>>12, vg>>12, vb>>12); + // possible optimization: only do color interpolation if the depth test passes + u32 vr = ((perspfactor1 * rl) + (perspfactor2 * rr)) / (perspfactor1 + perspfactor2); + u32 vg = ((perspfactor1 * gl) + (perspfactor2 * gr)) / (perspfactor1 + perspfactor2); + u32 vb = ((perspfactor1 * bl) + (perspfactor2 * br)) / (perspfactor1 + perspfactor2); + + RenderPixel(polygon->Attr, x, y, z, vr>>3, vg>>3, vb>>3); // Z debug /*u8 zerp = (w * 63) / 0xFFFFFF; diff --git a/melonDS.depend b/melonDS.depend index 0c715c1d..cdf281d0 100644 --- a/melonDS.depend +++ b/melonDS.depend @@ -1,5 +1,5 @@ # depslib dependency file v1.0 -1487173011 source:c:\documents\sources\melonds\main.cpp +1487292827 source:c:\documents\sources\melonds\main.cpp "NDS.h" @@ -10,7 +10,7 @@ 1481161027 c:\documents\sources\melonds\types.h -1487177244 source:c:\documents\sources\melonds\nds.cpp +1487299879 source:c:\documents\sources\melonds\nds.cpp "NDS.h" @@ -146,16 +146,16 @@ "NDS.h" "NDSCart.h" -1487016725 c:\documents\sources\melonds\gpu3d.h +1487287868 c:\documents\sources\melonds\gpu3d.h -1487036430 source:c:\documents\sources\melonds\gpu3d.cpp +1487299939 source:c:\documents\sources\melonds\gpu3d.cpp "NDS.h" "GPU.h" "FIFO.h" -1487177182 source:c:\documents\sources\melonds\gpu3d_soft.cpp +1487300098 source:c:\documents\sources\melonds\gpu3d_soft.cpp "NDS.h" From cca0a71d8178284629d8a9f31fbe4c2538650c9e Mon Sep 17 00:00:00 2001 From: StapleButter Date: Fri, 17 Feb 2017 05:33:37 +0100 Subject: [PATCH 14/16] emulate DMA timings. keeps games from overflowing the GXFIFO... when they aren't fucking dumb. --- ARM.h | 4 +- DMA.cpp | 118 +++++++++++++++++++++++++++++++++++++++++++------ DMA.h | 8 ++++ GPU3D.cpp | 7 ++- NDS.cpp | 58 +++++++++++++++++++----- NDS.h | 1 + melonDS.depend | 14 +++--- 7 files changed, 176 insertions(+), 34 deletions(-) diff --git a/ARM.h b/ARM.h index bf354a78..79c2bce3 100644 --- a/ARM.h +++ b/ARM.h @@ -125,7 +125,7 @@ public: else val = NDS::ARM7Read8(addr); - Cycles += Waitstates[3][(addr>>24)&0xF]; + Cycles += Waitstates[2][(addr>>24)&0xF]; return val; } @@ -171,7 +171,7 @@ public: else NDS::ARM7Write8(addr, val); - Cycles += Waitstates[3][(addr>>24)&0xF]; + Cycles += Waitstates[2][(addr>>24)&0xF]; } void DataWrite16(u32 addr, u16 val, u32 forceuser=0) diff --git a/DMA.cpp b/DMA.cpp index dbedc010..28f7ddc4 100644 --- a/DMA.cpp +++ b/DMA.cpp @@ -34,6 +34,66 @@ DMA::DMA(u32 cpu, u32 num) CPU = cpu; Num = num; + if (cpu == 0) + CountMask = 0x001FFFFF; + else + CountMask = (num==3 ? 0x0000FFFF : 0x00003FFF); + + // TODO: merge with the one in ARM.cpp, somewhere + for (int i = 0; i < 16; i++) + { + Waitstates[0][i] = 1; + Waitstates[1][i] = 1; + } + + if (!num) + { + // ARM9 + // note: 33MHz cycles + Waitstates[0][0x2] = 1; + Waitstates[0][0x3] = 1; + Waitstates[0][0x4] = 1; + Waitstates[0][0x5] = 1; + Waitstates[0][0x6] = 1; + Waitstates[0][0x7] = 1; + Waitstates[0][0x8] = 6; + Waitstates[0][0x9] = 6; + Waitstates[0][0xA] = 10; + Waitstates[0][0xF] = 1; + + Waitstates[1][0x2] = 2; + Waitstates[1][0x3] = 1; + Waitstates[1][0x4] = 1; + Waitstates[1][0x5] = 2; + Waitstates[1][0x6] = 2; + Waitstates[1][0x7] = 1; + Waitstates[1][0x8] = 12; + Waitstates[1][0x9] = 12; + Waitstates[1][0xA] = 10; + Waitstates[1][0xF] = 1; + } + else + { + // ARM7 + Waitstates[0][0x0] = 1; + Waitstates[0][0x2] = 1; + Waitstates[0][0x3] = 1; + Waitstates[0][0x4] = 1; + Waitstates[0][0x6] = 1; + Waitstates[0][0x8] = 6; + Waitstates[0][0x9] = 6; + Waitstates[0][0xA] = 10; + + Waitstates[1][0x0] = 1; + Waitstates[1][0x2] = 2; + Waitstates[1][0x3] = 1; + Waitstates[1][0x4] = 1; + Waitstates[1][0x6] = 2; + Waitstates[1][0x8] = 12; + Waitstates[1][0x9] = 12; + Waitstates[1][0xA] = 10; + } + Reset(); } @@ -51,8 +111,11 @@ void DMA::Reset() CurSrcAddr = 0; CurDstAddr = 0; RemCount = 0; + IterCount = 0; SrcAddrInc = 0; DstAddrInc = 0; + + Running = false; } void DMA::WriteCnt(u32 val) @@ -90,16 +153,16 @@ void DMA::WriteCnt(u32 val) Start(); else if (StartMode == 0x07) GPU3D::CheckFIFODMA(); - //else - // printf("SPECIAL ARM%d DMA%d START MODE %02X\n", CPU?7:9, Num, StartMode); + if ((StartMode&7)!=0x00 && (StartMode&7)!=0x1 && StartMode!=2 && StartMode!=0x05 && StartMode!=0x12 && StartMode!=0x07) printf("UNIMPLEMENTED ARM%d DMA%d START MODE %02X\n", CPU?7:9, Num, StartMode); - //if (StartMode==2)printf("HBLANK DMA %08X -> %08X\n", SrcAddr, DstAddr); } } void DMA::Start() { + if (Running) return; + u32 countmask; if (CPU == 0) countmask = 0x001FFFFF; @@ -110,6 +173,11 @@ void DMA::Start() if (!RemCount) RemCount = countmask+1; + if (StartMode == 0x07 && RemCount > 112) + IterCount = 112; + else + IterCount = RemCount; + if ((Cnt & 0x00600000) == 0x00600000) CurDstAddr = DstAddr; @@ -126,24 +194,33 @@ void DMA::Start() NDS::TriggerIRQ(CPU, NDS::IRQ_DMA0 + Num); return; } - //if (StartMode == 0x07)printf("GXFIFO DMA %08X %08X\n", Cnt, CurSrcAddr); - u32 num = RemCount; - if (StartMode == 0x07 && num > 112) - num = 112; - // TODO: NOT MAKE THE DMA INSTANT!! + // TODO eventually: not stop if we're running code in ITCM + + Running = true; + NDS::StopCPU(CPU, true); +} + +s32 DMA::Run(s32 cycles) +{ + if (!Running) + return cycles; + + u32 zorp = IterCount; + if (!(Cnt & 0x04000000)) { u16 (*readfn)(u32) = CPU ? NDS::ARM7Read16 : NDS::ARM9Read16; void (*writefn)(u32,u16) = CPU ? NDS::ARM7Write16 : NDS::ARM9Write16; - while (num > 0) + while (IterCount > 0 && cycles > 0) { writefn(CurDstAddr, readfn(CurSrcAddr)); + cycles -= (Waitstates[0][(CurSrcAddr >> 24) & 0xF] + Waitstates[0][(CurDstAddr >> 24) & 0xF]); CurSrcAddr += SrcAddrInc<<1; CurDstAddr += DstAddrInc<<1; - num--; + IterCount--; RemCount--; } } @@ -152,22 +229,30 @@ void DMA::Start() u32 (*readfn)(u32) = CPU ? NDS::ARM7Read32 : NDS::ARM9Read32; void (*writefn)(u32,u32) = CPU ? NDS::ARM7Write32 : NDS::ARM9Write32; - while (num > 0) + while (IterCount > 0 && cycles > 0) { writefn(CurDstAddr, readfn(CurSrcAddr)); + cycles -= (Waitstates[1][(CurSrcAddr >> 24) & 0xF] + Waitstates[1][(CurDstAddr >> 24) & 0xF]); CurSrcAddr += SrcAddrInc<<2; CurDstAddr += DstAddrInc<<2; - num--; + IterCount--; RemCount--; } } if (RemCount) { - Cnt &= ~countmask; + Cnt &= ~CountMask; Cnt |= RemCount; - return; + + if (IterCount == 0) + { + Running = false; + NDS::StopCPU(CPU, false); + } + + return cycles; } if (!(Cnt & 0x02000000)) @@ -175,4 +260,9 @@ void DMA::Start() if (Cnt & 0x40000000) NDS::TriggerIRQ(CPU, NDS::IRQ_DMA0 + Num); + + Running = false; + NDS::StopCPU(CPU, false); + + return cycles - 2; } diff --git a/DMA.h b/DMA.h index 619b1639..59a7f036 100644 --- a/DMA.h +++ b/DMA.h @@ -32,6 +32,8 @@ public: void WriteCnt(u32 val); void Start(); + s32 Run(s32 cycles); + void StartIfNeeded(u32 mode) { if ((mode == StartMode) && (Cnt & 0x80000000)) @@ -45,12 +47,18 @@ public: private: u32 CPU, Num; + s32 Waitstates[2][16]; + u32 StartMode; u32 CurSrcAddr; u32 CurDstAddr; u32 RemCount; + u32 IterCount; u32 SrcAddrInc; u32 DstAddrInc; + u32 CountMask; + + bool Running; }; #endif diff --git a/GPU3D.cpp b/GPU3D.cpp index 4d0dd7bc..c28ebd04 100644 --- a/GPU3D.cpp +++ b/GPU3D.cpp @@ -841,6 +841,8 @@ void SubmitVertex() +int logflag = 0; + void CmdFIFOWrite(CmdFIFOEntry& entry) { if (CmdFIFO->IsEmpty() && !CmdPIPE->IsFull()) @@ -852,7 +854,8 @@ void CmdFIFOWrite(CmdFIFOEntry& entry) { if (CmdFIFO->IsFull()) { - printf("!!! GX FIFO FULL\n"); + if (!logflag) printf("!!! GX FIFO FULL\n"); + logflag = 1; //NDS::debug(0); return; } @@ -876,6 +879,8 @@ CmdFIFOEntry CmdFIFORead() CheckFIFOIRQ(); } + logflag = 0; + return ret; } diff --git a/NDS.cpp b/NDS.cpp index 8b911d32..655e7ab0 100644 --- a/NDS.cpp +++ b/NDS.cpp @@ -54,6 +54,8 @@ s32 ARM7Offset; SchedEvent SchedList[Event_MAX]; u32 SchedListMask; +u32 CPUStop; + u8 ARM9BIOS[0x1000]; u8 ARM7BIOS[0x4000]; @@ -278,6 +280,8 @@ void Reset() ARM7->Reset(); CP15::Reset(); + CPUStop = 0; + memset(Timers, 0, 8*sizeof(Timer)); for (i = 0; i < 8; i++) DMAs[i]->Reset(); @@ -307,7 +311,7 @@ void Reset() // test //LoadROM(); //LoadFirmware(); - if (NDSCart::LoadROM("rom/nsmb.nds")) + if (NDSCart::LoadROM("rom/raving.nds")) Running = true; // hax } @@ -364,17 +368,45 @@ void RunFrame() while (Running && framecycles>0) { - CalcIterationCycles(); - - ARM9->CyclesToRun = CurIterationCycles << 1; - - ARM9->Execute(); - s32 ndscyclestorun = ARM9->Cycles >> 1; + s32 ndscyclestorun; s32 ndscycles = 0; - ARM7->CyclesToRun = ndscyclestorun - ARM7Offset; - ARM7->Execute(); - ARM7Offset = ARM7->Cycles - ARM7->CyclesToRun; + CalcIterationCycles(); + + if (CPUStop & 0x1) + { + s32 cycles = CurIterationCycles; + cycles = DMAs[0]->Run(cycles); + if (cycles > 0) cycles = DMAs[1]->Run(cycles); + if (cycles > 0) cycles = DMAs[2]->Run(cycles); + if (cycles > 0) cycles = DMAs[3]->Run(cycles); + ndscyclestorun = CurIterationCycles - cycles; + + // TODO: run other timing critical shit, like timers + GPU3D::Run(ndscyclestorun); + } + else + { + ARM9->CyclesToRun = CurIterationCycles << 1; + ARM9->Execute(); + ndscyclestorun = ARM9->Cycles >> 1; + } + + if (CPUStop & 0x2) + { + s32 cycles = ndscyclestorun - ARM7Offset; + cycles = DMAs[4]->Run(cycles); + if (cycles > 0) cycles = DMAs[5]->Run(cycles); + if (cycles > 0) cycles = DMAs[6]->Run(cycles); + if (cycles > 0) cycles = DMAs[7]->Run(cycles); + ARM7Offset = cycles; + } + else + { + ARM7->CyclesToRun = ndscyclestorun - ARM7Offset; + ARM7->Execute(); + ARM7Offset = ARM7->Cycles - ARM7->CyclesToRun; + } RunSystem(ndscyclestorun); //GPU3D::Run(ndscyclestorun); @@ -520,6 +552,12 @@ bool HaltInterrupted(u32 cpu) return false; } +void StopCPU(u32 cpu, bool stop) +{ + if (stop) CPUStop |= (1< "NDS.h" @@ -31,7 +31,7 @@ "ARMInterpreter.h" "GPU3D.h" -1486261220 c:\documents\sources\melonds\arm.h +1487302172 c:\documents\sources\melonds\arm.h "types.h" "NDS.h" "CP15.h" @@ -109,14 +109,14 @@ 1486511075 c:\documents\sources\melonds\fifo.h "types.h" -1486823366 source:c:\documents\sources\melonds\dma.cpp +1487305720 source:c:\documents\sources\melonds\dma.cpp "NDS.h" "DMA.h" "NDSCart.h" "GPU3D.h" -1484698068 c:\documents\sources\melonds\dma.h +1487305393 c:\documents\sources\melonds\dma.h "types.h" 1487102235 source:c:\documents\sources\melonds\gpu.cpp @@ -148,14 +148,14 @@ 1487287868 c:\documents\sources\melonds\gpu3d.h -1487299939 source:c:\documents\sources\melonds\gpu3d.cpp +1487305740 source:c:\documents\sources\melonds\gpu3d.cpp "NDS.h" "GPU.h" "FIFO.h" -1487300098 source:c:\documents\sources\melonds\gpu3d_soft.cpp +1487300658 source:c:\documents\sources\melonds\gpu3d_soft.cpp "NDS.h" From 97d76addd9c876a3438e8af1f83acc8bd20be70a Mon Sep 17 00:00:00 2001 From: StapleButter Date: Fri, 17 Feb 2017 18:59:11 +0100 Subject: [PATCH 15/16] fix a bunch of GXFIFO related bugs. --- ARM.cpp | 1 + DMA.cpp | 7 ++++--- GPU3D.cpp | 32 +++++++++++++++----------------- NDS.cpp | 2 +- melonDS.depend | 8 ++++---- 5 files changed, 25 insertions(+), 25 deletions(-) diff --git a/ARM.cpp b/ARM.cpp index 10b87956..536c78ce 100644 --- a/ARM.cpp +++ b/ARM.cpp @@ -328,6 +328,7 @@ s32 ARM::Execute() else { Cycles = CyclesToRun; + GPU3D::Run(CyclesToRun >> 1); return Cycles; } } diff --git a/DMA.cpp b/DMA.cpp index 28f7ddc4..629c14f3 100644 --- a/DMA.cpp +++ b/DMA.cpp @@ -46,7 +46,7 @@ DMA::DMA(u32 cpu, u32 num) Waitstates[1][i] = 1; } - if (!num) + if (!cpu) { // ARM9 // note: 33MHz cycles @@ -206,8 +206,6 @@ s32 DMA::Run(s32 cycles) if (!Running) return cycles; - u32 zorp = IterCount; - if (!(Cnt & 0x04000000)) { u16 (*readfn)(u32) = CPU ? NDS::ARM7Read16 : NDS::ARM9Read16; @@ -250,6 +248,9 @@ s32 DMA::Run(s32 cycles) { Running = false; NDS::StopCPU(CPU, false); + + if (StartMode & 0x07) + GPU3D::CheckFIFODMA(); } return cycles; diff --git a/GPU3D.cpp b/GPU3D.cpp index c28ebd04..de860f23 100644 --- a/GPU3D.cpp +++ b/GPU3D.cpp @@ -848,7 +848,6 @@ void CmdFIFOWrite(CmdFIFOEntry& entry) if (CmdFIFO->IsEmpty() && !CmdPIPE->IsFull()) { CmdPIPE->Write(entry); - GXStat |= (1<<27); } else { @@ -890,24 +889,19 @@ void ExecuteCommand() { CmdFIFOEntry entry = CmdFIFORead(); - //printf("FIFO: %02X %08X\n", entry.Command, entry.Param); + //printf("FIFO: processing %02X %08X. Levels: FIFO=%d, PIPE=%d\n", entry.Command, entry.Param, CmdFIFO->Level(), CmdPIPE->Level()); ExecParams[ExecParamCount] = entry.Param; ExecParamCount++; - //if ((entry.Command&0xF0)==0x10) - // printf("MATRIX CMD %02X %08X\n", entry.Command, entry.Param); - if (ExecParamCount >= CmdNumParams[entry.Command]) { CycleCount += CmdNumCycles[entry.Command]; ExecParamCount = 0; GXStat &= ~(1<<14); - //if (CycleCount > 0) - // GXStat |= (1<<27); - - //printf("3D CMD %02X\n", entry.Command); + if (CycleCount > 0) + GXStat |= (1<<27); switch (entry.Command) { @@ -1268,6 +1262,10 @@ void Run(s32 cycles) { if (FlushRequest) return; + if (CycleCount <= 0 && CmdPIPE->IsEmpty()) + return; + + CycleCount -= cycles; if (CycleCount <= 0) { @@ -1275,12 +1273,10 @@ void Run(s32 cycles) ExecuteCommand(); } - CycleCount -= cycles; - if (CycleCount <= 0 && CmdPIPE->IsEmpty()) { CycleCount = 0; - GXStat &= ~(1<<27); + GXStat &= ~((1<<27)|(1<<14)); } } @@ -1318,7 +1314,6 @@ void VBlank() NumPolygons = 0; FlushRequest = 0; - GXStat &= ~(1<<27); } } @@ -1410,10 +1405,13 @@ void Write32(u32 addr, u32 val) for (;;) { - CmdFIFOEntry entry; - entry.Command = CurCommand & 0xFF; - entry.Param = val; - CmdFIFOWrite(entry); + if ((CurCommand & 0xFF) || (NumCommands == 4)) + { + CmdFIFOEntry entry; + entry.Command = CurCommand & 0xFF; + entry.Param = val; + CmdFIFOWrite(entry); + } if (ParamCount >= TotalParams) { diff --git a/NDS.cpp b/NDS.cpp index 655e7ab0..182abe16 100644 --- a/NDS.cpp +++ b/NDS.cpp @@ -311,7 +311,7 @@ void Reset() // test //LoadROM(); //LoadFirmware(); - if (NDSCart::LoadROM("rom/raving.nds")) + if (NDSCart::LoadROM("rom/nsmb.nds")) Running = true; // hax } diff --git a/melonDS.depend b/melonDS.depend index d862460f..bdce235e 100644 --- a/melonDS.depend +++ b/melonDS.depend @@ -10,7 +10,7 @@ 1481161027 c:\documents\sources\melonds\types.h -1487304040 source:c:\documents\sources\melonds\nds.cpp +1487349286 source:c:\documents\sources\melonds\nds.cpp "NDS.h" @@ -24,7 +24,7 @@ "RTC.h" "Wifi.h" -1486736660 source:c:\documents\sources\melonds\arm.cpp +1487349559 source:c:\documents\sources\melonds\arm.cpp "NDS.h" "ARM.h" @@ -109,7 +109,7 @@ 1486511075 c:\documents\sources\melonds\fifo.h "types.h" -1487305720 source:c:\documents\sources\melonds\dma.cpp +1487354030 source:c:\documents\sources\melonds\dma.cpp "NDS.h" "DMA.h" @@ -148,7 +148,7 @@ 1487287868 c:\documents\sources\melonds\gpu3d.h -1487305740 source:c:\documents\sources\melonds\gpu3d.cpp +1487354054 source:c:\documents\sources\melonds\gpu3d.cpp "NDS.h" From 261689d3d123df541bb201babfa06a02861e2aaa Mon Sep 17 00:00:00 2001 From: StapleButter Date: Fri, 17 Feb 2017 19:34:41 +0100 Subject: [PATCH 16/16] 'FIFO full' hack for SM64DS. --- GPU3D.cpp | 19 +++++++++++-------- GPU3D.h | 2 ++ NDS.cpp | 2 +- melonDS.depend | 2 +- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/GPU3D.cpp b/GPU3D.cpp index de860f23..e38f7c23 100644 --- a/GPU3D.cpp +++ b/GPU3D.cpp @@ -841,8 +841,6 @@ void SubmitVertex() -int logflag = 0; - void CmdFIFOWrite(CmdFIFOEntry& entry) { if (CmdFIFO->IsEmpty() && !CmdPIPE->IsFull()) @@ -853,10 +851,17 @@ void CmdFIFOWrite(CmdFIFOEntry& entry) { if (CmdFIFO->IsFull()) { - if (!logflag) printf("!!! GX FIFO FULL\n"); - logflag = 1; - //NDS::debug(0); - return; + //printf("!!! GX FIFO FULL\n"); + //return; + + // temp. hack + // SM64DS seems to overflow the FIFO occasionally + // either leftover bugs in our implementation, or the game accidentally doing that + // TODO: investigate. + // TODO: implement this behavior properly (freezes the bus until the FIFO isn't full anymore) + + while (CmdFIFO->IsFull()) + ExecuteCommand(); } CmdFIFO->Write(entry); @@ -878,8 +883,6 @@ CmdFIFOEntry CmdFIFORead() CheckFIFOIRQ(); } - logflag = 0; - return ret; } diff --git a/GPU3D.h b/GPU3D.h index cbb8234a..c95e3b71 100644 --- a/GPU3D.h +++ b/GPU3D.h @@ -56,6 +56,8 @@ bool Init(); void DeInit(); void Reset(); +void ExecuteCommand(); + void Run(s32 cycles); void CheckFIFOIRQ(); void CheckFIFODMA(); diff --git a/NDS.cpp b/NDS.cpp index 182abe16..63013ab8 100644 --- a/NDS.cpp +++ b/NDS.cpp @@ -311,7 +311,7 @@ void Reset() // test //LoadROM(); //LoadFirmware(); - if (NDSCart::LoadROM("rom/nsmb.nds")) + if (NDSCart::LoadROM("rom/sm64ds.nds")) Running = true; // hax } diff --git a/melonDS.depend b/melonDS.depend index bdce235e..e0cd1cef 100644 --- a/melonDS.depend +++ b/melonDS.depend @@ -146,7 +146,7 @@ "NDS.h" "NDSCart.h" -1487287868 c:\documents\sources\melonds\gpu3d.h +1487356069 c:\documents\sources\melonds\gpu3d.h 1487354054 source:c:\documents\sources\melonds\gpu3d.cpp