diff --git a/ARM.cpp b/ARM.cpp index f582deb2..536c78ce 100644 --- a/ARM.cpp +++ b/ARM.cpp @@ -197,7 +197,7 @@ void ARM::RestoreCPSR() switch (CPSR & 0x1F) { case 0x11: - CPSR = R_FIQ[8]; + CPSR = R_FIQ[7]; break; case 0x12: @@ -328,6 +328,7 @@ s32 ARM::Execute() else { Cycles = CyclesToRun; + GPU3D::Run(CyclesToRun >> 1); return Cycles; } } diff --git a/ARM.h b/ARM.h index bf354a78..79c2bce3 100644 --- a/ARM.h +++ b/ARM.h @@ -125,7 +125,7 @@ public: else val = NDS::ARM7Read8(addr); - Cycles += Waitstates[3][(addr>>24)&0xF]; + Cycles += Waitstates[2][(addr>>24)&0xF]; return val; } @@ -171,7 +171,7 @@ public: else NDS::ARM7Write8(addr, val); - Cycles += Waitstates[3][(addr>>24)&0xF]; + Cycles += Waitstates[2][(addr>>24)&0xF]; } void DataWrite16(u32 addr, u16 val, u32 forceuser=0) diff --git a/DMA.cpp b/DMA.cpp index 836a5805..629c14f3 100644 --- a/DMA.cpp +++ b/DMA.cpp @@ -34,6 +34,66 @@ DMA::DMA(u32 cpu, u32 num) CPU = cpu; Num = num; + if (cpu == 0) + CountMask = 0x001FFFFF; + else + CountMask = (num==3 ? 0x0000FFFF : 0x00003FFF); + + // TODO: merge with the one in ARM.cpp, somewhere + for (int i = 0; i < 16; i++) + { + Waitstates[0][i] = 1; + Waitstates[1][i] = 1; + } + + if (!cpu) + { + // ARM9 + // note: 33MHz cycles + Waitstates[0][0x2] = 1; + Waitstates[0][0x3] = 1; + Waitstates[0][0x4] = 1; + Waitstates[0][0x5] = 1; + Waitstates[0][0x6] = 1; + Waitstates[0][0x7] = 1; + Waitstates[0][0x8] = 6; + Waitstates[0][0x9] = 6; + Waitstates[0][0xA] = 10; + Waitstates[0][0xF] = 1; + + Waitstates[1][0x2] = 2; + Waitstates[1][0x3] = 1; + Waitstates[1][0x4] = 1; + Waitstates[1][0x5] = 2; + Waitstates[1][0x6] = 2; + Waitstates[1][0x7] = 1; + Waitstates[1][0x8] = 12; + Waitstates[1][0x9] = 12; + Waitstates[1][0xA] = 10; + Waitstates[1][0xF] = 1; + } + else + { + // ARM7 + Waitstates[0][0x0] = 1; + Waitstates[0][0x2] = 1; + Waitstates[0][0x3] = 1; + Waitstates[0][0x4] = 1; + Waitstates[0][0x6] = 1; + Waitstates[0][0x8] = 6; + Waitstates[0][0x9] = 6; + Waitstates[0][0xA] = 10; + + Waitstates[1][0x0] = 1; + Waitstates[1][0x2] = 2; + Waitstates[1][0x3] = 1; + Waitstates[1][0x4] = 1; + Waitstates[1][0x6] = 2; + Waitstates[1][0x8] = 12; + Waitstates[1][0x9] = 12; + Waitstates[1][0xA] = 10; + } + Reset(); } @@ -51,8 +111,11 @@ void DMA::Reset() CurSrcAddr = 0; CurDstAddr = 0; RemCount = 0; + IterCount = 0; SrcAddrInc = 0; DstAddrInc = 0; + + Running = false; } void DMA::WriteCnt(u32 val) @@ -90,16 +153,16 @@ void DMA::WriteCnt(u32 val) Start(); else if (StartMode == 0x07) GPU3D::CheckFIFODMA(); - //else - // printf("SPECIAL ARM%d DMA%d START MODE %02X\n", CPU?7:9, Num, StartMode); + if ((StartMode&7)!=0x00 && (StartMode&7)!=0x1 && StartMode!=2 && StartMode!=0x05 && StartMode!=0x12 && StartMode!=0x07) printf("UNIMPLEMENTED ARM%d DMA%d START MODE %02X\n", CPU?7:9, Num, StartMode); - //if (StartMode==2)printf("HBLANK DMA %08X -> %08X\n", SrcAddr, DstAddr); } } void DMA::Start() { + if (Running) return; + u32 countmask; if (CPU == 0) countmask = 0x001FFFFF; @@ -110,10 +173,15 @@ void DMA::Start() if (!RemCount) RemCount = countmask+1; + if (StartMode == 0x07 && RemCount > 112) + IterCount = 112; + else + IterCount = RemCount; + if ((Cnt & 0x00600000) == 0x00600000) CurDstAddr = DstAddr; - //printf("ARM%d DMA%d %08X %08X->%08X %d bytes %dbit\n", CPU?7:9, Num, Cnt, CurSrcAddr, CurDstAddr, RemCount*((Cnt&0x04000000)?4:2), (Cnt&0x04000000)?32:16); + //printf("ARM%d DMA%d %08X %02X %08X->%08X %d bytes %dbit\n", CPU?7:9, Num, Cnt, StartMode, CurSrcAddr, CurDstAddr, RemCount*((Cnt&0x04000000)?4:2), (Cnt&0x04000000)?32:16); // special path for cart DMA. this is a gross hack. // emulating it properly requires emulating cart transfer delays, so uh... TODO @@ -126,24 +194,31 @@ void DMA::Start() NDS::TriggerIRQ(CPU, NDS::IRQ_DMA0 + Num); return; } -if (StartMode == 0x07)printf("GXFIFO DMA %08X %08X\n", Cnt, CurSrcAddr); - u32 num = RemCount; - if (StartMode == 0x07 && num > 112) - num = 112; - // TODO: NOT MAKE THE DMA INSTANT!! + // TODO eventually: not stop if we're running code in ITCM + + Running = true; + NDS::StopCPU(CPU, true); +} + +s32 DMA::Run(s32 cycles) +{ + if (!Running) + return cycles; + if (!(Cnt & 0x04000000)) { u16 (*readfn)(u32) = CPU ? NDS::ARM7Read16 : NDS::ARM9Read16; void (*writefn)(u32,u16) = CPU ? NDS::ARM7Write16 : NDS::ARM9Write16; - while (num > 0) + while (IterCount > 0 && cycles > 0) { writefn(CurDstAddr, readfn(CurSrcAddr)); + cycles -= (Waitstates[0][(CurSrcAddr >> 24) & 0xF] + Waitstates[0][(CurDstAddr >> 24) & 0xF]); CurSrcAddr += SrcAddrInc<<1; CurDstAddr += DstAddrInc<<1; - num--; + IterCount--; RemCount--; } } @@ -152,22 +227,33 @@ if (StartMode == 0x07)printf("GXFIFO DMA %08X %08X\n", Cnt, CurSrcAddr); u32 (*readfn)(u32) = CPU ? NDS::ARM7Read32 : NDS::ARM9Read32; void (*writefn)(u32,u32) = CPU ? NDS::ARM7Write32 : NDS::ARM9Write32; - while (num > 0) + while (IterCount > 0 && cycles > 0) { writefn(CurDstAddr, readfn(CurSrcAddr)); + cycles -= (Waitstates[1][(CurSrcAddr >> 24) & 0xF] + Waitstates[1][(CurDstAddr >> 24) & 0xF]); CurSrcAddr += SrcAddrInc<<2; CurDstAddr += DstAddrInc<<2; - num--; + IterCount--; RemCount--; } } if (RemCount) { - Cnt &= ~countmask; + Cnt &= ~CountMask; Cnt |= RemCount; - return; + + if (IterCount == 0) + { + Running = false; + NDS::StopCPU(CPU, false); + + if (StartMode & 0x07) + GPU3D::CheckFIFODMA(); + } + + return cycles; } if (!(Cnt & 0x02000000)) @@ -175,4 +261,9 @@ if (StartMode == 0x07)printf("GXFIFO DMA %08X %08X\n", Cnt, CurSrcAddr); if (Cnt & 0x40000000) NDS::TriggerIRQ(CPU, NDS::IRQ_DMA0 + Num); + + Running = false; + NDS::StopCPU(CPU, false); + + return cycles - 2; } diff --git a/DMA.h b/DMA.h index 619b1639..59a7f036 100644 --- a/DMA.h +++ b/DMA.h @@ -32,6 +32,8 @@ public: void WriteCnt(u32 val); void Start(); + s32 Run(s32 cycles); + void StartIfNeeded(u32 mode) { if ((mode == StartMode) && (Cnt & 0x80000000)) @@ -45,12 +47,18 @@ public: private: u32 CPU, Num; + s32 Waitstates[2][16]; + u32 StartMode; u32 CurSrcAddr; u32 CurDstAddr; u32 RemCount; + u32 IterCount; u32 SrcAddrInc; u32 DstAddrInc; + u32 CountMask; + + bool Running; }; #endif diff --git a/GPU.cpp b/GPU.cpp index e153c4d2..a945b3f8 100644 --- a/GPU.cpp +++ b/GPU.cpp @@ -62,7 +62,7 @@ u8* VRAM_AOBJExtPal; u8* VRAM_BBGExtPal[4]; u8* VRAM_BOBJExtPal; -u16 Framebuffer[256*192*2]; +u32 Framebuffer[256*192*2]; GPU2D* GPU2D_A; GPU2D* GPU2D_B; @@ -123,7 +123,7 @@ void Reset() for (int i = 0; i < 256*192*2; i++) { - Framebuffer[i] = 0x7FFF; + Framebuffer[i] = 0xFFFFFFFF; } GPU2D_A->Reset(); @@ -837,6 +837,10 @@ void StartScanline(u32 line) if (DispStat[0] & (1<<3)) NDS::TriggerIRQ(0, NDS::IRQ_VBlank); if (DispStat[1] & (1<<3)) NDS::TriggerIRQ(1, NDS::IRQ_VBlank); + + GPU2D_A->VBlank(); + GPU2D_B->VBlank(); + GPU3D::VBlank(); } //NDS::ScheduleEvent(LINE_CYCLES, StartScanline, line+1); diff --git a/GPU.h b/GPU.h index 18661ca2..a39faeab 100644 --- a/GPU.h +++ b/GPU.h @@ -48,7 +48,7 @@ extern u8* VRAM_AOBJExtPal; extern u8* VRAM_BBGExtPal[4]; extern u8* VRAM_BOBJExtPal; -extern u16 Framebuffer[256*192*2]; +extern u32 Framebuffer[256*192*2]; extern GPU2D* GPU2D_A; extern GPU2D* GPU2D_B; diff --git a/GPU2D.cpp b/GPU2D.cpp index 6b4594ec..d9634fdd 100644 --- a/GPU2D.cpp +++ b/GPU2D.cpp @@ -82,11 +82,8 @@ void GPU2D::Reset() memset(BGRotD, 0, 2*2); } -void GPU2D::SetFramebuffer(u16* buf) +void GPU2D::SetFramebuffer(u32* buf) { - // framebuffer is 256x192 16bit. - // might eventually support other framebuffer types/sizes - // TODO: change this. the DS uses 18bit color Framebuffer = buf; } @@ -205,7 +202,7 @@ void GPU2D::Write32(u32 addr, u32 val) void GPU2D::DrawScanline(u32 line) { - u16* dst = &Framebuffer[256*line]; + u32* dst = &Framebuffer[256*line]; u32 dispmode = DispCnt >> 16; dispmode &= (Num ? 0x1 : 0x3); @@ -214,8 +211,8 @@ void GPU2D::DrawScanline(u32 line) { case 0: // screen off { - for (int i = 0; i < 256>>1; i++) - ((u32*)dst)[i] = 0x7FFF7FFF; + for (int i = 0; i < 256; i++) + dst[i] = 0xFF3F3F3F; } break; @@ -230,8 +227,15 @@ void GPU2D::DrawScanline(u32 line) u32* vram = (u32*)GPU::VRAM[(DispCnt >> 18) & 0x3]; vram = &vram[line << 7]; - for (int i = 0; i < 256>>1; i++) - ((u32*)dst)[i] = vram[i]; + for (int i = 0; i < 256; i++) + { + u16 color = vram[i]; + u8 r = (color & 0x001F) << 1; + u8 g = (color & 0x03E0) >> 4; + u8 b = (color & 0x7C00) >> 9; + + dst[i] = r | (g << 8) | (b << 16); + } } break; @@ -241,11 +245,22 @@ void GPU2D::DrawScanline(u32 line) } break; } + + // convert to 32-bit RGBA + for (int i = 0; i < 256; i++) + dst[i] = ((dst[i] & 0x003F3F3F) << 2) | + ((dst[i] & 0x00303030) >> 4) | + 0xFF000000; +} + +void GPU2D::VBlank() +{ + // } template -void GPU2D::DrawScanlineBGMode(u32 line, u32* spritebuf, u16* dst) +void GPU2D::DrawScanlineBGMode(u32 line, u32* spritebuf, u32* dst) { for (int i = 3; i >= 0; i--) { @@ -285,7 +300,7 @@ void GPU2D::DrawScanlineBGMode(u32 line, u32* spritebuf, u16* dst) if (DispCnt & 0x0100) { if ((!Num) && (DispCnt & 0x8)) - {} // TODO + DrawBG_3D(line, dst); else DrawBG_Text(line, dst, 0); } @@ -295,17 +310,24 @@ void GPU2D::DrawScanlineBGMode(u32 line, u32* spritebuf, u16* dst) } } -void GPU2D::DrawScanline_Mode1(u32 line, u16* dst) +void GPU2D::DrawScanline_Mode1(u32 line, u32* dst) { u32 backdrop; if (Num) backdrop = *(u16*)&GPU::Palette[0x400]; else backdrop = *(u16*)&GPU::Palette[0]; - // TODO: color effect for backdrop + { + u8 r = (backdrop & 0x001F) << 1; + u8 g = (backdrop & 0x03E0) >> 4; + u8 b = (backdrop & 0x7C00) >> 9; - backdrop |= (backdrop<<16); - for (int i = 0; i < 256>>1; i++) - ((u32*)dst)[i] = backdrop; + // TODO: color effect for backdrop + + backdrop = r | (g << 8) | (b << 16) | 0x20000000; + + for (int i = 0; i < 256; i++) + dst[i] = backdrop; + } // prerender sprites u32 spritebuf[256]; @@ -328,7 +350,38 @@ void GPU2D::DrawScanline_Mode1(u32 line, u16* dst) } -void GPU2D::DrawBG_Text(u32 line, u16* dst, u32 bgnum) +typedef void (*DrawPixelFunc)(u32 bgnum, u32* dst, u16 color, u32 blendfunc); + +void GPU2D::DrawPixel_Normal(u32 bgnum, u32* dst, u16 color, u32 blendfunc) +{ + u8 r = (color & 0x001F) << 1; + u8 g = (color & 0x03E0) >> 4; + u8 b = (color & 0x7C00) >> 9; + + *dst = r | (g << 8) | (b << 16) | (0x01000000 << bgnum); +} + +void GPU2D::DrawBG_3D(u32 line, u32* dst) +{ + // TODO: scroll, etc + + u8* src = GPU3D::GetLine(line); + for (int i = 0; i < 256; i++) + { + u8 r = *src++; + u8 g = *src++; + u8 b = *src++; + u8 a = *src++; + if (a == 0) continue; + + // TODO: blending + // alpha is 6bit too....? + + dst[i] = r | (g << 8) | (b << 16); + } +} + +void GPU2D::DrawBG_Text(u32 line, u32* dst, u32 bgnum) { u16 bgcnt = BGCnt[bgnum]; @@ -342,6 +395,8 @@ void GPU2D::DrawBG_Text(u32 line, u16* dst, u32 bgnum) u32 widexmask = (bgcnt & 0x4000) ? 0x100 : 0; + DrawPixelFunc drawpixelfn = DrawPixel_Normal; + extpal = (bgcnt & 0x0080) && (DispCnt & 0x40000000); if (Num) @@ -432,7 +487,7 @@ void GPU2D::DrawBG_Text(u32 line, u16* dst, u32 bgnum) color = pixels[tilexoff]; if (color) - dst[i] = curpal[color]; + drawpixelfn(bgnum, &dst[i], curpal[color], BlendFunc); xoff++; } @@ -475,14 +530,14 @@ void GPU2D::DrawBG_Text(u32 line, u16* dst, u32 bgnum) } if (color) - dst[i] = curpal[color]; + drawpixelfn(bgnum, &dst[i], curpal[color], BlendFunc); xoff++; } } } -void GPU2D::DrawBG_Extended(u32 line, u16* dst, u32 bgnum) +void GPU2D::DrawBG_Extended(u32 line, u32* dst, u32 bgnum) { u16 bgcnt = BGCnt[bgnum]; @@ -505,6 +560,8 @@ void GPU2D::DrawBG_Extended(u32 line, u16* dst, u32 bgnum) if (bgcnt & 0x2000) overflowmask = 0; else overflowmask = ~(coordmask | 0x7FF); + DrawPixelFunc drawpixelfn = DrawPixel_Normal; + extpal = (DispCnt & 0x40000000); s16 rotA = BGRotA[bgnum-2]; @@ -542,7 +599,7 @@ void GPU2D::DrawBG_Extended(u32 line, u16* dst, u32 bgnum) u16 color = bitmap[(((rotY & coordmask) >> 8) << yshift) + ((rotX & coordmask) >> 8)]; if (color & 0x8000) - dst[i] = color; + drawpixelfn(bgnum, &dst[i], color, BlendFunc); } rotX += rotA; @@ -563,7 +620,7 @@ void GPU2D::DrawBG_Extended(u32 line, u16* dst, u32 bgnum) u8 color = tileset[(((rotY & coordmask) >> 8) << yshift) + ((rotX & coordmask) >> 8)]; if (color) - dst[i] = pal[color]; + drawpixelfn(bgnum, &dst[i], pal[color], BlendFunc); } rotX += rotA; @@ -636,7 +693,7 @@ void GPU2D::DrawBG_Extended(u32 line, u16* dst, u32 bgnum) color = pixels[(tileyoff << 3) + tilexoff]; if (color) - dst[i] = curpal[color]; + drawpixelfn(bgnum, &dst[i], curpal[color], BlendFunc); } rotX += rotA; @@ -648,12 +705,17 @@ void GPU2D::DrawBG_Extended(u32 line, u16* dst, u32 bgnum) //BGYCenter[bgnum-2] += rotD; } -void GPU2D::InterleaveSprites(u32* buf, u32 prio, u16* dst) +void GPU2D::InterleaveSprites(u32* buf, u32 prio, u32* dst) { + DrawPixelFunc drawpixelfn = DrawPixel_Normal; + for (u32 i = 0; i < 256; i++) { if ((buf[i] & 0xF8000) == prio) - dst[i] = buf[i] & 0x7FFF; + { + u32 blendfunc = 0; + drawpixelfn(4, &dst[i], buf[i], blendfunc); + } } } diff --git a/GPU2D.h b/GPU2D.h index d7da3cb3..2bd45216 100644 --- a/GPU2D.h +++ b/GPU2D.h @@ -27,7 +27,7 @@ public: void Reset(); - void SetFramebuffer(u16* buf); + void SetFramebuffer(u32* buf); u8 Read8(u32 addr); u16 Read16(u32 addr); @@ -37,10 +37,11 @@ public: void Write32(u32 addr, u32 val); void DrawScanline(u32 line); + void VBlank(); private: u32 Num; - u16* Framebuffer; + u32* Framebuffer; u32 DispCnt; u16 BGCnt[4]; @@ -55,13 +56,18 @@ private: s16 BGRotC[2]; s16 BGRotD[2]; - template void DrawScanlineBGMode(u32 line, u32* spritebuf, u16* dst); - void DrawScanline_Mode1(u32 line, u16* dst); + u32 BlendFunc; - void DrawBG_Text(u32 line, u16* dst, u32 num); - void DrawBG_Extended(u32 line, u16* dst, u32 bgnum); + template void DrawScanlineBGMode(u32 line, u32* spritebuf, u32* dst); + void DrawScanline_Mode1(u32 line, u32* dst); - void InterleaveSprites(u32* buf, u32 prio, u16* dst); + static void DrawPixel_Normal(u32 bgnum, u32* dst, u16 color, u32 blendfunc); + + void DrawBG_3D(u32 line, u32* dst); + void DrawBG_Text(u32 line, u32* dst, u32 num); + void DrawBG_Extended(u32 line, u32* dst, u32 bgnum); + + void InterleaveSprites(u32* buf, u32 prio, u32* dst); void DrawSprites(u32 line, u32* dst); void DrawSprite_Rotscale(u16* attrib, u16* rotparams, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, u32 ypos, u32* dst); void DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, u32 ypos, u32* dst); diff --git a/GPU3D.cpp b/GPU3D.cpp index 6cf636a7..e38f7c23 100644 --- a/GPU3D.cpp +++ b/GPU3D.cpp @@ -23,6 +23,32 @@ #include "FIFO.h" +// 3D engine notes +// +// vertex/polygon RAM is filled when a complete polygon is defined, after it's been culled and clipped +// 04000604 reads from bank used by renderer +// bank used by renderer is emptied at scanline ~192 +// banks are swapped at scanline ~194 +// TODO: needs more investigation. it's weird. +// +// clipping rules: +// * if a shared vertex in a strip is clipped, affected polygons are converted into single polygons +// strip is resumed at the first eligible polygon +// +// clipping exhibits oddities on the real thing. bad precision? fancy algorithm? TODO: investigate. +// +// vertex color precision: +// * vertex colors are kept at 5-bit during clipping. makes for shitty results. +// * vertex colors are converted to 9-bit before drawing, as such: +// if (x > 0) x = (x << 4) + 0xF +// the added bias affects interpolation. +// +// depth buffer: +// Z-buffering mode: val = ((Z * 0x800 * 0x1000) / W) + 0x7FFCFF +// W-buffering mode: val = W - 0x1FF +// TODO: confirm W, because it's weird + + namespace GPU3D { @@ -126,11 +152,112 @@ s32 PosMatrix[16]; s32 VecMatrix[16]; s32 TexMatrix[16]; +s32 ClipMatrix[16]; +bool ClipMatrixDirty; + +s32 Viewport[4]; + s32 ProjMatrixStack[16]; s32 PosMatrixStack[31][16]; s32 ProjMatrixStackPointer; s32 PosMatrixStackPointer; +void MatrixLoadIdentity(s32* m); +void UpdateClipMatrix(); + + +u32 PolygonMode; +s16 CurVertex[3]; +u8 VertexColor[3]; + +u32 PolygonAttr; +u32 CurPolygonAttr; + +Vertex TempVertexBuffer[4]; +u32 VertexNum; +u32 VertexNumInPoly; +u32 NumConsecutivePolygons; +Polygon* LastStripPolygon; + +Vertex VertexRAM[6144 * 2]; +Polygon PolygonRAM[2048 * 2]; + +Vertex* CurVertexRAM; +Polygon* CurPolygonRAM; +u32 NumVertices, NumPolygons; +u32 CurRAMBank; + +u32 FlushRequest; + + + +bool Init() +{ + CmdFIFO = new FIFO(256); + CmdPIPE = new FIFO(4); + + if (!SoftRenderer::Init()) return false; + + return true; +} + +void DeInit() +{ + SoftRenderer::DeInit(); + + delete CmdFIFO; + delete CmdPIPE; +} + +void Reset() +{ + CmdFIFO->Clear(); + CmdPIPE->Clear(); + + NumCommands = 0; + CurCommand = 0; + ParamCount = 0; + TotalParams = 0; + + GXStat = 0; + + memset(ExecParams, 0, 32*4); + ExecParamCount = 0; + CycleCount = 0; + + + MatrixMode = 0; + + MatrixLoadIdentity(ProjMatrix); + MatrixLoadIdentity(PosMatrix); + MatrixLoadIdentity(VecMatrix); + MatrixLoadIdentity(TexMatrix); + + ClipMatrixDirty = true; + UpdateClipMatrix(); + + memset(Viewport, 0, sizeof(Viewport)); + + memset(ProjMatrixStack, 0, 16*4); + memset(PosMatrixStack, 0, 31 * 16*4); + ProjMatrixStackPointer = 0; + PosMatrixStackPointer = 0; + + VertexNum = 0; + VertexNumInPoly = 0; + + CurRAMBank = 0; + CurVertexRAM = &VertexRAM[0]; + CurPolygonRAM = &PolygonRAM[0]; + NumVertices = 0; + NumPolygons = 0; + + FlushRequest = 0; + + SoftRenderer::Reset(); +} + + void MatrixLoadIdentity(s32* m) { @@ -159,25 +286,25 @@ void MatrixMult4x4(s32* m, s32* s) memcpy(tmp, m, 16*4); // m = s*m - m[0] = (s[0]*tmp[0] + s[1]*tmp[4] + s[2]*tmp[8] + s[3]*tmp[12]) >> 12; - m[1] = (s[0]*tmp[1] + s[1]*tmp[5] + s[2]*tmp[9] + s[3]*tmp[13]) >> 12; - m[2] = (s[0]*tmp[2] + s[1]*tmp[6] + s[2]*tmp[10] + s[3]*tmp[14]) >> 12; - m[3] = (s[0]*tmp[3] + s[1]*tmp[7] + s[2]*tmp[11] + s[3]*tmp[15]) >> 12; + m[0] = ((s64)s[0]*tmp[0] + (s64)s[1]*tmp[4] + (s64)s[2]*tmp[8] + (s64)s[3]*tmp[12]) >> 12; + m[1] = ((s64)s[0]*tmp[1] + (s64)s[1]*tmp[5] + (s64)s[2]*tmp[9] + (s64)s[3]*tmp[13]) >> 12; + m[2] = ((s64)s[0]*tmp[2] + (s64)s[1]*tmp[6] + (s64)s[2]*tmp[10] + (s64)s[3]*tmp[14]) >> 12; + m[3] = ((s64)s[0]*tmp[3] + (s64)s[1]*tmp[7] + (s64)s[2]*tmp[11] + (s64)s[3]*tmp[15]) >> 12; - m[4] = (s[4]*tmp[0] + s[5]*tmp[4] + s[6]*tmp[8] + s[7]*tmp[12]) >> 12; - m[5] = (s[4]*tmp[1] + s[5]*tmp[5] + s[6]*tmp[9] + s[7]*tmp[13]) >> 12; - m[6] = (s[4]*tmp[2] + s[5]*tmp[6] + s[6]*tmp[10] + s[7]*tmp[14]) >> 12; - m[7] = (s[4]*tmp[3] + s[5]*tmp[7] + s[6]*tmp[11] + s[7]*tmp[15]) >> 12; + m[4] = ((s64)s[4]*tmp[0] + (s64)s[5]*tmp[4] + (s64)s[6]*tmp[8] + (s64)s[7]*tmp[12]) >> 12; + m[5] = ((s64)s[4]*tmp[1] + (s64)s[5]*tmp[5] + (s64)s[6]*tmp[9] + (s64)s[7]*tmp[13]) >> 12; + m[6] = ((s64)s[4]*tmp[2] + (s64)s[5]*tmp[6] + (s64)s[6]*tmp[10] + (s64)s[7]*tmp[14]) >> 12; + m[7] = ((s64)s[4]*tmp[3] + (s64)s[5]*tmp[7] + (s64)s[6]*tmp[11] + (s64)s[7]*tmp[15]) >> 12; - m[8] = (s[8]*tmp[0] + s[9]*tmp[4] + s[10]*tmp[8] + s[11]*tmp[12]) >> 12; - m[9] = (s[8]*tmp[1] + s[9]*tmp[5] + s[10]*tmp[9] + s[11]*tmp[13]) >> 12; - m[10] = (s[8]*tmp[2] + s[9]*tmp[6] + s[10]*tmp[10] + s[11]*tmp[14]) >> 12; - m[11] = (s[8]*tmp[3] + s[9]*tmp[7] + s[10]*tmp[11] + s[11]*tmp[15]) >> 12; + m[8] = ((s64)s[8]*tmp[0] + (s64)s[9]*tmp[4] + (s64)s[10]*tmp[8] + (s64)s[11]*tmp[12]) >> 12; + m[9] = ((s64)s[8]*tmp[1] + (s64)s[9]*tmp[5] + (s64)s[10]*tmp[9] + (s64)s[11]*tmp[13]) >> 12; + m[10] = ((s64)s[8]*tmp[2] + (s64)s[9]*tmp[6] + (s64)s[10]*tmp[10] + (s64)s[11]*tmp[14]) >> 12; + m[11] = ((s64)s[8]*tmp[3] + (s64)s[9]*tmp[7] + (s64)s[10]*tmp[11] + (s64)s[11]*tmp[15]) >> 12; - m[12] = (s[12]*tmp[0] + s[13]*tmp[4] + s[14]*tmp[8] + s[15]*tmp[12]) >> 12; - m[13] = (s[12]*tmp[1] + s[13]*tmp[5] + s[14]*tmp[9] + s[15]*tmp[13]) >> 12; - m[14] = (s[12]*tmp[2] + s[13]*tmp[6] + s[14]*tmp[10] + s[15]*tmp[14]) >> 12; - m[15] = (s[12]*tmp[3] + s[13]*tmp[7] + s[14]*tmp[11] + s[15]*tmp[15]) >> 12; + m[12] = ((s64)s[12]*tmp[0] + (s64)s[13]*tmp[4] + (s64)s[14]*tmp[8] + (s64)s[15]*tmp[12]) >> 12; + m[13] = ((s64)s[12]*tmp[1] + (s64)s[13]*tmp[5] + (s64)s[14]*tmp[9] + (s64)s[15]*tmp[13]) >> 12; + m[14] = ((s64)s[12]*tmp[2] + (s64)s[13]*tmp[6] + (s64)s[14]*tmp[10] + (s64)s[15]*tmp[14]) >> 12; + m[15] = ((s64)s[12]*tmp[3] + (s64)s[13]*tmp[7] + (s64)s[14]*tmp[11] + (s64)s[15]*tmp[15]) >> 12; } void MatrixMult4x3(s32* m, s32* s) @@ -186,25 +313,25 @@ void MatrixMult4x3(s32* m, s32* s) memcpy(tmp, m, 16*4); // m = s*m - m[0] = (s[0]*tmp[0] + s[1]*tmp[4] + s[2]*tmp[8]) >> 12; - m[1] = (s[0]*tmp[1] + s[1]*tmp[5] + s[2]*tmp[9]) >> 12; - m[2] = (s[0]*tmp[2] + s[1]*tmp[6] + s[2]*tmp[10]) >> 12; - m[3] = (s[0]*tmp[3] + s[1]*tmp[7] + s[2]*tmp[11]) >> 12; + m[0] = ((s64)s[0]*tmp[0] + (s64)s[1]*tmp[4] + (s64)s[2]*tmp[8]) >> 12; + m[1] = ((s64)s[0]*tmp[1] + (s64)s[1]*tmp[5] + (s64)s[2]*tmp[9]) >> 12; + m[2] = ((s64)s[0]*tmp[2] + (s64)s[1]*tmp[6] + (s64)s[2]*tmp[10]) >> 12; + m[3] = ((s64)s[0]*tmp[3] + (s64)s[1]*tmp[7] + (s64)s[2]*tmp[11]) >> 12; - m[4] = (s[3]*tmp[0] + s[4]*tmp[4] + s[5]*tmp[8]) >> 12; - m[5] = (s[3]*tmp[1] + s[4]*tmp[5] + s[5]*tmp[9]) >> 12; - m[6] = (s[3]*tmp[2] + s[4]*tmp[6] + s[5]*tmp[10]) >> 12; - m[7] = (s[3]*tmp[3] + s[4]*tmp[7] + s[5]*tmp[11]) >> 12; + m[4] = ((s64)s[3]*tmp[0] + (s64)s[4]*tmp[4] + (s64)s[5]*tmp[8]) >> 12; + m[5] = ((s64)s[3]*tmp[1] + (s64)s[4]*tmp[5] + (s64)s[5]*tmp[9]) >> 12; + m[6] = ((s64)s[3]*tmp[2] + (s64)s[4]*tmp[6] + (s64)s[5]*tmp[10]) >> 12; + m[7] = ((s64)s[3]*tmp[3] + (s64)s[4]*tmp[7] + (s64)s[5]*tmp[11]) >> 12; - m[8] = (s[6]*tmp[0] + s[7]*tmp[4] + s[8]*tmp[8]) >> 12; - m[9] = (s[6]*tmp[1] + s[7]*tmp[5] + s[8]*tmp[9]) >> 12; - m[10] = (s[6]*tmp[2] + s[7]*tmp[6] + s[8]*tmp[10]) >> 12; - m[11] = (s[6]*tmp[3] + s[7]*tmp[7] + s[8]*tmp[11]) >> 12; + m[8] = ((s64)s[6]*tmp[0] + (s64)s[7]*tmp[4] + (s64)s[8]*tmp[8]) >> 12; + m[9] = ((s64)s[6]*tmp[1] + (s64)s[7]*tmp[5] + (s64)s[8]*tmp[9]) >> 12; + m[10] = ((s64)s[6]*tmp[2] + (s64)s[7]*tmp[6] + (s64)s[8]*tmp[10]) >> 12; + m[11] = ((s64)s[6]*tmp[3] + (s64)s[7]*tmp[7] + (s64)s[8]*tmp[11]) >> 12; - m[12] = (s[9]*tmp[0] + s[10]*tmp[4] + s[11]*tmp[8] + 0x1000*tmp[12]) >> 12; - m[13] = (s[9]*tmp[1] + s[10]*tmp[5] + s[11]*tmp[9] + 0x1000*tmp[13]) >> 12; - m[14] = (s[9]*tmp[2] + s[10]*tmp[6] + s[11]*tmp[10] + 0x1000*tmp[14]) >> 12; - m[15] = (s[9]*tmp[3] + s[10]*tmp[7] + s[11]*tmp[11] + 0x1000*tmp[15]) >> 12; + m[12] = ((s64)s[9]*tmp[0] + (s64)s[10]*tmp[4] + (s64)s[11]*tmp[8] + (s64)0x1000*tmp[12]) >> 12; + m[13] = ((s64)s[9]*tmp[1] + (s64)s[10]*tmp[5] + (s64)s[11]*tmp[9] + (s64)0x1000*tmp[13]) >> 12; + m[14] = ((s64)s[9]*tmp[2] + (s64)s[10]*tmp[6] + (s64)s[11]*tmp[10] + (s64)0x1000*tmp[14]) >> 12; + m[15] = ((s64)s[9]*tmp[3] + (s64)s[10]*tmp[7] + (s64)s[11]*tmp[11] + (s64)0x1000*tmp[15]) >> 12; } void MatrixMult3x3(s32* m, s32* s) @@ -213,92 +340,506 @@ void MatrixMult3x3(s32* m, s32* s) memcpy(tmp, m, 12*4); // m = s*m - m[0] = (s[0]*tmp[0] + s[1]*tmp[4] + s[2]*tmp[8]) >> 12; - m[1] = (s[0]*tmp[1] + s[1]*tmp[5] + s[2]*tmp[9]) >> 12; - m[2] = (s[0]*tmp[2] + s[1]*tmp[6] + s[2]*tmp[10]) >> 12; - m[3] = (s[0]*tmp[3] + s[1]*tmp[7] + s[2]*tmp[11]) >> 12; + m[0] = ((s64)s[0]*tmp[0] + (s64)s[1]*tmp[4] + (s64)s[2]*tmp[8]) >> 12; + m[1] = ((s64)s[0]*tmp[1] + (s64)s[1]*tmp[5] + (s64)s[2]*tmp[9]) >> 12; + m[2] = ((s64)s[0]*tmp[2] + (s64)s[1]*tmp[6] + (s64)s[2]*tmp[10]) >> 12; + m[3] = ((s64)s[0]*tmp[3] + (s64)s[1]*tmp[7] + (s64)s[2]*tmp[11]) >> 12; - m[4] = (s[3]*tmp[0] + s[4]*tmp[4] + s[5]*tmp[8]) >> 12; - m[5] = (s[3]*tmp[1] + s[4]*tmp[5] + s[5]*tmp[9]) >> 12; - m[6] = (s[3]*tmp[2] + s[4]*tmp[6] + s[5]*tmp[10]) >> 12; - m[7] = (s[3]*tmp[3] + s[4]*tmp[7] + s[5]*tmp[11]) >> 12; + m[4] = ((s64)s[3]*tmp[0] + (s64)s[4]*tmp[4] + (s64)s[5]*tmp[8]) >> 12; + m[5] = ((s64)s[3]*tmp[1] + (s64)s[4]*tmp[5] + (s64)s[5]*tmp[9]) >> 12; + m[6] = ((s64)s[3]*tmp[2] + (s64)s[4]*tmp[6] + (s64)s[5]*tmp[10]) >> 12; + m[7] = ((s64)s[3]*tmp[3] + (s64)s[4]*tmp[7] + (s64)s[5]*tmp[11]) >> 12; - m[8] = (s[6]*tmp[0] + s[7]*tmp[4] + s[8]*tmp[8]) >> 12; - m[9] = (s[6]*tmp[1] + s[7]*tmp[5] + s[8]*tmp[9]) >> 12; - m[10] = (s[6]*tmp[2] + s[7]*tmp[6] + s[8]*tmp[10]) >> 12; - m[11] = (s[6]*tmp[3] + s[7]*tmp[7] + s[8]*tmp[11]) >> 12; + m[8] = ((s64)s[6]*tmp[0] + (s64)s[7]*tmp[4] + (s64)s[8]*tmp[8]) >> 12; + m[9] = ((s64)s[6]*tmp[1] + (s64)s[7]*tmp[5] + (s64)s[8]*tmp[9]) >> 12; + m[10] = ((s64)s[6]*tmp[2] + (s64)s[7]*tmp[6] + (s64)s[8]*tmp[10]) >> 12; + m[11] = ((s64)s[6]*tmp[3] + (s64)s[7]*tmp[7] + (s64)s[8]*tmp[11]) >> 12; } void MatrixScale(s32* m, s32* s) { - m[0] = (s[0]*m[0]) >> 12; - m[1] = (s[0]*m[1]) >> 12; - m[2] = (s[0]*m[2]) >> 12; - m[3] = (s[0]*m[3]) >> 12; + m[0] = ((s64)s[0]*m[0]) >> 12; + m[1] = ((s64)s[0]*m[1]) >> 12; + m[2] = ((s64)s[0]*m[2]) >> 12; + m[3] = ((s64)s[0]*m[3]) >> 12; - m[4] = (s[1]*m[4]) >> 12; - m[5] = (s[1]*m[5]) >> 12; - m[6] = (s[1]*m[6]) >> 12; - m[7] = (s[1]*m[7]) >> 12; + m[4] = ((s64)s[1]*m[4]) >> 12; + m[5] = ((s64)s[1]*m[5]) >> 12; + m[6] = ((s64)s[1]*m[6]) >> 12; + m[7] = ((s64)s[1]*m[7]) >> 12; - m[8] = (s[2]*m[8]) >> 12; - m[9] = (s[2]*m[9]) >> 12; - m[10] = (s[2]*m[10]) >> 12; - m[11] = (s[2]*m[11]) >> 12; + m[8] = ((s64)s[2]*m[8]) >> 12; + m[9] = ((s64)s[2]*m[9]) >> 12; + m[10] = ((s64)s[2]*m[10]) >> 12; + m[11] = ((s64)s[2]*m[11]) >> 12; } void MatrixTranslate(s32* m, s32* s) { - m[12] += (s[0]*m[0] + s[1]*m[4] + s[2]*m[8]) >> 12; - m[13] += (s[0]*m[1] + s[1]*m[5] + s[2]*m[9]) >> 12; - m[14] += (s[0]*m[2] + s[1]*m[6] + s[2]*m[10]) >> 12; + m[12] += ((s64)s[0]*m[0] + (s64)s[1]*m[4] + (s64)s[2]*m[8]) >> 12; + m[13] += ((s64)s[0]*m[1] + (s64)s[1]*m[5] + (s64)s[2]*m[9]) >> 12; + m[14] += ((s64)s[0]*m[2] + (s64)s[1]*m[6] + (s64)s[2]*m[10]) >> 12; } - -bool Init() +void UpdateClipMatrix() { - CmdFIFO = new FIFO(256); - CmdPIPE = new FIFO(4); + if (!ClipMatrixDirty) return; + ClipMatrixDirty = false; - return true; + memcpy(ClipMatrix, ProjMatrix, 16*4); + MatrixMult4x4(ClipMatrix, PosMatrix); } -void DeInit() + + +template +void ClipSegment(Vertex* outbuf, Vertex* vout, Vertex* vin) { - delete CmdFIFO; - delete CmdPIPE; + s64 factor_num = vin->Position[3] - (plane*vin->Position[comp]); + s32 factor_den = factor_num - (vout->Position[3] - (plane*vout->Position[comp])); + + Vertex mid; +#define INTERPOLATE(var) mid.var = vin->var + (((vout->var - vin->var) * factor_num) / factor_den); + + INTERPOLATE(Position[0]); + INTERPOLATE(Position[1]); + INTERPOLATE(Position[2]); + INTERPOLATE(Position[3]); + + INTERPOLATE(Color[0]); + INTERPOLATE(Color[1]); + INTERPOLATE(Color[2]); + + mid.Clipped = true; + mid.ViewportTransformDone = false; + +#undef INTERPOLATE + *outbuf = mid; } -void Reset() +void SubmitPolygon() { - CmdFIFO->Clear(); - CmdPIPE->Clear(); + Vertex clippedvertices[2][10]; + Vertex* reusedvertices[2]; + int clipstart = 0; + int lastpolyverts = 0; - NumCommands = 0; - CurCommand = 0; - ParamCount = 0; - TotalParams = 0; + int nverts = PolygonMode & 0x1 ? 4:3; + int prev, next; + int c; - GXStat = 0; + // culling - memset(ExecParams, 0, 32*4); - ExecParamCount = 0; - CycleCount = 0; + // checkme: does it work this way for quads and up? + /*s32 _x1 = TempVertexBuffer[1].Position[0] - TempVertexBuffer[0].Position[0]; + s32 _x2 = TempVertexBuffer[2].Position[0] - TempVertexBuffer[0].Position[0]; + s32 _y1 = TempVertexBuffer[1].Position[1] - TempVertexBuffer[0].Position[1]; + s32 _y2 = TempVertexBuffer[2].Position[1] - TempVertexBuffer[0].Position[1]; + s32 _z1 = TempVertexBuffer[1].Position[2] - TempVertexBuffer[0].Position[2]; + s32 _z2 = TempVertexBuffer[2].Position[2] - TempVertexBuffer[0].Position[2]; + s32 normalX = (((s64)_y1 * _z2) - ((s64)_z1 * _y2)) >> 12; + s32 normalY = (((s64)_z1 * _x2) - ((s64)_x1 * _z2)) >> 12; + s32 normalZ = (((s64)_x1 * _y2) - ((s64)_y1 * _x2)) >> 12;*/ + /*s32 centerX = ((s64)TempVertexBuffer[0].Position[3] * ClipMatrix[12]) >> 12; + s32 centerY = ((s64)TempVertexBuffer[0].Position[3] * ClipMatrix[13]) >> 12; + s32 centerZ = ((s64)TempVertexBuffer[0].Position[3] * ClipMatrix[14]) >> 12;*/ + /*s64 dot = ((s64)(-TempVertexBuffer[0].Position[0]) * normalX) + + ((s64)(-TempVertexBuffer[0].Position[1]) * normalY) + + ((s64)(-TempVertexBuffer[0].Position[2]) * normalZ); // checkme*/ + // code inspired from Dolphin's software renderer. + // maybe not 100% right + s32 _x0 = TempVertexBuffer[0].Position[0]; + s32 _x1 = TempVertexBuffer[1].Position[0]; + s32 _x2 = TempVertexBuffer[2].Position[0]; + s32 _y0 = TempVertexBuffer[0].Position[1]; + s32 _y1 = TempVertexBuffer[1].Position[1]; + s32 _y2 = TempVertexBuffer[2].Position[1]; + s32 _z0 = TempVertexBuffer[0].Position[3]; + s32 _z1 = TempVertexBuffer[1].Position[3]; + s32 _z2 = TempVertexBuffer[2].Position[3]; + s32 normalX = (((s64)_y0 * _z2) - ((s64)_z0 * _y2)) >> 12; + s32 normalY = (((s64)_z0 * _x2) - ((s64)_x0 * _z2)) >> 12; + s32 normalZ = (((s64)_x0 * _y2) - ((s64)_y0 * _x2)) >> 12; + s64 dot = ((s64)_x1 * normalX) + ((s64)_y1 * normalY) + ((s64)_z1 * normalZ); + bool facingview = (dot < 0); +//printf("Z: %d %d\n", normalZ, -TempVertexBuffer[0].Position[2]); + if (facingview) + { + if (!(CurPolygonAttr & (1<<7))) + { + LastStripPolygon = NULL; + return; + } + } + else + { + if (!(CurPolygonAttr & (1<<6))) + { + LastStripPolygon = NULL; + return; + } + } + // for strips, check whether we can attach to the previous polygon + // this requires two vertices shared with the previous polygon, and that + // the two polygons be of the same type - MatrixMode = 0; + if (PolygonMode >= 2 && LastStripPolygon) + { + int id0, id1; + if (PolygonMode == 2) + { + if (NumConsecutivePolygons & 1) + { + id0 = 2; + id1 = 1; + } + else + { + id0 = 0; + id1 = 2; + } - MatrixLoadIdentity(ProjMatrix); - MatrixLoadIdentity(PosMatrix); - MatrixLoadIdentity(VecMatrix); - MatrixLoadIdentity(TexMatrix); + lastpolyverts = 3; + } + else + { + id0 = 3; + id1 = 2; - memset(ProjMatrixStack, 0, 16*4); - memset(PosMatrixStack, 0, 31 * 16*4); - ProjMatrixStackPointer = 0; - PosMatrixStackPointer = 0; + lastpolyverts = 4; + } + + if (LastStripPolygon->NumVertices == lastpolyverts && + !LastStripPolygon->Vertices[id0]->Clipped && + !LastStripPolygon->Vertices[id1]->Clipped) + { + reusedvertices[0] = LastStripPolygon->Vertices[id0]; + reusedvertices[1] = LastStripPolygon->Vertices[id1]; + + clippedvertices[0][0] = *reusedvertices[0]; + clippedvertices[0][1] = *reusedvertices[1]; + clippedvertices[1][0] = *reusedvertices[0]; + clippedvertices[1][1] = *reusedvertices[1]; + + clipstart = 2; + } + } + + // clip. + // for each vertex: + // if it's outside, check if the previous and next vertices are inside + // if so, place a new vertex at the edge of the view volume + + // X clipping + + c = clipstart; + for (int i = clipstart; i < nverts; i++) + { + prev = i-1; if (prev < 0) prev = nverts-1; + next = i+1; if (next >= nverts) next = 0; + + Vertex vtx = TempVertexBuffer[i]; + if (vtx.Position[0] > vtx.Position[3]) + { + Vertex* vprev = &TempVertexBuffer[prev]; + if (vprev->Position[0] <= vprev->Position[3]) + { + ClipSegment<0, 1>(&clippedvertices[0][c], &vtx, vprev); + c++; + } + + Vertex* vnext = &TempVertexBuffer[next]; + if (vnext->Position[0] <= vnext->Position[3]) + { + ClipSegment<0, 1>(&clippedvertices[0][c], &vtx, vnext); + c++; + } + } + else + clippedvertices[0][c++] = vtx; + } + + nverts = c; c = clipstart; + for (int i = clipstart; i < nverts; i++) + { + prev = i-1; if (prev < 0) prev = nverts-1; + next = i+1; if (next >= nverts) next = 0; + + Vertex vtx = clippedvertices[0][i]; + if (vtx.Position[0] < -vtx.Position[3]) + { + Vertex* vprev = &clippedvertices[0][prev]; + if (vprev->Position[0] >= -vprev->Position[3]) + { + ClipSegment<0, -1>(&clippedvertices[1][c], &vtx, vprev); + c++; + } + + Vertex* vnext = &clippedvertices[0][next]; + if (vnext->Position[0] >= -vnext->Position[3]) + { + ClipSegment<0, -1>(&clippedvertices[1][c], &vtx, vnext); + c++; + } + } + else + clippedvertices[1][c++] = vtx; + } + + // Y clipping + + nverts = c; c = clipstart; + for (int i = clipstart; i < nverts; i++) + { + prev = i-1; if (prev < 0) prev = nverts-1; + next = i+1; if (next >= nverts) next = 0; + + Vertex vtx = clippedvertices[1][i]; + if (vtx.Position[1] > vtx.Position[3]) + { + Vertex* vprev = &clippedvertices[1][prev]; + if (vprev->Position[1] <= vprev->Position[3]) + { + ClipSegment<1, 1>(&clippedvertices[0][c], &vtx, vprev); + c++; + } + + Vertex* vnext = &clippedvertices[1][next]; + if (vnext->Position[1] <= vnext->Position[3]) + { + ClipSegment<1, 1>(&clippedvertices[0][c], &vtx, vnext); + c++; + } + } + else + clippedvertices[0][c++] = vtx; + } + + nverts = c; c = clipstart; + for (int i = clipstart; i < nverts; i++) + { + prev = i-1; if (prev < 0) prev = nverts-1; + next = i+1; if (next >= nverts) next = 0; + + Vertex vtx = clippedvertices[0][i]; + if (vtx.Position[1] < -vtx.Position[3]) + { + Vertex* vprev = &clippedvertices[0][prev]; + if (vprev->Position[1] >= -vprev->Position[3]) + { + ClipSegment<1, -1>(&clippedvertices[1][c], &vtx, vprev); + c++; + } + + Vertex* vnext = &clippedvertices[0][next]; + if (vnext->Position[1] >= -vnext->Position[3]) + { + ClipSegment<1, -1>(&clippedvertices[1][c], &vtx, vnext); + c++; + } + } + else + clippedvertices[1][c++] = vtx; + } + + // Z clipping + + nverts = c; c = clipstart; + for (int i = clipstart; i < nverts; i++) + { + prev = i-1; if (prev < 0) prev = nverts-1; + next = i+1; if (next >= nverts) next = 0; + + Vertex vtx = clippedvertices[1][i]; + if (vtx.Position[2] > vtx.Position[3]) + { + Vertex* vprev = &clippedvertices[1][prev]; + if (vprev->Position[2] <= vprev->Position[3]) + { + ClipSegment<2, 1>(&clippedvertices[0][c], &vtx, vprev); + c++; + } + + Vertex* vnext = &clippedvertices[1][next]; + if (vnext->Position[2] <= vnext->Position[3]) + { + ClipSegment<2, 1>(&clippedvertices[0][c], &vtx, vnext); + c++; + } + } + else + clippedvertices[0][c++] = vtx; + } + + nverts = c; c = clipstart; + for (int i = clipstart; i < nverts; i++) + { + prev = i-1; if (prev < 0) prev = nverts-1; + next = i+1; if (next >= nverts) next = 0; + + Vertex vtx = clippedvertices[0][i]; + if (vtx.Position[2] < -vtx.Position[3]) + { + Vertex* vprev = &clippedvertices[0][prev]; + if (vprev->Position[2] >= -vprev->Position[3]) + { + ClipSegment<2, -1>(&clippedvertices[1][c], &vtx, vprev); + c++; + } + + Vertex* vnext = &clippedvertices[0][next]; + if (vnext->Position[2] >= -vnext->Position[3]) + { + ClipSegment<2, -1>(&clippedvertices[1][c], &vtx, vnext); + c++; + } + } + else + clippedvertices[1][c++] = vtx; + } + + if (c == 0) + { + LastStripPolygon = NULL; + return; + } + + // build the actual polygon + + if (NumPolygons >= 2048 || NumVertices+c > 6144) + { + LastStripPolygon = NULL; + return; + } + + Polygon* poly = &CurPolygonRAM[NumPolygons++]; + poly->NumVertices = 0; + + poly->Attr = CurPolygonAttr; + poly->FacingView = facingview; + + if (LastStripPolygon && clipstart > 0) + { + if (c == lastpolyverts) + { + poly->Vertices[0] = reusedvertices[0]; + poly->Vertices[1] = reusedvertices[1]; + } + else + { + Vertex v0 = *reusedvertices[0]; + Vertex v1 = *reusedvertices[1]; + + CurVertexRAM[NumVertices] = v0; + poly->Vertices[0] = &CurVertexRAM[NumVertices]; + CurVertexRAM[NumVertices+1] = v1; + poly->Vertices[1] = &CurVertexRAM[NumVertices+1]; + NumVertices += 2; + } + + poly->NumVertices += 2; + } + + for (int i = clipstart; i < c; i++) + { + CurVertexRAM[NumVertices] = clippedvertices[1][i]; + poly->Vertices[i] = &CurVertexRAM[NumVertices]; + + NumVertices++; + poly->NumVertices++; + } + + if (PolygonMode >= 2) + LastStripPolygon = poly; + else + LastStripPolygon = NULL; } +void SubmitVertex() +{ + s64 vertex[4] = {(s64)CurVertex[0], (s64)CurVertex[1], (s64)CurVertex[2], 0x1000}; + Vertex* vertextrans = &TempVertexBuffer[VertexNumInPoly]; + + UpdateClipMatrix(); + vertextrans->Position[0] = (vertex[0]*ClipMatrix[0] + vertex[1]*ClipMatrix[4] + vertex[2]*ClipMatrix[8] + vertex[3]*ClipMatrix[12]) >> 12; + vertextrans->Position[1] = (vertex[0]*ClipMatrix[1] + vertex[1]*ClipMatrix[5] + vertex[2]*ClipMatrix[9] + vertex[3]*ClipMatrix[13]) >> 12; + vertextrans->Position[2] = (vertex[0]*ClipMatrix[2] + vertex[1]*ClipMatrix[6] + vertex[2]*ClipMatrix[10] + vertex[3]*ClipMatrix[14]) >> 12; + vertextrans->Position[3] = (vertex[0]*ClipMatrix[3] + vertex[1]*ClipMatrix[7] + vertex[2]*ClipMatrix[11] + vertex[3]*ClipMatrix[15]) >> 12; + + vertextrans->Color[0] = VertexColor[0]; + vertextrans->Color[1] = VertexColor[1]; + vertextrans->Color[2] = VertexColor[2]; + + vertextrans->Clipped = false; + vertextrans->ViewportTransformDone = false; + + VertexNum++; + VertexNumInPoly++; + + switch (PolygonMode) + { + case 0: // triangle + if (VertexNumInPoly == 3) + { + VertexNumInPoly = 0; + SubmitPolygon(); + NumConsecutivePolygons++; + } + break; + + case 1: // quad + if (VertexNumInPoly == 4) + { + VertexNumInPoly = 0; + SubmitPolygon(); + NumConsecutivePolygons++; + } + break; + + case 2: // triangle strip + if (NumConsecutivePolygons & 1) + { + Vertex tmp = TempVertexBuffer[1]; + TempVertexBuffer[1] = TempVertexBuffer[0]; + TempVertexBuffer[0] = tmp; + + VertexNumInPoly = 2; + SubmitPolygon(); + NumConsecutivePolygons++; + + TempVertexBuffer[1] = TempVertexBuffer[2]; + } + else if (VertexNumInPoly == 3) + { + VertexNumInPoly = 2; + SubmitPolygon(); + NumConsecutivePolygons++; + + TempVertexBuffer[0] = TempVertexBuffer[1]; + TempVertexBuffer[1] = TempVertexBuffer[2]; + } + break; + + case 3: // quad strip + if (VertexNumInPoly == 4) + { + Vertex tmp = TempVertexBuffer[3]; + TempVertexBuffer[3] = TempVertexBuffer[2]; + TempVertexBuffer[2] = tmp; + + VertexNumInPoly = 2; + SubmitPolygon(); + NumConsecutivePolygons++; + + TempVertexBuffer[0] = TempVertexBuffer[3]; + TempVertexBuffer[1] = TempVertexBuffer[2]; + } + break; + } +} + + void CmdFIFOWrite(CmdFIFOEntry& entry) { @@ -310,8 +851,17 @@ void CmdFIFOWrite(CmdFIFOEntry& entry) { if (CmdFIFO->IsFull()) { - printf("!!! GX FIFO FULL\n"); - return; + //printf("!!! GX FIFO FULL\n"); + //return; + + // temp. hack + // SM64DS seems to overflow the FIFO occasionally + // either leftover bugs in our implementation, or the game accidentally doing that + // TODO: investigate. + // TODO: implement this behavior properly (freezes the bus until the FIFO isn't full anymore) + + while (CmdFIFO->IsFull()) + ExecuteCommand(); } CmdFIFO->Write(entry); @@ -338,11 +888,12 @@ CmdFIFOEntry CmdFIFORead() - void ExecuteCommand() { CmdFIFOEntry entry = CmdFIFORead(); + //printf("FIFO: processing %02X %08X. Levels: FIFO=%d, PIPE=%d\n", entry.Command, entry.Param, CmdFIFO->Level(), CmdPIPE->Level()); + ExecParams[ExecParamCount] = entry.Param; ExecParamCount++; @@ -352,8 +903,8 @@ void ExecuteCommand() ExecParamCount = 0; GXStat &= ~(1<<14); - - //printf("3D CMD %02X\n", entry.Command); + if (CycleCount > 0) + GXStat |= (1<<27); switch (entry.Command) { @@ -408,6 +959,7 @@ void ExecuteCommand() ProjMatrixStackPointer--; memcpy(ProjMatrix, ProjMatrixStack, 16*4); GXStat |= (1<<14); + ClipMatrixDirty = true; } else if (MatrixMode == 3) { @@ -429,6 +981,7 @@ void ExecuteCommand() memcpy(PosMatrix, PosMatrixStack[PosMatrixStackPointer], 16*4); GXStat |= (1<<14); + ClipMatrixDirty = true; } break; @@ -460,6 +1013,7 @@ void ExecuteCommand() if (MatrixMode == 0) { memcpy(ProjMatrix, ProjMatrixStack, 16*4); + ClipMatrixDirty = true; } else if (MatrixMode == 3) { @@ -477,12 +1031,16 @@ void ExecuteCommand() } memcpy(PosMatrix, PosMatrixStack[addr], 16*4); + ClipMatrixDirty = true; } break; case 0x15: // identity if (MatrixMode == 0) + { MatrixLoadIdentity(ProjMatrix); + ClipMatrixDirty = true; + } else if (MatrixMode == 3) MatrixLoadIdentity(TexMatrix); else @@ -490,12 +1048,16 @@ void ExecuteCommand() MatrixLoadIdentity(PosMatrix); if (MatrixMode == 2) MatrixLoadIdentity(VecMatrix); + ClipMatrixDirty = true; } break; case 0x16: // load 4x4 if (MatrixMode == 0) + { MatrixLoad4x4(ProjMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + } else if (MatrixMode == 3) MatrixLoad4x4(TexMatrix, (s32*)ExecParams); else @@ -503,12 +1065,16 @@ void ExecuteCommand() MatrixLoad4x4(PosMatrix, (s32*)ExecParams); if (MatrixMode == 2) MatrixLoad4x4(VecMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; } break; case 0x17: // load 4x3 if (MatrixMode == 0) + { MatrixLoad4x3(ProjMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + } else if (MatrixMode == 3) MatrixLoad4x3(TexMatrix, (s32*)ExecParams); else @@ -516,12 +1082,16 @@ void ExecuteCommand() MatrixLoad4x3(PosMatrix, (s32*)ExecParams); if (MatrixMode == 2) MatrixLoad4x3(VecMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; } break; case 0x18: // mult 4x4 if (MatrixMode == 0) + { MatrixMult4x4(ProjMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + } else if (MatrixMode == 3) MatrixMult4x4(TexMatrix, (s32*)ExecParams); else @@ -532,12 +1102,16 @@ void ExecuteCommand() MatrixMult4x4(VecMatrix, (s32*)ExecParams); CycleCount += 30; } + ClipMatrixDirty = true; } break; case 0x19: // mult 4x3 if (MatrixMode == 0) + { MatrixMult4x3(ProjMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + } else if (MatrixMode == 3) MatrixMult4x3(TexMatrix, (s32*)ExecParams); else @@ -548,12 +1122,16 @@ void ExecuteCommand() MatrixMult4x3(VecMatrix, (s32*)ExecParams); CycleCount += 30; } + ClipMatrixDirty = true; } break; case 0x1A: // mult 3x3 if (MatrixMode == 0) + { MatrixMult3x3(ProjMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + } else if (MatrixMode == 3) MatrixMult3x3(TexMatrix, (s32*)ExecParams); else @@ -564,21 +1142,31 @@ void ExecuteCommand() MatrixMult3x3(VecMatrix, (s32*)ExecParams); CycleCount += 30; } + ClipMatrixDirty = true; } break; case 0x1B: // scale if (MatrixMode == 0) + { MatrixScale(ProjMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + } else if (MatrixMode == 3) MatrixScale(TexMatrix, (s32*)ExecParams); else + { MatrixScale(PosMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + } break; case 0x1C: // translate if (MatrixMode == 0) + { MatrixTranslate(ProjMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + } else if (MatrixMode == 3) MatrixTranslate(TexMatrix, (s32*)ExecParams); else @@ -586,6 +1174,19 @@ void ExecuteCommand() MatrixTranslate(PosMatrix, (s32*)ExecParams); if (MatrixMode == 2) MatrixTranslate(VecMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + } + break; + + case 0x20: // vertex color + { + u32 c = ExecParams[0]; + u32 r = c & 0x1F; + u32 g = (c >> 5) & 0x1F; + u32 b = (c >> 10) & 0x1F; + VertexColor[0] = r; + VertexColor[1] = g; + VertexColor[2] = b; } break; @@ -593,8 +1194,68 @@ void ExecuteCommand() // TODO: more cycles if lights are enabled break; + case 0x23: // full vertex + CurVertex[0] = ExecParams[0] & 0xFFFF; + CurVertex[1] = ExecParams[0] >> 16; + CurVertex[2] = ExecParams[1] & 0xFFFF; + SubmitVertex(); + break; + + case 0x24: // 10-bit vertex + CurVertex[0] = (ExecParams[0] & 0x000003FF) << 6; + CurVertex[1] = (ExecParams[0] & 0x000FFC00) >> 4; + CurVertex[2] = (ExecParams[0] & 0x3FF00000) >> 14; + SubmitVertex(); + break; + + case 0x25: // vertex XY + CurVertex[0] = ExecParams[0] & 0xFFFF; + CurVertex[1] = ExecParams[0] >> 16; + SubmitVertex(); + break; + + case 0x26: // vertex XZ + CurVertex[0] = ExecParams[0] & 0xFFFF; + CurVertex[2] = ExecParams[0] >> 16; + SubmitVertex(); + break; + + case 0x27: // vertex YZ + CurVertex[1] = ExecParams[0] & 0xFFFF; + CurVertex[2] = ExecParams[0] >> 16; + SubmitVertex(); + break; + + case 0x28: // 10-bit delta vertex + CurVertex[0] += (s16)((ExecParams[0] & 0x000003FF) << 6) >> 6; + CurVertex[1] += (s16)((ExecParams[0] & 0x000FFC00) >> 4) >> 6; + CurVertex[2] += (s16)((ExecParams[0] & 0x3FF00000) >> 14) >> 6; + SubmitVertex(); + break; + + case 0x29: // polygon attributes + PolygonAttr = ExecParams[0]; + break; + + case 0x40: + PolygonMode = ExecParams[0] & 0x3; + VertexNum = 0; + VertexNumInPoly = 0; + NumConsecutivePolygons = 0; + LastStripPolygon = NULL; + CurPolygonAttr = PolygonAttr; + break; + case 0x50: - // TODO: make it happen upon VBlank, not right now + FlushRequest = 1;//0x80000000 | (ExecParams[0] & 0x3); + CycleCount = 392; + break; + + case 0x60: // viewport x1,y1,x2,y2 + Viewport[0] = ExecParams[0] & 0xFF; + Viewport[1] = (ExecParams[0] >> 8) & 0xFF; + Viewport[2] = ((ExecParams[0] >> 16) & 0xFF) - Viewport[0] + 1; + Viewport[3] = (ExecParams[0] >> 24) - Viewport[1] + 1; break; } } @@ -602,16 +1263,24 @@ void ExecuteCommand() void Run(s32 cycles) { + if (FlushRequest) + return; + if (CycleCount <= 0 && CmdPIPE->IsEmpty()) + return; + + CycleCount -= cycles; + if (CycleCount <= 0) { while (CycleCount <= 0 && !CmdPIPE->IsEmpty()) ExecuteCommand(); - - if (CmdPIPE->IsEmpty()) - CycleCount = 0; } - else - CycleCount -= cycles; + + if (CycleCount <= 0 && CmdPIPE->IsEmpty()) + { + CycleCount = 0; + GXStat &= ~((1<<27)|(1<<14)); + } } @@ -634,6 +1303,29 @@ void CheckFIFODMA() } +void VBlank() +{ + if (FlushRequest) + { + SoftRenderer::RenderFrame(CurVertexRAM, CurPolygonRAM, NumPolygons); + + CurRAMBank = CurRAMBank?0:1; + CurVertexRAM = &VertexRAM[CurRAMBank ? 6144 : 0]; + CurPolygonRAM = &PolygonRAM[CurRAMBank ? 2048 : 0]; + + NumVertices = 0; + NumPolygons = 0; + + FlushRequest = 0; + } +} + +u8* GetLine(int line) +{ + return SoftRenderer::GetLine(line); +} + + u8 Read8(u32 addr) { return 0; @@ -660,15 +1352,14 @@ u32 Read32(u32 addr) ((ProjMatrixStackPointer & 0x1) << 13) | (fifolevel << 16) | (fifolevel < 128 ? (1<<25) : 0) | - (fifolevel == 0 ? (1<<26) : 0) | - (CycleCount > 0 ? (1<<27) : 0); + (fifolevel == 0 ? (1<<26) : 0); } } if (addr >= 0x04000640 && addr < 0x04000680) { - printf("!! CLIPMTX READ\n"); - return 0; + UpdateClipMatrix(); + return ClipMatrix[(addr & 0x3C) >> 2]; } if (addr >= 0x04000680 && addr < 0x040006A4) { @@ -709,23 +1400,33 @@ void Write32(u32 addr, u32 val) CurCommand = val; ParamCount = 0; TotalParams = CmdNumParams[CurCommand & 0xFF]; + + if (TotalParams > 0) return; } else ParamCount++; - while (ParamCount == TotalParams) + for (;;) { - CmdFIFOEntry entry; - entry.Command = CurCommand & 0xFF; - entry.Param = val; - CmdFIFOWrite(entry); + if ((CurCommand & 0xFF) || (NumCommands == 4)) + { + CmdFIFOEntry entry; + entry.Command = CurCommand & 0xFF; + entry.Param = val; + CmdFIFOWrite(entry); + } - CurCommand >>= 8; - NumCommands--; - if (NumCommands == 0) break; + if (ParamCount >= TotalParams) + { + CurCommand >>= 8; + NumCommands--; + if (NumCommands == 0) break; - ParamCount = 0; - TotalParams = CmdNumParams[CurCommand & 0xFF]; + ParamCount = 0; + TotalParams = CmdNumParams[CurCommand & 0xFF]; + } + if (ParamCount < TotalParams) + break; } return; diff --git a/GPU3D.h b/GPU3D.h index d606e25d..c95e3b71 100644 --- a/GPU3D.h +++ b/GPU3D.h @@ -22,14 +22,49 @@ namespace GPU3D { +typedef struct +{ + s32 Position[4]; + u8 Color[3]; + + bool Clipped; + + // final vertex attributes. + // allows them to be reused in polygon strips. + + s32 FinalPosition[4]; + s32 FinalColor[3]; + + bool ViewportTransformDone; + +} Vertex; + +typedef struct +{ + Vertex* Vertices[10]; + u32 NumVertices; + + u32 Attr; + + bool FacingView; + +} Polygon; + +extern s32 Viewport[4]; + bool Init(); void DeInit(); void Reset(); +void ExecuteCommand(); + void Run(s32 cycles); void CheckFIFOIRQ(); void CheckFIFODMA(); +void VBlank(); +u8* GetLine(int line); + u8 Read8(u32 addr); u16 Read16(u32 addr); u32 Read32(u32 addr); @@ -37,6 +72,18 @@ void Write8(u32 addr, u8 val); void Write16(u32 addr, u16 val); void Write32(u32 addr, u32 val); +namespace SoftRenderer +{ + +bool Init(); +void DeInit(); +void Reset(); + +void RenderFrame(Vertex* vertices, Polygon* polygons, int npolys); +u8* GetLine(int line); + +} + } #endif diff --git a/GPU3D_Soft.cpp b/GPU3D_Soft.cpp new file mode 100644 index 00000000..afc31877 --- /dev/null +++ b/GPU3D_Soft.cpp @@ -0,0 +1,352 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#include +#include +#include "NDS.h" +#include "GPU3D.h" + + +namespace GPU3D +{ +namespace SoftRenderer +{ + +u8 ColorBuffer[256*192 * 4]; +u32 DepthBuffer[256*192]; + + +bool Init() +{ + return true; +} + +void DeInit() +{ + // +} + +void Reset() +{ + memset(ColorBuffer, 0, 256*192 * 4); + memset(DepthBuffer, 0, 256*192 * 4); +} + + +void RenderPixel(u32 attr, s32 x, s32 y, s32 z, u8 vr, u8 vg, u8 vb) +{ + u32* depth = &DepthBuffer[(256*y) + x]; + + bool passdepth = false; + if (attr & (1<<14)) + { + s32 diff = *depth - z; + if ((u32)(diff + 0x200) <= 0x400) + passdepth = true; + } + else + if (z < *depth) + passdepth = true; + + if (!passdepth) return; + + u8* pixel = &ColorBuffer[((256*y) + x) * 4]; + pixel[0] = vr; + pixel[1] = vg; + pixel[2] = vb; + pixel[3] = 31; // TODO: alpha + + // TODO: optional update for translucent pixels + *depth = z; +} + +void RenderPolygon(Polygon* polygon) +{ + int nverts = polygon->NumVertices; + + int vtop = 0, vbot = 0; + s32 ytop = 191, ybot = 0; + + // process the vertices, transform to screen coordinates + // find the topmost and bottommost vertices of the polygon + + for (int i = 0; i < nverts; i++) + { + Vertex* vtx = polygon->Vertices[i]; + + if (!vtx->ViewportTransformDone) + { + s32 posX, posY, posZ, posW; + s32 w = vtx->Position[3]; + if (w == 0) + { + posX = 0; + posY = 0; + posZ = 0; + posW = 0x1000; + } + else + { + posX = ((s64)vtx->Position[0] << 12) / w; + posY = ((s64)vtx->Position[1] << 12) / w; + + // TODO: W-buffering + posZ = (((s64)vtx->Position[2] * 0x800000) / w) + 0x7FFCFF; + + posW = w; + } + + s32 scrX = (((posX + 0x1000) * Viewport[2]) >> 13) + Viewport[0]; + s32 scrY = (((posY + 0x1000) * Viewport[3]) >> 13) + Viewport[1]; + + if (scrX < 0) scrX = 0; + else if (scrX > 255) scrX = 255; + if (scrY < 0) scrY = 0; + else if (scrY > 191) scrY = 191; + if (posZ < 0) posZ = 0; + else if (posZ > 0xFFFFFF) posZ = 0xFFFFFF; + + vtx->FinalPosition[0] = scrX; + vtx->FinalPosition[1] = 191 - scrY; + vtx->FinalPosition[2] = posZ; + vtx->FinalPosition[3] = posW; + + vtx->FinalColor[0] = vtx->Color[0] ? ((vtx->Color[0] << 4) + 0xF) : 0; + vtx->FinalColor[1] = vtx->Color[1] ? ((vtx->Color[1] << 4) + 0xF) : 0; + vtx->FinalColor[2] = vtx->Color[2] ? ((vtx->Color[2] << 4) + 0xF) : 0; + + vtx->ViewportTransformDone = true; + } + + if (vtx->FinalPosition[1] < ytop) + { + ytop = vtx->FinalPosition[1]; + vtop = i; + } + if (vtx->FinalPosition[1] > ybot) + { + ybot = vtx->FinalPosition[1]; + vbot = i; + } + } + + // draw, line per line + + int lcur = vtop, rcur = vtop; + int lnext, rnext; + + if (polygon->FacingView) + { + lnext = lcur + 1; + if (lnext >= nverts) lnext = 0; + rnext = rcur - 1; + if (rnext < 0) rnext = nverts - 1; + } + else + { + lnext = lcur - 1; + if (lnext < 0) lnext = nverts - 1; + rnext = rcur + 1; + if (rnext >= nverts) rnext = 0; + } + + for (s32 y = ytop; y <= ybot; y++) + { + if (y < ybot) + { + while (y == polygon->Vertices[lnext]->FinalPosition[1]) + { + lcur = lnext; + + if (polygon->FacingView) + { + lnext = lcur + 1; + if (lnext >= nverts) lnext = 0; + } + else + { + lnext = lcur - 1; + if (lnext < 0) lnext = nverts - 1; + } + + if (lcur == vbot) break; + } + + while (y == polygon->Vertices[rnext]->FinalPosition[1]) + { + rcur = rnext; + + if (polygon->FacingView) + { + rnext = rcur - 1; + if (rnext < 0) rnext = nverts - 1; + } + else + { + rnext = rcur + 1; + if (rnext >= nverts) rnext = 0; + } + + if (rcur == vbot) break; + } + } + + Vertex* vlcur = polygon->Vertices[lcur]; + Vertex* vlnext = polygon->Vertices[lnext]; + Vertex* vrcur = polygon->Vertices[rcur]; + Vertex* vrnext = polygon->Vertices[rnext]; + + s32 lfactor, rfactor; + + if (vlnext->FinalPosition[1] == vlcur->FinalPosition[1]) + lfactor = 0; + else + lfactor = ((y - vlcur->FinalPosition[1]) << 12) / (vlnext->FinalPosition[1] - vlcur->FinalPosition[1]); + + if (vrnext->FinalPosition[1] == vrcur->FinalPosition[1]) + rfactor = 0; + else + rfactor = ((y - vrcur->FinalPosition[1]) << 12) / (vrnext->FinalPosition[1] - vrcur->FinalPosition[1]); + + s32 xl = vlcur->FinalPosition[0] + (((vlnext->FinalPosition[0] - vlcur->FinalPosition[0]) * lfactor) >> 12); + s32 xr = vrcur->FinalPosition[0] + (((vrnext->FinalPosition[0] - vrcur->FinalPosition[0]) * rfactor) >> 12); + + if (xl<0 || xr>255) + { + printf("!! BAD X %d %d\n", xl, xr); + continue; // hax + } + + s32 zl = vlcur->FinalPosition[2] + (((s64)(vlnext->FinalPosition[2] -vlcur->FinalPosition[2]) * lfactor) >> 12); + s32 zr = vrcur->FinalPosition[2] + (((s64)(vrnext->FinalPosition[2] - vrcur->FinalPosition[2]) * rfactor) >> 12); + + s32 wl = vlcur->FinalPosition[3] + (((s64)(vlnext->FinalPosition[3] - vlcur->FinalPosition[3]) * lfactor) >> 12); + s32 wr = vrcur->FinalPosition[3] + (((s64)(vrnext->FinalPosition[3] - vrcur->FinalPosition[3]) * rfactor) >> 12); + + s64 perspfactorl1 = ((s64)(0x1000 - lfactor) << 12) / vlcur->FinalPosition[3]; + s64 perspfactorl2 = ((s64)lfactor << 12) / vlnext->FinalPosition[3]; + s64 perspfactorr1 = ((s64)(0x1000 - rfactor) << 12) / vrcur->FinalPosition[3]; + s64 perspfactorr2 = ((s64)rfactor << 12) / vrnext->FinalPosition[3]; + + if (perspfactorl1 + perspfactorl2 == 0) + { + perspfactorl1 = 0x1000; + perspfactorl2 = 0; + } + if (perspfactorr1 + perspfactorr2 == 0) + { + perspfactorr1 = 0x1000; + perspfactorr2 = 0; + } + + s32 rl = ((perspfactorl1 * vlcur->FinalColor[0]) + (perspfactorl2 * vlnext->FinalColor[0])) / (perspfactorl1 + perspfactorl2); + s32 gl = ((perspfactorl1 * vlcur->FinalColor[1]) + (perspfactorl2 * vlnext->FinalColor[1])) / (perspfactorl1 + perspfactorl2); + s32 bl = ((perspfactorl1 * vlcur->FinalColor[2]) + (perspfactorl2 * vlnext->FinalColor[2])) / (perspfactorl1 + perspfactorl2); + + s32 rr = ((perspfactorr1 * vrcur->FinalColor[0]) + (perspfactorr2 * vrnext->FinalColor[0])) / (perspfactorr1 + perspfactorr2); + s32 gr = ((perspfactorr1 * vrcur->FinalColor[1]) + (perspfactorr2 * vrnext->FinalColor[1])) / (perspfactorr1 + perspfactorr2); + s32 br = ((perspfactorr1 * vrcur->FinalColor[2]) + (perspfactorr2 * vrnext->FinalColor[2])) / (perspfactorr1 + perspfactorr2); + + s32 xdiv; + if (xr == xl) + xdiv = 0; + else + xdiv = 0x1000 / (xr - xl); + + for (s32 x = xl; x <= xr; x++) + { + s32 xfactor = (x - xl) * xdiv; + + s32 z = zl + (((s64)(zr - zl) * xfactor) >> 12); + //z = wl + (((s64)(wr - wl) * xfactor) >> 12); + //z -= 0x1FF; + //if (z < 0) z = 0; + + s32 perspfactor1 = ((0x1000 - xfactor) << 12) / wl; + s32 perspfactor2 = (xfactor << 12) / wr; + + if (perspfactor1 + perspfactor2 == 0) + { + perspfactor1 = 0x1000; + perspfactor2 = 0; + } + + // possible optimization: only do color interpolation if the depth test passes + u32 vr = ((perspfactor1 * rl) + (perspfactor2 * rr)) / (perspfactor1 + perspfactor2); + u32 vg = ((perspfactor1 * gl) + (perspfactor2 * gr)) / (perspfactor1 + perspfactor2); + u32 vb = ((perspfactor1 * bl) + (perspfactor2 * br)) / (perspfactor1 + perspfactor2); + + RenderPixel(polygon->Attr, x, y, z, vr>>3, vg>>3, vb>>3); + + // Z debug + /*u8 zerp = (w * 63) / 0xFFFFFF; + pixel[0] = zerp; + pixel[1] = zerp; + pixel[2] = zerp;*/ + } + } + + // DEBUG CODE + /*for (int i = 0; i < nverts; i++) + { + s32 x = scrcoords[i][0]; + s32 y = scrcoords[i][1]; + + u8* pixel = &ColorBuffer[((256*y) + x) * 4]; + pixel[0] = 63; + pixel[1] = 63; + pixel[2] = 63; + pixel[3] = 31; + }*/ +} + +void RenderFrame(Vertex* vertices, Polygon* polygons, int npolys) +{ + // TODO: render translucent polygons last + + // TODO proper clear color/depth support! + for (int i = 0; i < 256*192; i++) + { + ((u32*)ColorBuffer)[i] = 0x00000000; + DepthBuffer[i] = 0xFFFFFF; + } + + for (int i = 0; i < npolys; i++) + { + /*printf("polygon %d: %d %d %d\n", i, polygons[i].Vertices[0]->Color[0], polygons[i].Vertices[0]->Color[1], polygons[i].Vertices[0]->Color[2]); + for (int j = 0; j < polygons[i].NumVertices; j++) + printf(" %d: %f %f %f\n", + j, + polygons[i].Vertices[j]->Position[0]/4096.0f, + polygons[i].Vertices[j]->Position[1]/4096.0f, + polygons[i].Vertices[j]->Position[2]/4096.0f); +*/ + //printf("polygon %d\n", i); + //if (!polygons[i].Vertices[0]->Clipped) continue; + //printf("polygon %d\n", i); + RenderPolygon(&polygons[i]); + } +} + +u8* GetLine(int line) +{ + return &ColorBuffer[line * 256 * 4]; +} + +} +} diff --git a/Makefile.common b/Makefile.common index d9ad9676..dd4698eb 100644 --- a/Makefile.common +++ b/Makefile.common @@ -10,6 +10,7 @@ SOURCES_CXX := $(CORE_DIR)/NDS.cpp \ $(CORE_DIR)/GPU.cpp \ $(CORE_DIR)/GPU2D.cpp \ $(CORE_DIR)/GPU3D.cpp \ + $(CORE_DIR)/GPU3D_Soft.cpp \ $(CORE_DIR)/NDSCart.cpp \ $(CORE_DIR)/RTC.cpp \ $(CORE_DIR)/SPI.cpp \ diff --git a/NDS.cpp b/NDS.cpp index efec9f20..a86f21d5 100644 --- a/NDS.cpp +++ b/NDS.cpp @@ -58,6 +58,8 @@ s32 ARM7Offset; SchedEvent SchedList[Event_MAX]; u32 SchedListMask; +u32 CPUStop; + u8 ARM9BIOS[0x1000]; u8 ARM7BIOS[0x4000]; @@ -293,6 +295,8 @@ void Reset() ARM7->Reset(); CP15::Reset(); + CPUStop = 0; + memset(Timers, 0, 8*sizeof(Timer)); for (i = 0; i < 8; i++) DMAs[i]->Reset(); @@ -326,7 +330,7 @@ void Reset() if (NDSCart::LoadROM(retro_game_path)) Running = true; // hax #else - if (NDSCart::LoadROM("rom/Simple_Tri.nds")) + if (NDSCart::LoadROM("rom/sm64ds.nds")) Running = true; // hax #endif } @@ -384,17 +388,45 @@ void RunFrame() while (Running && framecycles>0) { - CalcIterationCycles(); - - ARM9->CyclesToRun = CurIterationCycles << 1; - - ARM9->Execute(); - s32 ndscyclestorun = ARM9->Cycles >> 1; + s32 ndscyclestorun; s32 ndscycles = 0; - ARM7->CyclesToRun = ndscyclestorun - ARM7Offset; - ARM7->Execute(); - ARM7Offset = ARM7->Cycles - ARM7->CyclesToRun; + CalcIterationCycles(); + + if (CPUStop & 0x1) + { + s32 cycles = CurIterationCycles; + cycles = DMAs[0]->Run(cycles); + if (cycles > 0) cycles = DMAs[1]->Run(cycles); + if (cycles > 0) cycles = DMAs[2]->Run(cycles); + if (cycles > 0) cycles = DMAs[3]->Run(cycles); + ndscyclestorun = CurIterationCycles - cycles; + + // TODO: run other timing critical shit, like timers + GPU3D::Run(ndscyclestorun); + } + else + { + ARM9->CyclesToRun = CurIterationCycles << 1; + ARM9->Execute(); + ndscyclestorun = ARM9->Cycles >> 1; + } + + if (CPUStop & 0x2) + { + s32 cycles = ndscyclestorun - ARM7Offset; + cycles = DMAs[4]->Run(cycles); + if (cycles > 0) cycles = DMAs[5]->Run(cycles); + if (cycles > 0) cycles = DMAs[6]->Run(cycles); + if (cycles > 0) cycles = DMAs[7]->Run(cycles); + ARM7Offset = cycles; + } + else + { + ARM7->CyclesToRun = ndscyclestorun - ARM7Offset; + ARM7->Execute(); + ARM7Offset = ARM7->Cycles - ARM7->CyclesToRun; + } RunSystem(ndscyclestorun); //GPU3D::Run(ndscyclestorun); @@ -540,6 +572,12 @@ bool HaltInterrupted(u32 cpu) return false; } +void StopCPU(u32 cpu, bool stop) +{ + if (stop) CPUStop |= (1< + diff --git a/melonDS.depend b/melonDS.depend index f49c92a2..e0cd1cef 100644 --- a/melonDS.depend +++ b/melonDS.depend @@ -1,16 +1,16 @@ # depslib dependency file v1.0 -1486502416 source:c:\documents\sources\melonds\main.cpp +1487292827 source:c:\documents\sources\melonds\main.cpp "NDS.h" "GPU.h" -1486502049 c:\documents\sources\melonds\nds.h +1487303037 c:\documents\sources\melonds\nds.h "types.h" 1481161027 c:\documents\sources\melonds\types.h -1486515172 source:c:\documents\sources\melonds\nds.cpp +1487349286 source:c:\documents\sources\melonds\nds.cpp "NDS.h" @@ -24,14 +24,14 @@ "RTC.h" "Wifi.h" -1486512922 source:c:\documents\sources\melonds\arm.cpp +1487349559 source:c:\documents\sources\melonds\arm.cpp "NDS.h" "ARM.h" "ARMInterpreter.h" "GPU3D.h" -1486261220 c:\documents\sources\melonds\arm.h +1487302172 c:\documents\sources\melonds\arm.h "types.h" "NDS.h" "CP15.h" @@ -87,13 +87,13 @@ "NDS.h" "SPI.h" -1486489354 source:c:\documents\sources\melonds\gpu2d.cpp +1487105611 source:c:\documents\sources\melonds\gpu2d.cpp "NDS.h" "GPU.h" -1485991372 c:\documents\sources\melonds\gpu2d.h +1487105228 c:\documents\sources\melonds\gpu2d.h 1481040524 c:\documents\sources\melonds\wifi.h @@ -109,23 +109,23 @@ 1486511075 c:\documents\sources\melonds\fifo.h "types.h" -1486514961 source:c:\documents\sources\melonds\dma.cpp +1487354030 source:c:\documents\sources\melonds\dma.cpp "NDS.h" "DMA.h" "NDSCart.h" "GPU3D.h" -1484698068 c:\documents\sources\melonds\dma.h +1487305393 c:\documents\sources\melonds\dma.h "types.h" -1486502073 source:c:\documents\sources\melonds\gpu.cpp +1487102235 source:c:\documents\sources\melonds\gpu.cpp "NDS.h" "GPU.h" -1486501976 c:\documents\sources\melonds\gpu.h +1487102203 c:\documents\sources\melonds\gpu.h "GPU2D.h" "GPU3D.h" @@ -146,12 +146,18 @@ "NDS.h" "NDSCart.h" -1486514429 c:\documents\sources\melonds\gpu3d.h +1487356069 c:\documents\sources\melonds\gpu3d.h -1486585700 source:c:\documents\sources\melonds\gpu3d.cpp +1487354054 source:c:\documents\sources\melonds\gpu3d.cpp "NDS.h" "GPU.h" "FIFO.h" +1487300658 source:c:\documents\sources\melonds\gpu3d_soft.cpp + + + "NDS.h" + "GPU3D.h" +