/* Copyright 2016-2017 StapleButter This file is part of melonDS. melonDS is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. melonDS is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with melonDS. If not, see http://www.gnu.org/licenses/. */ #include #include #include "NDS.h" #include "GPU.h" #include "FIFO.h" // 3D engine notes // // vertex/polygon RAM is filled when a complete polygon is defined, after it's been culled and clipped // 04000604 reads from bank used by renderer // bank used by renderer is emptied at scanline ~192 // banks are swapped at scanline ~194 // TODO: needs more investigation. it's weird. // // clipping rules: // * if a shared vertex in a strip is clipped, affected polygons are converted into single polygons // strip is resumed at the first eligible polygon namespace GPU3D { #define COPYVERTEX(a, b) { *(u64*)&a[0] = *(u64*)&b[0]; *(u64*)&a[2] = *(u64*)&b[2]; } const u32 CmdNumParams[256] = { // 0x00 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10 1, 0, 1, 1, 1, 0, 16, 12, 16, 12, 9, 3, 3, 0, 0, 0, // 0x20 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, // 0x30 1, 1, 1, 1, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x50 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x60 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x70 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; const s32 CmdNumCycles[256] = { // 0x00 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10 1, 17, 36, 17, 36, 19, 34, 30, 35, 31, 28, 22, 22, 0, 0, 0, // 0x20 1, 9, 1, 9, 8, 8, 8, 8, 8, 1, 1, 1, 0, 0, 0, 0, // 0x30 4, 4, 6, 1, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x50 392, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x60 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x70 103, 9, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; typedef struct { u8 Command; u32 Param; } CmdFIFOEntry; FIFO* CmdFIFO; FIFO* CmdPIPE; u32 NumCommands, CurCommand, ParamCount, TotalParams; u32 GXStat; u32 ExecParams[32]; u32 ExecParamCount; s32 CycleCount; u32 MatrixMode; s32 ProjMatrix[16]; s32 PosMatrix[16]; s32 VecMatrix[16]; s32 TexMatrix[16]; s32 ClipMatrix[16]; bool ClipMatrixDirty; s32 Viewport[4]; s32 ProjMatrixStack[16]; s32 PosMatrixStack[31][16]; s32 ProjMatrixStackPointer; s32 PosMatrixStackPointer; void MatrixLoadIdentity(s32* m); void UpdateClipMatrix(); u32 PolygonMode; s16 CurVertex[3]; u8 VertexColor[3]; Vertex TempVertexBuffer[4]; u32 VertexNum; u32 VertexNumInPoly; Vertex VertexRAM[6144 * 2]; Polygon PolygonRAM[2048 * 2]; Vertex* CurVertexRAM; Polygon* CurPolygonRAM; u32 NumVertices, NumPolygons; u32 CurRAMBank; u32 FlushRequest; bool Init() { CmdFIFO = new FIFO(256); CmdPIPE = new FIFO(4); if (!SoftRenderer::Init()) return false; return true; } void DeInit() { SoftRenderer::DeInit(); delete CmdFIFO; delete CmdPIPE; } void Reset() { CmdFIFO->Clear(); CmdPIPE->Clear(); NumCommands = 0; CurCommand = 0; ParamCount = 0; TotalParams = 0; GXStat = 0; memset(ExecParams, 0, 32*4); ExecParamCount = 0; CycleCount = 0; MatrixMode = 0; MatrixLoadIdentity(ProjMatrix); MatrixLoadIdentity(PosMatrix); MatrixLoadIdentity(VecMatrix); MatrixLoadIdentity(TexMatrix); ClipMatrixDirty = true; UpdateClipMatrix(); memset(Viewport, 0, sizeof(Viewport)); memset(ProjMatrixStack, 0, 16*4); memset(PosMatrixStack, 0, 31 * 16*4); ProjMatrixStackPointer = 0; PosMatrixStackPointer = 0; VertexNum = 0; VertexNumInPoly = 0; CurRAMBank = 0; CurVertexRAM = &VertexRAM[0]; CurPolygonRAM = &PolygonRAM[0]; NumVertices = 0; NumPolygons = 0; FlushRequest = 0; SoftRenderer::Reset(); } void MatrixLoadIdentity(s32* m) { m[0] = 0x1000; m[1] = 0; m[2] = 0; m[3] = 0; m[4] = 0; m[5] = 0x1000; m[6] = 0; m[7] = 0; m[8] = 0; m[9] = 0; m[10] = 0x1000; m[11] = 0; m[12] = 0; m[13] = 0; m[14] = 0; m[15] = 0x1000; } void MatrixLoad4x4(s32* m, s32* s) { memcpy(m, s, 16*4); } void MatrixLoad4x3(s32* m, s32* s) { m[0] = s[0]; m[1] = s[1]; m[2] = s[2]; m[3] = 0; m[4] = s[3]; m[5] = s[4]; m[6] = s[5]; m[7] = 0; m[8] = s[6]; m[9] = s[7]; m[10] = s[8]; m[11] = 0; m[12] = s[9]; m[13] = s[10]; m[14] = s[11]; m[15] = 0x1000; } void MatrixMult4x4(s32* m, s32* s) { s32 tmp[16]; memcpy(tmp, m, 16*4); // m = s*m m[0] = ((s64)s[0]*tmp[0] + (s64)s[1]*tmp[4] + (s64)s[2]*tmp[8] + (s64)s[3]*tmp[12]) >> 12; m[1] = ((s64)s[0]*tmp[1] + (s64)s[1]*tmp[5] + (s64)s[2]*tmp[9] + (s64)s[3]*tmp[13]) >> 12; m[2] = ((s64)s[0]*tmp[2] + (s64)s[1]*tmp[6] + (s64)s[2]*tmp[10] + (s64)s[3]*tmp[14]) >> 12; m[3] = ((s64)s[0]*tmp[3] + (s64)s[1]*tmp[7] + (s64)s[2]*tmp[11] + (s64)s[3]*tmp[15]) >> 12; m[4] = ((s64)s[4]*tmp[0] + (s64)s[5]*tmp[4] + (s64)s[6]*tmp[8] + (s64)s[7]*tmp[12]) >> 12; m[5] = ((s64)s[4]*tmp[1] + (s64)s[5]*tmp[5] + (s64)s[6]*tmp[9] + (s64)s[7]*tmp[13]) >> 12; m[6] = ((s64)s[4]*tmp[2] + (s64)s[5]*tmp[6] + (s64)s[6]*tmp[10] + (s64)s[7]*tmp[14]) >> 12; m[7] = ((s64)s[4]*tmp[3] + (s64)s[5]*tmp[7] + (s64)s[6]*tmp[11] + (s64)s[7]*tmp[15]) >> 12; m[8] = ((s64)s[8]*tmp[0] + (s64)s[9]*tmp[4] + (s64)s[10]*tmp[8] + (s64)s[11]*tmp[12]) >> 12; m[9] = ((s64)s[8]*tmp[1] + (s64)s[9]*tmp[5] + (s64)s[10]*tmp[9] + (s64)s[11]*tmp[13]) >> 12; m[10] = ((s64)s[8]*tmp[2] + (s64)s[9]*tmp[6] + (s64)s[10]*tmp[10] + (s64)s[11]*tmp[14]) >> 12; m[11] = ((s64)s[8]*tmp[3] + (s64)s[9]*tmp[7] + (s64)s[10]*tmp[11] + (s64)s[11]*tmp[15]) >> 12; m[12] = ((s64)s[12]*tmp[0] + (s64)s[13]*tmp[4] + (s64)s[14]*tmp[8] + (s64)s[15]*tmp[12]) >> 12; m[13] = ((s64)s[12]*tmp[1] + (s64)s[13]*tmp[5] + (s64)s[14]*tmp[9] + (s64)s[15]*tmp[13]) >> 12; m[14] = ((s64)s[12]*tmp[2] + (s64)s[13]*tmp[6] + (s64)s[14]*tmp[10] + (s64)s[15]*tmp[14]) >> 12; m[15] = ((s64)s[12]*tmp[3] + (s64)s[13]*tmp[7] + (s64)s[14]*tmp[11] + (s64)s[15]*tmp[15]) >> 12; } void MatrixMult4x3(s32* m, s32* s) { s32 tmp[16]; memcpy(tmp, m, 16*4); /*printf("4x3 matrix\n"); for (int j = 0; j < 12; j += 3) { for (int i = 0; i < 3; i++) printf("%f ", s[i]/4096.0f); printf("\n"); }*/ // m = s*m m[0] = ((s64)s[0]*tmp[0] + (s64)s[1]*tmp[4] + (s64)s[2]*tmp[8]) >> 12; m[1] = ((s64)s[0]*tmp[1] + (s64)s[1]*tmp[5] + (s64)s[2]*tmp[9]) >> 12; m[2] = ((s64)s[0]*tmp[2] + (s64)s[1]*tmp[6] + (s64)s[2]*tmp[10]) >> 12; m[3] = ((s64)s[0]*tmp[3] + (s64)s[1]*tmp[7] + (s64)s[2]*tmp[11]) >> 12; m[4] = ((s64)s[3]*tmp[0] + (s64)s[4]*tmp[4] + (s64)s[5]*tmp[8]) >> 12; m[5] = ((s64)s[3]*tmp[1] + (s64)s[4]*tmp[5] + (s64)s[5]*tmp[9]) >> 12; m[6] = ((s64)s[3]*tmp[2] + (s64)s[4]*tmp[6] + (s64)s[5]*tmp[10]) >> 12; m[7] = ((s64)s[3]*tmp[3] + (s64)s[4]*tmp[7] + (s64)s[5]*tmp[11]) >> 12; m[8] = ((s64)s[6]*tmp[0] + (s64)s[7]*tmp[4] + (s64)s[8]*tmp[8]) >> 12; m[9] = ((s64)s[6]*tmp[1] + (s64)s[7]*tmp[5] + (s64)s[8]*tmp[9]) >> 12; m[10] = ((s64)s[6]*tmp[2] + (s64)s[7]*tmp[6] + (s64)s[8]*tmp[10]) >> 12; m[11] = ((s64)s[6]*tmp[3] + (s64)s[7]*tmp[7] + (s64)s[8]*tmp[11]) >> 12; m[12] = ((s64)s[9]*tmp[0] + (s64)s[10]*tmp[4] + (s64)s[11]*tmp[8] + (s64)0x1000*tmp[12]) >> 12; m[13] = ((s64)s[9]*tmp[1] + (s64)s[10]*tmp[5] + (s64)s[11]*tmp[9] + (s64)0x1000*tmp[13]) >> 12; m[14] = ((s64)s[9]*tmp[2] + (s64)s[10]*tmp[6] + (s64)s[11]*tmp[10] + (s64)0x1000*tmp[14]) >> 12; m[15] = ((s64)s[9]*tmp[3] + (s64)s[10]*tmp[7] + (s64)s[11]*tmp[11] + (s64)0x1000*tmp[15]) >> 12; } void MatrixMult3x3(s32* m, s32* s) { s32 tmp[12]; memcpy(tmp, m, 12*4); // m = s*m m[0] = ((s64)s[0]*tmp[0] + (s64)s[1]*tmp[4] + (s64)s[2]*tmp[8]) >> 12; m[1] = ((s64)s[0]*tmp[1] + (s64)s[1]*tmp[5] + (s64)s[2]*tmp[9]) >> 12; m[2] = ((s64)s[0]*tmp[2] + (s64)s[1]*tmp[6] + (s64)s[2]*tmp[10]) >> 12; m[3] = ((s64)s[0]*tmp[3] + (s64)s[1]*tmp[7] + (s64)s[2]*tmp[11]) >> 12; m[4] = ((s64)s[3]*tmp[0] + (s64)s[4]*tmp[4] + (s64)s[5]*tmp[8]) >> 12; m[5] = ((s64)s[3]*tmp[1] + (s64)s[4]*tmp[5] + (s64)s[5]*tmp[9]) >> 12; m[6] = ((s64)s[3]*tmp[2] + (s64)s[4]*tmp[6] + (s64)s[5]*tmp[10]) >> 12; m[7] = ((s64)s[3]*tmp[3] + (s64)s[4]*tmp[7] + (s64)s[5]*tmp[11]) >> 12; m[8] = ((s64)s[6]*tmp[0] + (s64)s[7]*tmp[4] + (s64)s[8]*tmp[8]) >> 12; m[9] = ((s64)s[6]*tmp[1] + (s64)s[7]*tmp[5] + (s64)s[8]*tmp[9]) >> 12; m[10] = ((s64)s[6]*tmp[2] + (s64)s[7]*tmp[6] + (s64)s[8]*tmp[10]) >> 12; m[11] = ((s64)s[6]*tmp[3] + (s64)s[7]*tmp[7] + (s64)s[8]*tmp[11]) >> 12; } void MatrixScale(s32* m, s32* s) { m[0] = ((s64)s[0]*m[0]) >> 12; m[1] = ((s64)s[0]*m[1]) >> 12; m[2] = ((s64)s[0]*m[2]) >> 12; m[3] = ((s64)s[0]*m[3]) >> 12; m[4] = ((s64)s[1]*m[4]) >> 12; m[5] = ((s64)s[1]*m[5]) >> 12; m[6] = ((s64)s[1]*m[6]) >> 12; m[7] = ((s64)s[1]*m[7]) >> 12; m[8] = ((s64)s[2]*m[8]) >> 12; m[9] = ((s64)s[2]*m[9]) >> 12; m[10] = ((s64)s[2]*m[10]) >> 12; m[11] = ((s64)s[2]*m[11]) >> 12; } void MatrixTranslate(s32* m, s32* s) { m[12] += ((s64)s[0]*m[0] + (s64)s[1]*m[4] + (s64)s[2]*m[8]) >> 12; m[13] += ((s64)s[0]*m[1] + (s64)s[1]*m[5] + (s64)s[2]*m[9]) >> 12; m[14] += ((s64)s[0]*m[2] + (s64)s[1]*m[6] + (s64)s[2]*m[10]) >> 12; } void UpdateClipMatrix() { if (!ClipMatrixDirty) return; ClipMatrixDirty = false; memcpy(ClipMatrix, ProjMatrix, 16*4); MatrixMult4x4(ClipMatrix, PosMatrix); } template void ClipSegment(Vertex* outbuf, int num, Vertex* vout, Vertex* vin) { s64 factor = ((vin->Position[3] - (plane*vin->Position[comp])) << 12) / ((vin->Position[3] - (plane*vin->Position[comp])) - (vout->Position[3] - (plane*vout->Position[comp]))); Vertex mid; #define INTERPOLATE(var) mid.var = vin->var + (((vout->var - vin->var) * factor) >> 12); INTERPOLATE(Position[0]); INTERPOLATE(Position[1]); INTERPOLATE(Position[2]); INTERPOLATE(Position[3]); INTERPOLATE(Color[0]); INTERPOLATE(Color[1]); INTERPOLATE(Color[2]); #undef INTERPOLATE outbuf[num] = mid; } void SubmitPolygon() { // clip. // for each vertex: // if it's outside, check if the previous and next vertices are inside, if so, fixor Vertex clippedvertices[2][10]; u32 numclipped; int nverts = PolygonMode & 0x1 ? 4:3; int nvisible = 0; int prev, next; int c; /*if (NumPolygons == 91) for (int i = 0; i < nverts; i++) { Vertex vtx = TempVertexBuffer[i]; printf("pre-clip v%d: %f %f %f %f\n", i, vtx.Position[0]/4096.0f, vtx.Position[1]/4096.0f, vtx.Position[2]/4096.0f, vtx.Position[3]/4096.0f); }*/ // X clipping prev = nverts-1; next = 1; c = 0; for (int i = 0; i < nverts; i++) { Vertex vtx = TempVertexBuffer[i]; if (vtx.Position[0] > vtx.Position[3]) { Vertex* vprev = &TempVertexBuffer[prev]; if (vprev->Position[0] <= vprev->Position[3]) { ClipSegment<0, 1>(clippedvertices[0], c, &vtx, vprev); c++; } Vertex* vnext = &TempVertexBuffer[next]; if (vnext->Position[0] <= vnext->Position[3]) { ClipSegment<0, 1>(clippedvertices[0], c, &vtx, vnext); c++; } } else clippedvertices[0][c++] = vtx; prev++; if (prev >= nverts) prev = 0; next++; if (next >= nverts) next = 0; } nverts = c; prev = nverts-1; next = 1; c = 0; for (int i = 0; i < nverts; i++) { Vertex vtx = clippedvertices[0][i]; if (vtx.Position[0] < -vtx.Position[3]) { Vertex* vprev = &clippedvertices[0][prev]; if (vprev->Position[0] >= -vprev->Position[3]) { ClipSegment<0, -1>(clippedvertices[1], c, &vtx, vprev); c++; } Vertex* vnext = &clippedvertices[0][next]; if (vnext->Position[0] >= -vnext->Position[3]) { ClipSegment<0, -1>(clippedvertices[1], c, &vtx, vnext); c++; } } else clippedvertices[1][c++] = vtx; prev++; if (prev >= nverts) prev = 0; next++; if (next >= nverts) next = 0; } // Y clipping nverts = c; prev = nverts-1; next = 1; c = 0; for (int i = 0; i < nverts; i++) { Vertex vtx = clippedvertices[1][i]; if (vtx.Position[1] > vtx.Position[3]) { Vertex* vprev = &clippedvertices[1][prev]; if (vprev->Position[1] <= vprev->Position[3]) { ClipSegment<1, 1>(clippedvertices[0], c, &vtx, vprev); c++; } Vertex* vnext = &clippedvertices[1][next]; if (vnext->Position[1] <= vnext->Position[3]) { ClipSegment<1, 1>(clippedvertices[0], c, &vtx, vnext); c++; } } else clippedvertices[0][c++] = vtx; prev++; if (prev >= nverts) prev = 0; next++; if (next >= nverts) next = 0; } nverts = c; prev = nverts-1; next = 1; c = 0; for (int i = 0; i < nverts; i++) { Vertex vtx = clippedvertices[0][i]; if (vtx.Position[1] < -vtx.Position[3]) { Vertex* vprev = &clippedvertices[0][prev]; if (vprev->Position[1] >= -vprev->Position[3]) { ClipSegment<1, -1>(clippedvertices[1], c, &vtx, vprev); c++; } Vertex* vnext = &clippedvertices[0][next]; if (vnext->Position[1] >= -vnext->Position[3]) { ClipSegment<1, -1>(clippedvertices[1], c, &vtx, vnext); c++; } } else clippedvertices[1][c++] = vtx; prev++; if (prev >= nverts) prev = 0; next++; if (next >= nverts) next = 0; } // Z clipping nverts = c; prev = nverts-1; next = 1; c = 0; for (int i = 0; i < nverts; i++) { Vertex vtx = clippedvertices[1][i]; if (vtx.Position[2] > vtx.Position[3]) { Vertex* vprev = &clippedvertices[1][prev]; if (vprev->Position[2] <= vprev->Position[3]) { ClipSegment<2, 1>(clippedvertices[0], c, &vtx, vprev); c++; } Vertex* vnext = &clippedvertices[1][next]; if (vnext->Position[2] <= vnext->Position[3]) { ClipSegment<2, 1>(clippedvertices[0], c, &vtx, vnext); c++; } } else clippedvertices[0][c++] = vtx; prev++; if (prev >= nverts) prev = 0; next++; if (next >= nverts) next = 0; } nverts = c; prev = nverts-1; next = 1; c = 0; for (int i = 0; i < nverts; i++) { Vertex vtx = clippedvertices[0][i]; if (vtx.Position[2] < -vtx.Position[3]) { Vertex* vprev = &clippedvertices[0][prev]; if (vprev->Position[2] >= -vprev->Position[3]) { ClipSegment<2, -1>(clippedvertices[1], c, &vtx, vprev); c++; } Vertex* vnext = &clippedvertices[0][next]; if (vnext->Position[2] >= -vnext->Position[3]) { ClipSegment<2, -1>(clippedvertices[1], c, &vtx, vnext); c++; } } else clippedvertices[1][c++] = vtx; prev++; if (prev >= nverts) prev = 0; next++; if (next >= nverts) next = 0; } if (c == 0) return; // build the actual polygon // TODO: tri/quad strips if (NumPolygons >= 2048) return; if (NumVertices+c > 6144) return; Polygon* poly = &CurPolygonRAM[NumPolygons++]; poly->NumVertices = 0; for (int i = 0; i < c; i++) { CurVertexRAM[NumVertices] = clippedvertices[1][i]; poly->Vertices[i] = &CurVertexRAM[NumVertices]; NumVertices++; poly->NumVertices++; } } void SubmitVertex() { s64 vertex[4] = {(s64)CurVertex[0], (s64)CurVertex[1], (s64)CurVertex[2], 0x1000}; //s32 vertextrans[4]; Vertex* vertextrans = &TempVertexBuffer[VertexNumInPoly]; if (PolygonMode & 0x2) return; //printf("vertex: %f %f %f\n", vertex[0]/4096.0f, vertex[1]/4096.0f, vertex[2]/4096.0f); UpdateClipMatrix(); vertextrans->Position[0] = (vertex[0]*ClipMatrix[0] + vertex[1]*ClipMatrix[4] + vertex[2]*ClipMatrix[8] + vertex[3]*ClipMatrix[12]) >> 12; vertextrans->Position[1] = (vertex[0]*ClipMatrix[1] + vertex[1]*ClipMatrix[5] + vertex[2]*ClipMatrix[9] + vertex[3]*ClipMatrix[13]) >> 12; vertextrans->Position[2] = (vertex[0]*ClipMatrix[2] + vertex[1]*ClipMatrix[6] + vertex[2]*ClipMatrix[10] + vertex[3]*ClipMatrix[14]) >> 12; vertextrans->Position[3] = (vertex[0]*ClipMatrix[3] + vertex[1]*ClipMatrix[7] + vertex[2]*ClipMatrix[11] + vertex[3]*ClipMatrix[15]) >> 12; /*printf("vertex fart: %f %f %f %f\n", vertextrans->Position[0]/4096.0f, vertextrans->Position[1]/4096.0f, vertextrans->Position[2]/4096.0f, vertextrans->Position[3]/4096.0f);*/ /*s32 w_inv; if (vertextrans->Position[3] == 0) w_inv = 0x1000; // checkme else if(vertextrans->Position[3] < 0) w_inv = 0x1000000 / -vertextrans->Position[3]; else w_inv = 0x1000000 / vertextrans->Position[3]; vertextrans->Position[0] = (vertextrans->Position[0] * w_inv) >> 12; vertextrans->Position[1] = (vertextrans->Position[1] * w_inv) >> 12; vertextrans->Position[2] = (vertextrans->Position[2] * w_inv) >> 12;*/ vertextrans->Color[0] = VertexColor[0]; vertextrans->Color[1] = VertexColor[1]; vertextrans->Color[2] = VertexColor[2]; /*printf("vertex trans: %f %f %f %f\n", vertextrans->Position[0]/4096.0f, vertextrans->Position[1]/4096.0f, vertextrans->Position[2]/4096.0f, vertextrans->Position[3]/4096.0f); printf("clip: %f %f %f %f\n", ClipMatrix[3]/4096.0f, ClipMatrix[7]/4096.0f, ClipMatrix[11]/4096.0f, ClipMatrix[15]/4096.0f);*/ /*if (vertextrans[3] == 0) { //printf("!!!! VERTEX W IS ZERO\n"); //return; vertextrans[3] = 0x1000; // checkme } s32 screenX = (((vertextrans[0]+vertextrans[3]) * Viewport[2]) / (vertextrans[3]<<1)) + Viewport[0]; s32 screenY = (((vertextrans[1]+vertextrans[3]) * Viewport[3]) / (vertextrans[3]<<1)) + Viewport[1]; printf("screen: %d, %d\n", screenX, screenY); s32* finalvertex = TempVertexBuffer[VertexNumInPoly]; finalvertex[0] = screenX; finalvertex[1] = screenY; finalvertex[2] = vertextrans[2]; finalvertex[3] = vertextrans[3];*/ // triangle strip: 0,1,2 1,2,3 2,3,4 3,4,5 ... // quad strip: 0,1,3,2 2,3,5,4 4,5,7,6 6,7,9,8 ... VertexNum++; VertexNumInPoly++; switch (PolygonMode) { case 0: // triangle if (VertexNumInPoly == 3) { VertexNumInPoly = 0; SubmitPolygon(); } break; case 1: // quad if (VertexNumInPoly == 4) { VertexNumInPoly = 0; SubmitPolygon(); } break; /*case 2: // triangle strip if (VertexNum > 3) { if (VertexNumInPoly == 1) { VertexNumInPoly = 0; // reorder } else VertexNumInPoly = 0; SubmitPolygon(); } else if (VertexNum == 3) { VertexNumInPoly = 2; SubmitPolygon(); TempVertexBuffer[0] = TempVertexBuffer[1]; TempVertexBuffer[1] = TempVertexBuffer[2]; } break;*/ default: VertexNumInPoly = 0; break; } } void CmdFIFOWrite(CmdFIFOEntry& entry) { if (CmdFIFO->IsEmpty() && !CmdPIPE->IsFull()) { CmdPIPE->Write(entry); GXStat |= (1<<27); } else { if (CmdFIFO->IsFull()) { printf("!!! GX FIFO FULL\n"); //NDS::debug(0); return; } CmdFIFO->Write(entry); } } CmdFIFOEntry CmdFIFORead() { CmdFIFOEntry ret = CmdPIPE->Read(); if (CmdPIPE->Level() <= 2) { if (!CmdFIFO->IsEmpty()) CmdPIPE->Write(CmdFIFO->Read()); if (!CmdFIFO->IsEmpty()) CmdPIPE->Write(CmdFIFO->Read()); CheckFIFODMA(); CheckFIFOIRQ(); } return ret; } void ExecuteCommand() { CmdFIFOEntry entry = CmdFIFORead(); //printf("FIFO: %02X %08X\n", entry.Command, entry.Param); ExecParams[ExecParamCount] = entry.Param; ExecParamCount++; //if ((entry.Command&0xF0)==0x10) // printf("MATRIX CMD %02X %08X\n", entry.Command, entry.Param); if (ExecParamCount >= CmdNumParams[entry.Command]) { //CycleCount += CmdNumCycles[entry.Command]; ExecParamCount = 0; GXStat &= ~(1<<14); //if (CycleCount > 0) // GXStat |= (1<<27); //printf("3D CMD %02X\n", entry.Command); switch (entry.Command) { case 0x10: // matrix mode MatrixMode = ExecParams[0] & 0x3; break; case 0x11: // push matrix if (MatrixMode == 0) { if (ProjMatrixStackPointer > 0) { printf("!! PROJ MATRIX STACK OVERFLOW\n"); GXStat |= (1<<15); break; } memcpy(ProjMatrixStack, ProjMatrix, 16*4); ProjMatrixStackPointer++; GXStat |= (1<<14); } else if (MatrixMode == 3) { printf("!! CAN'T PUSH TEXTURE MATRIX\n"); GXStat |= (1<<15); // CHECKME } else { if (PosMatrixStackPointer > 30) { printf("!! POS MATRIX STACK OVERFLOW\n"); GXStat |= (1<<15); break; } memcpy(PosMatrixStack[PosMatrixStackPointer], PosMatrix, 16*4); PosMatrixStackPointer++; GXStat |= (1<<14); } break; case 0x12: // pop matrix if (MatrixMode == 0) { if (ProjMatrixStackPointer <= 0) { printf("!! PROJ MATRIX STACK UNDERFLOW\n"); GXStat |= (1<<15); break; } ProjMatrixStackPointer--; memcpy(ProjMatrix, ProjMatrixStack, 16*4); GXStat |= (1<<14); ClipMatrixDirty = true; } else if (MatrixMode == 3) { printf("!! CAN'T POP TEXTURE MATRIX\n"); GXStat |= (1<<15); // CHECKME } else { s32 offset = (s32)(ExecParams[0] << 26) >> 26; PosMatrixStackPointer -= offset; if (PosMatrixStackPointer < 0 || PosMatrixStackPointer > 30) { printf("!! POS MATRIX STACK UNDER/OVERFLOW %d\n", PosMatrixStackPointer); PosMatrixStackPointer += offset; GXStat |= (1<<15); break; } memcpy(PosMatrix, PosMatrixStack[PosMatrixStackPointer], 16*4); GXStat |= (1<<14); ClipMatrixDirty = true; } break; case 0x13: // store matrix if (MatrixMode == 0) { memcpy(ProjMatrixStack, ProjMatrix, 16*4); } else if (MatrixMode == 3) { printf("!! CAN'T STORE TEXTURE MATRIX\n"); GXStat |= (1<<15); // CHECKME } else { u32 addr = ExecParams[0] & 0x1F; if (addr > 30) { printf("!! POS MATRIX STORE ADDR 31\n"); GXStat |= (1<<15); break; } memcpy(PosMatrixStack[addr], PosMatrix, 16*4); } break; case 0x14: // restore matrix if (MatrixMode == 0) { memcpy(ProjMatrix, ProjMatrixStack, 16*4); ClipMatrixDirty = true; } else if (MatrixMode == 3) { printf("!! CAN'T RESTORE TEXTURE MATRIX\n"); GXStat |= (1<<15); // CHECKME } else { u32 addr = ExecParams[0] & 0x1F; if (addr > 30) { printf("!! POS MATRIX STORE ADDR 31\n"); GXStat |= (1<<15); break; } memcpy(PosMatrix, PosMatrixStack[addr], 16*4); ClipMatrixDirty = true; } break; case 0x15: // identity if (MatrixMode == 0) { MatrixLoadIdentity(ProjMatrix); ClipMatrixDirty = true; } else if (MatrixMode == 3) MatrixLoadIdentity(TexMatrix); else { MatrixLoadIdentity(PosMatrix); if (MatrixMode == 2) MatrixLoadIdentity(VecMatrix); ClipMatrixDirty = true; } break; case 0x16: // load 4x4 if (MatrixMode == 0) { MatrixLoad4x4(ProjMatrix, (s32*)ExecParams); ClipMatrixDirty = true; } else if (MatrixMode == 3) MatrixLoad4x4(TexMatrix, (s32*)ExecParams); else { MatrixLoad4x4(PosMatrix, (s32*)ExecParams); if (MatrixMode == 2) MatrixLoad4x4(VecMatrix, (s32*)ExecParams); ClipMatrixDirty = true; } break; case 0x17: // load 4x3 if (MatrixMode == 0) { MatrixLoad4x3(ProjMatrix, (s32*)ExecParams); ClipMatrixDirty = true; } else if (MatrixMode == 3) MatrixLoad4x3(TexMatrix, (s32*)ExecParams); else { MatrixLoad4x3(PosMatrix, (s32*)ExecParams); if (MatrixMode == 2) MatrixLoad4x3(VecMatrix, (s32*)ExecParams); ClipMatrixDirty = true; } break; case 0x18: // mult 4x4 if (MatrixMode == 0) { MatrixMult4x4(ProjMatrix, (s32*)ExecParams); ClipMatrixDirty = true; } else if (MatrixMode == 3) MatrixMult4x4(TexMatrix, (s32*)ExecParams); else { MatrixMult4x4(PosMatrix, (s32*)ExecParams); if (MatrixMode == 2) { MatrixMult4x4(VecMatrix, (s32*)ExecParams); CycleCount += 30; } ClipMatrixDirty = true; } break; case 0x19: // mult 4x3 if (MatrixMode == 0) { MatrixMult4x3(ProjMatrix, (s32*)ExecParams); ClipMatrixDirty = true; } else if (MatrixMode == 3) MatrixMult4x3(TexMatrix, (s32*)ExecParams); else { MatrixMult4x3(PosMatrix, (s32*)ExecParams); if (MatrixMode == 2) { MatrixMult4x3(VecMatrix, (s32*)ExecParams); CycleCount += 30; } ClipMatrixDirty = true; } break; case 0x1A: // mult 3x3 if (MatrixMode == 0) { MatrixMult3x3(ProjMatrix, (s32*)ExecParams); ClipMatrixDirty = true; } else if (MatrixMode == 3) MatrixMult3x3(TexMatrix, (s32*)ExecParams); else { MatrixMult3x3(PosMatrix, (s32*)ExecParams); if (MatrixMode == 2) { MatrixMult3x3(VecMatrix, (s32*)ExecParams); CycleCount += 30; } ClipMatrixDirty = true; } break; case 0x1B: // scale if (MatrixMode == 0) { MatrixScale(ProjMatrix, (s32*)ExecParams); ClipMatrixDirty = true; } else if (MatrixMode == 3) MatrixScale(TexMatrix, (s32*)ExecParams); else { MatrixScale(PosMatrix, (s32*)ExecParams); ClipMatrixDirty = true; } break; case 0x1C: // translate if (MatrixMode == 0) { MatrixTranslate(ProjMatrix, (s32*)ExecParams); ClipMatrixDirty = true; } else if (MatrixMode == 3) MatrixTranslate(TexMatrix, (s32*)ExecParams); else { MatrixTranslate(PosMatrix, (s32*)ExecParams); if (MatrixMode == 2) MatrixTranslate(VecMatrix, (s32*)ExecParams); ClipMatrixDirty = true; } break; case 0x20: // vertex color { u32 c = ExecParams[0]; u32 r = c & 0x1F; u32 g = (c >> 5) & 0x1F; u32 b = (c >> 10) & 0x1F; VertexColor[0] = r ? (r<<1)+1 : 0; VertexColor[1] = g ? (g<<1)+1 : 0; VertexColor[2] = b ? (b<<1)+1 : 0; } break; case 0x21: // TODO: more cycles if lights are enabled break; case 0x23: // full vertex CurVertex[0] = ExecParams[0] & 0xFFFF; CurVertex[1] = ExecParams[0] >> 16; CurVertex[2] = ExecParams[1] & 0xFFFF; SubmitVertex(); break; case 0x24: // 10-bit vertex CurVertex[0] = (ExecParams[0] & 0x000003FF) << 6; CurVertex[1] = (ExecParams[0] & 0x000FFC00) >> 4; CurVertex[2] = (ExecParams[0] & 0x3FF00000) >> 14; SubmitVertex(); break; case 0x25: // vertex XY CurVertex[0] = ExecParams[0] & 0xFFFF; CurVertex[1] = ExecParams[0] >> 16; SubmitVertex(); break; case 0x26: // vertex XZ CurVertex[0] = ExecParams[0] & 0xFFFF; CurVertex[2] = ExecParams[0] >> 16; SubmitVertex(); break; case 0x27: // vertex YZ CurVertex[1] = ExecParams[0] & 0xFFFF; CurVertex[2] = ExecParams[0] >> 16; SubmitVertex(); break; case 0x28: // 10-bit delta vertex CurVertex[0] += (s16)((ExecParams[0] & 0x000003FF) << 6) >> 6; CurVertex[1] += (s16)((ExecParams[0] & 0x000FFC00) >> 4) >> 6; CurVertex[2] += (s16)((ExecParams[0] & 0x3FF00000) >> 14) >> 6; SubmitVertex(); break; case 0x40: PolygonMode = ExecParams[0] & 0x3; VertexNum = 0; VertexNumInPoly = 0; break; case 0x50: FlushRequest = 1;//0x80000000 | (ExecParams[0] & 0x3); break; case 0x60: // viewport x1,y1,x2,y2 Viewport[0] = ExecParams[0] & 0xFF; Viewport[1] = (ExecParams[0] >> 8) & 0xFF; Viewport[2] = ((ExecParams[0] >> 16) & 0xFF) - Viewport[0] + 1; Viewport[3] = (ExecParams[0] >> 24) - Viewport[1] + 1; break; } } } void Run(s32 cycles) { if (FlushRequest) return; if (CycleCount <= 0) { while (CycleCount <= 0 && !CmdPIPE->IsEmpty()) ExecuteCommand(); } CycleCount -= cycles; if (CycleCount <= 0 && CmdPIPE->IsEmpty()) { CycleCount = 0; GXStat &= ~(1<<27); } } void CheckFIFOIRQ() { bool irq = false; switch (GXStat >> 30) { case 1: irq = (CmdFIFO->Level() < 128); break; case 2: irq = CmdFIFO->IsEmpty(); break; } if (irq) NDS::TriggerIRQ(0, NDS::IRQ_GXFIFO); } void CheckFIFODMA() { if (CmdFIFO->Level() < 128) NDS::CheckDMAs(0, 0x07); } void VBlank() { if (FlushRequest) { SoftRenderer::RenderFrame(CurVertexRAM, CurPolygonRAM, NumPolygons); CurRAMBank = CurRAMBank?0:1; CurVertexRAM = &VertexRAM[CurRAMBank ? 6144 : 0]; CurPolygonRAM = &PolygonRAM[CurRAMBank ? 2048 : 0]; NumVertices = 0; NumPolygons = 0; FlushRequest = 0; GXStat &= ~(1<<27); } } u8* GetLine(int line) { return SoftRenderer::GetLine(line); } u8 Read8(u32 addr) { return 0; } u16 Read16(u32 addr) { return 0; } u32 Read32(u32 addr) { switch (addr) { case 0x04000320: return 46; // TODO, eventually case 0x04000600: { u32 fifolevel = CmdFIFO->Level(); return GXStat | ((PosMatrixStackPointer & 0x1F) << 8) | ((ProjMatrixStackPointer & 0x1) << 13) | (fifolevel << 16) | (fifolevel < 128 ? (1<<25) : 0) | (fifolevel == 0 ? (1<<26) : 0); } } if (addr >= 0x04000640 && addr < 0x04000680) { UpdateClipMatrix(); return ClipMatrix[(addr & 0x3C) >> 2]; } if (addr >= 0x04000680 && addr < 0x040006A4) { printf("!! VECMTX READ\n"); return 0; } return 0; } void Write8(u32 addr, u8 val) { // } void Write16(u32 addr, u16 val) { // } void Write32(u32 addr, u32 val) { switch (addr) { case 0x04000600: if (val & 0x8000) GXStat &= ~0x8000; val &= 0xC0000000; GXStat &= 0x3FFFFFFF; GXStat |= val; return; } if (addr >= 0x04000400 && addr < 0x04000440) { if (NumCommands == 0) { NumCommands = 4; CurCommand = val; ParamCount = 0; TotalParams = CmdNumParams[CurCommand & 0xFF]; if (TotalParams > 0) return; } else ParamCount++; for (;;) { CmdFIFOEntry entry; entry.Command = CurCommand & 0xFF; entry.Param = val; CmdFIFOWrite(entry); if (ParamCount >= TotalParams) { CurCommand >>= 8; NumCommands--; if (NumCommands == 0) break; ParamCount = 0; TotalParams = CmdNumParams[CurCommand & 0xFF]; } if (ParamCount < TotalParams) break; } return; } if (addr >= 0x04000440 && addr < 0x040005CC) { CmdFIFOEntry entry; entry.Command = (addr & 0x1FC) >> 2; entry.Param = val; CmdFIFOWrite(entry); return; } } }