diff --git a/desmume/src/FIFO.cpp b/desmume/src/FIFO.cpp index 632ae7068..f4e6457ed 100644 --- a/desmume/src/FIFO.cpp +++ b/desmume/src/FIFO.cpp @@ -161,7 +161,6 @@ void GFX_FIFOsend(u8 cmd, u32 param) if (gxFIFO.tail == 0) // FIFO empty { - if (gxPIPE.tail < 4) // pipe not full { gxPIPE.cmd[gxPIPE.tail] = cmd; @@ -178,7 +177,7 @@ void GFX_FIFOsend(u8 cmd, u32 param) //INFO("GFX FIFO: Send GFX 3D cmd 0x%02X to FIFO - 0x%08X (%03i/%02X)\n", cmd, param, gxFIFO.tail, gxFIFO.tail); if (gxstat & 0x01000000) { - //INFO("ERROR: gxFIFO is full (cmd 0x%02X = 0x%08X) (prev cmd 0x%02X = 0x%08X)\n", cmd, param, gxFIFO.cmd[255], gxFIFO.param[255]); + //INFO("ERROR: gxFIFO is full\n"); return; // full } @@ -206,8 +205,7 @@ void GFX_FIFOsend(u8 cmd, u32 param) NDS_RescheduleGXFIFO(); } -extern void execHardware_doAllDma(EDMAMode modeNum); -BOOL FORCEINLINE GFX_FIFOrecv(u8 *cmd, u32 *param) +static BOOL GFX_FIFOrecv(u8 *cmd, u32 *param) { u32 gxstat = T1ReadLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x600); @@ -216,7 +214,7 @@ BOOL FORCEINLINE GFX_FIFOrecv(u8 *cmd, u32 *param) gxstat &= 0xF000FFFF; gxstat |= 0x06000000; T1WriteLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x600, gxstat); - if ((gxstat & 0xC0000000)) // IRQ: empty + if ((gxstat & 0x80000000)) // IRQ: empty { setIF(0, (1<<21)); } @@ -241,12 +239,7 @@ BOOL FORCEINLINE GFX_FIFOrecv(u8 *cmd, u32 *param) gxstat |= (gxFIFO.tail << 16); if (gxFIFO.tail < 128) - { gxstat |= 0x02000000; -#ifdef USE_GEOMETRY_FIFO_EMULATION - execHardware_doAllDma(EDMAMode_GXFifo); -#endif - } if (gxFIFO.tail == 0) // empty gxstat |= 0x04000000; diff --git a/desmume/src/MMU.cpp b/desmume/src/MMU.cpp index f7b3a8dbb..b2e3507f5 100644 --- a/desmume/src/MMU.cpp +++ b/desmume/src/MMU.cpp @@ -926,7 +926,6 @@ void MMU_clearMem() memset(MMU.DMACycle, 0, sizeof(MMU.DMACycle)); memset(MMU.DMACrt, 0, sizeof(u32) * 2 * 4); memset(MMU.DMAing, 0, sizeof(BOOL) * 2 * 4); - memset(MMU.DMACompleted, 0, sizeof(BOOL) * 2 * 4); memset(MMU.dscard, 0, sizeof(nds_dscard) * 2); @@ -1250,21 +1249,10 @@ u32 MMU_readFromGC() template void FASTCALL MMU_doDMA(u32 num) { -#ifdef USE_GEOMETRY_FIFO_EMULATION - if (MMU.DMACompleted[PROCNUM][num]) return; -#endif u32 src = DMASrc[PROCNUM][num]; u32 dst = DMADst[PROCNUM][num]; - u32 taille = 0; - bool paused = false; + u32 taille; -#ifdef USE_GEOMETRY_FIFO_EMULATION - if (MMU.DMAStartTime[PROCNUM][num]== EDMAMode_GXFifo) - { - if (gxFIFO.tail > 127) return; - } -#endif - if(src==dst) { T1WriteLong(MMU.MMU_MEM[PROCNUM][0x40], 0xB8 + (0xC*num), T1ReadLong(MMU.MMU_MEM[PROCNUM][0x40], 0xB8 + (0xC*num)) & 0x7FFFFFFF); @@ -1279,6 +1267,7 @@ void FASTCALL MMU_doDMA(u32 num) return; } + //word count taille = (MMU.DMACrt[PROCNUM][num]&0x1FFFFF); if(taille == 0) taille = 0x200000; //according to gbatek.. @@ -1292,23 +1281,21 @@ void FASTCALL MMU_doDMA(u32 num) if(MMU.DMAStartTime[PROCNUM][num] == EDMAMode_Card) taille *= 0x80; - + MMU.DMACycle[PROCNUM][num] = taille + nds_timer; //TODO - surely this is a gross simplification MMU.DMAing[PROCNUM][num] = TRUE; MMU.CheckDMAs |= (1<<(num+(PROCNUM<<2))); - DMALOG("ARM%c: DMA%d run src=%08X dst=%08X start=%d taille=%d repeat=%s %08X\r\n", - (PROCNUM==0)?'9':'7', num, src, dst, MMU.DMAStartTime[PROCNUM][num], taille, + DMALOG("PROCNUM %d, dma %d src %08X dst %08X start %d taille %d repeat %s %08X\r\n", + PROCNUM, num, src, dst, MMU.DMAStartTime[PROCNUM][num], taille, (MMU.DMACrt[PROCNUM][num]&(1<<25))?"on":"off",MMU.DMACrt[PROCNUM][num]); - -#ifndef USE_GEOMETRY_FIFO_EMULATION + if(!(MMU.DMACrt[PROCNUM][num]&(1<<25))) MMU.DMAStartTime[PROCNUM][num] = 0; -#endif NDS_RescheduleDMA(); - + // transfer { u32 i=0; @@ -1343,21 +1330,6 @@ void FASTCALL MMU_doDMA(u32 num) _MMU_write32(dst, _MMU_read32(src)); dst += dstinc; src += srcinc; -#ifdef USE_GEOMETRY_FIFO_EMULATION - if (MMU.DMAStartTime[PROCNUM][num] == EDMAMode_GXFifo) - { - if ( gxFIFO.tail > 255) - { - if (i == taille) break; - paused = true; - MMU.DMACrt[PROCNUM][num] &= 0xFFE00000; - MMU.DMACrt[PROCNUM][num] |= ((taille-i-1) & 0x1FFFFF); - MMU.DMAing[PROCNUM][num] = FALSE; - MMU.DMACycle[PROCNUM][num] = nds_timer+1; - break; - } - } -#endif } else for(; i < taille; ++i) @@ -1366,7 +1338,7 @@ void FASTCALL MMU_doDMA(u32 num) dst += dstinc; src += srcinc; } - + //write back the addresses DMASrc[PROCNUM][num] = src; if((u & 0x3)!=3) //but dont write back dst if we were supposed to reload @@ -1377,9 +1349,6 @@ void FASTCALL MMU_doDMA(u32 num) //(there is no proof for this code, but it is reasonable) T1WriteLong(MMU.MMU_MEM[PROCNUM][0x40], 0xB0+12*num, DMASrc[PROCNUM][num]); T1WriteLong(MMU.MMU_MEM[PROCNUM][0x40], 0xB4+12*num, DMADst[PROCNUM][num]); - - if (!paused) - MMU.DMACompleted[PROCNUM][num] = true; } } @@ -1564,31 +1533,16 @@ template static INLINE void write_dma_hictrl(const int dmanum, const u MMU.DMAStartTime[proc][dmanum] = EDMAMode7_GBASlot; } MMU.DMACrt[proc][dmanum] = v; - MMU.DMACompleted[proc][dmanum] = false; - MMU.DMAing[proc][dmanum] = false; - -#ifdef USE_GEOMETRY_FIFO_EMULATION - if(MMU.DMAStartTime[proc][dmanum] == EDMAMode_Immediate) - { - MMU_doDMA(dmanum); - } - - if (MMU.DMAStartTime[proc][dmanum] == EDMAMode_GXFifo) - { -#ifdef _3DINFO - INFO("ARM%c: DMA%d control src=0x%08X dst=0x%08X %s (gxFIFO tail %03i)\n", (proc==0)?'9':'7', dmanum, DMASrc[proc][dmanum], DMADst[proc][dmanum], ((val>>15)&0x01)?"ON":"OFF", gxFIFO.tail); -#endif - MMU_doDMA(dmanum); - } -#else - if(MMU.DMAStartTime[proc][dmanum] == EDMAMode_Immediate + if(MMU.DMAStartTime[proc][dmanum] == EDMAMode_Immediate + //TODO HACK: I think this is a gxfifo hack: || MMU.DMAStartTime[proc][dmanum] == EDMAMode_GXFifo) { MMU_doDMA(dmanum); } -#endif - DMALOG("ARM%c: DMA%d control src=0x%08X dst=0x%08X %s\n", (proc==0)?'9':'7', dmanum, DMASrc[proc][dmanum], DMADst[proc][dmanum], ((val>>15)&0x01)?"ON":"OFF"); + //printf("dma ctrl %d %d\n",proc,dmanum); + + //LOG("ARMCPU_ARM9 %d, dma %d src %08X dst %08X %s\r\n", ARMCPU_ARM9, 0, DMASrc[ARMCPU_ARM9][0], DMADst[ARMCPU_ARM9][0], (val&(1<<25))?"ON":"OFF"); NDS_RescheduleDMA(); } diff --git a/desmume/src/MMU.h b/desmume/src/MMU.h index 424fce1e0..fedbf5ae2 100644 --- a/desmume/src/MMU.h +++ b/desmume/src/MMU.h @@ -100,7 +100,6 @@ struct MMU_struct { u64 DMACycle[2][4]; u32 DMACrt[2][4]; BOOL DMAing[2][4]; - BOOL DMACompleted[2][4]; BOOL divRunning; s64 divResult; diff --git a/desmume/src/NDSSystem.cpp b/desmume/src/NDSSystem.cpp index 0cc2ca19d..007d0fb90 100644 --- a/desmume/src/NDSSystem.cpp +++ b/desmume/src/NDSSystem.cpp @@ -1492,9 +1492,7 @@ void NDS_SkipNextFrame() { SkipNext2DFrame = true; SkipCur3DFrame = true; } static void execHardware_doDma(int procnum, int chan, EDMAMode modeNum) { -#ifdef USE_GEOMETRY_FIFO_EMULATION if(MMU.DMAStartTime[procnum][chan] == modeNum) -#endif { if(procnum == ARMCPU_ARM9) MMU_doDMA(chan); else MMU_doDMA(chan); @@ -1502,7 +1500,7 @@ static void execHardware_doDma(int procnum, int chan, EDMAMode modeNum) } } -void execHardware_doAllDma(EDMAMode modeNum) +static void execHardware_doAllDma(EDMAMode modeNum) { for(int i=0;i<2;i++) for(int j=0;j<4;j++) @@ -1571,7 +1569,6 @@ struct TSequenceItem_GXFIFO : public TSequenceItem MMU.gfx3dCycles = max(MMU.gfx3dCycles,nds_timer); //uhh i dont entirely understand why this was necessary //i need to learn more about how the new gxfifo works, but I am leaving that to you for now crazymax ^_^ #endif - } FORCEINLINE u64 next() @@ -1664,16 +1661,13 @@ template struct TSequenceItem_DMA : public TSequenceItem FORCEINLINE void exec() { - if (MMU.DMACompleted[procnum][chan]) - { - u8* regs = procnum==0?ARM9Mem.ARM9_REG:MMU.ARM7_REG; - T1WriteLong(regs, 0xB8 + (0xC*chan), T1ReadLong(regs, 0xB8 + (0xC*chan)) & 0x7FFFFFFF); - if((MMU.DMACrt[procnum][chan])&(1<<30)) { - if(procnum==0) NDS_makeARM9Int(8+chan); - else NDS_makeARM7Int(8+chan); - } - MMU.DMAing[procnum][chan] = FALSE; + u8* regs = procnum==0?ARM9Mem.ARM9_REG:MMU.ARM7_REG; + T1WriteLong(regs, 0xB8 + (0xC*chan), T1ReadLong(regs, 0xB8 + (0xC*chan)) & 0x7FFFFFFF); + if((MMU.DMACrt[procnum][chan])&(1<<30)) { + if(procnum==0) NDS_makeARM9Int(8+chan); + else NDS_makeARM7Int(8+chan); } + MMU.DMAing[procnum][chan] = FALSE; } }; diff --git a/desmume/src/gfx3d.cpp b/desmume/src/gfx3d.cpp index 87e754b10..295ea428a 100644 --- a/desmume/src/gfx3d.cpp +++ b/desmume/src/gfx3d.cpp @@ -575,6 +575,681 @@ static void gfx3d_glLightDirection_cache(int index) } } + +//=============================================================================== +#ifdef USE_GEOMETRY_FIFO_EMULATION +#if 0 +#define dEXEC(cmdName, oldcmd, newcmd) if (oldcmd != newcmd)\ + INFO("ERROR %s: cmd old 0x%02X, new 0x%02X\n", cmdName, oldcmd, newcmd); +#else +#define dEXEC(cmdName, oldcmd, newcmd) +#endif + + +void FORCEINLINE gfx3d_glMatrixMode(u32 v) +{ + mode = (v&3); + + GFX_DELAY(1); +} + +void FORCEINLINE gfx3d_glPushMatrix() +{ + u32 gxstat = T1ReadLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x600); + //this command always works on both pos and vector when either pos or pos-vector are the current mtx mode + short mymode = (mode==1?2:mode); + + if (mtxStack[mymode].position > mtxStack[mymode].size) + { + gxstat |= (1<<15); + T1WriteLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x600, gxstat); + return; + } + + gxstat &= 0xFFFF00FF; + + MatrixStackPushMatrix(&mtxStack[mymode], mtxCurrent[mymode]); + + GFX_DELAY(17); + + if(mymode==2) + MatrixStackPushMatrix (&mtxStack[1], mtxCurrent[1]); + + gxstat |= ((mtxStack[0].position << 13) | (mtxStack[1].position << 8)); + T1WriteLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x600, gxstat); +} + +void FORCEINLINE gfx3d_glPopMatrix(s32 i) +{ + u32 gxstat = T1ReadLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x600); + + //this command always works on both pos and vector when either pos or pos-vector are the current mtx mode + short mymode = (mode==1?2:mode); + + /* + if (i > mtxStack[mymode].position) + { + gxstat |= (1<<15); + T1WriteLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x600, gxstat); + return; + } + */ + gxstat &= 0xFFFF00FF; + + MatrixCopy(mtxCurrent[mymode], MatrixStackPopMatrix (&mtxStack[mymode], i)); + + GFX_DELAY(36); + + if (mymode == 2) + MatrixCopy(mtxCurrent[1], MatrixStackPopMatrix (&mtxStack[1], i)); + + gxstat |= ((mtxStack[0].position << 13) | (mtxStack[1].position << 8)); + T1WriteLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x600, gxstat); +} + +void FORCEINLINE gfx3d_glStoreMatrix(u32 v) +{ + //this command always works on both pos and vector when either pos or pos-vector are the current mtx mode + short mymode = (mode==1?2:mode); + + //limit height of these stacks. + //without the mymode==3 namco classics galaxian will try to use pos=1 and overrun the stack, corrupting emu + if(mymode==0 || mymode==3) + v = 0; + + if(v==31) v=30; //? what should happen in this case? + + MatrixStackLoadMatrix (&mtxStack[mymode], v&31, mtxCurrent[mymode]); + + GFX_DELAY(17); + + if(mymode==2) + MatrixStackLoadMatrix (&mtxStack[1], v&31, mtxCurrent[1]); +} + +void FORCEINLINE gfx3d_glRestoreMatrix(u32 v) +{ + //this command always works on both pos and vector when either pos or pos-vector are the current mtx mode + short mymode = (mode==1?2:mode); + + //limit height of these stacks + //without the mymode==3 namco classics galaxian will try to use pos=1 and overrun the stack, corrupting emu + if(mymode==0 || mymode==3) + v = 0; + + if(v==31) v=30; //? what should happen in this case? + + MatrixCopy (mtxCurrent[mymode], MatrixStackGetPos(&mtxStack[mymode], v&31)); + + GFX_DELAY(36); + + if (mymode == 2) + MatrixCopy (mtxCurrent[1], MatrixStackGetPos(&mtxStack[1], v&31)); +} + +void FORCEINLINE gfx3d_glLoadIdentity() +{ + MatrixIdentity (mtxCurrent[mode]); + + GFX_DELAY(19); + + if (mode == 2) + MatrixIdentity (mtxCurrent[1]); +} + +void FORCEINLINE gfx3d_glLoadMatrix4x4(s32 v) +{ + mtxCurrent[mode][0] = v; + + for (int i = 1; i < 16; i++) + { + u8 cmd = 0; + u32 param = 0; + + if (!GFX_PIPErecv(&cmd, ¶m)) break; + dEXEC("glLoadMatrix4x4", 0x16, cmd); + mtxCurrent[mode][i] = (s32)param; + } + + vector_fix2float<4>(mtxCurrent[mode], 4096.f); + + GFX_DELAY(19); + + if (mode == 2) + MatrixCopy (mtxCurrent[1], mtxCurrent[2]); +} + +void FORCEINLINE gfx3d_glLoadMatrix4x3(s32 v) +{ + mtxCurrent[mode][0] = v; + + for (int i = 1; i < 16; i++) + { + if ((i & 0x03) == 3) continue; + u8 cmd = 0; + u32 param = 0; + + if (!GFX_PIPErecv(&cmd, ¶m)) break; + dEXEC("glLoadMatrix4x3", 0x17, cmd); + mtxCurrent[mode][i] = (s32)param; + } + + vector_fix2float<4>(mtxCurrent[mode], 4096.f); + + //fill in the unusued matrix values + mtxCurrent[mode][3] = mtxCurrent[mode][7] = mtxCurrent[mode][11] = 0.f; + mtxCurrent[mode][15] = 1.f; + + GFX_DELAY(30); + + if (mode == 2) + MatrixCopy (mtxCurrent[1], mtxCurrent[2]); +} + +void FORCEINLINE gfx3d_glMultMatrix4x4(s32 v) +{ + mtxTemporal[0] = v; + + for (int i = 1; i < 16; i++) + { + u8 cmd = 0; + u32 param = 0; + + if (!GFX_PIPErecv(&cmd, ¶m)) break; + dEXEC("glMultMatrix4x4", 0x18, cmd); + mtxTemporal[i] = (s32)param; + } + + vector_fix2float<4>(mtxTemporal, 4096.f); + + MatrixMultiply (mtxCurrent[mode], mtxTemporal); + + GFX_DELAY(35); + + if (mode == 2) + { + MatrixMultiply (mtxCurrent[1], mtxTemporal); + GFX_DELAY_M2(30); + } + + MatrixIdentity (mtxTemporal); +} + +void FORCEINLINE gfx3d_glMultMatrix4x3(s32 v) +{ + mtxTemporal[0] = v; + + for (int i = 1; i < 16; i++) + { + if ((i & 0x03) == 3) continue; + u8 cmd = 0; + u32 param = 0; + + if (!GFX_PIPErecv(&cmd, ¶m)) break; + dEXEC("glMultMatrix4x3", 0x19, cmd); + mtxTemporal[i] = (s32)param; + } + + vector_fix2float<4>(mtxTemporal, 4096.f); + + //fill in the unusued matrix values + mtxTemporal[3] = mtxTemporal[7] = mtxTemporal[11] = 0.f; + mtxTemporal[15] = 1.f; + + MatrixMultiply (mtxCurrent[mode], mtxTemporal); + + GFX_DELAY(31); + + if (mode == 2) + { + MatrixMultiply (mtxCurrent[1], mtxTemporal); + GFX_DELAY_M2(30); + } + + //does this really need to be done? + MatrixIdentity (mtxTemporal); +} + +void FORCEINLINE gfx3d_glMultMatrix3x3(s32 v) +{ + mtxTemporal[0] = v; + + for (int i = 1; i < 12; i++) + { + if ((i & 0x03) == 3) continue; + u8 cmd = 0; + u32 param = 0; + + if (!GFX_PIPErecv(&cmd, ¶m)) break; + dEXEC("glMultMatrix3x3", 0x1A, cmd); + mtxTemporal[i] = (s32)param; + } + + vector_fix2float<3>(mtxTemporal, 4096.f); + + //fill in the unusued matrix values + mtxTemporal[3] = mtxTemporal[7] = mtxTemporal[11] = 0; + mtxTemporal[15] = 1; + mtxTemporal[12] = mtxTemporal[13] = mtxTemporal[14] = 0; + + MatrixMultiply (mtxCurrent[mode], mtxTemporal); + + GFX_DELAY(28); + + if (mode == 2) + { + MatrixMultiply (mtxCurrent[1], mtxTemporal); + GFX_DELAY_M2(30); + } + + //does this really need to be done? + MatrixIdentity (mtxTemporal); +} + +void FORCEINLINE gfx3d_glScale(s32 v) +{ + u8 cmd = 0; + u32 param = 0; + + scale[0] = fix2float(v); + + if (!GFX_PIPErecv(&cmd, ¶m)) return; + dEXEC("glScale", 0x1B, cmd); + scale[1] = fix2float((s32)param); + + if (!GFX_PIPErecv(&cmd, ¶m)) return; + dEXEC("glScale", 0x1B, cmd); + scale[2] = fix2float((s32)param); + + MatrixScale (mtxCurrent[(mode==2?1:mode)], scale); + + GFX_DELAY(22); + + //note: pos-vector mode should not cause both matrices to scale. + //the whole purpose is to keep the vector matrix orthogonal + //so, I am leaving this commented out as an example of what not to do. + //if (mode == 2) + // MatrixScale (mtxCurrent[1], scale); +} + +void FORCEINLINE gfx3d_glTranslate(s32 v) +{ + u8 cmd = 0; + u32 param = 0; + + trans[0] = fix2float(v); + + if (!GFX_PIPErecv(&cmd, ¶m)) return; + dEXEC("glTranslate", 0x1C, cmd); + trans[1] = fix2float((s32)param); + + if (!GFX_PIPErecv(&cmd, ¶m)) return; + dEXEC("glTranslate", 0x1C, cmd); + trans[2] = fix2float((s32)param); + + MatrixTranslate (mtxCurrent[mode], trans); + + GFX_DELAY(22); + + if (mode == 2) + { + MatrixTranslate (mtxCurrent[1], trans); + GFX_DELAY_M2(30); + } +} + +void FORCEINLINE gfx3d_glColor3b(u32 v) +{ + colorRGB[0] = (v&0x1F); + colorRGB[1] = ((v>>5)&0x1F); + colorRGB[2] = ((v>>10)&0x1F); + GFX_DELAY(1); +} + +void FORCEINLINE gfx3d_glNormal(u32 v) +{ + int i,c; + ALIGN(16) float normal[4] = { normalTable[v&1023], + normalTable[(v>>10)&1023], + normalTable[(v>>20)&1023], + 1}; + + if (texCoordinateTransform == 2) + { + last_s =( (normal[0] *mtxCurrent[3][0] + normal[1] *mtxCurrent[3][4] + + normal[2] *mtxCurrent[3][8]) + (_s*16.0f)) / 16.0f; + last_t =( (normal[0] *mtxCurrent[3][1] + normal[1] *mtxCurrent[3][5] + + normal[2] *mtxCurrent[3][9]) + (_t*16.0f)) / 16.0f; + } + + //use the current normal transform matrix + MatrixMultVec3x3 (mtxCurrent[2], normal); + + //apply lighting model + { + u8 diffuse[3] = { + (dsDiffuse)&0x1F, + (dsDiffuse>>5)&0x1F, + (dsDiffuse>>10)&0x1F }; + + u8 ambient[3] = { + (dsAmbient)&0x1F, + (dsAmbient>>5)&0x1F, + (dsAmbient>>10)&0x1F }; + + u8 emission[3] = { + (dsEmission)&0x1F, + (dsEmission>>5)&0x1F, + (dsEmission>>10)&0x1F }; + + u8 specular[3] = { + (dsSpecular)&0x1F, + (dsSpecular>>5)&0x1F, + (dsSpecular>>10)&0x1F }; + + int vertexColor[3] = { emission[0], emission[1], emission[2] }; + + for(i=0; i<4; i++) + { + if(!((lightMask>>i)&1)) continue; + + u8 _lightColor[3] = { + (lightColor[i])&0x1F, + (lightColor[i]>>5)&0x1F, + (lightColor[i]>>10)&0x1F }; + + /* This formula is the one used by the DS */ + /* Reference : http://nocash.emubase.de/gbatek.htm#ds3dpolygonlightparameters */ + + float diffuseLevel = std::max(0.0f, -vec3dot(cacheLightDirection[i], normal)); + float shininessLevel = pow(std::max(0.0f, vec3dot(-cacheHalfVector[i], normal)), 2); + + if(dsSpecular & 0x8000) + { + int shininessIndex = (int)(shininessLevel * 128); + if(shininessIndex >= (int)ARRAY_SIZE(shininessTable)) { + //we can't print this right now, because when a game triggers this it triggers it _A_LOT_ + //so wait until we have per-frame diagnostics. + //this was tested using Princess Debut (US) after proceeding through the intro and getting the tiara. + //After much research, I determined that this was caused by the game feeding in a totally jacked matrix + //to mult4x4 from 0x02129B80 (after feeding two other valid matrices) + //the game seems to internally index these as: ?, 0x37, 0x2B <-- error + //but, man... this is seriously messed up. there must be something going wrong. + //maybe it has something to do with what looks like a mirror room effect that is going on during this time? + //PROGINFO("ERROR: shininess table out of bounds.\n maybe an emulator error; maybe a non-unit normal; setting to 0\n"); + shininessIndex = 0; + } + shininessLevel = shininessTable[shininessIndex]; + } + + for(c = 0; c < 3; c++) + { + vertexColor[c] += (int)(((specular[c] * _lightColor[c] * shininessLevel) + + (diffuse[c] * _lightColor[c] * diffuseLevel) + + (ambient[c] * _lightColor[c])) / 31.0f); + } + } + + for(c=0;c<3;c++) + colorRGB[c] = std::min(31,vertexColor[c]); + } + + GFX_DELAY(9); + GFX_DELAY_M2((lightMask) & 0x01); + GFX_DELAY_M2((lightMask>>1) & 0x01); + GFX_DELAY_M2((lightMask>>2) & 0x01); + GFX_DELAY_M2((lightMask>>3) & 0x01); +} + +void FORCEINLINE gfx3d_glTexCoord(u32 val) +{ + _t = (s16)(val>>16); + _s = (s16)(val&0xFFFF); + + _s /= 16.0f; + _t /= 16.0f; + + if (texCoordinateTransform == 1) + { + last_s =_s*mtxCurrent[3][0] + _t*mtxCurrent[3][4] + + 0.0625f*mtxCurrent[3][8] + 0.0625f*mtxCurrent[3][12]; + last_t =_s*mtxCurrent[3][1] + _t*mtxCurrent[3][5] + + 0.0625f*mtxCurrent[3][9] + 0.0625f*mtxCurrent[3][13]; + } + else + { + last_s=_s; + last_t=_t; + } + GFX_DELAY(1); +} + +void FORCEINLINE gfx3d_glVertex16b(unsigned int v) +{ + u8 cmd = 0; + u32 param = 0; + + coord[0] = float16table[v&0xFFFF]; + coord[1] = float16table[v>>16]; + + //if (gxPIPE.cmd[0] != 0x23) return; + if (!GFX_PIPErecv(&cmd, ¶m)) return; + dEXEC("glVertex16b", 0x23, cmd); + coord[2] = float16table[param&0xFFFF]; + + SetVertex (); + + GFX_DELAY(9); +} + +void FORCEINLINE gfx3d_glVertex10b(u32 v) +{ + coord[0] = float10Table[v&1023]; + coord[1] = float10Table[(v>>10)&1023]; + coord[2] = float10Table[(v>>20)&1023]; + + GFX_DELAY(8); + SetVertex (); +} + +void FORCEINLINE gfx3d_glVertex3_cord(unsigned int one, unsigned int two, unsigned int v) +{ + coord[one] = float16table[v&0xffff]; + coord[two] = float16table[v>>16]; + + SetVertex (); + + GFX_DELAY(8); +} + +void FORCEINLINE gfx3d_glVertex_rel(u32 v) +{ + coord[0] += float10RelTable[v&1023]; + coord[1] += float10RelTable[(v>>10)&1023]; + coord[2] += float10RelTable[(v>>20)&1023]; + + SetVertex (); + + GFX_DELAY(8); +} + +void FORCEINLINE gfx3d_glPolygonAttrib (u32 val) +{ + if(inBegin) { + //PROGINFO("Set polyattr in the middle of a begin/end pair.\n (This won't be activated until the next begin)\n"); + //TODO - we need some some similar checking for teximageparam etc. + } + polyAttrPending = val; + GFX_DELAY(1); +} + +void FORCEINLINE gfx3d_glTexImage(u32 val) +{ + textureFormat = val; + gfx3d_glTexImage_cache(); + GFX_DELAY(1); +} + +void FORCEINLINE gfx3d_glTexPalette(u32 val) +{ + texturePalette = val; + GFX_DELAY(1); +} + +/* + 0-4 Diffuse Reflection Red + 5-9 Diffuse Reflection Green + 10-14 Diffuse Reflection Blue + 15 Set Vertex Color (0=No, 1=Set Diffuse Reflection Color as Vertex Color) + 16-20 Ambient Reflection Red + 21-25 Ambient Reflection Green + 26-30 Ambient Reflection Blue +*/ +void FORCEINLINE gfx3d_glMaterial0(u32 val) +{ + dsDiffuse = val&0xFFFF; + dsAmbient = val>>16; + + if (BIT15(val)) + { + colorRGB[0] = (val)&0x1F; + colorRGB[1] = (val>>5)&0x1F; + colorRGB[2] = (val>>10)&0x1F; + } + GFX_DELAY(4); +} + +void FORCEINLINE gfx3d_glMaterial1(u32 val) +{ + dsSpecular = val&0xFFFF; + dsEmission = val>>16; + GFX_DELAY(4); +} + +/* + 0-9 Directional Vector's X component (1bit sign + 9bit fractional part) + 10-19 Directional Vector's Y component (1bit sign + 9bit fractional part) + 20-29 Directional Vector's Z component (1bit sign + 9bit fractional part) + 30-31 Light Number (0..3) +*/ +void FORCEINLINE gfx3d_glLightDirection (u32 v) +{ + int index = v>>30; + + lightDirection[index] = v; + gfx3d_glLightDirection_cache(index); + GFX_DELAY(6); +} + +void FORCEINLINE gfx3d_glLightColor (u32 v) +{ + int index = v>>30; + lightColor[index] = v; + GFX_DELAY(1); +} + +void FORCEINLINE gfx3d_glShininess (u32 val) +{ + u8 cmd = 0; + u32 param = 0; + + shininessTable[0] = ((val & 0xFF) / 256.0f); + shininessTable[1] = (((val >> 8) & 0xFF) / 256.0f); + shininessTable[2] = (((val >> 16) & 0xFF) / 256.0f); + shininessTable[3] = (((val >> 24) & 0xFF) / 256.0f); + + for (int i = 4; i < 128; i+=4) + { + if (!GFX_PIPErecv(&cmd, ¶m)) return; + dEXEC("glShininess", 0x34, cmd); + shininessTable[i] = ((param & 0xFF) / 256.0f); + shininessTable[i+1] = (((param >> 8) & 0xFF) / 256.0f); + shininessTable[i+2] = (((param >> 16) & 0xFF) / 256.0f); + shininessTable[i+3] = (((param >> 24) & 0xFF) / 256.0f); + } + + GFX_DELAY(32); +} + +void FORCEINLINE gfx3d_glBegin(u32 v) +{ + inBegin = TRUE; + vtxFormat = v&0x03; + triStripToggle = 0; + tempVertInfo.count = 0; + tempVertInfo.first = true; + polyAttr = polyAttrPending; + gfx3d_glPolygonAttrib_cache(); + GFX_DELAY(1); +} + +void FORCEINLINE gfx3d_glEnd(void) +{ + inBegin = FALSE; + tempVertInfo.count = 0; + GFX_DELAY(1); +} + +// swap buffers - skipped + +void FORCEINLINE gfx3d_glViewPort(u32 v) +{ + viewport = v; + GFX_DELAY(1); +} + +void FORCEINLINE gfx3d_glBoxTest(u32 v) +{ + u8 cmd = 0; + u32 param = 0; + u32 gxstat = T1ReadLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x600); + + if (!GFX_PIPErecv(&cmd, ¶m)) return; + dEXEC("glBoxTest", 0x70, cmd); + if (!GFX_PIPErecv(&cmd, ¶m)) return; + dEXEC("glBoxTest", 0x70, cmd); + + GFX_DELAY(103); + + gxstat &= 0xFFFFFFFE; // clear busy bit + gxstat |= 0x00000002; // hack + T1WriteLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x600, gxstat); +} + +void FORCEINLINE gfx3d_glPosTest(u32 v) +{ + u8 cmd = 0; + u32 param = 0; + u32 gxstat = T1ReadLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x600); + + PTcoords[0] = float16table[v & 0xFFFF]; + PTcoords[1] = float16table[v >> 16]; + + if (!GFX_PIPErecv(&cmd, ¶m)) return; + dEXEC("glPosTest", 0x71, cmd); + PTcoords[2] = float16table[param & 0xFFFF]; + PTcoords[3] = 1.0f; + + MatrixMultVec4x4_M2(mtxCurrent[0], PTcoords); + + gxstat &= 0xFFFFFFFE; // cleay busy bit + T1WriteLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x600, gxstat); + + GFX_DELAY(9); +} + +void FORCEINLINE gfx3d_glVecTest(u32 v) +{ + u32 gxstat = T1ReadLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x600); + gxstat &= 0xFFFFFFFE; + T1WriteLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x600, gxstat); + + GFX_DELAY(5); + //INFO("NDS_glVecTest\n"); +} +#else // else not USE_GEOMETRY_FIFO_EMULATION + + //=============================================================================== void gfx3d_glMatrixMode(u32 v) { @@ -1285,6 +1960,8 @@ void gfx3d_glVecTest(u32 v) GFX_DELAY(5); //INFO("NDS_glVecTest\n"); } + +#endif //================================================================================= Geometry Engine //================================================================================= (end) //================================================================================= @@ -1385,7 +2062,7 @@ void FORCEINLINE gfx3d_execute(u8 cmd, u32 param) { #ifdef _3D_LOG_EXEC u32 gxstat2 = T1ReadLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x600); - INFO("*** gxFIFO: exec 0x%02X, tail %03i, gxstat 0x%08X (timer %i)\n", cmd, gxFIFO.tail, gxstat2, nds_timer); + INFO("*** gxFIFO: exec 0x%02X, tail %03i, gxstat 0x%08X\n", cmd, gxFIFO.tail, gxstat2); #endif switch (cmd) { @@ -1502,7 +2179,7 @@ void FORCEINLINE gfx3d_execute(u8 cmd, u32 param) break; default: INFO("Unknown execute FIFO 3D command 0x%02X with param 0x%08X\n", cmd, param); - break; + return; } NDS_RescheduleGXFIFO(); } @@ -1514,6 +2191,43 @@ void gfx3d_execute3D() if (isSwapBuffers) return; + + u16 size = gxPIPE.tail + gxFIFO.tail; + //if (size == 0) return; + + switch (gxPIPE.cmd[0]) + { + case 0x34: // SHININESS - Specular Reflection Shininess Table (W) + if (size < 32) return; + break; + + case 0x16: // MTX_LOAD_4x4 - Load 4x4 Matrix to Current Matrix (W) + case 0x18: // MTX_MULT_4x4 - Multiply Current Matrix by 4x4 Matrix (W) + if (size < 16) return; + break; + + case 0x17: // MTX_LOAD_4x3 - Load 4x3 Matrix to Current Matrix (W) + case 0x19: // MTX_MULT_4x3 - Multiply Current Matrix by 4x3 Matrix (W) + if (size < 12) return; + break; + + case 0x1A: // MTX_MULT_3x3 - Multiply Current Matrix by 3x3 Matrix (W) + if (size < 9) return; + break; + + case 0x1B: // MTX_SCALE - Multiply Current Matrix by Scale Matrix (W) + case 0x1C: // MTX_TRANS - Mult. Curr. Matrix by Translation Matrix (W) + case 0x70: // BOX_TEST - Test if Cuboid Sits inside View Volume (W) + if (size < 3) return; + break; + + case 0x23: // VTX_16 - Set Vertex XYZ Coordinates (W) + case 0x71: // POS_TEST - Set Position Coordinates for Test (W) + if (size < 2) return; + break; + } + + if (GFX_PIPErecv(&cmd, ¶m)) { gfx3d_execute(cmd, param); @@ -1766,14 +2480,22 @@ void gfx3d_sendCommandToFIFO(u32 val) { if (clCmd == 0) { - clInd2 = 0; clCmd = val; - NOPARAMS(); return; } #ifdef _3D_LOG INFO("gxFIFO: send 0x%02X: val=0x%08X, pipe %02i, fifo %03i\n", clCmd & 0xFF, val, gxPIPE.tail, gxFIFO.tail); #endif + + + NOPARAMS(); + if (clCmd == 0) + { + if (val == 0) return; + clCmd = val; + return; + } + switch (clCmd & 0xFF) { case 0x34: // SHININESS - Specular Reflection Shininess Table (W)