diff --git a/desmume/src/GPU.cpp b/desmume/src/GPU.cpp index 79659e5f9..51d91d0b7 100644 --- a/desmume/src/GPU.cpp +++ b/desmume/src/GPU.cpp @@ -115,384 +115,6 @@ CACHE_ALIGN u16 fadeOutColors[17][0x8000]; CACHE_ALIGN u8 gpuBlendTable555[17][17][32][32]; -/*****************************************************************************/ -// PIXEL RENDERING - 3D -/*****************************************************************************/ - -#define DECL3D \ - int x = dstX; \ - int passing = dstX<<1; \ - u16 color = _3dColorLine[srcX]; \ - u8 alpha = _3dAlphaLine[srcX]; \ - u8* dst = currDst; - -FORCEINLINE void GPU::setFinal3DColorSpecialNone(int dstX, int srcX) -{ - DECL3D; - - // We must blend if the 3D layer has the highest prio - if((alpha < 16)) //zero 30-may-09 - i think 3d always blends && bg0HasHighestPrio) - { - int bg_under = bgPixels[dstX]; - u16 final = color; - - // If the layer we are drawing on is selected as 2nd source, we can blend - if(BLDCNT & (0x100 << bg_under)) - { - { - COLOR c1, c2, cfinal; - - c1.val = color; - c2.val = T2ReadWord(dst, passing); - - cfinal.bits.red = ((c1.bits.red * alpha) + (c2.bits.red * (16 - alpha)))/16; - cfinal.bits.green = ((c1.bits.green * alpha) + (c2.bits.green * (16 - alpha)))/16; - cfinal.bits.blue = ((c1.bits.blue * alpha) + (c2.bits.blue * (16 - alpha)))/16; - - final = cfinal.val; - } - } - - T2WriteWord(dst, passing, (final | 0x8000)); - bgPixels[x] = 0; - } - else - { - T2WriteWord(dst, passing, (color | 0x8000)); - bgPixels[x] = 0; - } -} - -FORCEINLINE void GPU::setFinal3DColorSpecialBlend(int dstX, int srcX) -{ - DECL3D; - - // We can blend if the 3D layer is selected as 1st target, - //but also if the 3D layer has the highest prio. - if((alpha < 16)) //zero 30-may-09 - i think 3d always blends && ((BLDCNT & 0x1) || bg0HasHighestPrio)) - { - int bg_under = bgPixels[x]; - u16 final = color; - - //If the layer we are drawing on is selected as 2nd source, we can blend - if(BLDCNT & (0x100 << bg_under)) - { - { - COLOR c1, c2, cfinal; - - c1.val = color; - c2.val = T2ReadWord(dst, passing); - - cfinal.bits.red = ((c1.bits.red * alpha ) + (c2.bits.red * (16 - alpha) )) / 16; - cfinal.bits.green = ((c1.bits.green * alpha ) + (c2.bits.green * (16 - alpha) )) / 16; - cfinal.bits.blue = ((c1.bits.blue * alpha ) + (c2.bits.blue * (16 - alpha) )) / 16; - - final = cfinal.val; - } - } - - T2WriteWord(dst, passing, (final | 0x8000)); - bgPixels[x] = 0; - } - else - { - T2WriteWord(dst, passing, (color | 0x8000)); - bgPixels[x] = 0; - } -} - -FORCEINLINE void GPU::setFinal3DColorSpecialIncrease(int dstX, int srcX) -{ - DECL3D; - u16 final = color; - - // We must blend if the 3D layer has the highest prio - // But it doesn't seem to have priority over fading, - // unlike semi-transparent sprites - if((alpha < 16)) //zero 30-may-09 - i think 3d always blends && bg0HasHighestPrio) - { - int bg_under = bgPixels[x]; - - /* If the layer we are drawing on is selected as 2nd source, we can blend */ - if(BLDCNT & (0x100 << bg_under)) - { - { - COLOR c1, c2, cfinal; - - c1.val = color; - c2.val = T2ReadWord(dst, passing); - - cfinal.bits.red = ((c1.bits.red * alpha ) + (c2.bits.red * (16 - alpha) ))/16; - cfinal.bits.green = ((c1.bits.green * alpha ) + (c2.bits.green * (16 - alpha) ))/16; - cfinal.bits.blue = ((c1.bits.blue * alpha ) + (c2.bits.blue * (16 - alpha) ))/16; - - final = cfinal.val; - } - } - } - - if(BLDCNT & 0x1) - { - if (BLDY_EVY != 0x0) - { - final = fadeInColors[BLDY_EVY][final&0x7FFF]; - } - - T2WriteWord(dst, passing, (final | 0x8000)); - bgPixels[x] = 0; - } - else - { - T2WriteWord(dst, passing, (final | 0x8000)); - bgPixels[x] = 0; - } -} - -FORCEINLINE void GPU::setFinal3DColorSpecialDecrease(int dstX, int srcX) -{ - DECL3D; - - u16 final = color; - - // We must blend if the 3D layer has the highest prio - // But it doesn't seem to have priority over fading - // unlike semi-transparent sprites - if((alpha < 16)) //zero 30-may-09 - i think 3d always blends && bg0HasHighestPrio) - { - int bg_under = bgPixels[x]; - - // If the layer we are drawing on is selected as 2nd source, we can blend - if(BLDCNT & (0x100 << bg_under)) - { - { - COLOR c1, c2, cfinal; - - c1.val = color; - c2.val = T2ReadWord(dst, passing); - - cfinal.bits.red = ((c1.bits.red * alpha ) + (c2.bits.red * (16 - alpha) ))/16; - cfinal.bits.green = ((c1.bits.green * alpha ) + (c2.bits.green * (16 - alpha) ))/16; - cfinal.bits.blue = ((c1.bits.blue * alpha ) + (c2.bits.blue * (16 - alpha) ))/16; - - final = cfinal.val; - } - } - } - - if(BLDCNT & 0x1) - { - if (BLDY_EVY != 0x0) - { - final = fadeOutColors[BLDY_EVY][final&0x7FFF]; - } - - T2WriteWord(dst, passing, (final | 0x8000)); - bgPixels[x] = 0; - } - else - { - T2WriteWord(dst, passing, (final | 0x8000)); - bgPixels[x] = 0; - } -} - -FORCEINLINE void GPU::setFinal3DColorSpecialNoneWnd(int dstX, int srcX) -{ - DECL3D; - - bool windowDraw = true, windowEffect = true; - - renderline_checkWindows(x, windowDraw, windowEffect); - - if(windowDraw) - { - // We must blend if the 3D layer has the highest prio - if((alpha < 16)) //zero 30-may-09 - i think 3d always blends && bg0HasHighestPrio) - { - int bg_under = bgPixels[x]; - u16 final = color; - - // If the layer we are drawing on is selected as 2nd source, we can blend - if(BLDCNT & (0x100 << bg_under)) - { - { - COLOR c1, c2, cfinal; - - c1.val = color; - c2.val = T2ReadWord(dst, passing); - - cfinal.bits.red = ((c1.bits.red * alpha ) + (c2.bits.red * (16 - alpha) ))/16; - cfinal.bits.green = ((c1.bits.green * alpha ) + (c2.bits.green * (16 - alpha) ))/16; - cfinal.bits.blue = ((c1.bits.blue * alpha ) + (c2.bits.blue * (16 - alpha) ))/16; - - final = cfinal.val; - } - } - - T2WriteWord(dst, passing, (final | 0x8000)); - bgPixels[x] = 0; - } - else - { - T2WriteWord(dst, passing, (color | 0x8000)); - bgPixels[x] = 0; - } - } -} - -FORCEINLINE void GPU::setFinal3DColorSpecialBlendWnd(int dstX, int srcX) -{ - DECL3D; - - bool windowDraw = true, windowEffect = true; - - renderline_checkWindows(x, windowDraw, windowEffect); - - if(windowDraw) - { - // We can blend if the 3D layer is selected as 1st target, - // but also if the 3D layer has the highest prio. - if((alpha < 16)) //zero 30-may-09 - i think 3d always blends && (((BLDCNT & 0x1) && windowEffect) || bg0HasHighestPrio)) - { - int bg_under = bgPixels[x]; - u16 final = color; - - // If the layer we are drawing on is selected as 2nd source, we can blend - if(BLDCNT & (0x100 << bg_under)) - { - { - COLOR c1, c2, cfinal; - - c1.val = color; - c2.val = T2ReadWord(dst, passing); - - cfinal.bits.red = ((c1.bits.red * alpha ) + (c2.bits.red * (16 - alpha) ))/16; - cfinal.bits.green = ((c1.bits.green * alpha ) + (c2.bits.green * (16 - alpha) ))/16; - cfinal.bits.blue = ((c1.bits.blue * alpha ) + (c2.bits.blue * (16 - alpha) ))/16; - - final = cfinal.val; - } - } - - T2WriteWord(dst, passing, (final | 0x8000)); - bgPixels[x] = 0; - } - else - { - T2WriteWord(dst, passing, (color | 0x8000)); - bgPixels[x] = 0; - } - } -} - -FORCEINLINE void GPU::setFinal3DColorSpecialIncreaseWnd(int dstX, int srcX) -{ - DECL3D; - - bool windowDraw = true, windowEffect = true; - u16 final = color; - - renderline_checkWindows(x, windowDraw, windowEffect); - - if(windowDraw) - { - // We must blend if the 3D layer has the highest prio - // But it doesn't seem to have priority over fading, - // unlike semi-transparent sprites - if((alpha < 16)) //zero 30-may-09 - i think 3d always blends && bg0HasHighestPrio) - { - int bg_under = bgPixels[x]; - - // If the layer we are drawing on is selected as 2nd source, we can blend - if(BLDCNT & (0x100 << bg_under)) - { - { - COLOR c1, c2, cfinal; - - c1.val = color; - c2.val = T2ReadWord(dst, passing); - - cfinal.bits.red = ((c1.bits.red * alpha ) + (c2.bits.red * (16 - alpha) ))/16; - cfinal.bits.green = ((c1.bits.green * alpha ) + (c2.bits.green * (16 - alpha) ))/16; - cfinal.bits.blue = ((c1.bits.blue * alpha ) + (c2.bits.blue * (16 - alpha) ))/16; - - final = cfinal.val; - } - } - } - - if((BLDCNT & 0x1) && windowEffect) - { - if (BLDY_EVY != 0x0) - { - final = fadeInColors[BLDY_EVY][final&0x7FFF]; - } - - T2WriteWord(dst, passing, (final | 0x8000)); - bgPixels[x] = 0; - } - else - { - T2WriteWord(dst, passing, (final | 0x8000)); - bgPixels[x] = 0; - } - } -} - -FORCEINLINE void GPU::setFinal3DColorSpecialDecreaseWnd(int dstX, int srcX) -{ - DECL3D; - - bool windowDraw = true, windowEffect = true; - u16 final = color; - - renderline_checkWindows(x, windowDraw, windowEffect); - - if(windowDraw) - { - // We must blend if the 3D layer has the highest prio - // But it doesn't seem to have priority over fading, - // unlike semi-transparent sprites - if((alpha < 16)) ////zero 30-may-09 - i think 3d always blends && bg0HasHighestPrio) - { - int bg_under = bgPixels[x]; - - // If the layer we are drawing on is selected as 2nd source, we can blend - if(BLDCNT & (0x100 << bg_under)) - { - { - COLOR c1, c2, cfinal; - - c1.val = color; - c2.val = T2ReadWord(dst, passing); - - cfinal.bits.red = ((c1.bits.red * alpha ) + (c2.bits.red * (16 - alpha) ))/16; - cfinal.bits.green = ((c1.bits.green * alpha ) + (c2.bits.green * (16 - alpha) ))/16; - cfinal.bits.blue = ((c1.bits.blue * alpha ) + (c2.bits.blue * (16 - alpha) ))/16; - - final = cfinal.val; - } - } - } - - if((BLDCNT & 0x1) && windowEffect) - { - if (BLDY_EVY != 0x0) - { - final = fadeOutColors[BLDY_EVY][final&0x7FFF]; - } - - T2WriteWord(dst, passing, (final | 0x8000)); - bgPixels[x] = 0; - } - else - { - T2WriteWord(dst, passing, (final | 0x8000)); - bgPixels[x] = 0; - } - } -} - /*****************************************************************************/ // INITIALIZATION /*****************************************************************************/ @@ -938,9 +560,69 @@ FORCEINLINE void GPU::renderline_checkWindows(u16 x, bool &draw, bool &effect) c } /*****************************************************************************/ -// PIXEL RENDERING - BGS +// PIXEL RENDERING /*****************************************************************************/ +template +FORCEINLINE FASTCALL void GPU::_master_setFinal3dColor(int dstX, int srcX) +{ + int x = dstX; + int passing = dstX<<1; + u8* color = &_3dColorLine[srcX<<2]; + u8 red = color[0]; + u8 green = color[1]; + u8 blue = color[2]; + u8 alpha = color[3]; + u8* dst = currDst; + u16 final; + + bool windowEffect = true; + + if(WINDOW) + { + bool windowDraw; + renderline_checkWindows(dstX, windowDraw, windowEffect); + + //we never have anything more to do if the window rejected us + if(!windowDraw) return; + } + + int bg_under = bgPixels[dstX]; + if(blend2[bg_under]) + { + if(alpha<32) + { + //if the layer underneath is a blend bottom layer, then 3d always alpha blends with it + COLOR c2, cfinal; + + c2.val = T2ReadWord(dst, passing); + + cfinal.bits.red = ((red * alpha) + ((c2.bits.red<<1) * (32 - alpha)))>>6; + cfinal.bits.green = ((green * alpha) + ((c2.bits.green<<1) * (32 - alpha)))>>6; + cfinal.bits.blue = ((blue * alpha) + ((c2.bits.blue<<1) * (32 - alpha)))>>6; + + final = cfinal.val; + } + else final = R6G6B6TORGB15(red,green,blue); + } + else + { + final = R6G6B6TORGB15(red,green,blue); + //perform the special effect + if(windowEffect) + switch(FUNC) { + case Increase: final = currentFadeInColors[final&0x7FFF]; break; + case Decrease: final = currentFadeOutColors[final&0x7FFF]; break; + case None: + case Blend: + break; + } + } + + T2WriteWord(dst, passing, (final | 0x8000)); + bgPixels[x] = 0; +} + template FORCEINLINE FASTCALL bool GPU::_master_setFinalBGColor(u16 &color, const u32 x) @@ -1066,14 +748,14 @@ FORCEINLINE void GPU::setFinalColor3d(int dstX, int srcX) { switch(setFinalColor3d_funcNum) { - case 0x0: setFinal3DColorSpecialNone(dstX,srcX); break; - case 0x1: setFinal3DColorSpecialBlend(dstX,srcX); break; - case 0x2: setFinal3DColorSpecialIncrease(dstX,srcX); break; - case 0x3: setFinal3DColorSpecialDecrease(dstX,srcX); break; - case 0x4: setFinal3DColorSpecialNoneWnd(dstX,srcX); break; - case 0x5: setFinal3DColorSpecialBlendWnd(dstX,srcX); break; - case 0x6: setFinal3DColorSpecialIncreaseWnd(dstX,srcX); break; - case 0x7: setFinal3DColorSpecialDecreaseWnd(dstX,srcX); break; + case 0x0: _master_setFinal3dColor(dstX,srcX); break; + case 0x1: _master_setFinal3dColor(dstX,srcX); break; + case 0x2: _master_setFinal3dColor(dstX,srcX); break; + case 0x3: _master_setFinal3dColor(dstX,srcX); break; + case 0x4: _master_setFinal3dColor(dstX,srcX); break; + case 0x5: _master_setFinal3dColor(dstX,srcX); break; + case 0x6: _master_setFinal3dColor(dstX,srcX); break; + case 0x7: _master_setFinal3dColor(dstX,srcX); break; }; } @@ -2403,8 +2085,8 @@ static void GPU_ligne_layer(NDS_Screen * screen, u16 l) BGxOFS *bgofs = &gpu->dispx_st->dispx_BGxOFS[i16]; u16 hofs = (T1ReadWord((u8*)&bgofs->BGxHOFS, 0) & 0x1FF); - gfx3d_GetLineData(l, &gpu->_3dColorLine, &gpu->_3dAlphaLine); - u16* colorLine = gpu->_3dColorLine; + gfx3d_GetLineData(l, &gpu->_3dColorLine); + u8* colorLine = gpu->_3dColorLine; for(int k = 0; k < 256; k++) { @@ -2413,7 +2095,7 @@ static void GPU_ligne_layer(NDS_Screen * screen, u16 l) if((q < 0) || (q > 255)) continue; - if(colorLine[q] & 0x8000) + if(colorLine[(q<<2)+3]) gpu->setFinalColor3d(k, q); } @@ -2525,7 +2207,7 @@ template static void GPU_ligne_DispCapture(u16 l) { //INFO("Capture 3D\n"); u16* colorLine; - gfx3d_GetLineData(l, &colorLine, NULL); + gfx3d_GetLineData15bpp(l, &colorLine); CAPCOPY(((u8*)colorLine),cap_dst); } break; @@ -2563,7 +2245,7 @@ template static void GPU_ligne_DispCapture(u16 l) } else { - gfx3d_GetLineData(l, &srcA, NULL); + gfx3d_GetLineData15bpp(l, &srcA); } static u16 fifoLine[256]; diff --git a/desmume/src/GPU.h b/desmume/src/GPU.h index 6af74738a..bdfdd5a22 100644 --- a/desmume/src/GPU.h +++ b/desmume/src/GPU.h @@ -746,8 +746,7 @@ struct GPU bool blend1; u8* currDst; - u16* _3dColorLine; - u8* _3dAlphaLine; + u8* _3dColorLine; static struct MosaicLookup { @@ -777,6 +776,9 @@ struct GPU template FORCEINLINE FASTCALL bool _master_setFinalBGColor(u16 &color, const u32 x); + template + FORCEINLINE FASTCALL void _master_setFinal3dColor(int dstX, int srcX); + int setFinalColorBck_funcNum; int bgFunc; int setFinalColor3d_funcNum; @@ -798,30 +800,12 @@ struct GPU } - template void setFinalColorBG(u16 color, const u32 x); void setFinalColor3d(int dstX, int srcX); - - template FORCEINLINE void setFinalBGColorSpecialNone(u16 &color, const u32 x); - template FORCEINLINE void setFinalBGColorSpecialBlend(u16 &color, const u32 x); - template FORCEINLINE void setFinalBGColorSpecialIncrease(u16 &color, const u32 x); - template FORCEINLINE void setFinalBGColorSpecialDecrease(u16 &color, const u32 x); - template FORCEINLINE bool setFinalBGColorSpecialNoneWnd(u16 &color, const u32 x); - template FORCEINLINE bool setFinalBGColorSpecialBlendWnd(u16 &color, const u32 x); - template FORCEINLINE bool setFinalBGColorSpecialIncreaseWnd(u16 &color, const u32 x); - template FORCEINLINE bool setFinalBGColorSpecialDecreaseWnd(u16 &color, const u32 x); - FORCEINLINE void setFinal3DColorSpecialNone(int dstX, int srcX); - FORCEINLINE void setFinal3DColorSpecialBlend(int dstX, int srcX); - FORCEINLINE void setFinal3DColorSpecialIncrease(int dstX, int srcX); - FORCEINLINE void setFinal3DColorSpecialDecrease(int dstX, int srcX); - FORCEINLINE void setFinal3DColorSpecialNoneWnd(int dstX, int srcX); - FORCEINLINE void setFinal3DColorSpecialBlendWnd(int dstX, int srcX); - FORCEINLINE void setFinal3DColorSpecialIncreaseWnd(int dstX, int srcX); - FORCEINLINE void setFinal3DColorSpecialDecreaseWnd(int dstX, int srcX); - - + template void setFinalColorBG(u16 color, const u32 x); template FORCEINLINE void __setFinalColorBck(u16 color, const u32 x, const int opaque); template FORCEINLINE void ___setFinalColorBck(u16 color, const u32 x, const int opaque); + void setAffineStart(int layer, int xy, u32 val); void setAffineStartWord(int layer, int xy, u16 val, int word); u32 getAffineStart(int layer, int xy); diff --git a/desmume/src/OGLRender.cpp b/desmume/src/OGLRender.cpp index a95580729..0efd98d41 100644 --- a/desmume/src/OGLRender.cpp +++ b/desmume/src/OGLRender.cpp @@ -693,34 +693,58 @@ static void GL_ReadFramebuffer() //is it safe to modify the screen buffer? if not, we could make a temp copy for(int i=0,y=191;y>=0;y--) { - u16* dst = gfx3d_convertedScreen + (y<<8); - u8* dstAlpha = gfx3d_convertedAlpha + (y<<8); - - //I dont know much about this kind of stuff, but this seems to help - //for some reason I couldnt make the intrinsics work - //u8* u8screen3D = (u8*)&((u32*)GPU_screen3D)[i]; - /*#define PREFETCH32(X,Y) __asm { prefetchnta [u8screen3D+32*0x##X##Y] } - #define PREFETCH128(X) PREFETCH32(X,0) PREFETCH32(X,1) PREFETCH32(X,2) PREFETCH32(X,3) \ - PREFETCH32(X,4) PREFETCH32(X,5) PREFETCH32(X,6) PREFETCH32(X,7) \ - PREFETCH32(X,8) PREFETCH32(X,9) PREFETCH32(X,A) PREFETCH32(X,B) \ - PREFETCH32(X,C) PREFETCH32(X,D) PREFETCH32(X,E) PREFETCH32(X,F) - PREFETCH128(0); PREFETCH128(1);*/ + u8* dst = gfx3d_convertedScreen + (y<<(8+2)); for(int x=0;x<256;x++,i++) { u32 &u32screen3D = ((u32*)GPU_screen3D)[i]; - u32screen3D>>=3; - u32screen3D &= 0x1F1F1F1F; - + u32screen3D>>=2; + u32screen3D &= 0x3F3F3F3F; + const int t = i<<2; const u8 a = GPU_screen3D[t+3]; const u8 r = GPU_screen3D[t+2]; const u8 g = GPU_screen3D[t+1]; const u8 b = GPU_screen3D[t+0]; - dst[x] = R5G5B5TORGB15(r,g,b) | alpha_lookup[a]; - dstAlpha[x] = alpha_5bit_to_4bit[a]; + *dst++ = r; + *dst++ = g; + *dst++ = b; + *dst++ = a; } } + + ////convert the pixels to a different format which is more convenient + ////is it safe to modify the screen buffer? if not, we could make a temp copy + //for(int i=0,y=191;y>=0;y--) + //{ + // u16* dst = gfx3d_convertedScreen + (y<<8); + // u8* dstAlpha = gfx3d_convertedAlpha + (y<<8); + + // //I dont know much about this kind of stuff, but this seems to help + // //for some reason I couldnt make the intrinsics work + // //u8* u8screen3D = (u8*)&((u32*)GPU_screen3D)[i]; + // /*#define PREFETCH32(X,Y) __asm { prefetchnta [u8screen3D+32*0x##X##Y] } + // #define PREFETCH128(X) PREFETCH32(X,0) PREFETCH32(X,1) PREFETCH32(X,2) PREFETCH32(X,3) \ + // PREFETCH32(X,4) PREFETCH32(X,5) PREFETCH32(X,6) PREFETCH32(X,7) \ + // PREFETCH32(X,8) PREFETCH32(X,9) PREFETCH32(X,A) PREFETCH32(X,B) \ + // PREFETCH32(X,C) PREFETCH32(X,D) PREFETCH32(X,E) PREFETCH32(X,F) + // PREFETCH128(0); PREFETCH128(1);*/ + + // for(int x=0;x<256;x++,i++) + // { + // u32 &u32screen3D = ((u32*)GPU_screen3D)[i]; + // u32screen3D>>=3; + // u32screen3D &= 0x1F1F1F1F; + + // const int t = i<<2; + // const u8 a = GPU_screen3D[t+3]; + // const u8 r = GPU_screen3D[t+2]; + // const u8 g = GPU_screen3D[t+1]; + // const u8 b = GPU_screen3D[t+0]; + // dst[x] = R5G5B5TORGB15(r,g,b) | alpha_lookup[a]; + // dstAlpha[x] = a; + // } + //} } @@ -831,9 +855,9 @@ static void OGLRender() u8 alpha = material_5bit_to_8bit[poly->getAlpha()]; if(wireframe) alpha = 255; u8 color0[4] = { - material_5bit_to_8bit[vert0->color[0]], - material_5bit_to_8bit[vert0->color[1]], - material_5bit_to_8bit[vert0->color[2]], + vert0->color[0]<<2, + vert0->color[1]<<2, + vert0->color[2]<<2, alpha }; @@ -846,15 +870,15 @@ static void OGLRender() VERT *vert2 = &gfx3d.vertlist->list[poly->vertIndexes[j+1]]; u8 color1[4] = { - material_5bit_to_8bit[vert1->color[0]], - material_5bit_to_8bit[vert1->color[1]], - material_5bit_to_8bit[vert1->color[2]], + vert1->color[0]<<2, + vert1->color[1]<<2, + vert1->color[2]<<2, alpha }; u8 color2[4] = { - material_5bit_to_8bit[vert2->color[0]], - material_5bit_to_8bit[vert2->color[1]], - material_5bit_to_8bit[vert2->color[2]], + vert2->color[0]<<2, + vert2->color[1]<<2, + vert2->color[2]<<2, alpha }; diff --git a/desmume/src/gfx3d.cpp b/desmume/src/gfx3d.cpp index fc104ed74..337d5d2ff 100644 --- a/desmume/src/gfx3d.cpp +++ b/desmume/src/gfx3d.cpp @@ -114,23 +114,9 @@ CACHE_ALIGN const u8 material_3bit_to_5bit[] = { 0, 4, 8, 13, 17, 22, 26, 31 }; -CACHE_ALIGN const u8 alpha_5bit_to_4bit[] = { - 0x00, 0x00, - 0x01, 0x01, - 0x02, 0x02, - 0x03, 0x03, - 0x04, 0x04, - 0x05, 0x05, - 0x06, 0x06, - 0x07, 0x07, - 0x08, 0x08, - 0x09, 0x09, - 0x0A, 0x0A, - 0x0B, 0x0B, - 0x0C, 0x0C, - 0x0D, 0x0D, - 0x0E, 0x0E, - 0x10, 0x10 +//TODO - generate this in the static init method more accurately +CACHE_ALIGN const u8 material_3bit_to_6bit[] = { + 0, 8, 16, 26, 34, 44, 52, 63 }; CACHE_ALIGN const u16 alpha_lookup[] = { @@ -149,10 +135,7 @@ static float normalTable[1024]; #define fix2float(v) (((float)((s32)(v))) / (float)(1<<12)) #define fix10_2float(v) (((float)((s32)(v))) / (float)(1<<9)) -CACHE_ALIGN u16 gfx3d_convertedScreen[256*192]; - -//this extra *2 is a HACK to salvage some savestates. remove me when the savestate format changes. -CACHE_ALIGN u8 gfx3d_convertedAlpha[256*192*2]; +CACHE_ALIGN u8 gfx3d_convertedScreen[256*192*4]; // Matrix stack handling static CACHE_ALIGN MatrixStack mtxStack[4] = { @@ -196,6 +179,7 @@ static u32 clInd = 0; static u32 clInd2 = 0; static bool isSwapBuffers = false; bool isVBlank = false; +bool bWaitForPolys = false; #endif static u32 BTind = 0; @@ -207,7 +191,7 @@ static CACHE_ALIGN float PTcoords[4] = {0.0, 0.0, 0.0, 1.0}; static u32 polyAttr=0,textureFormat=0, texturePalette=0, polyAttrPending=0; //the current vertex color, 5bit values -static int colorRGB[4] = { 31,31,31,31 }; +static u8 colorRGB[4] = { 31,31,31,31 }; u32 control = 0; @@ -342,8 +326,6 @@ void gfx3d_reset() memset(vertlists, 0, sizeof(vertlists)); listTwiddle = 1; twiddleLists(); - gfx3d.polylist = polylist; - gfx3d.vertlist = vertlist; MatrixInit (mtxCurrent[0]); MatrixInit (mtxCurrent[1]); @@ -375,7 +357,6 @@ void gfx3d_reset() viewport = 0xBFFF0000; memset(gfx3d_convertedScreen,0,sizeof(gfx3d_convertedScreen)); - memset(gfx3d_convertedAlpha,0,sizeof(gfx3d_convertedAlpha)); gfx3d.clearDepth = gfx3d_extendDepth_15_to_24(0x7FFF); @@ -383,6 +364,7 @@ void gfx3d_reset() clInd2 = 0; isSwapBuffers = false; isVBlank = false; + bWaitForPolys = false; #endif GFX_PIPEclear(); @@ -445,9 +427,9 @@ static void SetVertex() vert.coord[1] = coordTransformed[1]; vert.coord[2] = coordTransformed[2]; vert.coord[3] = coordTransformed[3]; - vert.color[0] = colorRGB[0]; - vert.color[1] = colorRGB[1]; - vert.color[2] = colorRGB[2]; + vert.color[0] = GFX3D_5TO6(colorRGB[0]); + vert.color[1] = GFX3D_5TO6(colorRGB[1]); + vert.color[2] = GFX3D_5TO6(colorRGB[2]); tempVertInfo.map[tempVertInfo.count] = vertlist->count + tempVertInfo.count - continuation; tempVertInfo.count++; @@ -1526,6 +1508,17 @@ void gfx3d_glFlush(u32 v) #ifdef USE_GEOMETRY_FIFO_EMULATION gfx3d.sortmode = BIT0(v); gfx3d.wbuffer = BIT1(v); +#if 0 + + if (polygonListCompleted == 2) + { + //u32 gxstat = T1ReadLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x600); + //gxstat |= 0x08000000; // set busy flag + //T1WriteLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x600, gxstat); + bWaitForPolys = true; + return; + } +#endif isSwapBuffers = true; #else if(!flushPending) @@ -1663,10 +1656,9 @@ void gfx3d_VBlankSignal() isVBlank = true; if (isSwapBuffers) { + //if (bWaitForPolys) return; gfx3d_doFlush(); isSwapBuffers = false; - GFX_DELAY(392); - NDS_RescheduleGXFIFO(); } #else //the 3d buffers are swapped when a vblank begins. @@ -1691,16 +1683,23 @@ void gfx3d_VBlankEndSignal(bool skipFrame) if (!drawPending) return; drawPending = FALSE; - if(skipFrame) return; + if(skipFrame) + { + GFX_DELAY(392); + NDS_RescheduleGXFIFO(); + return; + } //if the null 3d core is chosen, then we need to clear out the 3d buffers to keep old data from being rendered if(gpu3D == &gpu3DNull || !CommonSettings.showGpu.main) { memset(gfx3d_convertedScreen,0,sizeof(gfx3d_convertedScreen)); - memset(gfx3d_convertedScreen,0,sizeof(gfx3d_convertedAlpha)); return; } gpu3D->NDS_3D_Render(); + + GFX_DELAY(392); + NDS_RescheduleGXFIFO(); #else //if we are skipping 3d frames then the 3d rendering will get held up here. //but, as soon as we quit skipping frames, the held-up 3d frame will render @@ -1716,7 +1715,6 @@ void gfx3d_VBlankEndSignal(bool skipFrame) if(gpu3D == &gpu3DNull || !CommonSettings.showGpu.main) { memset(gfx3d_convertedScreen,0,sizeof(gfx3d_convertedScreen)); - memset(gfx3d_convertedScreen,0,sizeof(gfx3d_convertedAlpha)); } #endif } @@ -1761,13 +1759,6 @@ void gfx3d_sendCommandToFIFO(u32 val) #ifdef _3D_LOG INFO("gxFIFO: send 0x%02X: val=0x%08X, pipe %02i, fifo %03i\n", clCmd & 0xFF, val, gxPIPE.tail, gxFIFO.tail); #endif - if (gxFIFO.size > 255) - { - gfx3d_execute3D(); - gfx3d_execute3D(); - gfx3d_execute3D(); - gfx3d_execute3D(); - } switch (clCmd & 0xFF) { case 0x34: // SHININESS - Specular Reflection Shininess Table (W) @@ -1872,13 +1863,6 @@ void gfx3d_sendCommand(u32 cmd, u32 param) #ifdef _3D_LOG INFO("gxFIFO: send 0x%02X: val=0x%08X, pipe %02i, fifo %03i (direct)\n", cmd, param, gxPIPE.tail, gxFIFO.tail); #endif - if (gxFIFO.size > 255) - { - gfx3d_execute3D(); - gfx3d_execute3D(); - gfx3d_execute3D(); - gfx3d_execute3D(); - } switch (cmd) { @@ -2334,12 +2318,26 @@ void gfx3d_glGetLightColor(unsigned int index, unsigned int* dest) *dest = lightColor[index]; } -void gfx3d_GetLineData(int line, u16** dst, u8** dstAlpha) +void gfx3d_GetLineData(int line, u8** dst) { - *dst = gfx3d_convertedScreen+((line)<<8); - if(dstAlpha != NULL) + *dst = gfx3d_convertedScreen+((line)<<(8+2)); +} + +void gfx3d_GetLineData15bpp(int line, u16** dst) +{ + //TODO - this is not very thread safe!!! + u16 buf[256]; + *dst = buf; + + u8* lineData; + gfx3d_GetLineData(line, &lineData); + for(int i=0;i<256;i++) { - *dstAlpha = gfx3d_convertedAlpha+((line)<<8); + const u8 r = lineData[i*4+0]; + const u8 g = lineData[i*4+1]; + const u8 b = lineData[i*4+2]; + const u8 a = lineData[i*4+3]; + buf[i] = R5G5B5TORGB15(r,g,b) | alpha_lookup[a]; } } @@ -2382,17 +2380,17 @@ SFORMAT SF_GFX3D[]={ { "GLBT", 4, 1, &BTind}, { "GLPT", 4, 1, &PTind}, { "GLPC", 4, 4, PTcoords}, - { "GFHD", 4, 1, &gxFIFO.head}, - { "GFTA", 4, 1, &gxFIFO.tail}, - { "GFSZ", 4, 1, &gxFIFO.size}, + { "GFHE", 2, 1, &gxFIFO.head}, + { "GFTA", 2, 1, &gxFIFO.tail}, + { "GFSZ", 2, 1, &gxFIFO.size}, { "GFCM", 1, 257, &gxFIFO.cmd[0]}, { "GFPM", 4, 257, &gxFIFO.param[0]}, - { "GPHD", 1, 1, &gxPIPE.head}, + { "GPHE", 1, 1, &gxPIPE.head}, { "GPTA", 1, 1, &gxPIPE.tail}, { "GPSZ", 1, 1, &gxPIPE.size}, { "GPCM", 1, 5, &gxPIPE.cmd[0]}, { "GPPM", 4, 5, &gxPIPE.param[0]}, - { "GCOL", 1, 4, colorRGB}, + { "GCOL", 1, 4, &colorRGB[0]}, { "GLCO", 4, 4, lightColor}, { "GLDI", 4, 4, lightDirection}, { "GMDI", 2, 1, &dsDiffuse}, @@ -2427,8 +2425,7 @@ SFORMAT SF_GFX3D[]={ { "GTVC", 4, 1, &tempVertInfo.count}, { "GTVM", 4, 4, tempVertInfo.map}, { "GTVF", 4, 1, &tempVertInfo.first}, - { "G3CS", 2, 256*192, gfx3d_convertedScreen}, - { "G3CA", 2, 256*192, gfx3d_convertedAlpha}, + { "G3CX", 1, 4*256*192, gfx3d_convertedScreen}, { 0 } }; diff --git a/desmume/src/gfx3d.h b/desmume/src/gfx3d.h index 7bc15962c..4d06a7d1b 100644 --- a/desmume/src/gfx3d.h +++ b/desmume/src/gfx3d.h @@ -38,6 +38,19 @@ //produce a 5555 32bit color from a ds RGB15 plus an 5bit alpha #define RGB15TO5555(col,alpha5) (((alpha5)<<24) | ((((col) & 0x7C00)>>10)<<16) | ((((col) & 0x3E0)>>5)<<8) | (((col) & 0x1F))) +//produce a 6665 32bit color from a ds RGB15 plus an 5bit alpha +inline u32 RGB15TO6665(u16 col, u8 alpha5) +{ + u32 ret = alpha5<<24; + u16 r = (col&0x1F)>>0; + u16 g = (col&0x3E0)>>5; + u16 b = (col&0x7C00)>>10; + if(r) ret |= ((r<<1)+1); + if(g) ret |= ((g<<1)+1)<<8; + if(b) ret |= ((b<<1)+1)<<16; + return ret; +} + //produce a 24bpp color from a ds RGB15, using a table #define RGB15TO24_REVERSE(col) ( color_15bit_to_24bit_reverse[col&0x7FFF] ) @@ -50,6 +63,11 @@ //produce a 15bpp color from individual 5bit components #define R5G5B5TORGB15(r,g,b) ((r)|((g)<<5)|((b)<<10)) +//produce a 16bpp color from individual 5bit components +#define R6G6B6TORGB15(r,g,b) ((r>>1)|((g&0x3E)<<4)|((b&0x3E)<<9)) + +#define GFX3D_5TO6(x) ((x)?(((x)<<1)+1):0) + inline u32 gfx3d_extendDepth_15_to_24(u32 depth) { //formula from http://nocash.emubase.de/gbatek.htm#ds3drearplane @@ -247,12 +265,12 @@ extern CACHE_ALIGN u8 mixTable555[32][32][32]; extern CACHE_ALIGN const int material_5bit_to_31bit[32]; extern CACHE_ALIGN const u8 material_5bit_to_8bit[32]; extern CACHE_ALIGN const u8 material_3bit_to_5bit[8]; +extern CACHE_ALIGN const u8 material_3bit_to_6bit[8]; extern CACHE_ALIGN const u8 material_3bit_to_8bit[8]; -extern CACHE_ALIGN const u8 alpha_5bit_to_4bit[32]; //these contain the 3d framebuffer converted into the most useful format //they are stored here instead of in the renderers in order to consolidate the buffers -extern CACHE_ALIGN u16 gfx3d_convertedScreen[256*192]; +extern CACHE_ALIGN u8 gfx3d_convertedScreen[256*192*4]; extern CACHE_ALIGN u8 gfx3d_convertedAlpha[256*192*2]; //see cpp for explanation of illogical *2 //GE commands: @@ -324,7 +342,8 @@ void gfx3d_glGetMatrix(u32 mode, int index, float* dest); void gfx3d_glGetLightDirection(u32 index, u32* dest); void gfx3d_glGetLightColor(u32 index, u32* dest); -void gfx3d_GetLineData(int line, u16** dst, u8** dstAlpha); +void gfx3d_GetLineData(int line, u8** dst); +void gfx3d_GetLineData15bpp(int line, u16** dst); struct SFORMAT; extern SFORMAT SF_GFX3D[]; diff --git a/desmume/src/rasterize.cpp b/desmume/src/rasterize.cpp index b2ff2b3d9..92fb35912 100644 --- a/desmume/src/rasterize.cpp +++ b/desmume/src/rasterize.cpp @@ -68,8 +68,8 @@ static const int kUnsetTranslucentPolyID = 255; static int polynum; -static u8 modulate_table[32][32]; -static u8 decal_table[32][32][32]; +static u8 modulate_table[64][64]; +static u8 decal_table[32][64][64]; static u8 index_lookup_table[65]; static u8 index_start_table[8]; @@ -223,11 +223,7 @@ struct PolyAttr union FragmentColor { u32 color; struct { -#ifdef WORDS_BIGENDIAN - u8 a,b,g,r; -#else u8 r,g,b,a; -#endif }; }; @@ -350,7 +346,8 @@ struct Shader { mode = (polyattr>>4)&0x3; //if there is no texture set, then set to the mode which doesnt even use a texture - if(sampler.texFormat == 0 && mode != 3) + //(no texture makes sense for toon/highlight mode) + if(sampler.texFormat == 0 && (mode == 0 || mode == 1)) mode = 4; } @@ -371,7 +368,7 @@ struct Shader dst.r = modulate_table[texColor.r][materialColor.r]; dst.g = modulate_table[texColor.g][materialColor.g]; dst.b = modulate_table[texColor.b][materialColor.b]; - dst.a = modulate_table[texColor.a][materialColor.a]; + dst.a = modulate_table[GFX3D_5TO6(texColor.a)][GFX3D_5TO6(materialColor.a)]>>1; //dst.color.components.a = 31; //#ifdef _MSC_VER //if(GetAsyncKeyState(VK_SHIFT)) { @@ -399,7 +396,7 @@ struct Shader u = invu*w; v = invv*w; texColor = sampler.sample(u,v); - FragmentColor toonColor = toonTable[materialColor.r]; + FragmentColor toonColor = toonTable[materialColor.r>>1]; if(sampler.texFormat == 0) { //if no texture is set then we dont need to modulate texture with toon @@ -414,20 +411,21 @@ struct Shader dst.r = modulate_table[texColor.r][materialColor.r]; dst.g = modulate_table[texColor.g][materialColor.r]; dst.b = modulate_table[texColor.b][materialColor.r]; - dst.a = modulate_table[texColor.a][materialColor.a]; + dst.a = modulate_table[GFX3D_5TO6(texColor.a)][GFX3D_5TO6(materialColor.a)]>>1; - dst.r = min(31, (dst.r + toonColor.r)); - dst.g = min(31, (dst.g + toonColor.g)); - dst.b = min(31, (dst.b + toonColor.b)); + dst.r = min(63, (dst.r + toonColor.r)); + dst.g = min(63, (dst.g + toonColor.g)); + dst.b = min(63, (dst.b + toonColor.b)); } else { dst.r = modulate_table[texColor.r][toonColor.r]; dst.g = modulate_table[texColor.g][toonColor.g]; dst.b = modulate_table[texColor.b][toonColor.b]; - dst.a = modulate_table[texColor.a][materialColor.a]; + dst.a = modulate_table[GFX3D_5TO6(texColor.a)][GFX3D_5TO6(materialColor.a)]>>1; } } + } break; case 3: //shadows @@ -519,9 +517,9 @@ static FORCEINLINE void pixel(int adr,float r, float g, float b, float invu, flo //this is a HACK: //we are being very sloppy with our interpolation precision right now //and rather than fix it, i just want to clamp it - shader.materialColor.r = max(0U,min(31U,u32floor(r))); - shader.materialColor.g = max(0U,min(31U,u32floor(g))); - shader.materialColor.b = max(0U,min(31U,u32floor(b))); + shader.materialColor.r = max(0U,min(63U,u32floor(r))); + shader.materialColor.g = max(0U,min(63U,u32floor(g))); + shader.materialColor.b = max(0U,min(63U,u32floor(b))); shader.materialColor.a = polyAttr.alpha; @@ -927,11 +925,11 @@ static char SoftRastInit(void) clippedPolys = new TClippedPoly[POLYLIST_SIZE*2]; - for(int i=0;i<32;i++) + for(int i=0;i<64;i++) { - for(int j=0;j<32;j++) + for(int j=0;j<64;j++) { - modulate_table[i][j] = ((i+1) * (j+1) - 1) >> 5; + modulate_table[i][j] = ((i+1) * (j+1) - 1) >> 6; for(int a=0;a<32;a++) decal_table[a][i][j] = ((i*a) + (j*(31-a))) >> 5; } @@ -1053,9 +1051,9 @@ static void SoftRastFramebufferProcess() if(gfx3d.enableFog) { - u32 r = gfx3d.fogColor&0x1F; - u32 g = (gfx3d.fogColor>>5)&0x1F; - u32 b = (gfx3d.fogColor>>10)&0x1F; + u32 r = ((gfx3d.fogColor)&0x1F)<<1; + u32 g = ((gfx3d.fogColor>>5)&0x1F)<<1; + u32 b = ((gfx3d.fogColor>>10)&0x1F)<<1; u32 a = (gfx3d.fogColor>>16)&0x1F; for(int i=0;i<256*192;i++) { @@ -1079,29 +1077,7 @@ static void SoftRastFramebufferProcess() static void SoftRastConvertFramebuffer() { - FragmentColor* src = screenColor; - u16* dst = gfx3d_convertedScreen; - u8* dstAlpha = gfx3d_convertedAlpha; - - //in an effort to speed this up, the misc pixel buffers and the color buffer were separated. - - for(int i=0,y=0;y<192;y++) - { - // u8* wanx = (u8*)&src[i]; - // #define ASS(X,Y) __asm { prefetchnta [wanx+32*0x##X##Y] } - // #define PUNK(X) ASS(X,0) ASS(X,1) ASS(X,2) ASS(X,3) ASS(X,4) ASS(X,5) ASS(X,6) ASS(X,7) ASS(X,8) ASS(X,9) ASS(X,A) ASS(X,B) ASS(X,C) ASS(X,D) ASS(X,E) ASS(X,F) - // PUNK(0); PUNK(1); - - for(int x=0;x<256;x++,i++) - { - const u8 r = src[i].r; - const u8 g = src[i].g; - const u8 b = src[i].b; - const u8 a = src[i].a; - dst[i] = R5G5B5TORGB15(r,g,b) | alpha_lookup[a]; - dstAlpha[i] = alpha_5bit_to_4bit[a]; - } - } + memcpy(gfx3d_convertedScreen,screenColor,256*192*4); } @@ -1272,10 +1248,10 @@ static void SoftRastRender() Fragment clearFragment; FragmentColor clearFragmentColor; clearFragment.isTranslucentPoly = 0; - clearFragmentColor.r = gfx3d.clearColor&0x1F; - clearFragmentColor.g = (gfx3d.clearColor>>5)&0x1F; - clearFragmentColor.b = (gfx3d.clearColor>>10)&0x1F; - clearFragmentColor.a = (gfx3d.clearColor>>16)&0x1F; + clearFragmentColor.r = GFX3D_5TO6(gfx3d.clearColor&0x1F); + clearFragmentColor.g = GFX3D_5TO6((gfx3d.clearColor>>5)&0x1F); + clearFragmentColor.b = GFX3D_5TO6((gfx3d.clearColor>>10)&0x1F); + clearFragmentColor.a = ((gfx3d.clearColor>>16)&0x1F); clearFragment.polyid.opaque = (gfx3d.clearColor>>24)&0x3F; //special value for uninitialized translucent polyid. without this, fires in spiderman2 dont display //I am not sure whether it is right, though. previously this was cleared to 0, as a guess, @@ -1333,9 +1309,9 @@ static void SoftRastRender() //convert the toon colors //TODO for a slight speedup this could be cached in gfx3d (oglrenderer could benefit as well) for(int i=0;i<32;i++) { - toonTable[i].r = gfx3d.u16ToonTable[i]&0x1F; - toonTable[i].g = (gfx3d.u16ToonTable[i]>>5)&0x1F; - toonTable[i].b = (gfx3d.u16ToonTable[i]>>10)&0x1F; + toonTable[i].r = GFX3D_5TO6((gfx3d.u16ToonTable[i])&0x1F); + toonTable[i].g = GFX3D_5TO6((gfx3d.u16ToonTable[i]>>5)&0x1F); + toonTable[i].b = GFX3D_5TO6((gfx3d.u16ToonTable[i]>>10)&0x1F); } //setup fog variables (but only if fog is enabled) diff --git a/desmume/src/texcache.cpp b/desmume/src/texcache.cpp index bb17ad6a2..cb22799cc 100644 --- a/desmume/src/texcache.cpp +++ b/desmume/src/texcache.cpp @@ -180,7 +180,7 @@ static void DebugDumpTexture(int which) static int lastTexture = -1; -#define CONVERT(color,alpha) ((TEXFORMAT == TexFormat_32bpp)?(RGB15TO32(color,alpha)):RGB15TO5555(color,alpha)) +#define CONVERT(color,alpha) ((TEXFORMAT == TexFormat_32bpp)?(RGB15TO32(color,alpha)):RGB15TO6665(color,alpha)) template void TexCache_SetTexture(u32 format, u32 texpal) @@ -360,7 +360,7 @@ REJECT: u16 c = pal[*adr&31]; u8 alpha = *adr>>5; if(TEXFORMAT == TexFormat_15bpp) - *dwdst++ = RGB15TO5555(c,material_3bit_to_5bit[alpha]); + *dwdst++ = RGB15TO6665(c,material_3bit_to_5bit[alpha]); else *dwdst++ = RGB15TO32(c,material_3bit_to_8bit[alpha]); adr++; @@ -537,8 +537,11 @@ REJECT: { for(int i=0;i<4;i++) { - tmp_col[i] >>= 3; - tmp_col[i] &= 0x1F1F1F1F; + tmp_col[i] >>= 2; + tmp_col[i] &= 0x3F3F3F3F; + u32 a = tmp_col[i]>>24; + tmp_col[i] &= 0x00FFFFFF; + tmp_col[i] |= (a>>1)<<24; } } @@ -573,7 +576,7 @@ REJECT: u16 c = pal[*adr&0x07]; u8 alpha = (*adr>>3); if(TEXFORMAT == TexFormat_15bpp) - *dwdst++ = RGB15TO5555(c,alpha); + *dwdst++ = RGB15TO6665(c,alpha); else *dwdst++ = RGB15TO32(c,material_5bit_to_8bit[alpha]); adr++;