diff --git a/desmume/src/GPU.cpp b/desmume/src/GPU.cpp index b2bc0a800..261f7c7c2 100644 --- a/desmume/src/GPU.cpp +++ b/desmume/src/GPU.cpp @@ -49,6 +49,7 @@ #include "GPU.h" #include "debug.h" #include "render3D.h" +#include "gfx3d.h" #include "GPU_osd.h" #include "debug.h" #include "NDSSystem.h" @@ -131,6 +132,397 @@ GraphicsInterface_struct *GFXCoreList[] = { NULL }; +static const CACHE_ALIGN u8 win_empty[256] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; +static CACHE_ALIGN u16 fadeInColors[17][0x8000]; +static CACHE_ALIGN u16 fadeOutColors[17][0x8000]; + +//this should be public, because it gets used somewhere else +CACHE_ALIGN u8 gpuBlendTable555[17][17][32][32]; + + +/*****************************************************************************/ +// PIXEL RENDERING - 3D +/*****************************************************************************/ + +#define DECL3D \ + int x = dstX; \ + int passing = dstX<<1; \ + u16 color = _3dColorLine[srcX]; \ + u8 alpha = _3dAlphaLine[srcX]; \ + u8* dst = currDst; + +FORCEINLINE void GPU::setFinal3DColorSpecialNone(int dstX, int srcX) +{ + DECL3D; + + // We must blend if the 3D layer has the highest prio + if((alpha < 16) && bg0HasHighestPrio) + { + int bg_under = bgPixels[dstX]; + u16 final = color; + + // If the layer we are drawing on is selected as 2nd source, we can blend + if(BLDCNT & (0x100 << bg_under)) + { + { + COLOR c1, c2, cfinal; + + c1.val = color; + c2.val = T2ReadWord(dst, passing); + + cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16)); + cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16)); + cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16)); + + final = cfinal.val; + } + } + + T2WriteWord(dst, passing, (final | 0x8000)); + bgPixels[x] = 0; + } + else + { + T2WriteWord(dst, passing, (color | 0x8000)); + bgPixels[x] = 0; + } +} + +FORCEINLINE void GPU::setFinal3DColorSpecialBlend(int dstX, int srcX) +{ + DECL3D; + + // We can blend if the 3D layer is selected as 1st target, + //but also if the 3D layer has the highest prio. + if((alpha < 16) && ((BLDCNT & 0x1) || bg0HasHighestPrio)) + { + int bg_under = bgPixels[x]; + u16 final = color; + + //If the layer we are drawing on is selected as 2nd source, we can blend + if(BLDCNT & (0x100 << bg_under)) + { + { + COLOR c1, c2, cfinal; + + c1.val = color; + c2.val = T2ReadWord(dst, passing); + + cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16)); + cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16)); + cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16)); + + final = cfinal.val; + } + } + + T2WriteWord(dst, passing, (final | 0x8000)); + bgPixels[x] = 0; + } + else + { + T2WriteWord(dst, passing, (color | 0x8000)); + bgPixels[x] = 0; + } +} + +FORCEINLINE void GPU::setFinal3DColorSpecialIncrease(int dstX, int srcX) +{ + DECL3D; + u16 final = color; + + // We must blend if the 3D layer has the highest prio + // But it doesn't seem to have priority over fading, + // unlike semi-transparent sprites + if((alpha < 16) && bg0HasHighestPrio) + { + int bg_under = bgPixels[x]; + + /* If the layer we are drawing on is selected as 2nd source, we can blend */ + if(BLDCNT & (0x100 << bg_under)) + { + { + COLOR c1, c2, cfinal; + + c1.val = color; + c2.val = T2ReadWord(dst, passing); + + cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16)); + cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16)); + cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16)); + + final = cfinal.val; + } + } + } + + if(BLDCNT & 0x1) + { + if (BLDY_EVY != 0x0) + { + final = fadeInColors[BLDY_EVY][final&0x7FFF]; + } + + T2WriteWord(dst, passing, (final | 0x8000)); + bgPixels[x] = 0; + } + else + { + T2WriteWord(dst, passing, (final | 0x8000)); + bgPixels[x] = 0; + } +} + +FORCEINLINE void GPU::setFinal3DColorSpecialDecrease(int dstX, int srcX) +{ + DECL3D; + + u16 final = color; + + // We must blend if the 3D layer has the highest prio + // But it doesn't seem to have priority over fading + // unlike semi-transparent sprites + if((alpha < 16) && bg0HasHighestPrio) + { + int bg_under = bgPixels[x]; + + // If the layer we are drawing on is selected as 2nd source, we can blend + if(BLDCNT & (0x100 << bg_under)) + { + { + COLOR c1, c2, cfinal; + + c1.val = color; + c2.val = T2ReadWord(dst, passing); + + cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16)); + cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16)); + cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16)); + + final = cfinal.val; + } + } + } + + if(BLDCNT & 0x1) + { + if (BLDY_EVY != 0x0) + { + final = fadeOutColors[BLDY_EVY][final&0x7FFF]; + } + + T2WriteWord(dst, passing, (final | 0x8000)); + bgPixels[x] = 0; + } + else + { + T2WriteWord(dst, passing, (final | 0x8000)); + bgPixels[x] = 0; + } +} + +FORCEINLINE void GPU::setFinal3DColorSpecialNoneWnd(int dstX, int srcX) +{ + DECL3D; + + bool windowDraw = true, windowEffect = true; + + renderline_checkWindows(x, windowDraw, windowEffect); + + if(windowDraw) + { + // We must blend if the 3D layer has the highest prio + if((alpha < 16) && bg0HasHighestPrio) + { + int bg_under = bgPixels[x]; + u16 final = color; + + // If the layer we are drawing on is selected as 2nd source, we can blend + if(BLDCNT & (0x100 << bg_under)) + { + { + COLOR c1, c2, cfinal; + + c1.val = color; + c2.val = T2ReadWord(dst, passing); + + cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16)); + cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16)); + cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16)); + + final = cfinal.val; + } + } + + T2WriteWord(dst, passing, (final | 0x8000)); + bgPixels[x] = 0; + } + else + { + T2WriteWord(dst, passing, (color | 0x8000)); + bgPixels[x] = 0; + } + } +} + +FORCEINLINE void GPU::setFinal3DColorSpecialBlendWnd(int dstX, int srcX) +{ + DECL3D; + + bool windowDraw = true, windowEffect = true; + + renderline_checkWindows(x, windowDraw, windowEffect); + + if(windowDraw) + { + // We can blend if the 3D layer is selected as 1st target, + // but also if the 3D layer has the highest prio. + if((alpha < 16) && (((BLDCNT & 0x1) && windowEffect) || bg0HasHighestPrio)) + { + int bg_under = bgPixels[x]; + u16 final = color; + + // If the layer we are drawing on is selected as 2nd source, we can blend + if(BLDCNT & (0x100 << bg_under)) + { + { + COLOR c1, c2, cfinal; + + c1.val = color; + c2.val = T2ReadWord(dst, passing); + + cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16)); + cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16)); + cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16)); + + final = cfinal.val; + } + } + + T2WriteWord(dst, passing, (final | 0x8000)); + bgPixels[x] = 0; + } + else + { + T2WriteWord(dst, passing, (color | 0x8000)); + bgPixels[x] = 0; + } + } +} + +FORCEINLINE void GPU::setFinal3DColorSpecialIncreaseWnd(int dstX, int srcX) +{ + DECL3D; + + bool windowDraw = true, windowEffect = true; + u16 final = color; + + renderline_checkWindows(x, windowDraw, windowEffect); + + if(windowDraw) + { + // We must blend if the 3D layer has the highest prio + // But it doesn't seem to have priority over fading, + // unlike semi-transparent sprites + if((alpha < 16) && bg0HasHighestPrio) + { + int bg_under = bgPixels[x]; + + // If the layer we are drawing on is selected as 2nd source, we can blend + if(BLDCNT & (0x100 << bg_under)) + { + { + COLOR c1, c2, cfinal; + + c1.val = color; + c2.val = T2ReadWord(dst, passing); + + cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16)); + cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16)); + cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16)); + + final = cfinal.val; + } + } + } + + if((BLDCNT & 0x1) && windowEffect) + { + if (BLDY_EVY != 0x0) + { + final = fadeInColors[BLDY_EVY][final&0x7FFF]; + } + + T2WriteWord(dst, passing, (final | 0x8000)); + bgPixels[x] = 0; + } + else + { + T2WriteWord(dst, passing, (final | 0x8000)); + bgPixels[x] = 0; + } + } +} + +FORCEINLINE void GPU::setFinal3DColorSpecialDecreaseWnd(int dstX, int srcX) +{ + DECL3D; + + bool windowDraw = true, windowEffect = true; + u16 final = color; + + renderline_checkWindows(x, windowDraw, windowEffect); + + if(windowDraw) + { + // We must blend if the 3D layer has the highest prio + // But it doesn't seem to have priority over fading, + // unlike semi-transparent sprites + if((alpha < 16) && bg0HasHighestPrio) + { + int bg_under = bgPixels[x]; + + // If the layer we are drawing on is selected as 2nd source, we can blend + if(BLDCNT & (0x100 << bg_under)) + { + { + COLOR c1, c2, cfinal; + + c1.val = color; + c2.val = T2ReadWord(dst, passing); + + cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16)); + cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16)); + cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16)); + + final = cfinal.val; + } + } + } + + if((BLDCNT & 0x1) && windowEffect) + { + if (BLDY_EVY != 0x0) + { + final = fadeOutColors[BLDY_EVY][final&0x7FFF]; + } + + T2WriteWord(dst, passing, (final | 0x8000)); + bgPixels[x] = 0; + } + else + { + T2WriteWord(dst, passing, (final | 0x8000)); + bgPixels[x] = 0; + } + } +} + + static void setFinalOBJColorSpecialNone (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u8 type, u16 x); static void setFinalOBJColorSpecialBlend (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u8 type, u16 x); static void setFinalOBJColorSpecialIncrease (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u8 type, u16 x); @@ -140,16 +532,6 @@ static void setFinalOBJColorSpecialBlendWnd (GPU *gpu, u32 passing, u8 *dst, u1 static void setFinalOBJColorSpecialIncreaseWnd (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u8 type, u16 x); static void setFinalOBJColorSpecialDecreaseWnd (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u8 type, u16 x); -static void setFinal3DColorSpecialNone (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x); -static void setFinal3DColorSpecialBlend (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x); -static void setFinal3DColorSpecialIncrease (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x); -static void setFinal3DColorSpecialDecrease (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x); -static void setFinal3DColorSpecialNoneWnd (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x); -static void setFinal3DColorSpecialBlendWnd (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x); -static void setFinal3DColorSpecialIncreaseWnd (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x); -static void setFinal3DColorSpecialDecreaseWnd (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x); - - const GPU::FinalOBJColFunct pixelBlittersOBJ[8] = { setFinalOBJColorSpecialNone, setFinalOBJColorSpecialBlend, @@ -160,24 +542,6 @@ const GPU::FinalOBJColFunct pixelBlittersOBJ[8] = { setFinalOBJColorSpecialIncreaseWnd, setFinalOBJColorSpecialDecreaseWnd,}; -const GPU::Final3DColFunct pixelBlitters3D[8] = { - setFinal3DColorSpecialNone, - setFinal3DColorSpecialBlend, - setFinal3DColorSpecialIncrease, - setFinal3DColorSpecialDecrease, - setFinal3DColorSpecialNoneWnd, - setFinal3DColorSpecialBlendWnd, - setFinal3DColorSpecialIncreaseWnd, - setFinal3DColorSpecialDecreaseWnd}; - -static const CACHE_ALIGN u8 win_empty[256] = { - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; -static CACHE_ALIGN u16 fadeInColors[17][0x8000]; -static CACHE_ALIGN u16 fadeOutColors[17][0x8000]; -CACHE_ALIGN u8 gpuBlendTable555[17][17][32][32]; /*****************************************************************************/ // INITIALIZATION @@ -236,9 +600,6 @@ static void GPU_InitFadeColors() } } -static u16 line3Dcolor[512]; -static u8 line3Dalpha[512]; - GPU * GPU_Init(u8 l) { GPU * g; @@ -249,17 +610,13 @@ GPU * GPU_Init(u8 l) GPU_Reset(g, l); GPU_InitFadeColors(); - //clear out the excess line buffers (beyond x=255) - memset(line3Dcolor+256, 0, 256*sizeof(u16)); - memset(line3Dalpha+256, 0, 256*sizeof(u8)); - g->curr_win[0] = win_empty; g->curr_win[1] = win_empty; g->need_update_winh[0] = true; g->need_update_winh[1] = true; g->setFinalColorBck_funcNum = 0; + g->setFinalColor3d_funcNum = 0; g->setFinalColorSpr = setFinalOBJColorSpecialNone; - g->setFinalColor3D = setFinal3DColorSpecialNone; return g; } @@ -269,8 +626,8 @@ void GPU_Reset(GPU *g, u8 l) memset(g, 0, sizeof(GPU)); g->setFinalColorBck_funcNum = 0; + g->setFinalColor3d_funcNum = 0; g->setFinalColorSpr = setFinalOBJColorSpecialNone; - g->setFinalColor3D = setFinal3DColorSpecialNone; g->core = l; g->BGSize[0][0] = g->BGSize[1][0] = g->BGSize[2][0] = g->BGSize[3][0] = 256; g->BGSize[0][1] = g->BGSize[1][1] = g->BGSize[2][1] = g->BGSize[3][1] = 256; @@ -413,7 +770,7 @@ void SetupFinalPixelBlitter (GPU *gpu) gpu->setFinalColorSpr = pixelBlittersOBJ[windowUsed*4 + blendMode]; gpu->setFinalColorBck_funcNum = windowUsed*4 + blendMode; - gpu->setFinalColor3D = pixelBlitters3D[windowUsed*4 + blendMode]; + gpu->setFinalColor3d_funcNum = windowUsed*4 + blendMode; } @@ -1035,364 +1392,9 @@ static void setFinalOBJColorSpecialDecreaseWnd(GPU *gpu, u32 passing, u8 *dst, u } } -/*****************************************************************************/ -// PIXEL RENDERING - 3D -/*****************************************************************************/ - -static void setFinal3DColorSpecialNone(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x) -{ - /* We must blend if the 3D layer has the highest prio */ - if((alpha < 16) && gpu->bg0HasHighestPrio) - { - int bg_under = gpu->bgPixels[x]; - u16 final = color; - - /* If the layer we are drawing on is selected as 2nd source, we can blend */ - if(gpu->BLDCNT & (0x100 << bg_under)) - { - { - COLOR c1, c2, cfinal; - - c1.val = color; - c2.val = T2ReadWord(dst, passing); - - cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16)); - cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16)); - cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16)); - - final = cfinal.val; - } - } - - T2WriteWord(dst, passing, (final | 0x8000)); - gpu->bgPixels[x] = 0; - } - else - { - T2WriteWord(dst, passing, (color | 0x8000)); - gpu->bgPixels[x] = 0; - } -} - -static void setFinal3DColorSpecialBlend(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x) -{ - /* We can blend if the 3D layer is selected as 1st target, */ - /* but also if the 3D layer has the highest prio. */ - if((alpha < 16) && ((gpu->BLDCNT & 0x1) || gpu->bg0HasHighestPrio)) - { - int bg_under = gpu->bgPixels[x]; - u16 final = color; - - /* If the layer we are drawing on is selected as 2nd source, we can blend */ - if(gpu->BLDCNT & (0x100 << bg_under)) - { - { - COLOR c1, c2, cfinal; - - c1.val = color; - c2.val = T2ReadWord(dst, passing); - - cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16)); - cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16)); - cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16)); - - final = cfinal.val; - } - } - - T2WriteWord(dst, passing, (final | 0x8000)); - gpu->bgPixels[x] = 0; - } - else - { - T2WriteWord(dst, passing, (color | 0x8000)); - gpu->bgPixels[x] = 0; - } -} - -static void setFinal3DColorSpecialIncrease(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x) -{ - u16 final = color; - - /* We must blend if the 3D layer has the highest prio */ - /* But it doesn't seem to have priority over fading, */ - /* unlike semi-transparent sprites */ - if((alpha < 16) && gpu->bg0HasHighestPrio) - { - int bg_under = gpu->bgPixels[x]; - - /* If the layer we are drawing on is selected as 2nd source, we can blend */ - if(gpu->BLDCNT & (0x100 << bg_under)) - { - { - COLOR c1, c2, cfinal; - - c1.val = color; - c2.val = T2ReadWord(dst, passing); - - cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16)); - cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16)); - cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16)); - - final = cfinal.val; - } - } - } - - if(gpu->BLDCNT & 0x1) - { - if (gpu->BLDY_EVY != 0x0) - { - final = fadeInColors[gpu->BLDY_EVY][final&0x7FFF]; - } - - T2WriteWord(dst, passing, (final | 0x8000)); - gpu->bgPixels[x] = 0; - } - else - { - T2WriteWord(dst, passing, (final | 0x8000)); - gpu->bgPixels[x] = 0; - } -} - -static void setFinal3DColorSpecialDecrease(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x) -{ - u16 final = color; - - /* We must blend if the 3D layer has the highest prio */ - /* But it doesn't seem to have priority over fading, */ - /* unlike semi-transparent sprites */ - if((alpha < 16) && gpu->bg0HasHighestPrio) - { - int bg_under = gpu->bgPixels[x]; - - /* If the layer we are drawing on is selected as 2nd source, we can blend */ - if(gpu->BLDCNT & (0x100 << bg_under)) - { - { - COLOR c1, c2, cfinal; - - c1.val = color; - c2.val = T2ReadWord(dst, passing); - - cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16)); - cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16)); - cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16)); - - final = cfinal.val; - } - } - } - - if(gpu->BLDCNT & 0x1) - { - if (gpu->BLDY_EVY != 0x0) - { - final = fadeOutColors[gpu->BLDY_EVY][final&0x7FFF]; - } - - T2WriteWord(dst, passing, (final | 0x8000)); - gpu->bgPixels[x] = 0; - } - else - { - T2WriteWord(dst, passing, (final | 0x8000)); - gpu->bgPixels[x] = 0; - } -} - -static void setFinal3DColorSpecialNoneWnd(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x) -{ - bool windowDraw = true, windowEffect = true; - - gpu->renderline_checkWindows(x, windowDraw, windowEffect); - - if(windowDraw) - { - /* We must blend if the 3D layer has the highest prio */ - if((alpha < 16) && gpu->bg0HasHighestPrio) - { - int bg_under = gpu->bgPixels[x]; - u16 final = color; - - /* If the layer we are drawing on is selected as 2nd source, we can blend */ - if(gpu->BLDCNT & (0x100 << bg_under)) - { - { - COLOR c1, c2, cfinal; - - c1.val = color; - c2.val = T2ReadWord(dst, passing); - - cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16)); - cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16)); - cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16)); - - final = cfinal.val; - } - } - - T2WriteWord(dst, passing, (final | 0x8000)); - gpu->bgPixels[x] = 0; - } - else - { - T2WriteWord(dst, passing, (color | 0x8000)); - gpu->bgPixels[x] = 0; - } - } -} - -static void setFinal3DColorSpecialBlendWnd(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x) -{ - bool windowDraw = true, windowEffect = true; - - gpu->renderline_checkWindows(x, windowDraw, windowEffect); - - if(windowDraw) - { - /* We can blend if the 3D layer is selected as 1st target, */ - /* but also if the 3D layer has the highest prio. */ - if((alpha < 16) && (((gpu->BLDCNT & 0x1) && windowEffect) || gpu->bg0HasHighestPrio)) - { - int bg_under = gpu->bgPixels[x]; - u16 final = color; - - /* If the layer we are drawing on is selected as 2nd source, we can blend */ - if(gpu->BLDCNT & (0x100 << bg_under)) - { - { - COLOR c1, c2, cfinal; - - c1.val = color; - c2.val = T2ReadWord(dst, passing); - - cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16)); - cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16)); - cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16)); - - final = cfinal.val; - } - } - - T2WriteWord(dst, passing, (final | 0x8000)); - gpu->bgPixels[x] = 0; - } - else - { - T2WriteWord(dst, passing, (color | 0x8000)); - gpu->bgPixels[x] = 0; - } - } -} - -static void setFinal3DColorSpecialIncreaseWnd(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x) -{ - bool windowDraw = true, windowEffect = true; - u16 final = color; - - gpu->renderline_checkWindows(x, windowDraw, windowEffect); - - if(windowDraw) - { - /* We must blend if the 3D layer has the highest prio */ - /* But it doesn't seem to have priority over fading, */ - /* unlike semi-transparent sprites */ - if((alpha < 16) && gpu->bg0HasHighestPrio) - { - int bg_under = gpu->bgPixels[x]; - - /* If the layer we are drawing on is selected as 2nd source, we can blend */ - if(gpu->BLDCNT & (0x100 << bg_under)) - { - { - COLOR c1, c2, cfinal; - - c1.val = color; - c2.val = T2ReadWord(dst, passing); - - cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16)); - cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16)); - cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16)); - - final = cfinal.val; - } - } - } - - if((gpu->BLDCNT & 0x1) && windowEffect) - { - if (gpu->BLDY_EVY != 0x0) - { - final = fadeInColors[gpu->BLDY_EVY][final&0x7FFF]; - } - - T2WriteWord(dst, passing, (final | 0x8000)); - gpu->bgPixels[x] = 0; - } - else - { - T2WriteWord(dst, passing, (final | 0x8000)); - gpu->bgPixels[x] = 0; - } - } -} - -static void setFinal3DColorSpecialDecreaseWnd(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x) -{ - bool windowDraw = true, windowEffect = true; - u16 final = color; - - gpu->renderline_checkWindows(x, windowDraw, windowEffect); - - if(windowDraw) - { - /* We must blend if the 3D layer has the highest prio */ - /* But it doesn't seem to have priority over fading, */ - /* unlike semi-transparent sprites */ - if((alpha < 16) && gpu->bg0HasHighestPrio) - { - int bg_under = gpu->bgPixels[x]; - - /* If the layer we are drawing on is selected as 2nd source, we can blend */ - if(gpu->BLDCNT & (0x100 << bg_under)) - { - { - COLOR c1, c2, cfinal; - - c1.val = color; - c2.val = T2ReadWord(dst, passing); - - cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16)); - cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16)); - cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16)); - - final = cfinal.val; - } - } - } - - if((gpu->BLDCNT & 0x1) && windowEffect) - { - if (gpu->BLDY_EVY != 0x0) - { - final = fadeOutColors[gpu->BLDY_EVY][final&0x7FFF]; - } - - T2WriteWord(dst, passing, (final | 0x8000)); - gpu->bgPixels[x] = 0; - } - else - { - T2WriteWord(dst, passing, (final | 0x8000)); - gpu->bgPixels[x] = 0; - } - } -} - FORCEINLINE void GPU::setFinalColorBG(u16 color, u8 x) { + //if someone disagrees with these, they could be reimplemented as a function pointer easily switch(setFinalColorBck_funcNum | (blend1?8:0)) { case 0x0: setFinalBGColorSpecialNone(color,x,false); break; @@ -1415,6 +1417,22 @@ FORCEINLINE void GPU::setFinalColorBG(u16 color, u8 x) } +FORCEINLINE void GPU::setFinalColor3d(int dstX, int srcX) +{ + //if someone disagrees with these, they could be reimplemented as a function pointer easily + switch(setFinalColor3d_funcNum) + { + case 0x0: setFinal3DColorSpecialNone(dstX,srcX); break; + case 0x1: setFinal3DColorSpecialBlend(dstX,srcX); break; + case 0x2: setFinal3DColorSpecialIncrease(dstX,srcX); break; + case 0x3: setFinal3DColorSpecialDecrease(dstX,srcX); break; + case 0x4: setFinal3DColorSpecialNoneWnd(dstX,srcX); break; + case 0x5: setFinal3DColorSpecialBlendWnd(dstX,srcX); break; + case 0x6: setFinal3DColorSpecialIncreaseWnd(dstX,srcX); break; + case 0x7: setFinal3DColorSpecialDecreaseWnd(dstX,srcX); break; + }; +} + //this was forced inline because most of the time it just falls through to setFinalColorBck() and the function call //overhead was ridiculous and terrible FORCEINLINE void GPU::__setFinalColorBck(u16 color, u8 x, bool opaque) @@ -2687,17 +2705,15 @@ static void GPU_ligne_layer(NDS_Screen * screen, u16 l) BGxOFS *bgofs = &gpu->dispx_st->dispx_BGxOFS[i16]; u16 hofs = (T1ReadWord((u8*)&bgofs->BGxHOFS, 0) & 0x1FF); - //line3Dcolor and line3Dalpha are left cleared by GPU initialization, - //and they always stay that way. - - gpu3D->NDS_3D_GetLine(l, line3Dcolor, line3Dalpha); + gfx3d_GetLineData(l, &gpu->_3dColorLine, &gpu->_3dAlphaLine); + u16* colorLine = gpu->_3dColorLine; for(int k = 0; k < 256; k++) { int q = ((k + hofs) & 0x1FF); - if(line3Dcolor[q] & 0x8000) - gpu->setFinalColor3D(gpu, (k << 1), gpu->currDst, line3Dcolor[q], line3Dalpha[q], k); + if(colorLine[q] & 0x8000) + gpu->setFinalColor3d(k, q); } continue; @@ -2785,9 +2801,9 @@ static void GPU_ligne_DispCapture(u16 l) case 1: // Capture 3D { //INFO("Capture 3D\n"); - u16 cap3DLine[512]; - gpu3D->NDS_3D_GetLineCaptured(l, (u16*)cap3DLine); - CAPCOPY(((u8*)cap3DLine),cap_dst); + u16* colorLine; + gfx3d_GetLineData(l, &colorLine, NULL); + CAPCOPY(((u8*)colorLine),cap_dst); } break; } @@ -2818,7 +2834,6 @@ static void GPU_ligne_DispCapture(u16 l) //INFO("Capture source is SourceA+B blended\n"); u16 *srcA = NULL; u16 *srcB = NULL; - u16 cap3DLine[512]; if (gpu->dispCapCnt.srcA == 0) { @@ -2830,8 +2845,7 @@ static void GPU_ligne_DispCapture(u16 l) } else { - gpu3D->NDS_3D_GetLineCaptured(l, (u16*)cap3DLine); - srcA = (u16 *)cap3DLine; // 3D screen + gfx3d_GetLineData(l, &srcA, NULL); } if (gpu->dispCapCnt.srcB == 0) // VRAM screen diff --git a/desmume/src/GPU.h b/desmume/src/GPU.h index 936273531..9029cad68 100644 --- a/desmume/src/GPU.h +++ b/desmume/src/GPU.h @@ -695,6 +695,9 @@ struct GPU bool blend1; u8* currDst; + u16* _3dColorLine; + u8* _3dAlphaLine; + static struct MosaicLookup { @@ -721,11 +724,12 @@ struct GPU u16 blend(u16 colA, u16 colB); typedef void (*FinalOBJColFunct)(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u8 type, u16 x); - typedef void (*Final3DColFunct)(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x); + typedef void (*Final3DColFunct)(GPU *gpu, int dstX, int srcX); int setFinalColorBck_funcNum; + int setFinalColor3d_funcNum; FinalOBJColFunct setFinalColorSpr; - Final3DColFunct setFinalColor3D; + //Final3DColFunct setFinalColor3D; enum SpriteRenderMode { SPRITE_1D, SPRITE_2D } spriteRenderMode; @@ -735,6 +739,8 @@ struct GPU void spriteRender(u8 * dst, u8 * dst_alpha, u8 * typeTab, u8 * prioTab); void setFinalColorBG(u16 color, u8 x); + void setFinalColor3d(int dstX, int srcX); + FORCEINLINE void setFinalBGColorSpecialNone(u16 color, u8 x, bool blend1); FORCEINLINE void setFinalBGColorSpecialBlend(u16 color, u8 x, bool blend1); FORCEINLINE void setFinalBGColorSpecialIncrease(u16 color, u8 x, bool blend1); @@ -743,6 +749,16 @@ struct GPU FORCEINLINE void setFinalBGColorSpecialBlendWnd(u16 color, u8 x, bool blend1); FORCEINLINE void setFinalBGColorSpecialIncreaseWnd(u16 color, u8 x, bool blend1); FORCEINLINE void setFinalBGColorSpecialDecreaseWnd(u16 color, u8 x, bool blend1); + + FORCEINLINE void setFinal3DColorSpecialNone(int dstX, int srcX); + FORCEINLINE void setFinal3DColorSpecialBlend(int dstX, int srcX); + FORCEINLINE void setFinal3DColorSpecialIncrease(int dstX, int srcX); + FORCEINLINE void setFinal3DColorSpecialDecrease(int dstX, int srcX); + FORCEINLINE void setFinal3DColorSpecialNoneWnd(int dstX, int srcX); + FORCEINLINE void setFinal3DColorSpecialBlendWnd(int dstX, int srcX); + FORCEINLINE void setFinal3DColorSpecialIncreaseWnd(int dstX, int srcX); + FORCEINLINE void setFinal3DColorSpecialDecreaseWnd(int dstX, int srcX); + void __setFinalColorBck(u16 color, u8 x, bool opaque); void setAffineStart(int layer, int xy, u32 val); diff --git a/desmume/src/OGLRender.cpp b/desmume/src/OGLRender.cpp index dbf0c972c..1f2d9440c 100644 --- a/desmume/src/OGLRender.cpp +++ b/desmume/src/OGLRender.cpp @@ -75,19 +75,15 @@ static void ENDGL() { #define CTASSERT(x) typedef char __assert ## y[(x) ? 1 : -1] #endif -static ALIGN(16) u8 GPU_screen3D [256*192*4]; -//static ALIGN(16) unsigned char GPU_screenStencil[256*256]; +static ALIGN(16) u8 GPU_screen3D [256*192*4]; + static const unsigned short map3d_cull[4] = {GL_FRONT_AND_BACK, GL_FRONT, GL_BACK, 0}; static const int texEnv[4] = { GL_MODULATE, GL_DECAL, GL_MODULATE, GL_MODULATE }; static const int depthFunc[2] = { GL_LESS, GL_EQUAL }; static bool needRefreshFramebuffer = false; - -float clearAlpha; - - - +static bool validFramebuffer = false; //derived values extracted from polyattr etc static bool wireframe=false, alpha31=false; @@ -474,11 +470,6 @@ static char OGLInit(void) glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP); //clamp so that we dont run off the edges due to 1.0 -> [0,31] math } - if(glBlendFuncSeparateEXT == NULL) - clearAlpha = 1; - else - clearAlpha = 0; - OGLReset(); ENDGL(); @@ -839,6 +830,7 @@ static void OGLVramReconfigureSignal() TexCache_Invalidate(); } + static void GL_ReadFramebuffer() { if(!BEGINGL()) return; @@ -849,141 +841,47 @@ static void GL_ReadFramebuffer() //convert the pixels to a different format which is more convenient //is it safe to modify the screen buffer? if not, we could make a temp copy - for(int i=0;i<256*192;i++) { - u32 &u32screen3D = ((u32*)GPU_screen3D)[i]; - u32screen3D>>=3; - u32screen3D &= 0x1F1F1F1F; - } - - -//debug: view depth buffer via color buffer for debugging - //int ctr=0; - //for(ctr=0;ctr<256*192;ctr++) { - // float zval = GPU_screen3Ddepth[ctr]; - // u8* colorPtr = GPU_screen3D+ctr*3; - // if(zval<0) { - // colorPtr[0] = 255; - // colorPtr[1] = 0; - // colorPtr[2] = 0; - // } else if(zval>1) { - // colorPtr[0] = 0; - // colorPtr[1] = 0; - // colorPtr[2] = 255; - // } else { - // colorPtr[0] = colorPtr[1] = colorPtr[2] = zval*255; - // //INFO("%f %f %d\n",zval, zval*255,colorPtr[0]); - // } - - //} -} - -static void OGLGetLineCaptured(int line, u16* dst) -{ - if(needRefreshFramebuffer) { - needRefreshFramebuffer = false; - GL_ReadFramebuffer(); - } - - u8 *screen3D = (u8*)GPU_screen3D+((191-line)<<10); -// u8 *screenStencil = (u8*)GPU_screenStencil+((191-line)<<8); - - for(int i = 0; i < 256; i++) + for(int i=0,y=191;y>=0;y--) { - /* u32 stencil = screenStencil[i]; + u16* dst = gfx3d_convertedScreen + (y<<8); + u8* dstAlpha = gfx3d_convertedAlpha + (y<<8); - if(!stencil) + #ifndef NOSSE + //I dont know much about this kind of stuff, but this seems to help + //for some reason I couldnt make the intrinsics work + u8* wanx = (u8*)&((u32*)GPU_screen3D)[i]; + #define ASS(X,Y) __asm { prefetchnta [wanx+32*0x##X##Y] } + #define PUNK(X) ASS(X,0) ASS(X,1) ASS(X,2) ASS(X,3) ASS(X,4) ASS(X,5) ASS(X,6) ASS(X,7) ASS(X,8) ASS(X,9) ASS(X,A) ASS(X,B) ASS(X,C) ASS(X,D) ASS(X,E) ASS(X,F) + PUNK(0); PUNK(1); + #endif + + for(int x=0;x<256;x++,i++) { - dst[i] = 0x0000; - continue; - }*/ + u32 &u32screen3D = ((u32*)GPU_screen3D)[i]; + u32screen3D>>=3; + u32screen3D &= 0x1F1F1F1F; - int t=i<<2; - /* u8 r = screen3D[t+2]; - u8 g = screen3D[t+1]; - u8 b = screen3D[t+0];*/ - - //if this math strikes you as wrong, be sure to look at GL_ReadFramebuffer() where the pixel format in screen3D is changed - //dst[i] = (b<<10) | (g<<5) | (r) | 0x8000; - dst[i] = (screen3D[t+2] | (screen3D[t+1] << 5) | (screen3D[t+0] << 10) | ((screen3D[t+3] > 0) ? 0x8000 : 0x0000)); + const int t = i<<2; + const u8 a = GPU_screen3D[t+3]; + const u8 r = GPU_screen3D[t+2]; + const u8 g = GPU_screen3D[t+1]; + const u8 b = GPU_screen3D[t+0]; + dst[x] = R5G5B5TORGB15(r,g,b) | alpha_lookup[a]; + dstAlpha[x] = alpha_5bit_to_4bit[a]; + } } } - -static void OGLGetLine(int line, u16* dst, u8* dstAlpha) +static void OGLCheckFresh() { - assert(line<192 && line>=0); - - if(needRefreshFramebuffer) { + if(needRefreshFramebuffer) + { needRefreshFramebuffer = false; GL_ReadFramebuffer(); } - - u8 *screen3D = (u8*)GPU_screen3D+((191-line)<<10); - //u8 *screenStencil = (u8*)GPU_screenStencil+((191-line)<<8); - - //the renderer clears the stencil to 0 - //then it sets it to 1 whenever it renders a pixel that passes the alpha test - //(it also sets it to 2 under some circumstances when rendering shadow volumes) - //so, we COULD use a zero stencil value to indicate that nothing should get composited. - //in fact, we are going to do that to fix some problems. - //but beware that it i figure it might could CAUSE some problems - - //this alpha compositing blending logic isnt thought through very much - //someone needs to think about what bitdepth it should take place at and how to do it efficiently - - for(int i=0;i<256;i++) - { - // u32 stencil = screenStencil[i]; - - //you would use this if you wanted to use the stencil buffer to make decisions here - // if(!stencil) continue; - - // u16 oldcolor = dst[j]; - - int t=i<<2; - // u32 dstpixel; - - dst[i] = (screen3D[t+2] | (screen3D[t+1] << 5) | (screen3D[t+0] << 10) | ((screen3D[t+3] > 0) ? 0x8000 : 0x0000)); - dstAlpha[i] = alpha_5bit_to_4bit[screen3D[t+3]]; - - //old debug reminder: display alpha channel - //u32 r = screen3D[t+3]; - //u32 g = screen3D[t+3]; - //u32 b = screen3D[t+3]; - - //if this math strikes you as wrong, be sure to look at GL_ReadFramebuffer() where the pixel format in screen3D is changed - - /* u32 a = screen3D[t+3]; - - typedef u8 mixtbl[32][32]; - mixtbl & mix = mixTable555[a]; - - //r - u32 newpix = screen3D[t+2]; - u32 oldpix = oldcolor&0x1F; - newpix = mix[newpix][oldpix]; - dstpixel = newpix; - - //g - newpix = screen3D[t+1]; - oldpix = (oldcolor>>5)&0x1F; - newpix = mix[newpix][oldpix]; - dstpixel |= (newpix<<5); - - //b - newpix = screen3D[t+0]; - oldpix = (oldcolor>>10)&0x1F; - newpix = mix[newpix][oldpix]; - dstpixel |= (newpix<<10); - - dst[j] = dstpixel;*/ - } } - - - GPU3DInterface gpu3Dgl = { "OpenGL", OGLInit, @@ -991,9 +889,5 @@ GPU3DInterface gpu3Dgl = { OGLClose, OGLRender, OGLVramReconfigureSignal, - OGLGetLine, - OGLGetLineCaptured + OGLCheckFresh, }; - - - diff --git a/desmume/src/gfx3d.cpp b/desmume/src/gfx3d.cpp index ca16ffb32..bb9f43adf 100644 --- a/desmume/src/gfx3d.cpp +++ b/desmume/src/gfx3d.cpp @@ -103,6 +103,13 @@ CACHE_ALIGN const u8 alpha_5bit_to_4bit[] = { 0x10, 0x10 }; +CACHE_ALIGN static const u16 alpha_lookup[] = { + 0x0000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, + 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, + 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, + 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000}; + + //private acceleration tables static float float16table[65536]; static float float10Table[1024]; @@ -112,6 +119,9 @@ static float normalTable[1024]; #define fix2float(v) (((float)((s32)(v))) / (float)(1<<12)) #define fix10_2float(v) (((float)((s32)(v))) / (float)(1<<9)) +CACHE_ALIGN u16 gfx3d_convertedScreen[256*192]; +CACHE_ALIGN u8 gfx3d_convertedAlpha[256*192]; + // Matrix stack handling static CACHE_ALIGN MatrixStack mtxStack[4] = { MatrixStack(1), // Projection stack @@ -2156,6 +2166,16 @@ void gfx3d_glGetLightColor(unsigned int index, unsigned int* dest) *dest = lightColor[index]; } +void gfx3d_GetLineData(int line, u16** dst, u8** dstAlpha) +{ + gpu3D->NDS_3D_CheckFresh(); + *dst = gfx3d_convertedScreen+((line)<<8); + if(dstAlpha != NULL) + { + *dstAlpha = gfx3d_convertedAlpha+((line)<<8); + } +} + //http://www.opengl.org/documentation/specs/version1.1/glspec1.1/node17.html //talks about the state required to process verts in quadlists etc. helpful ideas. diff --git a/desmume/src/gfx3d.h b/desmume/src/gfx3d.h index 050e36a4e..8fb50d14f 100644 --- a/desmume/src/gfx3d.h +++ b/desmume/src/gfx3d.h @@ -180,6 +180,7 @@ extern GFX3D gfx3d; //--------------------- +extern CACHE_ALIGN const u16 alpha_lookup[32]; extern CACHE_ALIGN u32 color_15bit_to_24bit[32768]; extern CACHE_ALIGN u32 color_15bit_to_24bit_reverse[32768]; extern CACHE_ALIGN u16 color_15bit_to_16bit_reverse[32768]; @@ -190,6 +191,11 @@ extern CACHE_ALIGN const u8 material_3bit_to_5bit[8]; extern CACHE_ALIGN const u8 material_3bit_to_8bit[8]; extern CACHE_ALIGN const u8 alpha_5bit_to_4bit[32]; +//these contain the 3d framebuffer converted into the most useful format +//they are stored here instead of in the renderers in order to consolidate the buffers +extern CACHE_ALIGN u16 gfx3d_convertedScreen[256*192]; +extern CACHE_ALIGN u8 gfx3d_convertedAlpha[256*192]; + //GE commands: void gfx3d_glViewPort(u32 v); void gfx3d_glClearColor(u32 v); @@ -212,11 +218,11 @@ BOOL gfx3d_glMultMatrix4x4(s32 v); void gfx3d_glBegin(u32 v); void gfx3d_glEnd(void); void gfx3d_glColor3b(u32 v); -BOOL gfx3d_glVertex16b(unsigned int v); +BOOL gfx3d_glVertex16b(u32 v); void gfx3d_glVertex10b(u32 v); -void gfx3d_glVertex3_cord(unsigned int one, unsigned int two, unsigned int v); +void gfx3d_glVertex3_cord(u32 one, u32 two, u32 v); void gfx3d_glVertex_rel(u32 v); -void gfx3d_glSwapScreen(unsigned int screen); +void gfx3d_glSwapScreen(u32 screen); int gfx3d_GetNumPolys(); int gfx3d_GetNumVertex(); void gfx3d_glPolygonAttrib (u32 val); @@ -229,16 +235,16 @@ void gfx3d_glTexImage(u32 val); void gfx3d_glTexPalette(u32 val); void gfx3d_glTexCoord(u32 val); void gfx3d_glNormal(u32 v); -s32 gfx3d_GetClipMatrix (unsigned int index); -s32 gfx3d_GetDirectionalMatrix (unsigned int index); +s32 gfx3d_GetClipMatrix (u32 index); +s32 gfx3d_GetDirectionalMatrix (u32 index); void gfx3d_glLightDirection (u32 v); void gfx3d_glLightColor (u32 v); void gfx3d_glAlphaFunc(u32 v); BOOL gfx3d_glBoxTest(u32 v); BOOL gfx3d_glPosTest(u32 v); void gfx3d_glVecTest(u32 v); -unsigned int gfx3d_glGetPosRes(unsigned int index); -unsigned short gfx3d_glGetVecRes(unsigned int index); +u32 gfx3d_glGetPosRes(u32 index); +u16 gfx3d_glGetVecRes(u32 index); void gfx3d_glFlush(u32 v); void gfx3d_VBlankSignal(); void gfx3d_VBlankEndSignal(bool skipFrame); @@ -248,9 +254,11 @@ void gfx3d_sendCommandToFIFO(u32 val); void gfx3d_sendCommand(u32 cmd, u32 param); //other misc stuff -void gfx3d_glGetMatrix(unsigned int mode, int index, float* dest); -void gfx3d_glGetLightDirection(unsigned int index, unsigned int* dest); -void gfx3d_glGetLightColor(unsigned int index, unsigned int* dest); +void gfx3d_glGetMatrix(u32 mode, int index, float* dest); +void gfx3d_glGetLightDirection(u32 index, u32* dest); +void gfx3d_glGetLightColor(u32 index, u32* dest); + +void gfx3d_GetLineData(int line, u16** dst, u8** dstAlpha); struct SFORMAT; extern SFORMAT SF_GFX3D[]; diff --git a/desmume/src/rasterize.cpp b/desmume/src/rasterize.cpp index f5d9b36ed..38467868e 100644 --- a/desmume/src/rasterize.cpp +++ b/desmume/src/rasterize.cpp @@ -63,6 +63,7 @@ template T _min(T a, T b, T c, T d) { return min(_min(a,b,d),c); } template T _max(T a, T b, T c, T d) { return max(_max(a,b,d),c); } static int polynum; +static bool validFramebuffer = false; static u8 modulate_table[32][32]; static u8 decal_table[32][32][32]; @@ -202,16 +203,16 @@ struct PolyAttr } polyAttr; +union FragmentColor { + u32 color; + struct { + //#ifdef WORDS_BIGENDIAN ? + u8 r,g,b,a; + }; +}; + struct Fragment { - union Color { - u32 color; - struct { - //#ifdef WORDS_BIGENDIAN ? - u8 r,g,b,a; - } components; - } color; - u32 depth; struct { @@ -220,7 +221,7 @@ struct Fragment u8 stencil; - u8 pad[5]; + u8 pad; }; static VERT* verts[MAX_CLIPPED_VERTS]; @@ -231,6 +232,8 @@ INLINE static void SubmitVertex(int vert_index, VERT& rawvert) } static Fragment screen[256*192]; +static FragmentColor screenColor[256*192]; + FORCEINLINE int iround(float f) { return (int)f; //lol @@ -300,7 +303,7 @@ static struct Sampler } } - FORCEINLINE Fragment::Color sample(float u, float v) + FORCEINLINE FragmentColor sample(float u, float v) { //finally, we can use floor here. but, it is slower than we want. //the best solution is probably to wait until the pipeline is full of fixed point @@ -308,7 +311,7 @@ static struct Sampler int iv = floorf(v); dowrap(iu,iv); - Fragment::Color color; + FragmentColor color; color.color = ((u32*)textures.currentData)[(iv<> 3); - toonColor.components.g = ((toonColorVal & 0x00FF00) >> 11); - toonColor.components.b = ((toonColorVal & 0xFF0000) >> 19); - dst.color.components.r = modulate_table[texColor.components.r][toonColor.components.r]; - dst.color.components.g = modulate_table[texColor.components.g][toonColor.components.g]; - dst.color.components.b = modulate_table[texColor.components.b][toonColor.components.b]; - dst.color.components.a = modulate_table[texColor.components.a][materialColor.components.a]; + u32 toonColorVal; toonColorVal = gfx3d.rgbToonTable[materialColor.r]; + FragmentColor toonColor; + toonColor.r = ((toonColorVal & 0x0000FF) >> 3); + toonColor.g = ((toonColorVal & 0x00FF00) >> 11); + toonColor.b = ((toonColorVal & 0xFF0000) >> 19); + dst.r = modulate_table[texColor.r][toonColor.r]; + dst.g = modulate_table[texColor.g][toonColor.g]; + dst.b = modulate_table[texColor.b][toonColor.b]; + dst.a = modulate_table[texColor.a][materialColor.a]; if(gfx3d.shading == GFX3D::HIGHLIGHT) { - dst.color.components.r = min(31, (dst.color.components.r + toonColor.components.r)); - dst.color.components.g = min(31, (dst.color.components.g + toonColor.components.g)); - dst.color.components.b = min(31, (dst.color.components.b + toonColor.components.b)); + dst.r = min(31, (dst.r + toonColor.r)); + dst.g = min(31, (dst.g + toonColor.g)); + dst.b = min(31, (dst.b + toonColor.b)); } break; case 3: //shadows //is this right? only with the material color? - dst.color = materialColor; + dst = materialColor; break; case 4: //our own special mode which only uses the material color (for when texturing is disabled) - dst.color = materialColor; + dst = materialColor; break; } @@ -399,44 +402,45 @@ struct Shader } shader; -static FORCEINLINE void alphaBlend(Fragment::Color & dst, const Fragment::Color & src) +static FORCEINLINE void alphaBlend(FragmentColor & dst, const FragmentColor & src) { if(gfx3d.enableAlphaBlending) { - if(src.components.a == 0) + if(src.a == 0) { - dst.components.a = max(src.components.a,dst.components.a); + dst.a = max(src.a,dst.a); } - else if(src.components.a == 31 || dst.components.a == 0) + else if(src.a == 31 || dst.a == 0) { - dst.color = src.color; - dst.components.a = max(src.components.a,dst.components.a); + dst = src; + dst.a = max(src.a,dst.a); } else { - u8 alpha = src.components.a+1; + u8 alpha = src.a+1; u8 invAlpha = 32 - alpha; - dst.components.r = (alpha*src.components.r + invAlpha*dst.components.r)>>5; - dst.components.g = (alpha*src.components.g + invAlpha*dst.components.g)>>5; - dst.components.b = (alpha*src.components.b + invAlpha*dst.components.b)>>5; - dst.components.a = max(src.components.a,dst.components.a); + dst.r = (alpha*src.r + invAlpha*dst.r)>>5; + dst.g = (alpha*src.g + invAlpha*dst.g)>>5; + dst.b = (alpha*src.b + invAlpha*dst.b)>>5; + dst.a = max(src.a,dst.a); } } else { - if(src.components.a == 0) + if(src.a == 0) { //do nothing; the fragment is totally transparent } else { - dst.color = src.color; + dst = src; } } } static FORCEINLINE void pixel(int adr,float r, float g, float b, float invu, float invv, float w, float z) { Fragment &destFragment = screen[adr]; + FragmentColor &destFragmentColor = screenColor[adr]; //depth test u32 depth; @@ -478,25 +482,25 @@ static FORCEINLINE void pixel(int adr,float r, float g, float b, float invu, flo //this is a HACK: //we are being very sloppy with our interpolation precision right now //and rather than fix it, i just want to clamp it - shader.materialColor.components.r = max(0,min(31,(int)r)); - shader.materialColor.components.g = max(0,min(31,(int)g)); - shader.materialColor.components.b = max(0,min(31,(int)b)); + shader.materialColor.r = max(0,min(31,(int)r)); + shader.materialColor.g = max(0,min(31,(int)g)); + shader.materialColor.b = max(0,min(31,(int)b)); - shader.materialColor.components.a = polyAttr.alpha; + shader.materialColor.a = polyAttr.alpha; //pixel shader - Fragment shaderOutput; + FragmentColor shaderOutput; shader.shade(shaderOutput); //alpha test if(gfx3d.enableAlphaTest) { - if(shaderOutput.color.components.a < gfx3d.alphaTestRef) + if(shaderOutput.a < gfx3d.alphaTestRef) goto rejected_fragment; } //we shouldnt do any of this if we generated a totally transparent pixel - if(shaderOutput.color.components.a != 0) + if(shaderOutput.a != 0) { //handle shadow polys if(shader.mode == 3) @@ -533,7 +537,7 @@ static FORCEINLINE void pixel(int adr,float r, float g, float b, float invu, flo } //handle polyids - bool isOpaquePixel = shaderOutput.color.components.a == 31; + bool isOpaquePixel = shaderOutput.a == 31; if(isOpaquePixel) { destFragment.polyid.opaque = polyAttr.polyid; @@ -561,7 +565,7 @@ static FORCEINLINE void pixel(int adr,float r, float g, float b, float invu, flo } //alpha blending and write color - alphaBlend(destFragment.color, shaderOutput.color); + alphaBlend(destFragmentColor, shaderOutput); //depth writing if(isOpaquePixel || polyAttr.translucentDepthWrite) @@ -925,7 +929,9 @@ static char SoftRastInit(void) return 1; } -static void SoftRastReset() {} +static void SoftRastReset() { + validFramebuffer = false; +} static void SoftRastClose() { @@ -935,50 +941,41 @@ static void SoftRastVramReconfigureSignal() { TexCache_Invalidate(); } -CACHE_ALIGN static const u16 alpha_lookup[] = { - 0x0000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, - 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, - 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, - 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000}; - -static void SoftRastGetLine(int line, u16* dst, u8* dstAlpha) +static void SoftRastConvertFramebuffer() { - Fragment* src = screen+((line)<<8); - for(int i=0;i<256;i++) + FragmentColor* src = screenColor; + u16* dst = gfx3d_convertedScreen; + u8* dstAlpha = gfx3d_convertedAlpha; + + //in an effort to speed this up, the misc pixel buffers and the color buffer were separated. + + for(int i=0,y=0;y<192;y++) { - const bool testRenderAlpha = false; - const u8 r = src->color.components.r; - const u8 g = src->color.components.g; - const u8 b = src->color.components.b; - *dst = R5G5B5TORGB15(r,g,b); + #ifndef NOSSE + u8* wanx = (u8*)&src[i]; + #define ASS(X,Y) __asm { prefetchnta [wanx+32*0x##X##Y] } + #define PUNK(X) ASS(X,0) ASS(X,1) ASS(X,2) ASS(X,3) ASS(X,4) ASS(X,5) ASS(X,6) ASS(X,7) ASS(X,8) ASS(X,9) ASS(X,A) ASS(X,B) ASS(X,C) ASS(X,D) ASS(X,E) ASS(X,F) + PUNK(0); PUNK(1); + #endif - *dst |= alpha_lookup[src->color.components.a]; - *dstAlpha = alpha_5bit_to_4bit[src->color.components.a]; - - if(testRenderAlpha) + for(int x=0;x<256;x++,i++) { - *dst = 0x8000 | R5G5B5TORGB15(src->color.components.a,src->color.components.a,src->color.components.a); - *dstAlpha = 16; + const u8 r = src[i].r; + const u8 g = src[i].g; + const u8 b = src[i].b; + const u8 a = src[i].a; + dst[i] = R5G5B5TORGB15(r,g,b) | alpha_lookup[a]; + dstAlpha[i] = alpha_5bit_to_4bit[a]; } - - src++; - dst++; - dstAlpha++; } - + validFramebuffer = true; } -static void SoftRastGetLineCaptured(int line, u16* dst) { - Fragment* src = screen+((line)<<8); - for(int i=0;i<256;i++) +static void SoftRastCheckFresh() +{ + if(!validFramebuffer) { - const u8 r = src->color.components.r; - const u8 g = src->color.components.g; - const u8 b = src->color.components.b; - *dst = R5G5B5TORGB15(r,g,b); - *dst |= alpha_lookup[src->color.components.a]; - src++; - dst++; + SoftRastConvertFramebuffer(); } } @@ -1158,10 +1155,11 @@ static void clipPoly(POLY* poly) static void SoftRastRender() { Fragment clearFragment; - clearFragment.color.components.r = gfx3d.clearColor&0x1F; - clearFragment.color.components.g = (gfx3d.clearColor>>5)&0x1F; - clearFragment.color.components.b = (gfx3d.clearColor>>10)&0x1F; - clearFragment.color.components.a = (gfx3d.clearColor>>16)&0x1F; + FragmentColor clearFragmentColor; + clearFragmentColor.r = gfx3d.clearColor&0x1F; + clearFragmentColor.g = (gfx3d.clearColor>>5)&0x1F; + clearFragmentColor.b = (gfx3d.clearColor>>10)&0x1F; + clearFragmentColor.a = (gfx3d.clearColor>>16)&0x1F; clearFragment.polyid.opaque = (gfx3d.clearColor>>24)&0x3F; //special value for uninitialized translucent polyid. without this, fires in spiderman2 dont display //I am not sure whether it is right, though. previously this was cleared to 0, as a guess, @@ -1171,6 +1169,8 @@ static void SoftRastRender() clearFragment.stencil = 0; for(int i=0;i<256*192;i++) screen[i] = clearFragment; + for(int i=0;i<256*192;i++) + screenColor[i] = clearFragmentColor; //convert colors to float to get more precision in case we need it for(int i=0;icount;i++) @@ -1292,8 +1292,9 @@ static void SoftRastRender() shape_engine(type,!polyAttr.backfacing); } - // printf("rendered %d of %d polys after backface culling\n",gfx3d.polylist->count-culled,gfx3d.polylist->count); + validFramebuffer = false; + // printf("rendered %d of %d polys after backface culling\n",gfx3d.polylist->count-culled,gfx3d.polylist->count); } GPU3DInterface gpu3DRasterize = { @@ -1303,6 +1304,5 @@ GPU3DInterface gpu3DRasterize = { SoftRastClose, SoftRastRender, SoftRastVramReconfigureSignal, - SoftRastGetLine, - SoftRastGetLineCaptured + SoftRastCheckFresh, }; diff --git a/desmume/src/render3D.cpp b/desmume/src/render3D.cpp index 8ef8c0c50..9c2bb7a17 100644 --- a/desmume/src/render3D.cpp +++ b/desmume/src/render3D.cpp @@ -24,8 +24,6 @@ int cur3DCore = GPU3D_NULL; static void NDS_nullFunc1 (void){} static char NDS_nullFunc2 (void){ return 1; } -static void NDS_nullFunc3 (int,unsigned short*) {} -static void NDS_nullFunc4 (int,unsigned short*,unsigned char*) {} GPU3DInterface gpu3DNull = { "None", @@ -34,8 +32,7 @@ GPU3DInterface gpu3DNull = { NDS_nullFunc1, //NDS_3D_Close NDS_nullFunc1, //NDS_3D_Render NDS_nullFunc1, //NDS_3D_VramReconfigureSignal - NDS_nullFunc4, //NDS_3D_GetLine - NDS_nullFunc3 //NDS_3D_GetLineCaptured + NDS_nullFunc1, //NDS_3D_CheckFresh }; GPU3DInterface *gpu3D = &gpu3DNull; diff --git a/desmume/src/render3D.h b/desmume/src/render3D.h index 8df9133c4..a6d32fbdc 100644 --- a/desmume/src/render3D.h +++ b/desmume/src/render3D.h @@ -21,6 +21,8 @@ #ifndef RENDER3D_H #define RENDER3D_H +#include "types.h" + //not using this right now #define CALL_CONVENTION @@ -44,13 +46,9 @@ typedef struct Render3DInterface //called when the emulator reconfigures its vram. you may need to invalidate your texture cache. void (CALL_CONVENTION* NDS_3D_VramReconfigureSignal) (); - //Retrieves a line of color buffer data - void (CALL_CONVENTION* NDS_3D_GetLine) (int line, unsigned short* dst, unsigned char* dstAlpha); + //ensures that the plugin's framebuffer generation is fresh + void (CALL_CONVENTION* NDS_3D_CheckFresh) (); - //Retrieves a line of color buffer data for capture - void (CALL_CONVENTION* NDS_3D_GetLineCaptured) (int line, unsigned short* dst); - - } GPU3DInterface; extern int cur3DCore;