a bunch of small performance optimizations in the 3d engines, gains a couple of fps over the current trunk. still a little slower than 0.9.2 though, I am not sure why.

This commit is contained in:
zeromus 2009-04-21 04:13:47 +00:00
parent e30b3fea70
commit df48d9a889
8 changed files with 614 additions and 667 deletions

View File

@ -49,6 +49,7 @@
#include "GPU.h"
#include "debug.h"
#include "render3D.h"
#include "gfx3d.h"
#include "GPU_osd.h"
#include "debug.h"
#include "NDSSystem.h"
@ -131,6 +132,397 @@ GraphicsInterface_struct *GFXCoreList[] = {
NULL
};
static const CACHE_ALIGN u8 win_empty[256] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
static CACHE_ALIGN u16 fadeInColors[17][0x8000];
static CACHE_ALIGN u16 fadeOutColors[17][0x8000];
//this should be public, because it gets used somewhere else
CACHE_ALIGN u8 gpuBlendTable555[17][17][32][32];
/*****************************************************************************/
// PIXEL RENDERING - 3D
/*****************************************************************************/
#define DECL3D \
int x = dstX; \
int passing = dstX<<1; \
u16 color = _3dColorLine[srcX]; \
u8 alpha = _3dAlphaLine[srcX]; \
u8* dst = currDst;
FORCEINLINE void GPU::setFinal3DColorSpecialNone(int dstX, int srcX)
{
DECL3D;
// We must blend if the 3D layer has the highest prio
if((alpha < 16) && bg0HasHighestPrio)
{
int bg_under = bgPixels[dstX];
u16 final = color;
// If the layer we are drawing on is selected as 2nd source, we can blend
if(BLDCNT & (0x100 << bg_under))
{
{
COLOR c1, c2, cfinal;
c1.val = color;
c2.val = T2ReadWord(dst, passing);
cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16));
cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16));
cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16));
final = cfinal.val;
}
}
T2WriteWord(dst, passing, (final | 0x8000));
bgPixels[x] = 0;
}
else
{
T2WriteWord(dst, passing, (color | 0x8000));
bgPixels[x] = 0;
}
}
FORCEINLINE void GPU::setFinal3DColorSpecialBlend(int dstX, int srcX)
{
DECL3D;
// We can blend if the 3D layer is selected as 1st target,
//but also if the 3D layer has the highest prio.
if((alpha < 16) && ((BLDCNT & 0x1) || bg0HasHighestPrio))
{
int bg_under = bgPixels[x];
u16 final = color;
//If the layer we are drawing on is selected as 2nd source, we can blend
if(BLDCNT & (0x100 << bg_under))
{
{
COLOR c1, c2, cfinal;
c1.val = color;
c2.val = T2ReadWord(dst, passing);
cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16));
cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16));
cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16));
final = cfinal.val;
}
}
T2WriteWord(dst, passing, (final | 0x8000));
bgPixels[x] = 0;
}
else
{
T2WriteWord(dst, passing, (color | 0x8000));
bgPixels[x] = 0;
}
}
FORCEINLINE void GPU::setFinal3DColorSpecialIncrease(int dstX, int srcX)
{
DECL3D;
u16 final = color;
// We must blend if the 3D layer has the highest prio
// But it doesn't seem to have priority over fading,
// unlike semi-transparent sprites
if((alpha < 16) && bg0HasHighestPrio)
{
int bg_under = bgPixels[x];
/* If the layer we are drawing on is selected as 2nd source, we can blend */
if(BLDCNT & (0x100 << bg_under))
{
{
COLOR c1, c2, cfinal;
c1.val = color;
c2.val = T2ReadWord(dst, passing);
cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16));
cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16));
cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16));
final = cfinal.val;
}
}
}
if(BLDCNT & 0x1)
{
if (BLDY_EVY != 0x0)
{
final = fadeInColors[BLDY_EVY][final&0x7FFF];
}
T2WriteWord(dst, passing, (final | 0x8000));
bgPixels[x] = 0;
}
else
{
T2WriteWord(dst, passing, (final | 0x8000));
bgPixels[x] = 0;
}
}
FORCEINLINE void GPU::setFinal3DColorSpecialDecrease(int dstX, int srcX)
{
DECL3D;
u16 final = color;
// We must blend if the 3D layer has the highest prio
// But it doesn't seem to have priority over fading
// unlike semi-transparent sprites
if((alpha < 16) && bg0HasHighestPrio)
{
int bg_under = bgPixels[x];
// If the layer we are drawing on is selected as 2nd source, we can blend
if(BLDCNT & (0x100 << bg_under))
{
{
COLOR c1, c2, cfinal;
c1.val = color;
c2.val = T2ReadWord(dst, passing);
cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16));
cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16));
cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16));
final = cfinal.val;
}
}
}
if(BLDCNT & 0x1)
{
if (BLDY_EVY != 0x0)
{
final = fadeOutColors[BLDY_EVY][final&0x7FFF];
}
T2WriteWord(dst, passing, (final | 0x8000));
bgPixels[x] = 0;
}
else
{
T2WriteWord(dst, passing, (final | 0x8000));
bgPixels[x] = 0;
}
}
FORCEINLINE void GPU::setFinal3DColorSpecialNoneWnd(int dstX, int srcX)
{
DECL3D;
bool windowDraw = true, windowEffect = true;
renderline_checkWindows(x, windowDraw, windowEffect);
if(windowDraw)
{
// We must blend if the 3D layer has the highest prio
if((alpha < 16) && bg0HasHighestPrio)
{
int bg_under = bgPixels[x];
u16 final = color;
// If the layer we are drawing on is selected as 2nd source, we can blend
if(BLDCNT & (0x100 << bg_under))
{
{
COLOR c1, c2, cfinal;
c1.val = color;
c2.val = T2ReadWord(dst, passing);
cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16));
cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16));
cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16));
final = cfinal.val;
}
}
T2WriteWord(dst, passing, (final | 0x8000));
bgPixels[x] = 0;
}
else
{
T2WriteWord(dst, passing, (color | 0x8000));
bgPixels[x] = 0;
}
}
}
FORCEINLINE void GPU::setFinal3DColorSpecialBlendWnd(int dstX, int srcX)
{
DECL3D;
bool windowDraw = true, windowEffect = true;
renderline_checkWindows(x, windowDraw, windowEffect);
if(windowDraw)
{
// We can blend if the 3D layer is selected as 1st target,
// but also if the 3D layer has the highest prio.
if((alpha < 16) && (((BLDCNT & 0x1) && windowEffect) || bg0HasHighestPrio))
{
int bg_under = bgPixels[x];
u16 final = color;
// If the layer we are drawing on is selected as 2nd source, we can blend
if(BLDCNT & (0x100 << bg_under))
{
{
COLOR c1, c2, cfinal;
c1.val = color;
c2.val = T2ReadWord(dst, passing);
cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16));
cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16));
cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16));
final = cfinal.val;
}
}
T2WriteWord(dst, passing, (final | 0x8000));
bgPixels[x] = 0;
}
else
{
T2WriteWord(dst, passing, (color | 0x8000));
bgPixels[x] = 0;
}
}
}
FORCEINLINE void GPU::setFinal3DColorSpecialIncreaseWnd(int dstX, int srcX)
{
DECL3D;
bool windowDraw = true, windowEffect = true;
u16 final = color;
renderline_checkWindows(x, windowDraw, windowEffect);
if(windowDraw)
{
// We must blend if the 3D layer has the highest prio
// But it doesn't seem to have priority over fading,
// unlike semi-transparent sprites
if((alpha < 16) && bg0HasHighestPrio)
{
int bg_under = bgPixels[x];
// If the layer we are drawing on is selected as 2nd source, we can blend
if(BLDCNT & (0x100 << bg_under))
{
{
COLOR c1, c2, cfinal;
c1.val = color;
c2.val = T2ReadWord(dst, passing);
cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16));
cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16));
cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16));
final = cfinal.val;
}
}
}
if((BLDCNT & 0x1) && windowEffect)
{
if (BLDY_EVY != 0x0)
{
final = fadeInColors[BLDY_EVY][final&0x7FFF];
}
T2WriteWord(dst, passing, (final | 0x8000));
bgPixels[x] = 0;
}
else
{
T2WriteWord(dst, passing, (final | 0x8000));
bgPixels[x] = 0;
}
}
}
FORCEINLINE void GPU::setFinal3DColorSpecialDecreaseWnd(int dstX, int srcX)
{
DECL3D;
bool windowDraw = true, windowEffect = true;
u16 final = color;
renderline_checkWindows(x, windowDraw, windowEffect);
if(windowDraw)
{
// We must blend if the 3D layer has the highest prio
// But it doesn't seem to have priority over fading,
// unlike semi-transparent sprites
if((alpha < 16) && bg0HasHighestPrio)
{
int bg_under = bgPixels[x];
// If the layer we are drawing on is selected as 2nd source, we can blend
if(BLDCNT & (0x100 << bg_under))
{
{
COLOR c1, c2, cfinal;
c1.val = color;
c2.val = T2ReadWord(dst, passing);
cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16));
cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16));
cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16));
final = cfinal.val;
}
}
}
if((BLDCNT & 0x1) && windowEffect)
{
if (BLDY_EVY != 0x0)
{
final = fadeOutColors[BLDY_EVY][final&0x7FFF];
}
T2WriteWord(dst, passing, (final | 0x8000));
bgPixels[x] = 0;
}
else
{
T2WriteWord(dst, passing, (final | 0x8000));
bgPixels[x] = 0;
}
}
}
static void setFinalOBJColorSpecialNone (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u8 type, u16 x);
static void setFinalOBJColorSpecialBlend (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u8 type, u16 x);
static void setFinalOBJColorSpecialIncrease (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u8 type, u16 x);
@ -140,16 +532,6 @@ static void setFinalOBJColorSpecialBlendWnd (GPU *gpu, u32 passing, u8 *dst, u1
static void setFinalOBJColorSpecialIncreaseWnd (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u8 type, u16 x);
static void setFinalOBJColorSpecialDecreaseWnd (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u8 type, u16 x);
static void setFinal3DColorSpecialNone (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x);
static void setFinal3DColorSpecialBlend (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x);
static void setFinal3DColorSpecialIncrease (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x);
static void setFinal3DColorSpecialDecrease (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x);
static void setFinal3DColorSpecialNoneWnd (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x);
static void setFinal3DColorSpecialBlendWnd (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x);
static void setFinal3DColorSpecialIncreaseWnd (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x);
static void setFinal3DColorSpecialDecreaseWnd (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x);
const GPU::FinalOBJColFunct pixelBlittersOBJ[8] = {
setFinalOBJColorSpecialNone,
setFinalOBJColorSpecialBlend,
@ -160,24 +542,6 @@ const GPU::FinalOBJColFunct pixelBlittersOBJ[8] = {
setFinalOBJColorSpecialIncreaseWnd,
setFinalOBJColorSpecialDecreaseWnd,};
const GPU::Final3DColFunct pixelBlitters3D[8] = {
setFinal3DColorSpecialNone,
setFinal3DColorSpecialBlend,
setFinal3DColorSpecialIncrease,
setFinal3DColorSpecialDecrease,
setFinal3DColorSpecialNoneWnd,
setFinal3DColorSpecialBlendWnd,
setFinal3DColorSpecialIncreaseWnd,
setFinal3DColorSpecialDecreaseWnd};
static const CACHE_ALIGN u8 win_empty[256] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
static CACHE_ALIGN u16 fadeInColors[17][0x8000];
static CACHE_ALIGN u16 fadeOutColors[17][0x8000];
CACHE_ALIGN u8 gpuBlendTable555[17][17][32][32];
/*****************************************************************************/
// INITIALIZATION
@ -236,9 +600,6 @@ static void GPU_InitFadeColors()
}
}
static u16 line3Dcolor[512];
static u8 line3Dalpha[512];
GPU * GPU_Init(u8 l)
{
GPU * g;
@ -249,17 +610,13 @@ GPU * GPU_Init(u8 l)
GPU_Reset(g, l);
GPU_InitFadeColors();
//clear out the excess line buffers (beyond x=255)
memset(line3Dcolor+256, 0, 256*sizeof(u16));
memset(line3Dalpha+256, 0, 256*sizeof(u8));
g->curr_win[0] = win_empty;
g->curr_win[1] = win_empty;
g->need_update_winh[0] = true;
g->need_update_winh[1] = true;
g->setFinalColorBck_funcNum = 0;
g->setFinalColor3d_funcNum = 0;
g->setFinalColorSpr = setFinalOBJColorSpecialNone;
g->setFinalColor3D = setFinal3DColorSpecialNone;
return g;
}
@ -269,8 +626,8 @@ void GPU_Reset(GPU *g, u8 l)
memset(g, 0, sizeof(GPU));
g->setFinalColorBck_funcNum = 0;
g->setFinalColor3d_funcNum = 0;
g->setFinalColorSpr = setFinalOBJColorSpecialNone;
g->setFinalColor3D = setFinal3DColorSpecialNone;
g->core = l;
g->BGSize[0][0] = g->BGSize[1][0] = g->BGSize[2][0] = g->BGSize[3][0] = 256;
g->BGSize[0][1] = g->BGSize[1][1] = g->BGSize[2][1] = g->BGSize[3][1] = 256;
@ -413,7 +770,7 @@ void SetupFinalPixelBlitter (GPU *gpu)
gpu->setFinalColorSpr = pixelBlittersOBJ[windowUsed*4 + blendMode];
gpu->setFinalColorBck_funcNum = windowUsed*4 + blendMode;
gpu->setFinalColor3D = pixelBlitters3D[windowUsed*4 + blendMode];
gpu->setFinalColor3d_funcNum = windowUsed*4 + blendMode;
}
@ -1035,364 +1392,9 @@ static void setFinalOBJColorSpecialDecreaseWnd(GPU *gpu, u32 passing, u8 *dst, u
}
}
/*****************************************************************************/
// PIXEL RENDERING - 3D
/*****************************************************************************/
static void setFinal3DColorSpecialNone(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x)
{
/* We must blend if the 3D layer has the highest prio */
if((alpha < 16) && gpu->bg0HasHighestPrio)
{
int bg_under = gpu->bgPixels[x];
u16 final = color;
/* If the layer we are drawing on is selected as 2nd source, we can blend */
if(gpu->BLDCNT & (0x100 << bg_under))
{
{
COLOR c1, c2, cfinal;
c1.val = color;
c2.val = T2ReadWord(dst, passing);
cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16));
cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16));
cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16));
final = cfinal.val;
}
}
T2WriteWord(dst, passing, (final | 0x8000));
gpu->bgPixels[x] = 0;
}
else
{
T2WriteWord(dst, passing, (color | 0x8000));
gpu->bgPixels[x] = 0;
}
}
static void setFinal3DColorSpecialBlend(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x)
{
/* We can blend if the 3D layer is selected as 1st target, */
/* but also if the 3D layer has the highest prio. */
if((alpha < 16) && ((gpu->BLDCNT & 0x1) || gpu->bg0HasHighestPrio))
{
int bg_under = gpu->bgPixels[x];
u16 final = color;
/* If the layer we are drawing on is selected as 2nd source, we can blend */
if(gpu->BLDCNT & (0x100 << bg_under))
{
{
COLOR c1, c2, cfinal;
c1.val = color;
c2.val = T2ReadWord(dst, passing);
cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16));
cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16));
cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16));
final = cfinal.val;
}
}
T2WriteWord(dst, passing, (final | 0x8000));
gpu->bgPixels[x] = 0;
}
else
{
T2WriteWord(dst, passing, (color | 0x8000));
gpu->bgPixels[x] = 0;
}
}
static void setFinal3DColorSpecialIncrease(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x)
{
u16 final = color;
/* We must blend if the 3D layer has the highest prio */
/* But it doesn't seem to have priority over fading, */
/* unlike semi-transparent sprites */
if((alpha < 16) && gpu->bg0HasHighestPrio)
{
int bg_under = gpu->bgPixels[x];
/* If the layer we are drawing on is selected as 2nd source, we can blend */
if(gpu->BLDCNT & (0x100 << bg_under))
{
{
COLOR c1, c2, cfinal;
c1.val = color;
c2.val = T2ReadWord(dst, passing);
cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16));
cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16));
cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16));
final = cfinal.val;
}
}
}
if(gpu->BLDCNT & 0x1)
{
if (gpu->BLDY_EVY != 0x0)
{
final = fadeInColors[gpu->BLDY_EVY][final&0x7FFF];
}
T2WriteWord(dst, passing, (final | 0x8000));
gpu->bgPixels[x] = 0;
}
else
{
T2WriteWord(dst, passing, (final | 0x8000));
gpu->bgPixels[x] = 0;
}
}
static void setFinal3DColorSpecialDecrease(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x)
{
u16 final = color;
/* We must blend if the 3D layer has the highest prio */
/* But it doesn't seem to have priority over fading, */
/* unlike semi-transparent sprites */
if((alpha < 16) && gpu->bg0HasHighestPrio)
{
int bg_under = gpu->bgPixels[x];
/* If the layer we are drawing on is selected as 2nd source, we can blend */
if(gpu->BLDCNT & (0x100 << bg_under))
{
{
COLOR c1, c2, cfinal;
c1.val = color;
c2.val = T2ReadWord(dst, passing);
cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16));
cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16));
cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16));
final = cfinal.val;
}
}
}
if(gpu->BLDCNT & 0x1)
{
if (gpu->BLDY_EVY != 0x0)
{
final = fadeOutColors[gpu->BLDY_EVY][final&0x7FFF];
}
T2WriteWord(dst, passing, (final | 0x8000));
gpu->bgPixels[x] = 0;
}
else
{
T2WriteWord(dst, passing, (final | 0x8000));
gpu->bgPixels[x] = 0;
}
}
static void setFinal3DColorSpecialNoneWnd(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x)
{
bool windowDraw = true, windowEffect = true;
gpu->renderline_checkWindows(x, windowDraw, windowEffect);
if(windowDraw)
{
/* We must blend if the 3D layer has the highest prio */
if((alpha < 16) && gpu->bg0HasHighestPrio)
{
int bg_under = gpu->bgPixels[x];
u16 final = color;
/* If the layer we are drawing on is selected as 2nd source, we can blend */
if(gpu->BLDCNT & (0x100 << bg_under))
{
{
COLOR c1, c2, cfinal;
c1.val = color;
c2.val = T2ReadWord(dst, passing);
cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16));
cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16));
cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16));
final = cfinal.val;
}
}
T2WriteWord(dst, passing, (final | 0x8000));
gpu->bgPixels[x] = 0;
}
else
{
T2WriteWord(dst, passing, (color | 0x8000));
gpu->bgPixels[x] = 0;
}
}
}
static void setFinal3DColorSpecialBlendWnd(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x)
{
bool windowDraw = true, windowEffect = true;
gpu->renderline_checkWindows(x, windowDraw, windowEffect);
if(windowDraw)
{
/* We can blend if the 3D layer is selected as 1st target, */
/* but also if the 3D layer has the highest prio. */
if((alpha < 16) && (((gpu->BLDCNT & 0x1) && windowEffect) || gpu->bg0HasHighestPrio))
{
int bg_under = gpu->bgPixels[x];
u16 final = color;
/* If the layer we are drawing on is selected as 2nd source, we can blend */
if(gpu->BLDCNT & (0x100 << bg_under))
{
{
COLOR c1, c2, cfinal;
c1.val = color;
c2.val = T2ReadWord(dst, passing);
cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16));
cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16));
cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16));
final = cfinal.val;
}
}
T2WriteWord(dst, passing, (final | 0x8000));
gpu->bgPixels[x] = 0;
}
else
{
T2WriteWord(dst, passing, (color | 0x8000));
gpu->bgPixels[x] = 0;
}
}
}
static void setFinal3DColorSpecialIncreaseWnd(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x)
{
bool windowDraw = true, windowEffect = true;
u16 final = color;
gpu->renderline_checkWindows(x, windowDraw, windowEffect);
if(windowDraw)
{
/* We must blend if the 3D layer has the highest prio */
/* But it doesn't seem to have priority over fading, */
/* unlike semi-transparent sprites */
if((alpha < 16) && gpu->bg0HasHighestPrio)
{
int bg_under = gpu->bgPixels[x];
/* If the layer we are drawing on is selected as 2nd source, we can blend */
if(gpu->BLDCNT & (0x100 << bg_under))
{
{
COLOR c1, c2, cfinal;
c1.val = color;
c2.val = T2ReadWord(dst, passing);
cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16));
cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16));
cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16));
final = cfinal.val;
}
}
}
if((gpu->BLDCNT & 0x1) && windowEffect)
{
if (gpu->BLDY_EVY != 0x0)
{
final = fadeInColors[gpu->BLDY_EVY][final&0x7FFF];
}
T2WriteWord(dst, passing, (final | 0x8000));
gpu->bgPixels[x] = 0;
}
else
{
T2WriteWord(dst, passing, (final | 0x8000));
gpu->bgPixels[x] = 0;
}
}
}
static void setFinal3DColorSpecialDecreaseWnd(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x)
{
bool windowDraw = true, windowEffect = true;
u16 final = color;
gpu->renderline_checkWindows(x, windowDraw, windowEffect);
if(windowDraw)
{
/* We must blend if the 3D layer has the highest prio */
/* But it doesn't seem to have priority over fading, */
/* unlike semi-transparent sprites */
if((alpha < 16) && gpu->bg0HasHighestPrio)
{
int bg_under = gpu->bgPixels[x];
/* If the layer we are drawing on is selected as 2nd source, we can blend */
if(gpu->BLDCNT & (0x100 << bg_under))
{
{
COLOR c1, c2, cfinal;
c1.val = color;
c2.val = T2ReadWord(dst, passing);
cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16));
cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16));
cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16));
final = cfinal.val;
}
}
}
if((gpu->BLDCNT & 0x1) && windowEffect)
{
if (gpu->BLDY_EVY != 0x0)
{
final = fadeOutColors[gpu->BLDY_EVY][final&0x7FFF];
}
T2WriteWord(dst, passing, (final | 0x8000));
gpu->bgPixels[x] = 0;
}
else
{
T2WriteWord(dst, passing, (final | 0x8000));
gpu->bgPixels[x] = 0;
}
}
}
FORCEINLINE void GPU::setFinalColorBG(u16 color, u8 x)
{
//if someone disagrees with these, they could be reimplemented as a function pointer easily
switch(setFinalColorBck_funcNum | (blend1?8:0))
{
case 0x0: setFinalBGColorSpecialNone(color,x,false); break;
@ -1415,6 +1417,22 @@ FORCEINLINE void GPU::setFinalColorBG(u16 color, u8 x)
}
FORCEINLINE void GPU::setFinalColor3d(int dstX, int srcX)
{
//if someone disagrees with these, they could be reimplemented as a function pointer easily
switch(setFinalColor3d_funcNum)
{
case 0x0: setFinal3DColorSpecialNone(dstX,srcX); break;
case 0x1: setFinal3DColorSpecialBlend(dstX,srcX); break;
case 0x2: setFinal3DColorSpecialIncrease(dstX,srcX); break;
case 0x3: setFinal3DColorSpecialDecrease(dstX,srcX); break;
case 0x4: setFinal3DColorSpecialNoneWnd(dstX,srcX); break;
case 0x5: setFinal3DColorSpecialBlendWnd(dstX,srcX); break;
case 0x6: setFinal3DColorSpecialIncreaseWnd(dstX,srcX); break;
case 0x7: setFinal3DColorSpecialDecreaseWnd(dstX,srcX); break;
};
}
//this was forced inline because most of the time it just falls through to setFinalColorBck() and the function call
//overhead was ridiculous and terrible
FORCEINLINE void GPU::__setFinalColorBck(u16 color, u8 x, bool opaque)
@ -2687,17 +2705,15 @@ static void GPU_ligne_layer(NDS_Screen * screen, u16 l)
BGxOFS *bgofs = &gpu->dispx_st->dispx_BGxOFS[i16];
u16 hofs = (T1ReadWord((u8*)&bgofs->BGxHOFS, 0) & 0x1FF);
//line3Dcolor and line3Dalpha are left cleared by GPU initialization,
//and they always stay that way.
gpu3D->NDS_3D_GetLine(l, line3Dcolor, line3Dalpha);
gfx3d_GetLineData(l, &gpu->_3dColorLine, &gpu->_3dAlphaLine);
u16* colorLine = gpu->_3dColorLine;
for(int k = 0; k < 256; k++)
{
int q = ((k + hofs) & 0x1FF);
if(line3Dcolor[q] & 0x8000)
gpu->setFinalColor3D(gpu, (k << 1), gpu->currDst, line3Dcolor[q], line3Dalpha[q], k);
if(colorLine[q] & 0x8000)
gpu->setFinalColor3d(k, q);
}
continue;
@ -2785,9 +2801,9 @@ static void GPU_ligne_DispCapture(u16 l)
case 1: // Capture 3D
{
//INFO("Capture 3D\n");
u16 cap3DLine[512];
gpu3D->NDS_3D_GetLineCaptured(l, (u16*)cap3DLine);
CAPCOPY(((u8*)cap3DLine),cap_dst);
u16* colorLine;
gfx3d_GetLineData(l, &colorLine, NULL);
CAPCOPY(((u8*)colorLine),cap_dst);
}
break;
}
@ -2818,7 +2834,6 @@ static void GPU_ligne_DispCapture(u16 l)
//INFO("Capture source is SourceA+B blended\n");
u16 *srcA = NULL;
u16 *srcB = NULL;
u16 cap3DLine[512];
if (gpu->dispCapCnt.srcA == 0)
{
@ -2830,8 +2845,7 @@ static void GPU_ligne_DispCapture(u16 l)
}
else
{
gpu3D->NDS_3D_GetLineCaptured(l, (u16*)cap3DLine);
srcA = (u16 *)cap3DLine; // 3D screen
gfx3d_GetLineData(l, &srcA, NULL);
}
if (gpu->dispCapCnt.srcB == 0) // VRAM screen

View File

@ -695,6 +695,9 @@ struct GPU
bool blend1;
u8* currDst;
u16* _3dColorLine;
u8* _3dAlphaLine;
static struct MosaicLookup {
@ -721,11 +724,12 @@ struct GPU
u16 blend(u16 colA, u16 colB);
typedef void (*FinalOBJColFunct)(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u8 type, u16 x);
typedef void (*Final3DColFunct)(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x);
typedef void (*Final3DColFunct)(GPU *gpu, int dstX, int srcX);
int setFinalColorBck_funcNum;
int setFinalColor3d_funcNum;
FinalOBJColFunct setFinalColorSpr;
Final3DColFunct setFinalColor3D;
//Final3DColFunct setFinalColor3D;
enum SpriteRenderMode {
SPRITE_1D, SPRITE_2D
} spriteRenderMode;
@ -735,6 +739,8 @@ struct GPU
void spriteRender(u8 * dst, u8 * dst_alpha, u8 * typeTab, u8 * prioTab);
void setFinalColorBG(u16 color, u8 x);
void setFinalColor3d(int dstX, int srcX);
FORCEINLINE void setFinalBGColorSpecialNone(u16 color, u8 x, bool blend1);
FORCEINLINE void setFinalBGColorSpecialBlend(u16 color, u8 x, bool blend1);
FORCEINLINE void setFinalBGColorSpecialIncrease(u16 color, u8 x, bool blend1);
@ -744,6 +750,16 @@ struct GPU
FORCEINLINE void setFinalBGColorSpecialIncreaseWnd(u16 color, u8 x, bool blend1);
FORCEINLINE void setFinalBGColorSpecialDecreaseWnd(u16 color, u8 x, bool blend1);
FORCEINLINE void setFinal3DColorSpecialNone(int dstX, int srcX);
FORCEINLINE void setFinal3DColorSpecialBlend(int dstX, int srcX);
FORCEINLINE void setFinal3DColorSpecialIncrease(int dstX, int srcX);
FORCEINLINE void setFinal3DColorSpecialDecrease(int dstX, int srcX);
FORCEINLINE void setFinal3DColorSpecialNoneWnd(int dstX, int srcX);
FORCEINLINE void setFinal3DColorSpecialBlendWnd(int dstX, int srcX);
FORCEINLINE void setFinal3DColorSpecialIncreaseWnd(int dstX, int srcX);
FORCEINLINE void setFinal3DColorSpecialDecreaseWnd(int dstX, int srcX);
void __setFinalColorBck(u16 color, u8 x, bool opaque);
void setAffineStart(int layer, int xy, u32 val);

View File

@ -75,19 +75,15 @@ static void ENDGL() {
#define CTASSERT(x) typedef char __assert ## y[(x) ? 1 : -1]
#endif
static ALIGN(16) u8 GPU_screen3D [256*192*4];
//static ALIGN(16) unsigned char GPU_screenStencil[256*256];
static ALIGN(16) u8 GPU_screen3D [256*192*4];
static const unsigned short map3d_cull[4] = {GL_FRONT_AND_BACK, GL_FRONT, GL_BACK, 0};
static const int texEnv[4] = { GL_MODULATE, GL_DECAL, GL_MODULATE, GL_MODULATE };
static const int depthFunc[2] = { GL_LESS, GL_EQUAL };
static bool needRefreshFramebuffer = false;
float clearAlpha;
static bool validFramebuffer = false;
//derived values extracted from polyattr etc
static bool wireframe=false, alpha31=false;
@ -474,11 +470,6 @@ static char OGLInit(void)
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP); //clamp so that we dont run off the edges due to 1.0 -> [0,31] math
}
if(glBlendFuncSeparateEXT == NULL)
clearAlpha = 1;
else
clearAlpha = 0;
OGLReset();
ENDGL();
@ -839,6 +830,7 @@ static void OGLVramReconfigureSignal()
TexCache_Invalidate();
}
static void GL_ReadFramebuffer()
{
if(!BEGINGL()) return;
@ -849,141 +841,47 @@ static void GL_ReadFramebuffer()
//convert the pixels to a different format which is more convenient
//is it safe to modify the screen buffer? if not, we could make a temp copy
for(int i=0;i<256*192;i++) {
u32 &u32screen3D = ((u32*)GPU_screen3D)[i];
u32screen3D>>=3;
u32screen3D &= 0x1F1F1F1F;
}
//debug: view depth buffer via color buffer for debugging
//int ctr=0;
//for(ctr=0;ctr<256*192;ctr++) {
// float zval = GPU_screen3Ddepth[ctr];
// u8* colorPtr = GPU_screen3D+ctr*3;
// if(zval<0) {
// colorPtr[0] = 255;
// colorPtr[1] = 0;
// colorPtr[2] = 0;
// } else if(zval>1) {
// colorPtr[0] = 0;
// colorPtr[1] = 0;
// colorPtr[2] = 255;
// } else {
// colorPtr[0] = colorPtr[1] = colorPtr[2] = zval*255;
// //INFO("%f %f %d\n",zval, zval*255,colorPtr[0]);
// }
//}
}
static void OGLGetLineCaptured(int line, u16* dst)
{
if(needRefreshFramebuffer) {
needRefreshFramebuffer = false;
GL_ReadFramebuffer();
}
u8 *screen3D = (u8*)GPU_screen3D+((191-line)<<10);
// u8 *screenStencil = (u8*)GPU_screenStencil+((191-line)<<8);
for(int i = 0; i < 256; i++)
for(int i=0,y=191;y>=0;y--)
{
/* u32 stencil = screenStencil[i];
u16* dst = gfx3d_convertedScreen + (y<<8);
u8* dstAlpha = gfx3d_convertedAlpha + (y<<8);
if(!stencil)
#ifndef NOSSE
//I dont know much about this kind of stuff, but this seems to help
//for some reason I couldnt make the intrinsics work
u8* wanx = (u8*)&((u32*)GPU_screen3D)[i];
#define ASS(X,Y) __asm { prefetchnta [wanx+32*0x##X##Y] }
#define PUNK(X) ASS(X,0) ASS(X,1) ASS(X,2) ASS(X,3) ASS(X,4) ASS(X,5) ASS(X,6) ASS(X,7) ASS(X,8) ASS(X,9) ASS(X,A) ASS(X,B) ASS(X,C) ASS(X,D) ASS(X,E) ASS(X,F)
PUNK(0); PUNK(1);
#endif
for(int x=0;x<256;x++,i++)
{
dst[i] = 0x0000;
continue;
}*/
u32 &u32screen3D = ((u32*)GPU_screen3D)[i];
u32screen3D>>=3;
u32screen3D &= 0x1F1F1F1F;
int t=i<<2;
/* u8 r = screen3D[t+2];
u8 g = screen3D[t+1];
u8 b = screen3D[t+0];*/
//if this math strikes you as wrong, be sure to look at GL_ReadFramebuffer() where the pixel format in screen3D is changed
//dst[i] = (b<<10) | (g<<5) | (r) | 0x8000;
dst[i] = (screen3D[t+2] | (screen3D[t+1] << 5) | (screen3D[t+0] << 10) | ((screen3D[t+3] > 0) ? 0x8000 : 0x0000));
const int t = i<<2;
const u8 a = GPU_screen3D[t+3];
const u8 r = GPU_screen3D[t+2];
const u8 g = GPU_screen3D[t+1];
const u8 b = GPU_screen3D[t+0];
dst[x] = R5G5B5TORGB15(r,g,b) | alpha_lookup[a];
dstAlpha[x] = alpha_5bit_to_4bit[a];
}
}
}
static void OGLGetLine(int line, u16* dst, u8* dstAlpha)
static void OGLCheckFresh()
{
assert(line<192 && line>=0);
if(needRefreshFramebuffer) {
if(needRefreshFramebuffer)
{
needRefreshFramebuffer = false;
GL_ReadFramebuffer();
}
u8 *screen3D = (u8*)GPU_screen3D+((191-line)<<10);
//u8 *screenStencil = (u8*)GPU_screenStencil+((191-line)<<8);
//the renderer clears the stencil to 0
//then it sets it to 1 whenever it renders a pixel that passes the alpha test
//(it also sets it to 2 under some circumstances when rendering shadow volumes)
//so, we COULD use a zero stencil value to indicate that nothing should get composited.
//in fact, we are going to do that to fix some problems.
//but beware that it i figure it might could CAUSE some problems
//this alpha compositing blending logic isnt thought through very much
//someone needs to think about what bitdepth it should take place at and how to do it efficiently
for(int i=0;i<256;i++)
{
// u32 stencil = screenStencil[i];
//you would use this if you wanted to use the stencil buffer to make decisions here
// if(!stencil) continue;
// u16 oldcolor = dst[j];
int t=i<<2;
// u32 dstpixel;
dst[i] = (screen3D[t+2] | (screen3D[t+1] << 5) | (screen3D[t+0] << 10) | ((screen3D[t+3] > 0) ? 0x8000 : 0x0000));
dstAlpha[i] = alpha_5bit_to_4bit[screen3D[t+3]];
//old debug reminder: display alpha channel
//u32 r = screen3D[t+3];
//u32 g = screen3D[t+3];
//u32 b = screen3D[t+3];
//if this math strikes you as wrong, be sure to look at GL_ReadFramebuffer() where the pixel format in screen3D is changed
/* u32 a = screen3D[t+3];
typedef u8 mixtbl[32][32];
mixtbl & mix = mixTable555[a];
//r
u32 newpix = screen3D[t+2];
u32 oldpix = oldcolor&0x1F;
newpix = mix[newpix][oldpix];
dstpixel = newpix;
//g
newpix = screen3D[t+1];
oldpix = (oldcolor>>5)&0x1F;
newpix = mix[newpix][oldpix];
dstpixel |= (newpix<<5);
//b
newpix = screen3D[t+0];
oldpix = (oldcolor>>10)&0x1F;
newpix = mix[newpix][oldpix];
dstpixel |= (newpix<<10);
dst[j] = dstpixel;*/
}
}
GPU3DInterface gpu3Dgl = {
"OpenGL",
OGLInit,
@ -991,9 +889,5 @@ GPU3DInterface gpu3Dgl = {
OGLClose,
OGLRender,
OGLVramReconfigureSignal,
OGLGetLine,
OGLGetLineCaptured
OGLCheckFresh,
};

View File

@ -103,6 +103,13 @@ CACHE_ALIGN const u8 alpha_5bit_to_4bit[] = {
0x10, 0x10
};
CACHE_ALIGN static const u16 alpha_lookup[] = {
0x0000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000};
//private acceleration tables
static float float16table[65536];
static float float10Table[1024];
@ -112,6 +119,9 @@ static float normalTable[1024];
#define fix2float(v) (((float)((s32)(v))) / (float)(1<<12))
#define fix10_2float(v) (((float)((s32)(v))) / (float)(1<<9))
CACHE_ALIGN u16 gfx3d_convertedScreen[256*192];
CACHE_ALIGN u8 gfx3d_convertedAlpha[256*192];
// Matrix stack handling
static CACHE_ALIGN MatrixStack mtxStack[4] = {
MatrixStack(1), // Projection stack
@ -2156,6 +2166,16 @@ void gfx3d_glGetLightColor(unsigned int index, unsigned int* dest)
*dest = lightColor[index];
}
void gfx3d_GetLineData(int line, u16** dst, u8** dstAlpha)
{
gpu3D->NDS_3D_CheckFresh();
*dst = gfx3d_convertedScreen+((line)<<8);
if(dstAlpha != NULL)
{
*dstAlpha = gfx3d_convertedAlpha+((line)<<8);
}
}
//http://www.opengl.org/documentation/specs/version1.1/glspec1.1/node17.html
//talks about the state required to process verts in quadlists etc. helpful ideas.

View File

@ -180,6 +180,7 @@ extern GFX3D gfx3d;
//---------------------
extern CACHE_ALIGN const u16 alpha_lookup[32];
extern CACHE_ALIGN u32 color_15bit_to_24bit[32768];
extern CACHE_ALIGN u32 color_15bit_to_24bit_reverse[32768];
extern CACHE_ALIGN u16 color_15bit_to_16bit_reverse[32768];
@ -190,6 +191,11 @@ extern CACHE_ALIGN const u8 material_3bit_to_5bit[8];
extern CACHE_ALIGN const u8 material_3bit_to_8bit[8];
extern CACHE_ALIGN const u8 alpha_5bit_to_4bit[32];
//these contain the 3d framebuffer converted into the most useful format
//they are stored here instead of in the renderers in order to consolidate the buffers
extern CACHE_ALIGN u16 gfx3d_convertedScreen[256*192];
extern CACHE_ALIGN u8 gfx3d_convertedAlpha[256*192];
//GE commands:
void gfx3d_glViewPort(u32 v);
void gfx3d_glClearColor(u32 v);
@ -212,11 +218,11 @@ BOOL gfx3d_glMultMatrix4x4(s32 v);
void gfx3d_glBegin(u32 v);
void gfx3d_glEnd(void);
void gfx3d_glColor3b(u32 v);
BOOL gfx3d_glVertex16b(unsigned int v);
BOOL gfx3d_glVertex16b(u32 v);
void gfx3d_glVertex10b(u32 v);
void gfx3d_glVertex3_cord(unsigned int one, unsigned int two, unsigned int v);
void gfx3d_glVertex3_cord(u32 one, u32 two, u32 v);
void gfx3d_glVertex_rel(u32 v);
void gfx3d_glSwapScreen(unsigned int screen);
void gfx3d_glSwapScreen(u32 screen);
int gfx3d_GetNumPolys();
int gfx3d_GetNumVertex();
void gfx3d_glPolygonAttrib (u32 val);
@ -229,16 +235,16 @@ void gfx3d_glTexImage(u32 val);
void gfx3d_glTexPalette(u32 val);
void gfx3d_glTexCoord(u32 val);
void gfx3d_glNormal(u32 v);
s32 gfx3d_GetClipMatrix (unsigned int index);
s32 gfx3d_GetDirectionalMatrix (unsigned int index);
s32 gfx3d_GetClipMatrix (u32 index);
s32 gfx3d_GetDirectionalMatrix (u32 index);
void gfx3d_glLightDirection (u32 v);
void gfx3d_glLightColor (u32 v);
void gfx3d_glAlphaFunc(u32 v);
BOOL gfx3d_glBoxTest(u32 v);
BOOL gfx3d_glPosTest(u32 v);
void gfx3d_glVecTest(u32 v);
unsigned int gfx3d_glGetPosRes(unsigned int index);
unsigned short gfx3d_glGetVecRes(unsigned int index);
u32 gfx3d_glGetPosRes(u32 index);
u16 gfx3d_glGetVecRes(u32 index);
void gfx3d_glFlush(u32 v);
void gfx3d_VBlankSignal();
void gfx3d_VBlankEndSignal(bool skipFrame);
@ -248,9 +254,11 @@ void gfx3d_sendCommandToFIFO(u32 val);
void gfx3d_sendCommand(u32 cmd, u32 param);
//other misc stuff
void gfx3d_glGetMatrix(unsigned int mode, int index, float* dest);
void gfx3d_glGetLightDirection(unsigned int index, unsigned int* dest);
void gfx3d_glGetLightColor(unsigned int index, unsigned int* dest);
void gfx3d_glGetMatrix(u32 mode, int index, float* dest);
void gfx3d_glGetLightDirection(u32 index, u32* dest);
void gfx3d_glGetLightColor(u32 index, u32* dest);
void gfx3d_GetLineData(int line, u16** dst, u8** dstAlpha);
struct SFORMAT;
extern SFORMAT SF_GFX3D[];

View File

@ -63,6 +63,7 @@ template<typename T> T _min(T a, T b, T c, T d) { return min(_min(a,b,d),c); }
template<typename T> T _max(T a, T b, T c, T d) { return max(_max(a,b,d),c); }
static int polynum;
static bool validFramebuffer = false;
static u8 modulate_table[32][32];
static u8 decal_table[32][32][32];
@ -202,16 +203,16 @@ struct PolyAttr
} polyAttr;
union FragmentColor {
u32 color;
struct {
//#ifdef WORDS_BIGENDIAN ?
u8 r,g,b,a;
};
};
struct Fragment
{
union Color {
u32 color;
struct {
//#ifdef WORDS_BIGENDIAN ?
u8 r,g,b,a;
} components;
} color;
u32 depth;
struct {
@ -220,7 +221,7 @@ struct Fragment
u8 stencil;
u8 pad[5];
u8 pad;
};
static VERT* verts[MAX_CLIPPED_VERTS];
@ -231,6 +232,8 @@ INLINE static void SubmitVertex(int vert_index, VERT& rawvert)
}
static Fragment screen[256*192];
static FragmentColor screenColor[256*192];
FORCEINLINE int iround(float f) {
return (int)f; //lol
@ -300,7 +303,7 @@ static struct Sampler
}
}
FORCEINLINE Fragment::Color sample(float u, float v)
FORCEINLINE FragmentColor sample(float u, float v)
{
//finally, we can use floor here. but, it is slower than we want.
//the best solution is probably to wait until the pipeline is full of fixed point
@ -308,7 +311,7 @@ static struct Sampler
int iv = floorf(v);
dowrap(iu,iv);
Fragment::Color color;
FragmentColor color;
color.color = ((u32*)textures.currentData)[(iv<<wshift)+iu];
return color;
}
@ -327,11 +330,11 @@ struct Shader
}
float invu, invv, w;
Fragment::Color materialColor;
FragmentColor materialColor;
FORCEINLINE void shade(Fragment& dst)
FORCEINLINE void shade(FragmentColor& dst)
{
Fragment::Color texColor;
FragmentColor texColor;
float u,v;
switch(mode)
@ -340,10 +343,10 @@ struct Shader
u = invu*w;
v = invv*w;
texColor = sampler.sample(u,v);
dst.color.components.r = modulate_table[texColor.components.r][materialColor.components.r];
dst.color.components.g = modulate_table[texColor.components.g][materialColor.components.g];
dst.color.components.b = modulate_table[texColor.components.b][materialColor.components.b];
dst.color.components.a = modulate_table[texColor.components.a][materialColor.components.a];
dst.r = modulate_table[texColor.r][materialColor.r];
dst.g = modulate_table[texColor.g][materialColor.g];
dst.b = modulate_table[texColor.b][materialColor.b];
dst.a = modulate_table[texColor.a][materialColor.a];
//dst.color.components.a = 31;
//#ifdef _MSC_VER
//if(GetAsyncKeyState(VK_SHIFT)) {
@ -361,37 +364,37 @@ struct Shader
u = invu*w;
v = invv*w;
texColor = sampler.sample(u,v);
dst.color.components.r = decal_table[texColor.components.a][texColor.components.r][materialColor.components.r];
dst.color.components.g = decal_table[texColor.components.a][texColor.components.g][materialColor.components.g];
dst.color.components.b = decal_table[texColor.components.a][texColor.components.b][materialColor.components.b];
dst.color.components.a = materialColor.components.a;
dst.r = decal_table[texColor.a][texColor.r][materialColor.r];
dst.g = decal_table[texColor.a][texColor.g][materialColor.g];
dst.b = decal_table[texColor.a][texColor.b][materialColor.b];
dst.a = materialColor.a;
break;
case 2: //toon/highlight shading
u = invu*w;
v = invv*w;
texColor = sampler.sample(u,v);
u32 toonColorVal; toonColorVal = gfx3d.rgbToonTable[materialColor.components.r];
Fragment::Color toonColor;
toonColor.components.r = ((toonColorVal & 0x0000FF) >> 3);
toonColor.components.g = ((toonColorVal & 0x00FF00) >> 11);
toonColor.components.b = ((toonColorVal & 0xFF0000) >> 19);
dst.color.components.r = modulate_table[texColor.components.r][toonColor.components.r];
dst.color.components.g = modulate_table[texColor.components.g][toonColor.components.g];
dst.color.components.b = modulate_table[texColor.components.b][toonColor.components.b];
dst.color.components.a = modulate_table[texColor.components.a][materialColor.components.a];
u32 toonColorVal; toonColorVal = gfx3d.rgbToonTable[materialColor.r];
FragmentColor toonColor;
toonColor.r = ((toonColorVal & 0x0000FF) >> 3);
toonColor.g = ((toonColorVal & 0x00FF00) >> 11);
toonColor.b = ((toonColorVal & 0xFF0000) >> 19);
dst.r = modulate_table[texColor.r][toonColor.r];
dst.g = modulate_table[texColor.g][toonColor.g];
dst.b = modulate_table[texColor.b][toonColor.b];
dst.a = modulate_table[texColor.a][materialColor.a];
if(gfx3d.shading == GFX3D::HIGHLIGHT)
{
dst.color.components.r = min<u8>(31, (dst.color.components.r + toonColor.components.r));
dst.color.components.g = min<u8>(31, (dst.color.components.g + toonColor.components.g));
dst.color.components.b = min<u8>(31, (dst.color.components.b + toonColor.components.b));
dst.r = min<u8>(31, (dst.r + toonColor.r));
dst.g = min<u8>(31, (dst.g + toonColor.g));
dst.b = min<u8>(31, (dst.b + toonColor.b));
}
break;
case 3: //shadows
//is this right? only with the material color?
dst.color = materialColor;
dst = materialColor;
break;
case 4: //our own special mode which only uses the material color (for when texturing is disabled)
dst.color = materialColor;
dst = materialColor;
break;
}
@ -399,44 +402,45 @@ struct Shader
} shader;
static FORCEINLINE void alphaBlend(Fragment::Color & dst, const Fragment::Color & src)
static FORCEINLINE void alphaBlend(FragmentColor & dst, const FragmentColor & src)
{
if(gfx3d.enableAlphaBlending)
{
if(src.components.a == 0)
if(src.a == 0)
{
dst.components.a = max(src.components.a,dst.components.a);
dst.a = max(src.a,dst.a);
}
else if(src.components.a == 31 || dst.components.a == 0)
else if(src.a == 31 || dst.a == 0)
{
dst.color = src.color;
dst.components.a = max(src.components.a,dst.components.a);
dst = src;
dst.a = max(src.a,dst.a);
}
else
{
u8 alpha = src.components.a+1;
u8 alpha = src.a+1;
u8 invAlpha = 32 - alpha;
dst.components.r = (alpha*src.components.r + invAlpha*dst.components.r)>>5;
dst.components.g = (alpha*src.components.g + invAlpha*dst.components.g)>>5;
dst.components.b = (alpha*src.components.b + invAlpha*dst.components.b)>>5;
dst.components.a = max(src.components.a,dst.components.a);
dst.r = (alpha*src.r + invAlpha*dst.r)>>5;
dst.g = (alpha*src.g + invAlpha*dst.g)>>5;
dst.b = (alpha*src.b + invAlpha*dst.b)>>5;
dst.a = max(src.a,dst.a);
}
}
else
{
if(src.components.a == 0)
if(src.a == 0)
{
//do nothing; the fragment is totally transparent
}
else
{
dst.color = src.color;
dst = src;
}
}
}
static FORCEINLINE void pixel(int adr,float r, float g, float b, float invu, float invv, float w, float z) {
Fragment &destFragment = screen[adr];
FragmentColor &destFragmentColor = screenColor[adr];
//depth test
u32 depth;
@ -478,25 +482,25 @@ static FORCEINLINE void pixel(int adr,float r, float g, float b, float invu, flo
//this is a HACK:
//we are being very sloppy with our interpolation precision right now
//and rather than fix it, i just want to clamp it
shader.materialColor.components.r = max(0,min(31,(int)r));
shader.materialColor.components.g = max(0,min(31,(int)g));
shader.materialColor.components.b = max(0,min(31,(int)b));
shader.materialColor.r = max(0,min(31,(int)r));
shader.materialColor.g = max(0,min(31,(int)g));
shader.materialColor.b = max(0,min(31,(int)b));
shader.materialColor.components.a = polyAttr.alpha;
shader.materialColor.a = polyAttr.alpha;
//pixel shader
Fragment shaderOutput;
FragmentColor shaderOutput;
shader.shade(shaderOutput);
//alpha test
if(gfx3d.enableAlphaTest)
{
if(shaderOutput.color.components.a < gfx3d.alphaTestRef)
if(shaderOutput.a < gfx3d.alphaTestRef)
goto rejected_fragment;
}
//we shouldnt do any of this if we generated a totally transparent pixel
if(shaderOutput.color.components.a != 0)
if(shaderOutput.a != 0)
{
//handle shadow polys
if(shader.mode == 3)
@ -533,7 +537,7 @@ static FORCEINLINE void pixel(int adr,float r, float g, float b, float invu, flo
}
//handle polyids
bool isOpaquePixel = shaderOutput.color.components.a == 31;
bool isOpaquePixel = shaderOutput.a == 31;
if(isOpaquePixel)
{
destFragment.polyid.opaque = polyAttr.polyid;
@ -561,7 +565,7 @@ static FORCEINLINE void pixel(int adr,float r, float g, float b, float invu, flo
}
//alpha blending and write color
alphaBlend(destFragment.color, shaderOutput.color);
alphaBlend(destFragmentColor, shaderOutput);
//depth writing
if(isOpaquePixel || polyAttr.translucentDepthWrite)
@ -925,7 +929,9 @@ static char SoftRastInit(void)
return 1;
}
static void SoftRastReset() {}
static void SoftRastReset() {
validFramebuffer = false;
}
static void SoftRastClose()
{
@ -935,50 +941,41 @@ static void SoftRastVramReconfigureSignal() {
TexCache_Invalidate();
}
CACHE_ALIGN static const u16 alpha_lookup[] = {
0x0000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000};
static void SoftRastGetLine(int line, u16* dst, u8* dstAlpha)
static void SoftRastConvertFramebuffer()
{
Fragment* src = screen+((line)<<8);
for(int i=0;i<256;i++)
FragmentColor* src = screenColor;
u16* dst = gfx3d_convertedScreen;
u8* dstAlpha = gfx3d_convertedAlpha;
//in an effort to speed this up, the misc pixel buffers and the color buffer were separated.
for(int i=0,y=0;y<192;y++)
{
const bool testRenderAlpha = false;
const u8 r = src->color.components.r;
const u8 g = src->color.components.g;
const u8 b = src->color.components.b;
*dst = R5G5B5TORGB15(r,g,b);
#ifndef NOSSE
u8* wanx = (u8*)&src[i];
#define ASS(X,Y) __asm { prefetchnta [wanx+32*0x##X##Y] }
#define PUNK(X) ASS(X,0) ASS(X,1) ASS(X,2) ASS(X,3) ASS(X,4) ASS(X,5) ASS(X,6) ASS(X,7) ASS(X,8) ASS(X,9) ASS(X,A) ASS(X,B) ASS(X,C) ASS(X,D) ASS(X,E) ASS(X,F)
PUNK(0); PUNK(1);
#endif
*dst |= alpha_lookup[src->color.components.a];
*dstAlpha = alpha_5bit_to_4bit[src->color.components.a];
if(testRenderAlpha)
for(int x=0;x<256;x++,i++)
{
*dst = 0x8000 | R5G5B5TORGB15(src->color.components.a,src->color.components.a,src->color.components.a);
*dstAlpha = 16;
const u8 r = src[i].r;
const u8 g = src[i].g;
const u8 b = src[i].b;
const u8 a = src[i].a;
dst[i] = R5G5B5TORGB15(r,g,b) | alpha_lookup[a];
dstAlpha[i] = alpha_5bit_to_4bit[a];
}
src++;
dst++;
dstAlpha++;
}
validFramebuffer = true;
}
static void SoftRastGetLineCaptured(int line, u16* dst) {
Fragment* src = screen+((line)<<8);
for(int i=0;i<256;i++)
static void SoftRastCheckFresh()
{
if(!validFramebuffer)
{
const u8 r = src->color.components.r;
const u8 g = src->color.components.g;
const u8 b = src->color.components.b;
*dst = R5G5B5TORGB15(r,g,b);
*dst |= alpha_lookup[src->color.components.a];
src++;
dst++;
SoftRastConvertFramebuffer();
}
}
@ -1158,10 +1155,11 @@ static void clipPoly(POLY* poly)
static void SoftRastRender()
{
Fragment clearFragment;
clearFragment.color.components.r = gfx3d.clearColor&0x1F;
clearFragment.color.components.g = (gfx3d.clearColor>>5)&0x1F;
clearFragment.color.components.b = (gfx3d.clearColor>>10)&0x1F;
clearFragment.color.components.a = (gfx3d.clearColor>>16)&0x1F;
FragmentColor clearFragmentColor;
clearFragmentColor.r = gfx3d.clearColor&0x1F;
clearFragmentColor.g = (gfx3d.clearColor>>5)&0x1F;
clearFragmentColor.b = (gfx3d.clearColor>>10)&0x1F;
clearFragmentColor.a = (gfx3d.clearColor>>16)&0x1F;
clearFragment.polyid.opaque = (gfx3d.clearColor>>24)&0x3F;
//special value for uninitialized translucent polyid. without this, fires in spiderman2 dont display
//I am not sure whether it is right, though. previously this was cleared to 0, as a guess,
@ -1171,6 +1169,8 @@ static void SoftRastRender()
clearFragment.stencil = 0;
for(int i=0;i<256*192;i++)
screen[i] = clearFragment;
for(int i=0;i<256*192;i++)
screenColor[i] = clearFragmentColor;
//convert colors to float to get more precision in case we need it
for(int i=0;i<gfx3d.vertlist->count;i++)
@ -1292,8 +1292,9 @@ static void SoftRastRender()
shape_engine(type,!polyAttr.backfacing);
}
// printf("rendered %d of %d polys after backface culling\n",gfx3d.polylist->count-culled,gfx3d.polylist->count);
validFramebuffer = false;
// printf("rendered %d of %d polys after backface culling\n",gfx3d.polylist->count-culled,gfx3d.polylist->count);
}
GPU3DInterface gpu3DRasterize = {
@ -1303,6 +1304,5 @@ GPU3DInterface gpu3DRasterize = {
SoftRastClose,
SoftRastRender,
SoftRastVramReconfigureSignal,
SoftRastGetLine,
SoftRastGetLineCaptured
SoftRastCheckFresh,
};

View File

@ -24,8 +24,6 @@ int cur3DCore = GPU3D_NULL;
static void NDS_nullFunc1 (void){}
static char NDS_nullFunc2 (void){ return 1; }
static void NDS_nullFunc3 (int,unsigned short*) {}
static void NDS_nullFunc4 (int,unsigned short*,unsigned char*) {}
GPU3DInterface gpu3DNull = {
"None",
@ -34,8 +32,7 @@ GPU3DInterface gpu3DNull = {
NDS_nullFunc1, //NDS_3D_Close
NDS_nullFunc1, //NDS_3D_Render
NDS_nullFunc1, //NDS_3D_VramReconfigureSignal
NDS_nullFunc4, //NDS_3D_GetLine
NDS_nullFunc3 //NDS_3D_GetLineCaptured
NDS_nullFunc1, //NDS_3D_CheckFresh
};
GPU3DInterface *gpu3D = &gpu3DNull;

View File

@ -21,6 +21,8 @@
#ifndef RENDER3D_H
#define RENDER3D_H
#include "types.h"
//not using this right now
#define CALL_CONVENTION
@ -44,12 +46,8 @@ typedef struct Render3DInterface
//called when the emulator reconfigures its vram. you may need to invalidate your texture cache.
void (CALL_CONVENTION* NDS_3D_VramReconfigureSignal) ();
//Retrieves a line of color buffer data
void (CALL_CONVENTION* NDS_3D_GetLine) (int line, unsigned short* dst, unsigned char* dstAlpha);
//Retrieves a line of color buffer data for capture
void (CALL_CONVENTION* NDS_3D_GetLineCaptured) (int line, unsigned short* dst);
//ensures that the plugin's framebuffer generation is fresh
void (CALL_CONVENTION* NDS_3D_CheckFresh) ();
} GPU3DInterface;