a bunch of small performance optimizations in the 3d engines, gains a couple of fps over the current trunk. still a little slower than 0.9.2 though, I am not sure why.
This commit is contained in:
parent
e30b3fea70
commit
df48d9a889
|
@ -49,6 +49,7 @@
|
|||
#include "GPU.h"
|
||||
#include "debug.h"
|
||||
#include "render3D.h"
|
||||
#include "gfx3d.h"
|
||||
#include "GPU_osd.h"
|
||||
#include "debug.h"
|
||||
#include "NDSSystem.h"
|
||||
|
@ -131,6 +132,397 @@ GraphicsInterface_struct *GFXCoreList[] = {
|
|||
NULL
|
||||
};
|
||||
|
||||
static const CACHE_ALIGN u8 win_empty[256] = {
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
|
||||
static CACHE_ALIGN u16 fadeInColors[17][0x8000];
|
||||
static CACHE_ALIGN u16 fadeOutColors[17][0x8000];
|
||||
|
||||
//this should be public, because it gets used somewhere else
|
||||
CACHE_ALIGN u8 gpuBlendTable555[17][17][32][32];
|
||||
|
||||
|
||||
/*****************************************************************************/
|
||||
// PIXEL RENDERING - 3D
|
||||
/*****************************************************************************/
|
||||
|
||||
#define DECL3D \
|
||||
int x = dstX; \
|
||||
int passing = dstX<<1; \
|
||||
u16 color = _3dColorLine[srcX]; \
|
||||
u8 alpha = _3dAlphaLine[srcX]; \
|
||||
u8* dst = currDst;
|
||||
|
||||
FORCEINLINE void GPU::setFinal3DColorSpecialNone(int dstX, int srcX)
|
||||
{
|
||||
DECL3D;
|
||||
|
||||
// We must blend if the 3D layer has the highest prio
|
||||
if((alpha < 16) && bg0HasHighestPrio)
|
||||
{
|
||||
int bg_under = bgPixels[dstX];
|
||||
u16 final = color;
|
||||
|
||||
// If the layer we are drawing on is selected as 2nd source, we can blend
|
||||
if(BLDCNT & (0x100 << bg_under))
|
||||
{
|
||||
{
|
||||
COLOR c1, c2, cfinal;
|
||||
|
||||
c1.val = color;
|
||||
c2.val = T2ReadWord(dst, passing);
|
||||
|
||||
cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16));
|
||||
cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16));
|
||||
cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16));
|
||||
|
||||
final = cfinal.val;
|
||||
}
|
||||
}
|
||||
|
||||
T2WriteWord(dst, passing, (final | 0x8000));
|
||||
bgPixels[x] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
T2WriteWord(dst, passing, (color | 0x8000));
|
||||
bgPixels[x] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
FORCEINLINE void GPU::setFinal3DColorSpecialBlend(int dstX, int srcX)
|
||||
{
|
||||
DECL3D;
|
||||
|
||||
// We can blend if the 3D layer is selected as 1st target,
|
||||
//but also if the 3D layer has the highest prio.
|
||||
if((alpha < 16) && ((BLDCNT & 0x1) || bg0HasHighestPrio))
|
||||
{
|
||||
int bg_under = bgPixels[x];
|
||||
u16 final = color;
|
||||
|
||||
//If the layer we are drawing on is selected as 2nd source, we can blend
|
||||
if(BLDCNT & (0x100 << bg_under))
|
||||
{
|
||||
{
|
||||
COLOR c1, c2, cfinal;
|
||||
|
||||
c1.val = color;
|
||||
c2.val = T2ReadWord(dst, passing);
|
||||
|
||||
cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16));
|
||||
cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16));
|
||||
cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16));
|
||||
|
||||
final = cfinal.val;
|
||||
}
|
||||
}
|
||||
|
||||
T2WriteWord(dst, passing, (final | 0x8000));
|
||||
bgPixels[x] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
T2WriteWord(dst, passing, (color | 0x8000));
|
||||
bgPixels[x] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
FORCEINLINE void GPU::setFinal3DColorSpecialIncrease(int dstX, int srcX)
|
||||
{
|
||||
DECL3D;
|
||||
u16 final = color;
|
||||
|
||||
// We must blend if the 3D layer has the highest prio
|
||||
// But it doesn't seem to have priority over fading,
|
||||
// unlike semi-transparent sprites
|
||||
if((alpha < 16) && bg0HasHighestPrio)
|
||||
{
|
||||
int bg_under = bgPixels[x];
|
||||
|
||||
/* If the layer we are drawing on is selected as 2nd source, we can blend */
|
||||
if(BLDCNT & (0x100 << bg_under))
|
||||
{
|
||||
{
|
||||
COLOR c1, c2, cfinal;
|
||||
|
||||
c1.val = color;
|
||||
c2.val = T2ReadWord(dst, passing);
|
||||
|
||||
cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16));
|
||||
cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16));
|
||||
cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16));
|
||||
|
||||
final = cfinal.val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(BLDCNT & 0x1)
|
||||
{
|
||||
if (BLDY_EVY != 0x0)
|
||||
{
|
||||
final = fadeInColors[BLDY_EVY][final&0x7FFF];
|
||||
}
|
||||
|
||||
T2WriteWord(dst, passing, (final | 0x8000));
|
||||
bgPixels[x] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
T2WriteWord(dst, passing, (final | 0x8000));
|
||||
bgPixels[x] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
FORCEINLINE void GPU::setFinal3DColorSpecialDecrease(int dstX, int srcX)
|
||||
{
|
||||
DECL3D;
|
||||
|
||||
u16 final = color;
|
||||
|
||||
// We must blend if the 3D layer has the highest prio
|
||||
// But it doesn't seem to have priority over fading
|
||||
// unlike semi-transparent sprites
|
||||
if((alpha < 16) && bg0HasHighestPrio)
|
||||
{
|
||||
int bg_under = bgPixels[x];
|
||||
|
||||
// If the layer we are drawing on is selected as 2nd source, we can blend
|
||||
if(BLDCNT & (0x100 << bg_under))
|
||||
{
|
||||
{
|
||||
COLOR c1, c2, cfinal;
|
||||
|
||||
c1.val = color;
|
||||
c2.val = T2ReadWord(dst, passing);
|
||||
|
||||
cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16));
|
||||
cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16));
|
||||
cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16));
|
||||
|
||||
final = cfinal.val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(BLDCNT & 0x1)
|
||||
{
|
||||
if (BLDY_EVY != 0x0)
|
||||
{
|
||||
final = fadeOutColors[BLDY_EVY][final&0x7FFF];
|
||||
}
|
||||
|
||||
T2WriteWord(dst, passing, (final | 0x8000));
|
||||
bgPixels[x] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
T2WriteWord(dst, passing, (final | 0x8000));
|
||||
bgPixels[x] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
FORCEINLINE void GPU::setFinal3DColorSpecialNoneWnd(int dstX, int srcX)
|
||||
{
|
||||
DECL3D;
|
||||
|
||||
bool windowDraw = true, windowEffect = true;
|
||||
|
||||
renderline_checkWindows(x, windowDraw, windowEffect);
|
||||
|
||||
if(windowDraw)
|
||||
{
|
||||
// We must blend if the 3D layer has the highest prio
|
||||
if((alpha < 16) && bg0HasHighestPrio)
|
||||
{
|
||||
int bg_under = bgPixels[x];
|
||||
u16 final = color;
|
||||
|
||||
// If the layer we are drawing on is selected as 2nd source, we can blend
|
||||
if(BLDCNT & (0x100 << bg_under))
|
||||
{
|
||||
{
|
||||
COLOR c1, c2, cfinal;
|
||||
|
||||
c1.val = color;
|
||||
c2.val = T2ReadWord(dst, passing);
|
||||
|
||||
cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16));
|
||||
cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16));
|
||||
cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16));
|
||||
|
||||
final = cfinal.val;
|
||||
}
|
||||
}
|
||||
|
||||
T2WriteWord(dst, passing, (final | 0x8000));
|
||||
bgPixels[x] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
T2WriteWord(dst, passing, (color | 0x8000));
|
||||
bgPixels[x] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FORCEINLINE void GPU::setFinal3DColorSpecialBlendWnd(int dstX, int srcX)
|
||||
{
|
||||
DECL3D;
|
||||
|
||||
bool windowDraw = true, windowEffect = true;
|
||||
|
||||
renderline_checkWindows(x, windowDraw, windowEffect);
|
||||
|
||||
if(windowDraw)
|
||||
{
|
||||
// We can blend if the 3D layer is selected as 1st target,
|
||||
// but also if the 3D layer has the highest prio.
|
||||
if((alpha < 16) && (((BLDCNT & 0x1) && windowEffect) || bg0HasHighestPrio))
|
||||
{
|
||||
int bg_under = bgPixels[x];
|
||||
u16 final = color;
|
||||
|
||||
// If the layer we are drawing on is selected as 2nd source, we can blend
|
||||
if(BLDCNT & (0x100 << bg_under))
|
||||
{
|
||||
{
|
||||
COLOR c1, c2, cfinal;
|
||||
|
||||
c1.val = color;
|
||||
c2.val = T2ReadWord(dst, passing);
|
||||
|
||||
cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16));
|
||||
cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16));
|
||||
cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16));
|
||||
|
||||
final = cfinal.val;
|
||||
}
|
||||
}
|
||||
|
||||
T2WriteWord(dst, passing, (final | 0x8000));
|
||||
bgPixels[x] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
T2WriteWord(dst, passing, (color | 0x8000));
|
||||
bgPixels[x] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FORCEINLINE void GPU::setFinal3DColorSpecialIncreaseWnd(int dstX, int srcX)
|
||||
{
|
||||
DECL3D;
|
||||
|
||||
bool windowDraw = true, windowEffect = true;
|
||||
u16 final = color;
|
||||
|
||||
renderline_checkWindows(x, windowDraw, windowEffect);
|
||||
|
||||
if(windowDraw)
|
||||
{
|
||||
// We must blend if the 3D layer has the highest prio
|
||||
// But it doesn't seem to have priority over fading,
|
||||
// unlike semi-transparent sprites
|
||||
if((alpha < 16) && bg0HasHighestPrio)
|
||||
{
|
||||
int bg_under = bgPixels[x];
|
||||
|
||||
// If the layer we are drawing on is selected as 2nd source, we can blend
|
||||
if(BLDCNT & (0x100 << bg_under))
|
||||
{
|
||||
{
|
||||
COLOR c1, c2, cfinal;
|
||||
|
||||
c1.val = color;
|
||||
c2.val = T2ReadWord(dst, passing);
|
||||
|
||||
cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16));
|
||||
cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16));
|
||||
cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16));
|
||||
|
||||
final = cfinal.val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if((BLDCNT & 0x1) && windowEffect)
|
||||
{
|
||||
if (BLDY_EVY != 0x0)
|
||||
{
|
||||
final = fadeInColors[BLDY_EVY][final&0x7FFF];
|
||||
}
|
||||
|
||||
T2WriteWord(dst, passing, (final | 0x8000));
|
||||
bgPixels[x] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
T2WriteWord(dst, passing, (final | 0x8000));
|
||||
bgPixels[x] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FORCEINLINE void GPU::setFinal3DColorSpecialDecreaseWnd(int dstX, int srcX)
|
||||
{
|
||||
DECL3D;
|
||||
|
||||
bool windowDraw = true, windowEffect = true;
|
||||
u16 final = color;
|
||||
|
||||
renderline_checkWindows(x, windowDraw, windowEffect);
|
||||
|
||||
if(windowDraw)
|
||||
{
|
||||
// We must blend if the 3D layer has the highest prio
|
||||
// But it doesn't seem to have priority over fading,
|
||||
// unlike semi-transparent sprites
|
||||
if((alpha < 16) && bg0HasHighestPrio)
|
||||
{
|
||||
int bg_under = bgPixels[x];
|
||||
|
||||
// If the layer we are drawing on is selected as 2nd source, we can blend
|
||||
if(BLDCNT & (0x100 << bg_under))
|
||||
{
|
||||
{
|
||||
COLOR c1, c2, cfinal;
|
||||
|
||||
c1.val = color;
|
||||
c2.val = T2ReadWord(dst, passing);
|
||||
|
||||
cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16));
|
||||
cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16));
|
||||
cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16));
|
||||
|
||||
final = cfinal.val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if((BLDCNT & 0x1) && windowEffect)
|
||||
{
|
||||
if (BLDY_EVY != 0x0)
|
||||
{
|
||||
final = fadeOutColors[BLDY_EVY][final&0x7FFF];
|
||||
}
|
||||
|
||||
T2WriteWord(dst, passing, (final | 0x8000));
|
||||
bgPixels[x] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
T2WriteWord(dst, passing, (final | 0x8000));
|
||||
bgPixels[x] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void setFinalOBJColorSpecialNone (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u8 type, u16 x);
|
||||
static void setFinalOBJColorSpecialBlend (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u8 type, u16 x);
|
||||
static void setFinalOBJColorSpecialIncrease (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u8 type, u16 x);
|
||||
|
@ -140,16 +532,6 @@ static void setFinalOBJColorSpecialBlendWnd (GPU *gpu, u32 passing, u8 *dst, u1
|
|||
static void setFinalOBJColorSpecialIncreaseWnd (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u8 type, u16 x);
|
||||
static void setFinalOBJColorSpecialDecreaseWnd (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u8 type, u16 x);
|
||||
|
||||
static void setFinal3DColorSpecialNone (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x);
|
||||
static void setFinal3DColorSpecialBlend (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x);
|
||||
static void setFinal3DColorSpecialIncrease (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x);
|
||||
static void setFinal3DColorSpecialDecrease (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x);
|
||||
static void setFinal3DColorSpecialNoneWnd (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x);
|
||||
static void setFinal3DColorSpecialBlendWnd (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x);
|
||||
static void setFinal3DColorSpecialIncreaseWnd (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x);
|
||||
static void setFinal3DColorSpecialDecreaseWnd (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x);
|
||||
|
||||
|
||||
const GPU::FinalOBJColFunct pixelBlittersOBJ[8] = {
|
||||
setFinalOBJColorSpecialNone,
|
||||
setFinalOBJColorSpecialBlend,
|
||||
|
@ -160,24 +542,6 @@ const GPU::FinalOBJColFunct pixelBlittersOBJ[8] = {
|
|||
setFinalOBJColorSpecialIncreaseWnd,
|
||||
setFinalOBJColorSpecialDecreaseWnd,};
|
||||
|
||||
const GPU::Final3DColFunct pixelBlitters3D[8] = {
|
||||
setFinal3DColorSpecialNone,
|
||||
setFinal3DColorSpecialBlend,
|
||||
setFinal3DColorSpecialIncrease,
|
||||
setFinal3DColorSpecialDecrease,
|
||||
setFinal3DColorSpecialNoneWnd,
|
||||
setFinal3DColorSpecialBlendWnd,
|
||||
setFinal3DColorSpecialIncreaseWnd,
|
||||
setFinal3DColorSpecialDecreaseWnd};
|
||||
|
||||
static const CACHE_ALIGN u8 win_empty[256] = {
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
|
||||
static CACHE_ALIGN u16 fadeInColors[17][0x8000];
|
||||
static CACHE_ALIGN u16 fadeOutColors[17][0x8000];
|
||||
CACHE_ALIGN u8 gpuBlendTable555[17][17][32][32];
|
||||
|
||||
/*****************************************************************************/
|
||||
// INITIALIZATION
|
||||
|
@ -236,9 +600,6 @@ static void GPU_InitFadeColors()
|
|||
}
|
||||
}
|
||||
|
||||
static u16 line3Dcolor[512];
|
||||
static u8 line3Dalpha[512];
|
||||
|
||||
GPU * GPU_Init(u8 l)
|
||||
{
|
||||
GPU * g;
|
||||
|
@ -249,17 +610,13 @@ GPU * GPU_Init(u8 l)
|
|||
GPU_Reset(g, l);
|
||||
GPU_InitFadeColors();
|
||||
|
||||
//clear out the excess line buffers (beyond x=255)
|
||||
memset(line3Dcolor+256, 0, 256*sizeof(u16));
|
||||
memset(line3Dalpha+256, 0, 256*sizeof(u8));
|
||||
|
||||
g->curr_win[0] = win_empty;
|
||||
g->curr_win[1] = win_empty;
|
||||
g->need_update_winh[0] = true;
|
||||
g->need_update_winh[1] = true;
|
||||
g->setFinalColorBck_funcNum = 0;
|
||||
g->setFinalColor3d_funcNum = 0;
|
||||
g->setFinalColorSpr = setFinalOBJColorSpecialNone;
|
||||
g->setFinalColor3D = setFinal3DColorSpecialNone;
|
||||
|
||||
return g;
|
||||
}
|
||||
|
@ -269,8 +626,8 @@ void GPU_Reset(GPU *g, u8 l)
|
|||
memset(g, 0, sizeof(GPU));
|
||||
|
||||
g->setFinalColorBck_funcNum = 0;
|
||||
g->setFinalColor3d_funcNum = 0;
|
||||
g->setFinalColorSpr = setFinalOBJColorSpecialNone;
|
||||
g->setFinalColor3D = setFinal3DColorSpecialNone;
|
||||
g->core = l;
|
||||
g->BGSize[0][0] = g->BGSize[1][0] = g->BGSize[2][0] = g->BGSize[3][0] = 256;
|
||||
g->BGSize[0][1] = g->BGSize[1][1] = g->BGSize[2][1] = g->BGSize[3][1] = 256;
|
||||
|
@ -413,7 +770,7 @@ void SetupFinalPixelBlitter (GPU *gpu)
|
|||
|
||||
gpu->setFinalColorSpr = pixelBlittersOBJ[windowUsed*4 + blendMode];
|
||||
gpu->setFinalColorBck_funcNum = windowUsed*4 + blendMode;
|
||||
gpu->setFinalColor3D = pixelBlitters3D[windowUsed*4 + blendMode];
|
||||
gpu->setFinalColor3d_funcNum = windowUsed*4 + blendMode;
|
||||
|
||||
}
|
||||
|
||||
|
@ -1035,364 +1392,9 @@ static void setFinalOBJColorSpecialDecreaseWnd(GPU *gpu, u32 passing, u8 *dst, u
|
|||
}
|
||||
}
|
||||
|
||||
/*****************************************************************************/
|
||||
// PIXEL RENDERING - 3D
|
||||
/*****************************************************************************/
|
||||
|
||||
static void setFinal3DColorSpecialNone(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x)
|
||||
{
|
||||
/* We must blend if the 3D layer has the highest prio */
|
||||
if((alpha < 16) && gpu->bg0HasHighestPrio)
|
||||
{
|
||||
int bg_under = gpu->bgPixels[x];
|
||||
u16 final = color;
|
||||
|
||||
/* If the layer we are drawing on is selected as 2nd source, we can blend */
|
||||
if(gpu->BLDCNT & (0x100 << bg_under))
|
||||
{
|
||||
{
|
||||
COLOR c1, c2, cfinal;
|
||||
|
||||
c1.val = color;
|
||||
c2.val = T2ReadWord(dst, passing);
|
||||
|
||||
cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16));
|
||||
cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16));
|
||||
cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16));
|
||||
|
||||
final = cfinal.val;
|
||||
}
|
||||
}
|
||||
|
||||
T2WriteWord(dst, passing, (final | 0x8000));
|
||||
gpu->bgPixels[x] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
T2WriteWord(dst, passing, (color | 0x8000));
|
||||
gpu->bgPixels[x] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void setFinal3DColorSpecialBlend(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x)
|
||||
{
|
||||
/* We can blend if the 3D layer is selected as 1st target, */
|
||||
/* but also if the 3D layer has the highest prio. */
|
||||
if((alpha < 16) && ((gpu->BLDCNT & 0x1) || gpu->bg0HasHighestPrio))
|
||||
{
|
||||
int bg_under = gpu->bgPixels[x];
|
||||
u16 final = color;
|
||||
|
||||
/* If the layer we are drawing on is selected as 2nd source, we can blend */
|
||||
if(gpu->BLDCNT & (0x100 << bg_under))
|
||||
{
|
||||
{
|
||||
COLOR c1, c2, cfinal;
|
||||
|
||||
c1.val = color;
|
||||
c2.val = T2ReadWord(dst, passing);
|
||||
|
||||
cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16));
|
||||
cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16));
|
||||
cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16));
|
||||
|
||||
final = cfinal.val;
|
||||
}
|
||||
}
|
||||
|
||||
T2WriteWord(dst, passing, (final | 0x8000));
|
||||
gpu->bgPixels[x] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
T2WriteWord(dst, passing, (color | 0x8000));
|
||||
gpu->bgPixels[x] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void setFinal3DColorSpecialIncrease(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x)
|
||||
{
|
||||
u16 final = color;
|
||||
|
||||
/* We must blend if the 3D layer has the highest prio */
|
||||
/* But it doesn't seem to have priority over fading, */
|
||||
/* unlike semi-transparent sprites */
|
||||
if((alpha < 16) && gpu->bg0HasHighestPrio)
|
||||
{
|
||||
int bg_under = gpu->bgPixels[x];
|
||||
|
||||
/* If the layer we are drawing on is selected as 2nd source, we can blend */
|
||||
if(gpu->BLDCNT & (0x100 << bg_under))
|
||||
{
|
||||
{
|
||||
COLOR c1, c2, cfinal;
|
||||
|
||||
c1.val = color;
|
||||
c2.val = T2ReadWord(dst, passing);
|
||||
|
||||
cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16));
|
||||
cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16));
|
||||
cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16));
|
||||
|
||||
final = cfinal.val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(gpu->BLDCNT & 0x1)
|
||||
{
|
||||
if (gpu->BLDY_EVY != 0x0)
|
||||
{
|
||||
final = fadeInColors[gpu->BLDY_EVY][final&0x7FFF];
|
||||
}
|
||||
|
||||
T2WriteWord(dst, passing, (final | 0x8000));
|
||||
gpu->bgPixels[x] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
T2WriteWord(dst, passing, (final | 0x8000));
|
||||
gpu->bgPixels[x] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void setFinal3DColorSpecialDecrease(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x)
|
||||
{
|
||||
u16 final = color;
|
||||
|
||||
/* We must blend if the 3D layer has the highest prio */
|
||||
/* But it doesn't seem to have priority over fading, */
|
||||
/* unlike semi-transparent sprites */
|
||||
if((alpha < 16) && gpu->bg0HasHighestPrio)
|
||||
{
|
||||
int bg_under = gpu->bgPixels[x];
|
||||
|
||||
/* If the layer we are drawing on is selected as 2nd source, we can blend */
|
||||
if(gpu->BLDCNT & (0x100 << bg_under))
|
||||
{
|
||||
{
|
||||
COLOR c1, c2, cfinal;
|
||||
|
||||
c1.val = color;
|
||||
c2.val = T2ReadWord(dst, passing);
|
||||
|
||||
cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16));
|
||||
cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16));
|
||||
cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16));
|
||||
|
||||
final = cfinal.val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(gpu->BLDCNT & 0x1)
|
||||
{
|
||||
if (gpu->BLDY_EVY != 0x0)
|
||||
{
|
||||
final = fadeOutColors[gpu->BLDY_EVY][final&0x7FFF];
|
||||
}
|
||||
|
||||
T2WriteWord(dst, passing, (final | 0x8000));
|
||||
gpu->bgPixels[x] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
T2WriteWord(dst, passing, (final | 0x8000));
|
||||
gpu->bgPixels[x] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void setFinal3DColorSpecialNoneWnd(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x)
|
||||
{
|
||||
bool windowDraw = true, windowEffect = true;
|
||||
|
||||
gpu->renderline_checkWindows(x, windowDraw, windowEffect);
|
||||
|
||||
if(windowDraw)
|
||||
{
|
||||
/* We must blend if the 3D layer has the highest prio */
|
||||
if((alpha < 16) && gpu->bg0HasHighestPrio)
|
||||
{
|
||||
int bg_under = gpu->bgPixels[x];
|
||||
u16 final = color;
|
||||
|
||||
/* If the layer we are drawing on is selected as 2nd source, we can blend */
|
||||
if(gpu->BLDCNT & (0x100 << bg_under))
|
||||
{
|
||||
{
|
||||
COLOR c1, c2, cfinal;
|
||||
|
||||
c1.val = color;
|
||||
c2.val = T2ReadWord(dst, passing);
|
||||
|
||||
cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16));
|
||||
cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16));
|
||||
cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16));
|
||||
|
||||
final = cfinal.val;
|
||||
}
|
||||
}
|
||||
|
||||
T2WriteWord(dst, passing, (final | 0x8000));
|
||||
gpu->bgPixels[x] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
T2WriteWord(dst, passing, (color | 0x8000));
|
||||
gpu->bgPixels[x] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void setFinal3DColorSpecialBlendWnd(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x)
|
||||
{
|
||||
bool windowDraw = true, windowEffect = true;
|
||||
|
||||
gpu->renderline_checkWindows(x, windowDraw, windowEffect);
|
||||
|
||||
if(windowDraw)
|
||||
{
|
||||
/* We can blend if the 3D layer is selected as 1st target, */
|
||||
/* but also if the 3D layer has the highest prio. */
|
||||
if((alpha < 16) && (((gpu->BLDCNT & 0x1) && windowEffect) || gpu->bg0HasHighestPrio))
|
||||
{
|
||||
int bg_under = gpu->bgPixels[x];
|
||||
u16 final = color;
|
||||
|
||||
/* If the layer we are drawing on is selected as 2nd source, we can blend */
|
||||
if(gpu->BLDCNT & (0x100 << bg_under))
|
||||
{
|
||||
{
|
||||
COLOR c1, c2, cfinal;
|
||||
|
||||
c1.val = color;
|
||||
c2.val = T2ReadWord(dst, passing);
|
||||
|
||||
cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16));
|
||||
cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16));
|
||||
cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16));
|
||||
|
||||
final = cfinal.val;
|
||||
}
|
||||
}
|
||||
|
||||
T2WriteWord(dst, passing, (final | 0x8000));
|
||||
gpu->bgPixels[x] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
T2WriteWord(dst, passing, (color | 0x8000));
|
||||
gpu->bgPixels[x] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void setFinal3DColorSpecialIncreaseWnd(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x)
|
||||
{
|
||||
bool windowDraw = true, windowEffect = true;
|
||||
u16 final = color;
|
||||
|
||||
gpu->renderline_checkWindows(x, windowDraw, windowEffect);
|
||||
|
||||
if(windowDraw)
|
||||
{
|
||||
/* We must blend if the 3D layer has the highest prio */
|
||||
/* But it doesn't seem to have priority over fading, */
|
||||
/* unlike semi-transparent sprites */
|
||||
if((alpha < 16) && gpu->bg0HasHighestPrio)
|
||||
{
|
||||
int bg_under = gpu->bgPixels[x];
|
||||
|
||||
/* If the layer we are drawing on is selected as 2nd source, we can blend */
|
||||
if(gpu->BLDCNT & (0x100 << bg_under))
|
||||
{
|
||||
{
|
||||
COLOR c1, c2, cfinal;
|
||||
|
||||
c1.val = color;
|
||||
c2.val = T2ReadWord(dst, passing);
|
||||
|
||||
cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16));
|
||||
cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16));
|
||||
cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16));
|
||||
|
||||
final = cfinal.val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if((gpu->BLDCNT & 0x1) && windowEffect)
|
||||
{
|
||||
if (gpu->BLDY_EVY != 0x0)
|
||||
{
|
||||
final = fadeInColors[gpu->BLDY_EVY][final&0x7FFF];
|
||||
}
|
||||
|
||||
T2WriteWord(dst, passing, (final | 0x8000));
|
||||
gpu->bgPixels[x] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
T2WriteWord(dst, passing, (final | 0x8000));
|
||||
gpu->bgPixels[x] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void setFinal3DColorSpecialDecreaseWnd(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x)
|
||||
{
|
||||
bool windowDraw = true, windowEffect = true;
|
||||
u16 final = color;
|
||||
|
||||
gpu->renderline_checkWindows(x, windowDraw, windowEffect);
|
||||
|
||||
if(windowDraw)
|
||||
{
|
||||
/* We must blend if the 3D layer has the highest prio */
|
||||
/* But it doesn't seem to have priority over fading, */
|
||||
/* unlike semi-transparent sprites */
|
||||
if((alpha < 16) && gpu->bg0HasHighestPrio)
|
||||
{
|
||||
int bg_under = gpu->bgPixels[x];
|
||||
|
||||
/* If the layer we are drawing on is selected as 2nd source, we can blend */
|
||||
if(gpu->BLDCNT & (0x100 << bg_under))
|
||||
{
|
||||
{
|
||||
COLOR c1, c2, cfinal;
|
||||
|
||||
c1.val = color;
|
||||
c2.val = T2ReadWord(dst, passing);
|
||||
|
||||
cfinal.bits.red = ((c1.bits.red * alpha / 16) + (c2.bits.red * (16 - alpha) / 16));
|
||||
cfinal.bits.green = ((c1.bits.green * alpha / 16) + (c2.bits.green * (16 - alpha) / 16));
|
||||
cfinal.bits.blue = ((c1.bits.blue * alpha / 16) + (c2.bits.blue * (16 - alpha) / 16));
|
||||
|
||||
final = cfinal.val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if((gpu->BLDCNT & 0x1) && windowEffect)
|
||||
{
|
||||
if (gpu->BLDY_EVY != 0x0)
|
||||
{
|
||||
final = fadeOutColors[gpu->BLDY_EVY][final&0x7FFF];
|
||||
}
|
||||
|
||||
T2WriteWord(dst, passing, (final | 0x8000));
|
||||
gpu->bgPixels[x] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
T2WriteWord(dst, passing, (final | 0x8000));
|
||||
gpu->bgPixels[x] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FORCEINLINE void GPU::setFinalColorBG(u16 color, u8 x)
|
||||
{
|
||||
//if someone disagrees with these, they could be reimplemented as a function pointer easily
|
||||
switch(setFinalColorBck_funcNum | (blend1?8:0))
|
||||
{
|
||||
case 0x0: setFinalBGColorSpecialNone(color,x,false); break;
|
||||
|
@ -1415,6 +1417,22 @@ FORCEINLINE void GPU::setFinalColorBG(u16 color, u8 x)
|
|||
}
|
||||
|
||||
|
||||
FORCEINLINE void GPU::setFinalColor3d(int dstX, int srcX)
|
||||
{
|
||||
//if someone disagrees with these, they could be reimplemented as a function pointer easily
|
||||
switch(setFinalColor3d_funcNum)
|
||||
{
|
||||
case 0x0: setFinal3DColorSpecialNone(dstX,srcX); break;
|
||||
case 0x1: setFinal3DColorSpecialBlend(dstX,srcX); break;
|
||||
case 0x2: setFinal3DColorSpecialIncrease(dstX,srcX); break;
|
||||
case 0x3: setFinal3DColorSpecialDecrease(dstX,srcX); break;
|
||||
case 0x4: setFinal3DColorSpecialNoneWnd(dstX,srcX); break;
|
||||
case 0x5: setFinal3DColorSpecialBlendWnd(dstX,srcX); break;
|
||||
case 0x6: setFinal3DColorSpecialIncreaseWnd(dstX,srcX); break;
|
||||
case 0x7: setFinal3DColorSpecialDecreaseWnd(dstX,srcX); break;
|
||||
};
|
||||
}
|
||||
|
||||
//this was forced inline because most of the time it just falls through to setFinalColorBck() and the function call
|
||||
//overhead was ridiculous and terrible
|
||||
FORCEINLINE void GPU::__setFinalColorBck(u16 color, u8 x, bool opaque)
|
||||
|
@ -2687,17 +2705,15 @@ static void GPU_ligne_layer(NDS_Screen * screen, u16 l)
|
|||
BGxOFS *bgofs = &gpu->dispx_st->dispx_BGxOFS[i16];
|
||||
u16 hofs = (T1ReadWord((u8*)&bgofs->BGxHOFS, 0) & 0x1FF);
|
||||
|
||||
//line3Dcolor and line3Dalpha are left cleared by GPU initialization,
|
||||
//and they always stay that way.
|
||||
|
||||
gpu3D->NDS_3D_GetLine(l, line3Dcolor, line3Dalpha);
|
||||
gfx3d_GetLineData(l, &gpu->_3dColorLine, &gpu->_3dAlphaLine);
|
||||
u16* colorLine = gpu->_3dColorLine;
|
||||
|
||||
for(int k = 0; k < 256; k++)
|
||||
{
|
||||
int q = ((k + hofs) & 0x1FF);
|
||||
|
||||
if(line3Dcolor[q] & 0x8000)
|
||||
gpu->setFinalColor3D(gpu, (k << 1), gpu->currDst, line3Dcolor[q], line3Dalpha[q], k);
|
||||
if(colorLine[q] & 0x8000)
|
||||
gpu->setFinalColor3d(k, q);
|
||||
}
|
||||
|
||||
continue;
|
||||
|
@ -2785,9 +2801,9 @@ static void GPU_ligne_DispCapture(u16 l)
|
|||
case 1: // Capture 3D
|
||||
{
|
||||
//INFO("Capture 3D\n");
|
||||
u16 cap3DLine[512];
|
||||
gpu3D->NDS_3D_GetLineCaptured(l, (u16*)cap3DLine);
|
||||
CAPCOPY(((u8*)cap3DLine),cap_dst);
|
||||
u16* colorLine;
|
||||
gfx3d_GetLineData(l, &colorLine, NULL);
|
||||
CAPCOPY(((u8*)colorLine),cap_dst);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -2818,7 +2834,6 @@ static void GPU_ligne_DispCapture(u16 l)
|
|||
//INFO("Capture source is SourceA+B blended\n");
|
||||
u16 *srcA = NULL;
|
||||
u16 *srcB = NULL;
|
||||
u16 cap3DLine[512];
|
||||
|
||||
if (gpu->dispCapCnt.srcA == 0)
|
||||
{
|
||||
|
@ -2830,8 +2845,7 @@ static void GPU_ligne_DispCapture(u16 l)
|
|||
}
|
||||
else
|
||||
{
|
||||
gpu3D->NDS_3D_GetLineCaptured(l, (u16*)cap3DLine);
|
||||
srcA = (u16 *)cap3DLine; // 3D screen
|
||||
gfx3d_GetLineData(l, &srcA, NULL);
|
||||
}
|
||||
|
||||
if (gpu->dispCapCnt.srcB == 0) // VRAM screen
|
||||
|
|
|
@ -695,6 +695,9 @@ struct GPU
|
|||
bool blend1;
|
||||
u8* currDst;
|
||||
|
||||
u16* _3dColorLine;
|
||||
u8* _3dAlphaLine;
|
||||
|
||||
|
||||
static struct MosaicLookup {
|
||||
|
||||
|
@ -721,11 +724,12 @@ struct GPU
|
|||
u16 blend(u16 colA, u16 colB);
|
||||
|
||||
typedef void (*FinalOBJColFunct)(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u8 type, u16 x);
|
||||
typedef void (*Final3DColFunct)(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u16 x);
|
||||
typedef void (*Final3DColFunct)(GPU *gpu, int dstX, int srcX);
|
||||
|
||||
int setFinalColorBck_funcNum;
|
||||
int setFinalColor3d_funcNum;
|
||||
FinalOBJColFunct setFinalColorSpr;
|
||||
Final3DColFunct setFinalColor3D;
|
||||
//Final3DColFunct setFinalColor3D;
|
||||
enum SpriteRenderMode {
|
||||
SPRITE_1D, SPRITE_2D
|
||||
} spriteRenderMode;
|
||||
|
@ -735,6 +739,8 @@ struct GPU
|
|||
void spriteRender(u8 * dst, u8 * dst_alpha, u8 * typeTab, u8 * prioTab);
|
||||
|
||||
void setFinalColorBG(u16 color, u8 x);
|
||||
void setFinalColor3d(int dstX, int srcX);
|
||||
|
||||
FORCEINLINE void setFinalBGColorSpecialNone(u16 color, u8 x, bool blend1);
|
||||
FORCEINLINE void setFinalBGColorSpecialBlend(u16 color, u8 x, bool blend1);
|
||||
FORCEINLINE void setFinalBGColorSpecialIncrease(u16 color, u8 x, bool blend1);
|
||||
|
@ -744,6 +750,16 @@ struct GPU
|
|||
FORCEINLINE void setFinalBGColorSpecialIncreaseWnd(u16 color, u8 x, bool blend1);
|
||||
FORCEINLINE void setFinalBGColorSpecialDecreaseWnd(u16 color, u8 x, bool blend1);
|
||||
|
||||
FORCEINLINE void setFinal3DColorSpecialNone(int dstX, int srcX);
|
||||
FORCEINLINE void setFinal3DColorSpecialBlend(int dstX, int srcX);
|
||||
FORCEINLINE void setFinal3DColorSpecialIncrease(int dstX, int srcX);
|
||||
FORCEINLINE void setFinal3DColorSpecialDecrease(int dstX, int srcX);
|
||||
FORCEINLINE void setFinal3DColorSpecialNoneWnd(int dstX, int srcX);
|
||||
FORCEINLINE void setFinal3DColorSpecialBlendWnd(int dstX, int srcX);
|
||||
FORCEINLINE void setFinal3DColorSpecialIncreaseWnd(int dstX, int srcX);
|
||||
FORCEINLINE void setFinal3DColorSpecialDecreaseWnd(int dstX, int srcX);
|
||||
|
||||
|
||||
void __setFinalColorBck(u16 color, u8 x, bool opaque);
|
||||
void setAffineStart(int layer, int xy, u32 val);
|
||||
|
||||
|
|
|
@ -75,19 +75,15 @@ static void ENDGL() {
|
|||
#define CTASSERT(x) typedef char __assert ## y[(x) ? 1 : -1]
|
||||
#endif
|
||||
|
||||
static ALIGN(16) u8 GPU_screen3D [256*192*4];
|
||||
//static ALIGN(16) unsigned char GPU_screenStencil[256*256];
|
||||
static ALIGN(16) u8 GPU_screen3D [256*192*4];
|
||||
|
||||
|
||||
static const unsigned short map3d_cull[4] = {GL_FRONT_AND_BACK, GL_FRONT, GL_BACK, 0};
|
||||
static const int texEnv[4] = { GL_MODULATE, GL_DECAL, GL_MODULATE, GL_MODULATE };
|
||||
static const int depthFunc[2] = { GL_LESS, GL_EQUAL };
|
||||
static bool needRefreshFramebuffer = false;
|
||||
|
||||
|
||||
float clearAlpha;
|
||||
|
||||
|
||||
|
||||
static bool validFramebuffer = false;
|
||||
|
||||
//derived values extracted from polyattr etc
|
||||
static bool wireframe=false, alpha31=false;
|
||||
|
@ -474,11 +470,6 @@ static char OGLInit(void)
|
|||
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP); //clamp so that we dont run off the edges due to 1.0 -> [0,31] math
|
||||
}
|
||||
|
||||
if(glBlendFuncSeparateEXT == NULL)
|
||||
clearAlpha = 1;
|
||||
else
|
||||
clearAlpha = 0;
|
||||
|
||||
OGLReset();
|
||||
|
||||
ENDGL();
|
||||
|
@ -839,6 +830,7 @@ static void OGLVramReconfigureSignal()
|
|||
TexCache_Invalidate();
|
||||
}
|
||||
|
||||
|
||||
static void GL_ReadFramebuffer()
|
||||
{
|
||||
if(!BEGINGL()) return;
|
||||
|
@ -849,141 +841,47 @@ static void GL_ReadFramebuffer()
|
|||
|
||||
//convert the pixels to a different format which is more convenient
|
||||
//is it safe to modify the screen buffer? if not, we could make a temp copy
|
||||
for(int i=0;i<256*192;i++) {
|
||||
u32 &u32screen3D = ((u32*)GPU_screen3D)[i];
|
||||
u32screen3D>>=3;
|
||||
u32screen3D &= 0x1F1F1F1F;
|
||||
}
|
||||
|
||||
|
||||
//debug: view depth buffer via color buffer for debugging
|
||||
//int ctr=0;
|
||||
//for(ctr=0;ctr<256*192;ctr++) {
|
||||
// float zval = GPU_screen3Ddepth[ctr];
|
||||
// u8* colorPtr = GPU_screen3D+ctr*3;
|
||||
// if(zval<0) {
|
||||
// colorPtr[0] = 255;
|
||||
// colorPtr[1] = 0;
|
||||
// colorPtr[2] = 0;
|
||||
// } else if(zval>1) {
|
||||
// colorPtr[0] = 0;
|
||||
// colorPtr[1] = 0;
|
||||
// colorPtr[2] = 255;
|
||||
// } else {
|
||||
// colorPtr[0] = colorPtr[1] = colorPtr[2] = zval*255;
|
||||
// //INFO("%f %f %d\n",zval, zval*255,colorPtr[0]);
|
||||
// }
|
||||
|
||||
//}
|
||||
}
|
||||
|
||||
static void OGLGetLineCaptured(int line, u16* dst)
|
||||
{
|
||||
if(needRefreshFramebuffer) {
|
||||
needRefreshFramebuffer = false;
|
||||
GL_ReadFramebuffer();
|
||||
}
|
||||
|
||||
u8 *screen3D = (u8*)GPU_screen3D+((191-line)<<10);
|
||||
// u8 *screenStencil = (u8*)GPU_screenStencil+((191-line)<<8);
|
||||
|
||||
for(int i = 0; i < 256; i++)
|
||||
for(int i=0,y=191;y>=0;y--)
|
||||
{
|
||||
/* u32 stencil = screenStencil[i];
|
||||
u16* dst = gfx3d_convertedScreen + (y<<8);
|
||||
u8* dstAlpha = gfx3d_convertedAlpha + (y<<8);
|
||||
|
||||
if(!stencil)
|
||||
#ifndef NOSSE
|
||||
//I dont know much about this kind of stuff, but this seems to help
|
||||
//for some reason I couldnt make the intrinsics work
|
||||
u8* wanx = (u8*)&((u32*)GPU_screen3D)[i];
|
||||
#define ASS(X,Y) __asm { prefetchnta [wanx+32*0x##X##Y] }
|
||||
#define PUNK(X) ASS(X,0) ASS(X,1) ASS(X,2) ASS(X,3) ASS(X,4) ASS(X,5) ASS(X,6) ASS(X,7) ASS(X,8) ASS(X,9) ASS(X,A) ASS(X,B) ASS(X,C) ASS(X,D) ASS(X,E) ASS(X,F)
|
||||
PUNK(0); PUNK(1);
|
||||
#endif
|
||||
|
||||
for(int x=0;x<256;x++,i++)
|
||||
{
|
||||
dst[i] = 0x0000;
|
||||
continue;
|
||||
}*/
|
||||
u32 &u32screen3D = ((u32*)GPU_screen3D)[i];
|
||||
u32screen3D>>=3;
|
||||
u32screen3D &= 0x1F1F1F1F;
|
||||
|
||||
int t=i<<2;
|
||||
/* u8 r = screen3D[t+2];
|
||||
u8 g = screen3D[t+1];
|
||||
u8 b = screen3D[t+0];*/
|
||||
|
||||
//if this math strikes you as wrong, be sure to look at GL_ReadFramebuffer() where the pixel format in screen3D is changed
|
||||
//dst[i] = (b<<10) | (g<<5) | (r) | 0x8000;
|
||||
dst[i] = (screen3D[t+2] | (screen3D[t+1] << 5) | (screen3D[t+0] << 10) | ((screen3D[t+3] > 0) ? 0x8000 : 0x0000));
|
||||
const int t = i<<2;
|
||||
const u8 a = GPU_screen3D[t+3];
|
||||
const u8 r = GPU_screen3D[t+2];
|
||||
const u8 g = GPU_screen3D[t+1];
|
||||
const u8 b = GPU_screen3D[t+0];
|
||||
dst[x] = R5G5B5TORGB15(r,g,b) | alpha_lookup[a];
|
||||
dstAlpha[x] = alpha_5bit_to_4bit[a];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void OGLGetLine(int line, u16* dst, u8* dstAlpha)
|
||||
static void OGLCheckFresh()
|
||||
{
|
||||
assert(line<192 && line>=0);
|
||||
|
||||
if(needRefreshFramebuffer) {
|
||||
if(needRefreshFramebuffer)
|
||||
{
|
||||
needRefreshFramebuffer = false;
|
||||
GL_ReadFramebuffer();
|
||||
}
|
||||
|
||||
u8 *screen3D = (u8*)GPU_screen3D+((191-line)<<10);
|
||||
//u8 *screenStencil = (u8*)GPU_screenStencil+((191-line)<<8);
|
||||
|
||||
//the renderer clears the stencil to 0
|
||||
//then it sets it to 1 whenever it renders a pixel that passes the alpha test
|
||||
//(it also sets it to 2 under some circumstances when rendering shadow volumes)
|
||||
//so, we COULD use a zero stencil value to indicate that nothing should get composited.
|
||||
//in fact, we are going to do that to fix some problems.
|
||||
//but beware that it i figure it might could CAUSE some problems
|
||||
|
||||
//this alpha compositing blending logic isnt thought through very much
|
||||
//someone needs to think about what bitdepth it should take place at and how to do it efficiently
|
||||
|
||||
for(int i=0;i<256;i++)
|
||||
{
|
||||
// u32 stencil = screenStencil[i];
|
||||
|
||||
//you would use this if you wanted to use the stencil buffer to make decisions here
|
||||
// if(!stencil) continue;
|
||||
|
||||
// u16 oldcolor = dst[j];
|
||||
|
||||
int t=i<<2;
|
||||
// u32 dstpixel;
|
||||
|
||||
dst[i] = (screen3D[t+2] | (screen3D[t+1] << 5) | (screen3D[t+0] << 10) | ((screen3D[t+3] > 0) ? 0x8000 : 0x0000));
|
||||
dstAlpha[i] = alpha_5bit_to_4bit[screen3D[t+3]];
|
||||
|
||||
//old debug reminder: display alpha channel
|
||||
//u32 r = screen3D[t+3];
|
||||
//u32 g = screen3D[t+3];
|
||||
//u32 b = screen3D[t+3];
|
||||
|
||||
//if this math strikes you as wrong, be sure to look at GL_ReadFramebuffer() where the pixel format in screen3D is changed
|
||||
|
||||
/* u32 a = screen3D[t+3];
|
||||
|
||||
typedef u8 mixtbl[32][32];
|
||||
mixtbl & mix = mixTable555[a];
|
||||
|
||||
//r
|
||||
u32 newpix = screen3D[t+2];
|
||||
u32 oldpix = oldcolor&0x1F;
|
||||
newpix = mix[newpix][oldpix];
|
||||
dstpixel = newpix;
|
||||
|
||||
//g
|
||||
newpix = screen3D[t+1];
|
||||
oldpix = (oldcolor>>5)&0x1F;
|
||||
newpix = mix[newpix][oldpix];
|
||||
dstpixel |= (newpix<<5);
|
||||
|
||||
//b
|
||||
newpix = screen3D[t+0];
|
||||
oldpix = (oldcolor>>10)&0x1F;
|
||||
newpix = mix[newpix][oldpix];
|
||||
dstpixel |= (newpix<<10);
|
||||
|
||||
dst[j] = dstpixel;*/
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
GPU3DInterface gpu3Dgl = {
|
||||
"OpenGL",
|
||||
OGLInit,
|
||||
|
@ -991,9 +889,5 @@ GPU3DInterface gpu3Dgl = {
|
|||
OGLClose,
|
||||
OGLRender,
|
||||
OGLVramReconfigureSignal,
|
||||
OGLGetLine,
|
||||
OGLGetLineCaptured
|
||||
OGLCheckFresh,
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -103,6 +103,13 @@ CACHE_ALIGN const u8 alpha_5bit_to_4bit[] = {
|
|||
0x10, 0x10
|
||||
};
|
||||
|
||||
CACHE_ALIGN static const u16 alpha_lookup[] = {
|
||||
0x0000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
|
||||
0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
|
||||
0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
|
||||
0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000};
|
||||
|
||||
|
||||
//private acceleration tables
|
||||
static float float16table[65536];
|
||||
static float float10Table[1024];
|
||||
|
@ -112,6 +119,9 @@ static float normalTable[1024];
|
|||
#define fix2float(v) (((float)((s32)(v))) / (float)(1<<12))
|
||||
#define fix10_2float(v) (((float)((s32)(v))) / (float)(1<<9))
|
||||
|
||||
CACHE_ALIGN u16 gfx3d_convertedScreen[256*192];
|
||||
CACHE_ALIGN u8 gfx3d_convertedAlpha[256*192];
|
||||
|
||||
// Matrix stack handling
|
||||
static CACHE_ALIGN MatrixStack mtxStack[4] = {
|
||||
MatrixStack(1), // Projection stack
|
||||
|
@ -2156,6 +2166,16 @@ void gfx3d_glGetLightColor(unsigned int index, unsigned int* dest)
|
|||
*dest = lightColor[index];
|
||||
}
|
||||
|
||||
void gfx3d_GetLineData(int line, u16** dst, u8** dstAlpha)
|
||||
{
|
||||
gpu3D->NDS_3D_CheckFresh();
|
||||
*dst = gfx3d_convertedScreen+((line)<<8);
|
||||
if(dstAlpha != NULL)
|
||||
{
|
||||
*dstAlpha = gfx3d_convertedAlpha+((line)<<8);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//http://www.opengl.org/documentation/specs/version1.1/glspec1.1/node17.html
|
||||
//talks about the state required to process verts in quadlists etc. helpful ideas.
|
||||
|
|
|
@ -180,6 +180,7 @@ extern GFX3D gfx3d;
|
|||
|
||||
//---------------------
|
||||
|
||||
extern CACHE_ALIGN const u16 alpha_lookup[32];
|
||||
extern CACHE_ALIGN u32 color_15bit_to_24bit[32768];
|
||||
extern CACHE_ALIGN u32 color_15bit_to_24bit_reverse[32768];
|
||||
extern CACHE_ALIGN u16 color_15bit_to_16bit_reverse[32768];
|
||||
|
@ -190,6 +191,11 @@ extern CACHE_ALIGN const u8 material_3bit_to_5bit[8];
|
|||
extern CACHE_ALIGN const u8 material_3bit_to_8bit[8];
|
||||
extern CACHE_ALIGN const u8 alpha_5bit_to_4bit[32];
|
||||
|
||||
//these contain the 3d framebuffer converted into the most useful format
|
||||
//they are stored here instead of in the renderers in order to consolidate the buffers
|
||||
extern CACHE_ALIGN u16 gfx3d_convertedScreen[256*192];
|
||||
extern CACHE_ALIGN u8 gfx3d_convertedAlpha[256*192];
|
||||
|
||||
//GE commands:
|
||||
void gfx3d_glViewPort(u32 v);
|
||||
void gfx3d_glClearColor(u32 v);
|
||||
|
@ -212,11 +218,11 @@ BOOL gfx3d_glMultMatrix4x4(s32 v);
|
|||
void gfx3d_glBegin(u32 v);
|
||||
void gfx3d_glEnd(void);
|
||||
void gfx3d_glColor3b(u32 v);
|
||||
BOOL gfx3d_glVertex16b(unsigned int v);
|
||||
BOOL gfx3d_glVertex16b(u32 v);
|
||||
void gfx3d_glVertex10b(u32 v);
|
||||
void gfx3d_glVertex3_cord(unsigned int one, unsigned int two, unsigned int v);
|
||||
void gfx3d_glVertex3_cord(u32 one, u32 two, u32 v);
|
||||
void gfx3d_glVertex_rel(u32 v);
|
||||
void gfx3d_glSwapScreen(unsigned int screen);
|
||||
void gfx3d_glSwapScreen(u32 screen);
|
||||
int gfx3d_GetNumPolys();
|
||||
int gfx3d_GetNumVertex();
|
||||
void gfx3d_glPolygonAttrib (u32 val);
|
||||
|
@ -229,16 +235,16 @@ void gfx3d_glTexImage(u32 val);
|
|||
void gfx3d_glTexPalette(u32 val);
|
||||
void gfx3d_glTexCoord(u32 val);
|
||||
void gfx3d_glNormal(u32 v);
|
||||
s32 gfx3d_GetClipMatrix (unsigned int index);
|
||||
s32 gfx3d_GetDirectionalMatrix (unsigned int index);
|
||||
s32 gfx3d_GetClipMatrix (u32 index);
|
||||
s32 gfx3d_GetDirectionalMatrix (u32 index);
|
||||
void gfx3d_glLightDirection (u32 v);
|
||||
void gfx3d_glLightColor (u32 v);
|
||||
void gfx3d_glAlphaFunc(u32 v);
|
||||
BOOL gfx3d_glBoxTest(u32 v);
|
||||
BOOL gfx3d_glPosTest(u32 v);
|
||||
void gfx3d_glVecTest(u32 v);
|
||||
unsigned int gfx3d_glGetPosRes(unsigned int index);
|
||||
unsigned short gfx3d_glGetVecRes(unsigned int index);
|
||||
u32 gfx3d_glGetPosRes(u32 index);
|
||||
u16 gfx3d_glGetVecRes(u32 index);
|
||||
void gfx3d_glFlush(u32 v);
|
||||
void gfx3d_VBlankSignal();
|
||||
void gfx3d_VBlankEndSignal(bool skipFrame);
|
||||
|
@ -248,9 +254,11 @@ void gfx3d_sendCommandToFIFO(u32 val);
|
|||
void gfx3d_sendCommand(u32 cmd, u32 param);
|
||||
|
||||
//other misc stuff
|
||||
void gfx3d_glGetMatrix(unsigned int mode, int index, float* dest);
|
||||
void gfx3d_glGetLightDirection(unsigned int index, unsigned int* dest);
|
||||
void gfx3d_glGetLightColor(unsigned int index, unsigned int* dest);
|
||||
void gfx3d_glGetMatrix(u32 mode, int index, float* dest);
|
||||
void gfx3d_glGetLightDirection(u32 index, u32* dest);
|
||||
void gfx3d_glGetLightColor(u32 index, u32* dest);
|
||||
|
||||
void gfx3d_GetLineData(int line, u16** dst, u8** dstAlpha);
|
||||
|
||||
struct SFORMAT;
|
||||
extern SFORMAT SF_GFX3D[];
|
||||
|
|
|
@ -63,6 +63,7 @@ template<typename T> T _min(T a, T b, T c, T d) { return min(_min(a,b,d),c); }
|
|||
template<typename T> T _max(T a, T b, T c, T d) { return max(_max(a,b,d),c); }
|
||||
|
||||
static int polynum;
|
||||
static bool validFramebuffer = false;
|
||||
|
||||
static u8 modulate_table[32][32];
|
||||
static u8 decal_table[32][32][32];
|
||||
|
@ -202,16 +203,16 @@ struct PolyAttr
|
|||
|
||||
} polyAttr;
|
||||
|
||||
union FragmentColor {
|
||||
u32 color;
|
||||
struct {
|
||||
//#ifdef WORDS_BIGENDIAN ?
|
||||
u8 r,g,b,a;
|
||||
};
|
||||
};
|
||||
|
||||
struct Fragment
|
||||
{
|
||||
union Color {
|
||||
u32 color;
|
||||
struct {
|
||||
//#ifdef WORDS_BIGENDIAN ?
|
||||
u8 r,g,b,a;
|
||||
} components;
|
||||
} color;
|
||||
|
||||
u32 depth;
|
||||
|
||||
struct {
|
||||
|
@ -220,7 +221,7 @@ struct Fragment
|
|||
|
||||
u8 stencil;
|
||||
|
||||
u8 pad[5];
|
||||
u8 pad;
|
||||
};
|
||||
|
||||
static VERT* verts[MAX_CLIPPED_VERTS];
|
||||
|
@ -231,6 +232,8 @@ INLINE static void SubmitVertex(int vert_index, VERT& rawvert)
|
|||
}
|
||||
|
||||
static Fragment screen[256*192];
|
||||
static FragmentColor screenColor[256*192];
|
||||
|
||||
|
||||
FORCEINLINE int iround(float f) {
|
||||
return (int)f; //lol
|
||||
|
@ -300,7 +303,7 @@ static struct Sampler
|
|||
}
|
||||
}
|
||||
|
||||
FORCEINLINE Fragment::Color sample(float u, float v)
|
||||
FORCEINLINE FragmentColor sample(float u, float v)
|
||||
{
|
||||
//finally, we can use floor here. but, it is slower than we want.
|
||||
//the best solution is probably to wait until the pipeline is full of fixed point
|
||||
|
@ -308,7 +311,7 @@ static struct Sampler
|
|||
int iv = floorf(v);
|
||||
dowrap(iu,iv);
|
||||
|
||||
Fragment::Color color;
|
||||
FragmentColor color;
|
||||
color.color = ((u32*)textures.currentData)[(iv<<wshift)+iu];
|
||||
return color;
|
||||
}
|
||||
|
@ -327,11 +330,11 @@ struct Shader
|
|||
}
|
||||
|
||||
float invu, invv, w;
|
||||
Fragment::Color materialColor;
|
||||
FragmentColor materialColor;
|
||||
|
||||
FORCEINLINE void shade(Fragment& dst)
|
||||
FORCEINLINE void shade(FragmentColor& dst)
|
||||
{
|
||||
Fragment::Color texColor;
|
||||
FragmentColor texColor;
|
||||
float u,v;
|
||||
|
||||
switch(mode)
|
||||
|
@ -340,10 +343,10 @@ struct Shader
|
|||
u = invu*w;
|
||||
v = invv*w;
|
||||
texColor = sampler.sample(u,v);
|
||||
dst.color.components.r = modulate_table[texColor.components.r][materialColor.components.r];
|
||||
dst.color.components.g = modulate_table[texColor.components.g][materialColor.components.g];
|
||||
dst.color.components.b = modulate_table[texColor.components.b][materialColor.components.b];
|
||||
dst.color.components.a = modulate_table[texColor.components.a][materialColor.components.a];
|
||||
dst.r = modulate_table[texColor.r][materialColor.r];
|
||||
dst.g = modulate_table[texColor.g][materialColor.g];
|
||||
dst.b = modulate_table[texColor.b][materialColor.b];
|
||||
dst.a = modulate_table[texColor.a][materialColor.a];
|
||||
//dst.color.components.a = 31;
|
||||
//#ifdef _MSC_VER
|
||||
//if(GetAsyncKeyState(VK_SHIFT)) {
|
||||
|
@ -361,37 +364,37 @@ struct Shader
|
|||
u = invu*w;
|
||||
v = invv*w;
|
||||
texColor = sampler.sample(u,v);
|
||||
dst.color.components.r = decal_table[texColor.components.a][texColor.components.r][materialColor.components.r];
|
||||
dst.color.components.g = decal_table[texColor.components.a][texColor.components.g][materialColor.components.g];
|
||||
dst.color.components.b = decal_table[texColor.components.a][texColor.components.b][materialColor.components.b];
|
||||
dst.color.components.a = materialColor.components.a;
|
||||
dst.r = decal_table[texColor.a][texColor.r][materialColor.r];
|
||||
dst.g = decal_table[texColor.a][texColor.g][materialColor.g];
|
||||
dst.b = decal_table[texColor.a][texColor.b][materialColor.b];
|
||||
dst.a = materialColor.a;
|
||||
break;
|
||||
case 2: //toon/highlight shading
|
||||
u = invu*w;
|
||||
v = invv*w;
|
||||
texColor = sampler.sample(u,v);
|
||||
u32 toonColorVal; toonColorVal = gfx3d.rgbToonTable[materialColor.components.r];
|
||||
Fragment::Color toonColor;
|
||||
toonColor.components.r = ((toonColorVal & 0x0000FF) >> 3);
|
||||
toonColor.components.g = ((toonColorVal & 0x00FF00) >> 11);
|
||||
toonColor.components.b = ((toonColorVal & 0xFF0000) >> 19);
|
||||
dst.color.components.r = modulate_table[texColor.components.r][toonColor.components.r];
|
||||
dst.color.components.g = modulate_table[texColor.components.g][toonColor.components.g];
|
||||
dst.color.components.b = modulate_table[texColor.components.b][toonColor.components.b];
|
||||
dst.color.components.a = modulate_table[texColor.components.a][materialColor.components.a];
|
||||
u32 toonColorVal; toonColorVal = gfx3d.rgbToonTable[materialColor.r];
|
||||
FragmentColor toonColor;
|
||||
toonColor.r = ((toonColorVal & 0x0000FF) >> 3);
|
||||
toonColor.g = ((toonColorVal & 0x00FF00) >> 11);
|
||||
toonColor.b = ((toonColorVal & 0xFF0000) >> 19);
|
||||
dst.r = modulate_table[texColor.r][toonColor.r];
|
||||
dst.g = modulate_table[texColor.g][toonColor.g];
|
||||
dst.b = modulate_table[texColor.b][toonColor.b];
|
||||
dst.a = modulate_table[texColor.a][materialColor.a];
|
||||
if(gfx3d.shading == GFX3D::HIGHLIGHT)
|
||||
{
|
||||
dst.color.components.r = min<u8>(31, (dst.color.components.r + toonColor.components.r));
|
||||
dst.color.components.g = min<u8>(31, (dst.color.components.g + toonColor.components.g));
|
||||
dst.color.components.b = min<u8>(31, (dst.color.components.b + toonColor.components.b));
|
||||
dst.r = min<u8>(31, (dst.r + toonColor.r));
|
||||
dst.g = min<u8>(31, (dst.g + toonColor.g));
|
||||
dst.b = min<u8>(31, (dst.b + toonColor.b));
|
||||
}
|
||||
break;
|
||||
case 3: //shadows
|
||||
//is this right? only with the material color?
|
||||
dst.color = materialColor;
|
||||
dst = materialColor;
|
||||
break;
|
||||
case 4: //our own special mode which only uses the material color (for when texturing is disabled)
|
||||
dst.color = materialColor;
|
||||
dst = materialColor;
|
||||
break;
|
||||
|
||||
}
|
||||
|
@ -399,44 +402,45 @@ struct Shader
|
|||
|
||||
} shader;
|
||||
|
||||
static FORCEINLINE void alphaBlend(Fragment::Color & dst, const Fragment::Color & src)
|
||||
static FORCEINLINE void alphaBlend(FragmentColor & dst, const FragmentColor & src)
|
||||
{
|
||||
if(gfx3d.enableAlphaBlending)
|
||||
{
|
||||
if(src.components.a == 0)
|
||||
if(src.a == 0)
|
||||
{
|
||||
dst.components.a = max(src.components.a,dst.components.a);
|
||||
dst.a = max(src.a,dst.a);
|
||||
}
|
||||
else if(src.components.a == 31 || dst.components.a == 0)
|
||||
else if(src.a == 31 || dst.a == 0)
|
||||
{
|
||||
dst.color = src.color;
|
||||
dst.components.a = max(src.components.a,dst.components.a);
|
||||
dst = src;
|
||||
dst.a = max(src.a,dst.a);
|
||||
}
|
||||
else
|
||||
{
|
||||
u8 alpha = src.components.a+1;
|
||||
u8 alpha = src.a+1;
|
||||
u8 invAlpha = 32 - alpha;
|
||||
dst.components.r = (alpha*src.components.r + invAlpha*dst.components.r)>>5;
|
||||
dst.components.g = (alpha*src.components.g + invAlpha*dst.components.g)>>5;
|
||||
dst.components.b = (alpha*src.components.b + invAlpha*dst.components.b)>>5;
|
||||
dst.components.a = max(src.components.a,dst.components.a);
|
||||
dst.r = (alpha*src.r + invAlpha*dst.r)>>5;
|
||||
dst.g = (alpha*src.g + invAlpha*dst.g)>>5;
|
||||
dst.b = (alpha*src.b + invAlpha*dst.b)>>5;
|
||||
dst.a = max(src.a,dst.a);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(src.components.a == 0)
|
||||
if(src.a == 0)
|
||||
{
|
||||
//do nothing; the fragment is totally transparent
|
||||
}
|
||||
else
|
||||
{
|
||||
dst.color = src.color;
|
||||
dst = src;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static FORCEINLINE void pixel(int adr,float r, float g, float b, float invu, float invv, float w, float z) {
|
||||
Fragment &destFragment = screen[adr];
|
||||
FragmentColor &destFragmentColor = screenColor[adr];
|
||||
|
||||
//depth test
|
||||
u32 depth;
|
||||
|
@ -478,25 +482,25 @@ static FORCEINLINE void pixel(int adr,float r, float g, float b, float invu, flo
|
|||
//this is a HACK:
|
||||
//we are being very sloppy with our interpolation precision right now
|
||||
//and rather than fix it, i just want to clamp it
|
||||
shader.materialColor.components.r = max(0,min(31,(int)r));
|
||||
shader.materialColor.components.g = max(0,min(31,(int)g));
|
||||
shader.materialColor.components.b = max(0,min(31,(int)b));
|
||||
shader.materialColor.r = max(0,min(31,(int)r));
|
||||
shader.materialColor.g = max(0,min(31,(int)g));
|
||||
shader.materialColor.b = max(0,min(31,(int)b));
|
||||
|
||||
shader.materialColor.components.a = polyAttr.alpha;
|
||||
shader.materialColor.a = polyAttr.alpha;
|
||||
|
||||
//pixel shader
|
||||
Fragment shaderOutput;
|
||||
FragmentColor shaderOutput;
|
||||
shader.shade(shaderOutput);
|
||||
|
||||
//alpha test
|
||||
if(gfx3d.enableAlphaTest)
|
||||
{
|
||||
if(shaderOutput.color.components.a < gfx3d.alphaTestRef)
|
||||
if(shaderOutput.a < gfx3d.alphaTestRef)
|
||||
goto rejected_fragment;
|
||||
}
|
||||
|
||||
//we shouldnt do any of this if we generated a totally transparent pixel
|
||||
if(shaderOutput.color.components.a != 0)
|
||||
if(shaderOutput.a != 0)
|
||||
{
|
||||
//handle shadow polys
|
||||
if(shader.mode == 3)
|
||||
|
@ -533,7 +537,7 @@ static FORCEINLINE void pixel(int adr,float r, float g, float b, float invu, flo
|
|||
}
|
||||
|
||||
//handle polyids
|
||||
bool isOpaquePixel = shaderOutput.color.components.a == 31;
|
||||
bool isOpaquePixel = shaderOutput.a == 31;
|
||||
if(isOpaquePixel)
|
||||
{
|
||||
destFragment.polyid.opaque = polyAttr.polyid;
|
||||
|
@ -561,7 +565,7 @@ static FORCEINLINE void pixel(int adr,float r, float g, float b, float invu, flo
|
|||
}
|
||||
|
||||
//alpha blending and write color
|
||||
alphaBlend(destFragment.color, shaderOutput.color);
|
||||
alphaBlend(destFragmentColor, shaderOutput);
|
||||
|
||||
//depth writing
|
||||
if(isOpaquePixel || polyAttr.translucentDepthWrite)
|
||||
|
@ -925,7 +929,9 @@ static char SoftRastInit(void)
|
|||
return 1;
|
||||
}
|
||||
|
||||
static void SoftRastReset() {}
|
||||
static void SoftRastReset() {
|
||||
validFramebuffer = false;
|
||||
}
|
||||
|
||||
static void SoftRastClose()
|
||||
{
|
||||
|
@ -935,50 +941,41 @@ static void SoftRastVramReconfigureSignal() {
|
|||
TexCache_Invalidate();
|
||||
}
|
||||
|
||||
CACHE_ALIGN static const u16 alpha_lookup[] = {
|
||||
0x0000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
|
||||
0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
|
||||
0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
|
||||
0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000};
|
||||
|
||||
static void SoftRastGetLine(int line, u16* dst, u8* dstAlpha)
|
||||
static void SoftRastConvertFramebuffer()
|
||||
{
|
||||
Fragment* src = screen+((line)<<8);
|
||||
for(int i=0;i<256;i++)
|
||||
FragmentColor* src = screenColor;
|
||||
u16* dst = gfx3d_convertedScreen;
|
||||
u8* dstAlpha = gfx3d_convertedAlpha;
|
||||
|
||||
//in an effort to speed this up, the misc pixel buffers and the color buffer were separated.
|
||||
|
||||
for(int i=0,y=0;y<192;y++)
|
||||
{
|
||||
const bool testRenderAlpha = false;
|
||||
const u8 r = src->color.components.r;
|
||||
const u8 g = src->color.components.g;
|
||||
const u8 b = src->color.components.b;
|
||||
*dst = R5G5B5TORGB15(r,g,b);
|
||||
#ifndef NOSSE
|
||||
u8* wanx = (u8*)&src[i];
|
||||
#define ASS(X,Y) __asm { prefetchnta [wanx+32*0x##X##Y] }
|
||||
#define PUNK(X) ASS(X,0) ASS(X,1) ASS(X,2) ASS(X,3) ASS(X,4) ASS(X,5) ASS(X,6) ASS(X,7) ASS(X,8) ASS(X,9) ASS(X,A) ASS(X,B) ASS(X,C) ASS(X,D) ASS(X,E) ASS(X,F)
|
||||
PUNK(0); PUNK(1);
|
||||
#endif
|
||||
|
||||
*dst |= alpha_lookup[src->color.components.a];
|
||||
*dstAlpha = alpha_5bit_to_4bit[src->color.components.a];
|
||||
|
||||
if(testRenderAlpha)
|
||||
for(int x=0;x<256;x++,i++)
|
||||
{
|
||||
*dst = 0x8000 | R5G5B5TORGB15(src->color.components.a,src->color.components.a,src->color.components.a);
|
||||
*dstAlpha = 16;
|
||||
const u8 r = src[i].r;
|
||||
const u8 g = src[i].g;
|
||||
const u8 b = src[i].b;
|
||||
const u8 a = src[i].a;
|
||||
dst[i] = R5G5B5TORGB15(r,g,b) | alpha_lookup[a];
|
||||
dstAlpha[i] = alpha_5bit_to_4bit[a];
|
||||
}
|
||||
|
||||
src++;
|
||||
dst++;
|
||||
dstAlpha++;
|
||||
}
|
||||
|
||||
validFramebuffer = true;
|
||||
}
|
||||
|
||||
static void SoftRastGetLineCaptured(int line, u16* dst) {
|
||||
Fragment* src = screen+((line)<<8);
|
||||
for(int i=0;i<256;i++)
|
||||
static void SoftRastCheckFresh()
|
||||
{
|
||||
if(!validFramebuffer)
|
||||
{
|
||||
const u8 r = src->color.components.r;
|
||||
const u8 g = src->color.components.g;
|
||||
const u8 b = src->color.components.b;
|
||||
*dst = R5G5B5TORGB15(r,g,b);
|
||||
*dst |= alpha_lookup[src->color.components.a];
|
||||
src++;
|
||||
dst++;
|
||||
SoftRastConvertFramebuffer();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1158,10 +1155,11 @@ static void clipPoly(POLY* poly)
|
|||
static void SoftRastRender()
|
||||
{
|
||||
Fragment clearFragment;
|
||||
clearFragment.color.components.r = gfx3d.clearColor&0x1F;
|
||||
clearFragment.color.components.g = (gfx3d.clearColor>>5)&0x1F;
|
||||
clearFragment.color.components.b = (gfx3d.clearColor>>10)&0x1F;
|
||||
clearFragment.color.components.a = (gfx3d.clearColor>>16)&0x1F;
|
||||
FragmentColor clearFragmentColor;
|
||||
clearFragmentColor.r = gfx3d.clearColor&0x1F;
|
||||
clearFragmentColor.g = (gfx3d.clearColor>>5)&0x1F;
|
||||
clearFragmentColor.b = (gfx3d.clearColor>>10)&0x1F;
|
||||
clearFragmentColor.a = (gfx3d.clearColor>>16)&0x1F;
|
||||
clearFragment.polyid.opaque = (gfx3d.clearColor>>24)&0x3F;
|
||||
//special value for uninitialized translucent polyid. without this, fires in spiderman2 dont display
|
||||
//I am not sure whether it is right, though. previously this was cleared to 0, as a guess,
|
||||
|
@ -1171,6 +1169,8 @@ static void SoftRastRender()
|
|||
clearFragment.stencil = 0;
|
||||
for(int i=0;i<256*192;i++)
|
||||
screen[i] = clearFragment;
|
||||
for(int i=0;i<256*192;i++)
|
||||
screenColor[i] = clearFragmentColor;
|
||||
|
||||
//convert colors to float to get more precision in case we need it
|
||||
for(int i=0;i<gfx3d.vertlist->count;i++)
|
||||
|
@ -1292,8 +1292,9 @@ static void SoftRastRender()
|
|||
shape_engine(type,!polyAttr.backfacing);
|
||||
}
|
||||
|
||||
// printf("rendered %d of %d polys after backface culling\n",gfx3d.polylist->count-culled,gfx3d.polylist->count);
|
||||
validFramebuffer = false;
|
||||
|
||||
// printf("rendered %d of %d polys after backface culling\n",gfx3d.polylist->count-culled,gfx3d.polylist->count);
|
||||
}
|
||||
|
||||
GPU3DInterface gpu3DRasterize = {
|
||||
|
@ -1303,6 +1304,5 @@ GPU3DInterface gpu3DRasterize = {
|
|||
SoftRastClose,
|
||||
SoftRastRender,
|
||||
SoftRastVramReconfigureSignal,
|
||||
SoftRastGetLine,
|
||||
SoftRastGetLineCaptured
|
||||
SoftRastCheckFresh,
|
||||
};
|
||||
|
|
|
@ -24,8 +24,6 @@ int cur3DCore = GPU3D_NULL;
|
|||
|
||||
static void NDS_nullFunc1 (void){}
|
||||
static char NDS_nullFunc2 (void){ return 1; }
|
||||
static void NDS_nullFunc3 (int,unsigned short*) {}
|
||||
static void NDS_nullFunc4 (int,unsigned short*,unsigned char*) {}
|
||||
|
||||
GPU3DInterface gpu3DNull = {
|
||||
"None",
|
||||
|
@ -34,8 +32,7 @@ GPU3DInterface gpu3DNull = {
|
|||
NDS_nullFunc1, //NDS_3D_Close
|
||||
NDS_nullFunc1, //NDS_3D_Render
|
||||
NDS_nullFunc1, //NDS_3D_VramReconfigureSignal
|
||||
NDS_nullFunc4, //NDS_3D_GetLine
|
||||
NDS_nullFunc3 //NDS_3D_GetLineCaptured
|
||||
NDS_nullFunc1, //NDS_3D_CheckFresh
|
||||
};
|
||||
|
||||
GPU3DInterface *gpu3D = &gpu3DNull;
|
||||
|
|
|
@ -21,6 +21,8 @@
|
|||
#ifndef RENDER3D_H
|
||||
#define RENDER3D_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
//not using this right now
|
||||
#define CALL_CONVENTION
|
||||
|
||||
|
@ -44,12 +46,8 @@ typedef struct Render3DInterface
|
|||
//called when the emulator reconfigures its vram. you may need to invalidate your texture cache.
|
||||
void (CALL_CONVENTION* NDS_3D_VramReconfigureSignal) ();
|
||||
|
||||
//Retrieves a line of color buffer data
|
||||
void (CALL_CONVENTION* NDS_3D_GetLine) (int line, unsigned short* dst, unsigned char* dstAlpha);
|
||||
|
||||
//Retrieves a line of color buffer data for capture
|
||||
void (CALL_CONVENTION* NDS_3D_GetLineCaptured) (int line, unsigned short* dst);
|
||||
|
||||
//ensures that the plugin's framebuffer generation is fresh
|
||||
void (CALL_CONVENTION* NDS_3D_CheckFresh) ();
|
||||
|
||||
} GPU3DInterface;
|
||||
|
||||
|
|
Loading…
Reference in New Issue