change 3d rendering to use 18 bits of color instead of 15. fix mario kart star powerup rainbow effect regression. merge all 3d pixel blitters into one function. fix bugs in new fifo savestates. i probably just broke some stuff but its worth it.

This commit is contained in:
zeromus 2009-08-01 00:47:21 +00:00
parent 131603c25b
commit c0ea499806
7 changed files with 244 additions and 559 deletions

View File

@ -115,384 +115,6 @@ CACHE_ALIGN u16 fadeOutColors[17][0x8000];
CACHE_ALIGN u8 gpuBlendTable555[17][17][32][32];
/*****************************************************************************/
// PIXEL RENDERING - 3D
/*****************************************************************************/
#define DECL3D \
int x = dstX; \
int passing = dstX<<1; \
u16 color = _3dColorLine[srcX]; \
u8 alpha = _3dAlphaLine[srcX]; \
u8* dst = currDst;
FORCEINLINE void GPU::setFinal3DColorSpecialNone(int dstX, int srcX)
{
DECL3D;
// We must blend if the 3D layer has the highest prio
if((alpha < 16)) //zero 30-may-09 - i think 3d always blends && bg0HasHighestPrio)
{
int bg_under = bgPixels[dstX];
u16 final = color;
// If the layer we are drawing on is selected as 2nd source, we can blend
if(BLDCNT & (0x100 << bg_under))
{
{
COLOR c1, c2, cfinal;
c1.val = color;
c2.val = T2ReadWord(dst, passing);
cfinal.bits.red = ((c1.bits.red * alpha) + (c2.bits.red * (16 - alpha)))/16;
cfinal.bits.green = ((c1.bits.green * alpha) + (c2.bits.green * (16 - alpha)))/16;
cfinal.bits.blue = ((c1.bits.blue * alpha) + (c2.bits.blue * (16 - alpha)))/16;
final = cfinal.val;
}
}
T2WriteWord(dst, passing, (final | 0x8000));
bgPixels[x] = 0;
}
else
{
T2WriteWord(dst, passing, (color | 0x8000));
bgPixels[x] = 0;
}
}
FORCEINLINE void GPU::setFinal3DColorSpecialBlend(int dstX, int srcX)
{
DECL3D;
// We can blend if the 3D layer is selected as 1st target,
//but also if the 3D layer has the highest prio.
if((alpha < 16)) //zero 30-may-09 - i think 3d always blends && ((BLDCNT & 0x1) || bg0HasHighestPrio))
{
int bg_under = bgPixels[x];
u16 final = color;
//If the layer we are drawing on is selected as 2nd source, we can blend
if(BLDCNT & (0x100 << bg_under))
{
{
COLOR c1, c2, cfinal;
c1.val = color;
c2.val = T2ReadWord(dst, passing);
cfinal.bits.red = ((c1.bits.red * alpha ) + (c2.bits.red * (16 - alpha) )) / 16;
cfinal.bits.green = ((c1.bits.green * alpha ) + (c2.bits.green * (16 - alpha) )) / 16;
cfinal.bits.blue = ((c1.bits.blue * alpha ) + (c2.bits.blue * (16 - alpha) )) / 16;
final = cfinal.val;
}
}
T2WriteWord(dst, passing, (final | 0x8000));
bgPixels[x] = 0;
}
else
{
T2WriteWord(dst, passing, (color | 0x8000));
bgPixels[x] = 0;
}
}
FORCEINLINE void GPU::setFinal3DColorSpecialIncrease(int dstX, int srcX)
{
DECL3D;
u16 final = color;
// We must blend if the 3D layer has the highest prio
// But it doesn't seem to have priority over fading,
// unlike semi-transparent sprites
if((alpha < 16)) //zero 30-may-09 - i think 3d always blends && bg0HasHighestPrio)
{
int bg_under = bgPixels[x];
/* If the layer we are drawing on is selected as 2nd source, we can blend */
if(BLDCNT & (0x100 << bg_under))
{
{
COLOR c1, c2, cfinal;
c1.val = color;
c2.val = T2ReadWord(dst, passing);
cfinal.bits.red = ((c1.bits.red * alpha ) + (c2.bits.red * (16 - alpha) ))/16;
cfinal.bits.green = ((c1.bits.green * alpha ) + (c2.bits.green * (16 - alpha) ))/16;
cfinal.bits.blue = ((c1.bits.blue * alpha ) + (c2.bits.blue * (16 - alpha) ))/16;
final = cfinal.val;
}
}
}
if(BLDCNT & 0x1)
{
if (BLDY_EVY != 0x0)
{
final = fadeInColors[BLDY_EVY][final&0x7FFF];
}
T2WriteWord(dst, passing, (final | 0x8000));
bgPixels[x] = 0;
}
else
{
T2WriteWord(dst, passing, (final | 0x8000));
bgPixels[x] = 0;
}
}
FORCEINLINE void GPU::setFinal3DColorSpecialDecrease(int dstX, int srcX)
{
DECL3D;
u16 final = color;
// We must blend if the 3D layer has the highest prio
// But it doesn't seem to have priority over fading
// unlike semi-transparent sprites
if((alpha < 16)) //zero 30-may-09 - i think 3d always blends && bg0HasHighestPrio)
{
int bg_under = bgPixels[x];
// If the layer we are drawing on is selected as 2nd source, we can blend
if(BLDCNT & (0x100 << bg_under))
{
{
COLOR c1, c2, cfinal;
c1.val = color;
c2.val = T2ReadWord(dst, passing);
cfinal.bits.red = ((c1.bits.red * alpha ) + (c2.bits.red * (16 - alpha) ))/16;
cfinal.bits.green = ((c1.bits.green * alpha ) + (c2.bits.green * (16 - alpha) ))/16;
cfinal.bits.blue = ((c1.bits.blue * alpha ) + (c2.bits.blue * (16 - alpha) ))/16;
final = cfinal.val;
}
}
}
if(BLDCNT & 0x1)
{
if (BLDY_EVY != 0x0)
{
final = fadeOutColors[BLDY_EVY][final&0x7FFF];
}
T2WriteWord(dst, passing, (final | 0x8000));
bgPixels[x] = 0;
}
else
{
T2WriteWord(dst, passing, (final | 0x8000));
bgPixels[x] = 0;
}
}
FORCEINLINE void GPU::setFinal3DColorSpecialNoneWnd(int dstX, int srcX)
{
DECL3D;
bool windowDraw = true, windowEffect = true;
renderline_checkWindows(x, windowDraw, windowEffect);
if(windowDraw)
{
// We must blend if the 3D layer has the highest prio
if((alpha < 16)) //zero 30-may-09 - i think 3d always blends && bg0HasHighestPrio)
{
int bg_under = bgPixels[x];
u16 final = color;
// If the layer we are drawing on is selected as 2nd source, we can blend
if(BLDCNT & (0x100 << bg_under))
{
{
COLOR c1, c2, cfinal;
c1.val = color;
c2.val = T2ReadWord(dst, passing);
cfinal.bits.red = ((c1.bits.red * alpha ) + (c2.bits.red * (16 - alpha) ))/16;
cfinal.bits.green = ((c1.bits.green * alpha ) + (c2.bits.green * (16 - alpha) ))/16;
cfinal.bits.blue = ((c1.bits.blue * alpha ) + (c2.bits.blue * (16 - alpha) ))/16;
final = cfinal.val;
}
}
T2WriteWord(dst, passing, (final | 0x8000));
bgPixels[x] = 0;
}
else
{
T2WriteWord(dst, passing, (color | 0x8000));
bgPixels[x] = 0;
}
}
}
FORCEINLINE void GPU::setFinal3DColorSpecialBlendWnd(int dstX, int srcX)
{
DECL3D;
bool windowDraw = true, windowEffect = true;
renderline_checkWindows(x, windowDraw, windowEffect);
if(windowDraw)
{
// We can blend if the 3D layer is selected as 1st target,
// but also if the 3D layer has the highest prio.
if((alpha < 16)) //zero 30-may-09 - i think 3d always blends && (((BLDCNT & 0x1) && windowEffect) || bg0HasHighestPrio))
{
int bg_under = bgPixels[x];
u16 final = color;
// If the layer we are drawing on is selected as 2nd source, we can blend
if(BLDCNT & (0x100 << bg_under))
{
{
COLOR c1, c2, cfinal;
c1.val = color;
c2.val = T2ReadWord(dst, passing);
cfinal.bits.red = ((c1.bits.red * alpha ) + (c2.bits.red * (16 - alpha) ))/16;
cfinal.bits.green = ((c1.bits.green * alpha ) + (c2.bits.green * (16 - alpha) ))/16;
cfinal.bits.blue = ((c1.bits.blue * alpha ) + (c2.bits.blue * (16 - alpha) ))/16;
final = cfinal.val;
}
}
T2WriteWord(dst, passing, (final | 0x8000));
bgPixels[x] = 0;
}
else
{
T2WriteWord(dst, passing, (color | 0x8000));
bgPixels[x] = 0;
}
}
}
FORCEINLINE void GPU::setFinal3DColorSpecialIncreaseWnd(int dstX, int srcX)
{
DECL3D;
bool windowDraw = true, windowEffect = true;
u16 final = color;
renderline_checkWindows(x, windowDraw, windowEffect);
if(windowDraw)
{
// We must blend if the 3D layer has the highest prio
// But it doesn't seem to have priority over fading,
// unlike semi-transparent sprites
if((alpha < 16)) //zero 30-may-09 - i think 3d always blends && bg0HasHighestPrio)
{
int bg_under = bgPixels[x];
// If the layer we are drawing on is selected as 2nd source, we can blend
if(BLDCNT & (0x100 << bg_under))
{
{
COLOR c1, c2, cfinal;
c1.val = color;
c2.val = T2ReadWord(dst, passing);
cfinal.bits.red = ((c1.bits.red * alpha ) + (c2.bits.red * (16 - alpha) ))/16;
cfinal.bits.green = ((c1.bits.green * alpha ) + (c2.bits.green * (16 - alpha) ))/16;
cfinal.bits.blue = ((c1.bits.blue * alpha ) + (c2.bits.blue * (16 - alpha) ))/16;
final = cfinal.val;
}
}
}
if((BLDCNT & 0x1) && windowEffect)
{
if (BLDY_EVY != 0x0)
{
final = fadeInColors[BLDY_EVY][final&0x7FFF];
}
T2WriteWord(dst, passing, (final | 0x8000));
bgPixels[x] = 0;
}
else
{
T2WriteWord(dst, passing, (final | 0x8000));
bgPixels[x] = 0;
}
}
}
FORCEINLINE void GPU::setFinal3DColorSpecialDecreaseWnd(int dstX, int srcX)
{
DECL3D;
bool windowDraw = true, windowEffect = true;
u16 final = color;
renderline_checkWindows(x, windowDraw, windowEffect);
if(windowDraw)
{
// We must blend if the 3D layer has the highest prio
// But it doesn't seem to have priority over fading,
// unlike semi-transparent sprites
if((alpha < 16)) ////zero 30-may-09 - i think 3d always blends && bg0HasHighestPrio)
{
int bg_under = bgPixels[x];
// If the layer we are drawing on is selected as 2nd source, we can blend
if(BLDCNT & (0x100 << bg_under))
{
{
COLOR c1, c2, cfinal;
c1.val = color;
c2.val = T2ReadWord(dst, passing);
cfinal.bits.red = ((c1.bits.red * alpha ) + (c2.bits.red * (16 - alpha) ))/16;
cfinal.bits.green = ((c1.bits.green * alpha ) + (c2.bits.green * (16 - alpha) ))/16;
cfinal.bits.blue = ((c1.bits.blue * alpha ) + (c2.bits.blue * (16 - alpha) ))/16;
final = cfinal.val;
}
}
}
if((BLDCNT & 0x1) && windowEffect)
{
if (BLDY_EVY != 0x0)
{
final = fadeOutColors[BLDY_EVY][final&0x7FFF];
}
T2WriteWord(dst, passing, (final | 0x8000));
bgPixels[x] = 0;
}
else
{
T2WriteWord(dst, passing, (final | 0x8000));
bgPixels[x] = 0;
}
}
}
/*****************************************************************************/
// INITIALIZATION
/*****************************************************************************/
@ -938,9 +560,69 @@ FORCEINLINE void GPU::renderline_checkWindows(u16 x, bool &draw, bool &effect) c
}
/*****************************************************************************/
// PIXEL RENDERING - BGS
// PIXEL RENDERING
/*****************************************************************************/
template<BlendFunc FUNC, bool WINDOW>
FORCEINLINE FASTCALL void GPU::_master_setFinal3dColor(int dstX, int srcX)
{
int x = dstX;
int passing = dstX<<1;
u8* color = &_3dColorLine[srcX<<2];
u8 red = color[0];
u8 green = color[1];
u8 blue = color[2];
u8 alpha = color[3];
u8* dst = currDst;
u16 final;
bool windowEffect = true;
if(WINDOW)
{
bool windowDraw;
renderline_checkWindows(dstX, windowDraw, windowEffect);
//we never have anything more to do if the window rejected us
if(!windowDraw) return;
}
int bg_under = bgPixels[dstX];
if(blend2[bg_under])
{
if(alpha<32)
{
//if the layer underneath is a blend bottom layer, then 3d always alpha blends with it
COLOR c2, cfinal;
c2.val = T2ReadWord(dst, passing);
cfinal.bits.red = ((red * alpha) + ((c2.bits.red<<1) * (32 - alpha)))>>6;
cfinal.bits.green = ((green * alpha) + ((c2.bits.green<<1) * (32 - alpha)))>>6;
cfinal.bits.blue = ((blue * alpha) + ((c2.bits.blue<<1) * (32 - alpha)))>>6;
final = cfinal.val;
}
else final = R6G6B6TORGB15(red,green,blue);
}
else
{
final = R6G6B6TORGB15(red,green,blue);
//perform the special effect
if(windowEffect)
switch(FUNC) {
case Increase: final = currentFadeInColors[final&0x7FFF]; break;
case Decrease: final = currentFadeOutColors[final&0x7FFF]; break;
case None:
case Blend:
break;
}
}
T2WriteWord(dst, passing, (final | 0x8000));
bgPixels[x] = 0;
}
template<bool BACKDROP, BlendFunc FUNC, bool WINDOW>
FORCEINLINE FASTCALL bool GPU::_master_setFinalBGColor(u16 &color, const u32 x)
@ -1066,14 +748,14 @@ FORCEINLINE void GPU::setFinalColor3d(int dstX, int srcX)
{
switch(setFinalColor3d_funcNum)
{
case 0x0: setFinal3DColorSpecialNone(dstX,srcX); break;
case 0x1: setFinal3DColorSpecialBlend(dstX,srcX); break;
case 0x2: setFinal3DColorSpecialIncrease(dstX,srcX); break;
case 0x3: setFinal3DColorSpecialDecrease(dstX,srcX); break;
case 0x4: setFinal3DColorSpecialNoneWnd(dstX,srcX); break;
case 0x5: setFinal3DColorSpecialBlendWnd(dstX,srcX); break;
case 0x6: setFinal3DColorSpecialIncreaseWnd(dstX,srcX); break;
case 0x7: setFinal3DColorSpecialDecreaseWnd(dstX,srcX); break;
case 0x0: _master_setFinal3dColor<None,false>(dstX,srcX); break;
case 0x1: _master_setFinal3dColor<Blend,false>(dstX,srcX); break;
case 0x2: _master_setFinal3dColor<Increase,false>(dstX,srcX); break;
case 0x3: _master_setFinal3dColor<Decrease,false>(dstX,srcX); break;
case 0x4: _master_setFinal3dColor<None,true>(dstX,srcX); break;
case 0x5: _master_setFinal3dColor<Blend,true>(dstX,srcX); break;
case 0x6: _master_setFinal3dColor<Increase,true>(dstX,srcX); break;
case 0x7: _master_setFinal3dColor<Decrease,true>(dstX,srcX); break;
};
}
@ -2403,8 +2085,8 @@ static void GPU_ligne_layer(NDS_Screen * screen, u16 l)
BGxOFS *bgofs = &gpu->dispx_st->dispx_BGxOFS[i16];
u16 hofs = (T1ReadWord((u8*)&bgofs->BGxHOFS, 0) & 0x1FF);
gfx3d_GetLineData(l, &gpu->_3dColorLine, &gpu->_3dAlphaLine);
u16* colorLine = gpu->_3dColorLine;
gfx3d_GetLineData(l, &gpu->_3dColorLine);
u8* colorLine = gpu->_3dColorLine;
for(int k = 0; k < 256; k++)
{
@ -2413,7 +2095,7 @@ static void GPU_ligne_layer(NDS_Screen * screen, u16 l)
if((q < 0) || (q > 255))
continue;
if(colorLine[q] & 0x8000)
if(colorLine[(q<<2)+3])
gpu->setFinalColor3d(k, q);
}
@ -2525,7 +2207,7 @@ template<bool SKIP> static void GPU_ligne_DispCapture(u16 l)
{
//INFO("Capture 3D\n");
u16* colorLine;
gfx3d_GetLineData(l, &colorLine, NULL);
gfx3d_GetLineData15bpp(l, &colorLine);
CAPCOPY(((u8*)colorLine),cap_dst);
}
break;
@ -2563,7 +2245,7 @@ template<bool SKIP> static void GPU_ligne_DispCapture(u16 l)
}
else
{
gfx3d_GetLineData(l, &srcA, NULL);
gfx3d_GetLineData15bpp(l, &srcA);
}
static u16 fifoLine[256];

View File

@ -746,8 +746,7 @@ struct GPU
bool blend1;
u8* currDst;
u16* _3dColorLine;
u8* _3dAlphaLine;
u8* _3dColorLine;
static struct MosaicLookup {
@ -777,6 +776,9 @@ struct GPU
template<bool BACKDROP, BlendFunc FUNC, bool WINDOW>
FORCEINLINE FASTCALL bool _master_setFinalBGColor(u16 &color, const u32 x);
template<BlendFunc FUNC, bool WINDOW>
FORCEINLINE FASTCALL void _master_setFinal3dColor(int dstX, int srcX);
int setFinalColorBck_funcNum;
int bgFunc;
int setFinalColor3d_funcNum;
@ -798,30 +800,12 @@ struct GPU
}
template<bool BACKDROP, int FUNCNUM> void setFinalColorBG(u16 color, const u32 x);
void setFinalColor3d(int dstX, int srcX);
template<bool BACKDROP> FORCEINLINE void setFinalBGColorSpecialNone(u16 &color, const u32 x);
template<bool BACKDROP> FORCEINLINE void setFinalBGColorSpecialBlend(u16 &color, const u32 x);
template<bool BACKDROP> FORCEINLINE void setFinalBGColorSpecialIncrease(u16 &color, const u32 x);
template<bool BACKDROP> FORCEINLINE void setFinalBGColorSpecialDecrease(u16 &color, const u32 x);
template<bool BACKDROP> FORCEINLINE bool setFinalBGColorSpecialNoneWnd(u16 &color, const u32 x);
template<bool BACKDROP> FORCEINLINE bool setFinalBGColorSpecialBlendWnd(u16 &color, const u32 x);
template<bool BACKDROP> FORCEINLINE bool setFinalBGColorSpecialIncreaseWnd(u16 &color, const u32 x);
template<bool BACKDROP> FORCEINLINE bool setFinalBGColorSpecialDecreaseWnd(u16 &color, const u32 x);
FORCEINLINE void setFinal3DColorSpecialNone(int dstX, int srcX);
FORCEINLINE void setFinal3DColorSpecialBlend(int dstX, int srcX);
FORCEINLINE void setFinal3DColorSpecialIncrease(int dstX, int srcX);
FORCEINLINE void setFinal3DColorSpecialDecrease(int dstX, int srcX);
FORCEINLINE void setFinal3DColorSpecialNoneWnd(int dstX, int srcX);
FORCEINLINE void setFinal3DColorSpecialBlendWnd(int dstX, int srcX);
FORCEINLINE void setFinal3DColorSpecialIncreaseWnd(int dstX, int srcX);
FORCEINLINE void setFinal3DColorSpecialDecreaseWnd(int dstX, int srcX);
template<bool BACKDROP, int FUNCNUM> void setFinalColorBG(u16 color, const u32 x);
template<bool MOSAIC, bool BACKDROP> FORCEINLINE void __setFinalColorBck(u16 color, const u32 x, const int opaque);
template<bool MOSAIC, bool BACKDROP, int FUNCNUM> FORCEINLINE void ___setFinalColorBck(u16 color, const u32 x, const int opaque);
void setAffineStart(int layer, int xy, u32 val);
void setAffineStartWord(int layer, int xy, u16 val, int word);
u32 getAffineStart(int layer, int xy);

View File

@ -693,34 +693,58 @@ static void GL_ReadFramebuffer()
//is it safe to modify the screen buffer? if not, we could make a temp copy
for(int i=0,y=191;y>=0;y--)
{
u16* dst = gfx3d_convertedScreen + (y<<8);
u8* dstAlpha = gfx3d_convertedAlpha + (y<<8);
//I dont know much about this kind of stuff, but this seems to help
//for some reason I couldnt make the intrinsics work
//u8* u8screen3D = (u8*)&((u32*)GPU_screen3D)[i];
/*#define PREFETCH32(X,Y) __asm { prefetchnta [u8screen3D+32*0x##X##Y] }
#define PREFETCH128(X) PREFETCH32(X,0) PREFETCH32(X,1) PREFETCH32(X,2) PREFETCH32(X,3) \
PREFETCH32(X,4) PREFETCH32(X,5) PREFETCH32(X,6) PREFETCH32(X,7) \
PREFETCH32(X,8) PREFETCH32(X,9) PREFETCH32(X,A) PREFETCH32(X,B) \
PREFETCH32(X,C) PREFETCH32(X,D) PREFETCH32(X,E) PREFETCH32(X,F)
PREFETCH128(0); PREFETCH128(1);*/
u8* dst = gfx3d_convertedScreen + (y<<(8+2));
for(int x=0;x<256;x++,i++)
{
u32 &u32screen3D = ((u32*)GPU_screen3D)[i];
u32screen3D>>=3;
u32screen3D &= 0x1F1F1F1F;
u32screen3D>>=2;
u32screen3D &= 0x3F3F3F3F;
const int t = i<<2;
const u8 a = GPU_screen3D[t+3];
const u8 r = GPU_screen3D[t+2];
const u8 g = GPU_screen3D[t+1];
const u8 b = GPU_screen3D[t+0];
dst[x] = R5G5B5TORGB15(r,g,b) | alpha_lookup[a];
dstAlpha[x] = alpha_5bit_to_4bit[a];
*dst++ = r;
*dst++ = g;
*dst++ = b;
*dst++ = a;
}
}
////convert the pixels to a different format which is more convenient
////is it safe to modify the screen buffer? if not, we could make a temp copy
//for(int i=0,y=191;y>=0;y--)
//{
// u16* dst = gfx3d_convertedScreen + (y<<8);
// u8* dstAlpha = gfx3d_convertedAlpha + (y<<8);
// //I dont know much about this kind of stuff, but this seems to help
// //for some reason I couldnt make the intrinsics work
// //u8* u8screen3D = (u8*)&((u32*)GPU_screen3D)[i];
// /*#define PREFETCH32(X,Y) __asm { prefetchnta [u8screen3D+32*0x##X##Y] }
// #define PREFETCH128(X) PREFETCH32(X,0) PREFETCH32(X,1) PREFETCH32(X,2) PREFETCH32(X,3) \
// PREFETCH32(X,4) PREFETCH32(X,5) PREFETCH32(X,6) PREFETCH32(X,7) \
// PREFETCH32(X,8) PREFETCH32(X,9) PREFETCH32(X,A) PREFETCH32(X,B) \
// PREFETCH32(X,C) PREFETCH32(X,D) PREFETCH32(X,E) PREFETCH32(X,F)
// PREFETCH128(0); PREFETCH128(1);*/
// for(int x=0;x<256;x++,i++)
// {
// u32 &u32screen3D = ((u32*)GPU_screen3D)[i];
// u32screen3D>>=3;
// u32screen3D &= 0x1F1F1F1F;
// const int t = i<<2;
// const u8 a = GPU_screen3D[t+3];
// const u8 r = GPU_screen3D[t+2];
// const u8 g = GPU_screen3D[t+1];
// const u8 b = GPU_screen3D[t+0];
// dst[x] = R5G5B5TORGB15(r,g,b) | alpha_lookup[a];
// dstAlpha[x] = a;
// }
//}
}
@ -831,9 +855,9 @@ static void OGLRender()
u8 alpha = material_5bit_to_8bit[poly->getAlpha()];
if(wireframe) alpha = 255;
u8 color0[4] = {
material_5bit_to_8bit[vert0->color[0]],
material_5bit_to_8bit[vert0->color[1]],
material_5bit_to_8bit[vert0->color[2]],
vert0->color[0]<<2,
vert0->color[1]<<2,
vert0->color[2]<<2,
alpha
};
@ -846,15 +870,15 @@ static void OGLRender()
VERT *vert2 = &gfx3d.vertlist->list[poly->vertIndexes[j+1]];
u8 color1[4] = {
material_5bit_to_8bit[vert1->color[0]],
material_5bit_to_8bit[vert1->color[1]],
material_5bit_to_8bit[vert1->color[2]],
vert1->color[0]<<2,
vert1->color[1]<<2,
vert1->color[2]<<2,
alpha
};
u8 color2[4] = {
material_5bit_to_8bit[vert2->color[0]],
material_5bit_to_8bit[vert2->color[1]],
material_5bit_to_8bit[vert2->color[2]],
vert2->color[0]<<2,
vert2->color[1]<<2,
vert2->color[2]<<2,
alpha
};

View File

@ -114,23 +114,9 @@ CACHE_ALIGN const u8 material_3bit_to_5bit[] = {
0, 4, 8, 13, 17, 22, 26, 31
};
CACHE_ALIGN const u8 alpha_5bit_to_4bit[] = {
0x00, 0x00,
0x01, 0x01,
0x02, 0x02,
0x03, 0x03,
0x04, 0x04,
0x05, 0x05,
0x06, 0x06,
0x07, 0x07,
0x08, 0x08,
0x09, 0x09,
0x0A, 0x0A,
0x0B, 0x0B,
0x0C, 0x0C,
0x0D, 0x0D,
0x0E, 0x0E,
0x10, 0x10
//TODO - generate this in the static init method more accurately
CACHE_ALIGN const u8 material_3bit_to_6bit[] = {
0, 8, 16, 26, 34, 44, 52, 63
};
CACHE_ALIGN const u16 alpha_lookup[] = {
@ -149,10 +135,7 @@ static float normalTable[1024];
#define fix2float(v) (((float)((s32)(v))) / (float)(1<<12))
#define fix10_2float(v) (((float)((s32)(v))) / (float)(1<<9))
CACHE_ALIGN u16 gfx3d_convertedScreen[256*192];
//this extra *2 is a HACK to salvage some savestates. remove me when the savestate format changes.
CACHE_ALIGN u8 gfx3d_convertedAlpha[256*192*2];
CACHE_ALIGN u8 gfx3d_convertedScreen[256*192*4];
// Matrix stack handling
static CACHE_ALIGN MatrixStack mtxStack[4] = {
@ -196,6 +179,7 @@ static u32 clInd = 0;
static u32 clInd2 = 0;
static bool isSwapBuffers = false;
bool isVBlank = false;
bool bWaitForPolys = false;
#endif
static u32 BTind = 0;
@ -207,7 +191,7 @@ static CACHE_ALIGN float PTcoords[4] = {0.0, 0.0, 0.0, 1.0};
static u32 polyAttr=0,textureFormat=0, texturePalette=0, polyAttrPending=0;
//the current vertex color, 5bit values
static int colorRGB[4] = { 31,31,31,31 };
static u8 colorRGB[4] = { 31,31,31,31 };
u32 control = 0;
@ -342,8 +326,6 @@ void gfx3d_reset()
memset(vertlists, 0, sizeof(vertlists));
listTwiddle = 1;
twiddleLists();
gfx3d.polylist = polylist;
gfx3d.vertlist = vertlist;
MatrixInit (mtxCurrent[0]);
MatrixInit (mtxCurrent[1]);
@ -375,7 +357,6 @@ void gfx3d_reset()
viewport = 0xBFFF0000;
memset(gfx3d_convertedScreen,0,sizeof(gfx3d_convertedScreen));
memset(gfx3d_convertedAlpha,0,sizeof(gfx3d_convertedAlpha));
gfx3d.clearDepth = gfx3d_extendDepth_15_to_24(0x7FFF);
@ -383,6 +364,7 @@ void gfx3d_reset()
clInd2 = 0;
isSwapBuffers = false;
isVBlank = false;
bWaitForPolys = false;
#endif
GFX_PIPEclear();
@ -445,9 +427,9 @@ static void SetVertex()
vert.coord[1] = coordTransformed[1];
vert.coord[2] = coordTransformed[2];
vert.coord[3] = coordTransformed[3];
vert.color[0] = colorRGB[0];
vert.color[1] = colorRGB[1];
vert.color[2] = colorRGB[2];
vert.color[0] = GFX3D_5TO6(colorRGB[0]);
vert.color[1] = GFX3D_5TO6(colorRGB[1]);
vert.color[2] = GFX3D_5TO6(colorRGB[2]);
tempVertInfo.map[tempVertInfo.count] = vertlist->count + tempVertInfo.count - continuation;
tempVertInfo.count++;
@ -1526,6 +1508,17 @@ void gfx3d_glFlush(u32 v)
#ifdef USE_GEOMETRY_FIFO_EMULATION
gfx3d.sortmode = BIT0(v);
gfx3d.wbuffer = BIT1(v);
#if 0
if (polygonListCompleted == 2)
{
//u32 gxstat = T1ReadLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x600);
//gxstat |= 0x08000000; // set busy flag
//T1WriteLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x600, gxstat);
bWaitForPolys = true;
return;
}
#endif
isSwapBuffers = true;
#else
if(!flushPending)
@ -1663,10 +1656,9 @@ void gfx3d_VBlankSignal()
isVBlank = true;
if (isSwapBuffers)
{
//if (bWaitForPolys) return;
gfx3d_doFlush();
isSwapBuffers = false;
GFX_DELAY(392);
NDS_RescheduleGXFIFO();
}
#else
//the 3d buffers are swapped when a vblank begins.
@ -1691,16 +1683,23 @@ void gfx3d_VBlankEndSignal(bool skipFrame)
if (!drawPending) return;
drawPending = FALSE;
if(skipFrame) return;
if(skipFrame)
{
GFX_DELAY(392);
NDS_RescheduleGXFIFO();
return;
}
//if the null 3d core is chosen, then we need to clear out the 3d buffers to keep old data from being rendered
if(gpu3D == &gpu3DNull || !CommonSettings.showGpu.main)
{
memset(gfx3d_convertedScreen,0,sizeof(gfx3d_convertedScreen));
memset(gfx3d_convertedScreen,0,sizeof(gfx3d_convertedAlpha));
return;
}
gpu3D->NDS_3D_Render();
GFX_DELAY(392);
NDS_RescheduleGXFIFO();
#else
//if we are skipping 3d frames then the 3d rendering will get held up here.
//but, as soon as we quit skipping frames, the held-up 3d frame will render
@ -1716,7 +1715,6 @@ void gfx3d_VBlankEndSignal(bool skipFrame)
if(gpu3D == &gpu3DNull || !CommonSettings.showGpu.main)
{
memset(gfx3d_convertedScreen,0,sizeof(gfx3d_convertedScreen));
memset(gfx3d_convertedScreen,0,sizeof(gfx3d_convertedAlpha));
}
#endif
}
@ -1761,13 +1759,6 @@ void gfx3d_sendCommandToFIFO(u32 val)
#ifdef _3D_LOG
INFO("gxFIFO: send 0x%02X: val=0x%08X, pipe %02i, fifo %03i\n", clCmd & 0xFF, val, gxPIPE.tail, gxFIFO.tail);
#endif
if (gxFIFO.size > 255)
{
gfx3d_execute3D();
gfx3d_execute3D();
gfx3d_execute3D();
gfx3d_execute3D();
}
switch (clCmd & 0xFF)
{
case 0x34: // SHININESS - Specular Reflection Shininess Table (W)
@ -1872,13 +1863,6 @@ void gfx3d_sendCommand(u32 cmd, u32 param)
#ifdef _3D_LOG
INFO("gxFIFO: send 0x%02X: val=0x%08X, pipe %02i, fifo %03i (direct)\n", cmd, param, gxPIPE.tail, gxFIFO.tail);
#endif
if (gxFIFO.size > 255)
{
gfx3d_execute3D();
gfx3d_execute3D();
gfx3d_execute3D();
gfx3d_execute3D();
}
switch (cmd)
{
@ -2334,12 +2318,26 @@ void gfx3d_glGetLightColor(unsigned int index, unsigned int* dest)
*dest = lightColor[index];
}
void gfx3d_GetLineData(int line, u16** dst, u8** dstAlpha)
void gfx3d_GetLineData(int line, u8** dst)
{
*dst = gfx3d_convertedScreen+((line)<<8);
if(dstAlpha != NULL)
*dst = gfx3d_convertedScreen+((line)<<(8+2));
}
void gfx3d_GetLineData15bpp(int line, u16** dst)
{
//TODO - this is not very thread safe!!!
u16 buf[256];
*dst = buf;
u8* lineData;
gfx3d_GetLineData(line, &lineData);
for(int i=0;i<256;i++)
{
*dstAlpha = gfx3d_convertedAlpha+((line)<<8);
const u8 r = lineData[i*4+0];
const u8 g = lineData[i*4+1];
const u8 b = lineData[i*4+2];
const u8 a = lineData[i*4+3];
buf[i] = R5G5B5TORGB15(r,g,b) | alpha_lookup[a];
}
}
@ -2382,17 +2380,17 @@ SFORMAT SF_GFX3D[]={
{ "GLBT", 4, 1, &BTind},
{ "GLPT", 4, 1, &PTind},
{ "GLPC", 4, 4, PTcoords},
{ "GFHD", 4, 1, &gxFIFO.head},
{ "GFTA", 4, 1, &gxFIFO.tail},
{ "GFSZ", 4, 1, &gxFIFO.size},
{ "GFHE", 2, 1, &gxFIFO.head},
{ "GFTA", 2, 1, &gxFIFO.tail},
{ "GFSZ", 2, 1, &gxFIFO.size},
{ "GFCM", 1, 257, &gxFIFO.cmd[0]},
{ "GFPM", 4, 257, &gxFIFO.param[0]},
{ "GPHD", 1, 1, &gxPIPE.head},
{ "GPHE", 1, 1, &gxPIPE.head},
{ "GPTA", 1, 1, &gxPIPE.tail},
{ "GPSZ", 1, 1, &gxPIPE.size},
{ "GPCM", 1, 5, &gxPIPE.cmd[0]},
{ "GPPM", 4, 5, &gxPIPE.param[0]},
{ "GCOL", 1, 4, colorRGB},
{ "GCOL", 1, 4, &colorRGB[0]},
{ "GLCO", 4, 4, lightColor},
{ "GLDI", 4, 4, lightDirection},
{ "GMDI", 2, 1, &dsDiffuse},
@ -2427,8 +2425,7 @@ SFORMAT SF_GFX3D[]={
{ "GTVC", 4, 1, &tempVertInfo.count},
{ "GTVM", 4, 4, tempVertInfo.map},
{ "GTVF", 4, 1, &tempVertInfo.first},
{ "G3CS", 2, 256*192, gfx3d_convertedScreen},
{ "G3CA", 2, 256*192, gfx3d_convertedAlpha},
{ "G3CX", 1, 4*256*192, gfx3d_convertedScreen},
{ 0 }
};

View File

@ -38,6 +38,19 @@
//produce a 5555 32bit color from a ds RGB15 plus an 5bit alpha
#define RGB15TO5555(col,alpha5) (((alpha5)<<24) | ((((col) & 0x7C00)>>10)<<16) | ((((col) & 0x3E0)>>5)<<8) | (((col) & 0x1F)))
//produce a 6665 32bit color from a ds RGB15 plus an 5bit alpha
inline u32 RGB15TO6665(u16 col, u8 alpha5)
{
u32 ret = alpha5<<24;
u16 r = (col&0x1F)>>0;
u16 g = (col&0x3E0)>>5;
u16 b = (col&0x7C00)>>10;
if(r) ret |= ((r<<1)+1);
if(g) ret |= ((g<<1)+1)<<8;
if(b) ret |= ((b<<1)+1)<<16;
return ret;
}
//produce a 24bpp color from a ds RGB15, using a table
#define RGB15TO24_REVERSE(col) ( color_15bit_to_24bit_reverse[col&0x7FFF] )
@ -50,6 +63,11 @@
//produce a 15bpp color from individual 5bit components
#define R5G5B5TORGB15(r,g,b) ((r)|((g)<<5)|((b)<<10))
//produce a 16bpp color from individual 5bit components
#define R6G6B6TORGB15(r,g,b) ((r>>1)|((g&0x3E)<<4)|((b&0x3E)<<9))
#define GFX3D_5TO6(x) ((x)?(((x)<<1)+1):0)
inline u32 gfx3d_extendDepth_15_to_24(u32 depth)
{
//formula from http://nocash.emubase.de/gbatek.htm#ds3drearplane
@ -247,12 +265,12 @@ extern CACHE_ALIGN u8 mixTable555[32][32][32];
extern CACHE_ALIGN const int material_5bit_to_31bit[32];
extern CACHE_ALIGN const u8 material_5bit_to_8bit[32];
extern CACHE_ALIGN const u8 material_3bit_to_5bit[8];
extern CACHE_ALIGN const u8 material_3bit_to_6bit[8];
extern CACHE_ALIGN const u8 material_3bit_to_8bit[8];
extern CACHE_ALIGN const u8 alpha_5bit_to_4bit[32];
//these contain the 3d framebuffer converted into the most useful format
//they are stored here instead of in the renderers in order to consolidate the buffers
extern CACHE_ALIGN u16 gfx3d_convertedScreen[256*192];
extern CACHE_ALIGN u8 gfx3d_convertedScreen[256*192*4];
extern CACHE_ALIGN u8 gfx3d_convertedAlpha[256*192*2]; //see cpp for explanation of illogical *2
//GE commands:
@ -324,7 +342,8 @@ void gfx3d_glGetMatrix(u32 mode, int index, float* dest);
void gfx3d_glGetLightDirection(u32 index, u32* dest);
void gfx3d_glGetLightColor(u32 index, u32* dest);
void gfx3d_GetLineData(int line, u16** dst, u8** dstAlpha);
void gfx3d_GetLineData(int line, u8** dst);
void gfx3d_GetLineData15bpp(int line, u16** dst);
struct SFORMAT;
extern SFORMAT SF_GFX3D[];

View File

@ -68,8 +68,8 @@ static const int kUnsetTranslucentPolyID = 255;
static int polynum;
static u8 modulate_table[32][32];
static u8 decal_table[32][32][32];
static u8 modulate_table[64][64];
static u8 decal_table[32][64][64];
static u8 index_lookup_table[65];
static u8 index_start_table[8];
@ -223,11 +223,7 @@ struct PolyAttr
union FragmentColor {
u32 color;
struct {
#ifdef WORDS_BIGENDIAN
u8 a,b,g,r;
#else
u8 r,g,b,a;
#endif
};
};
@ -350,7 +346,8 @@ struct Shader
{
mode = (polyattr>>4)&0x3;
//if there is no texture set, then set to the mode which doesnt even use a texture
if(sampler.texFormat == 0 && mode != 3)
//(no texture makes sense for toon/highlight mode)
if(sampler.texFormat == 0 && (mode == 0 || mode == 1))
mode = 4;
}
@ -371,7 +368,7 @@ struct Shader
dst.r = modulate_table[texColor.r][materialColor.r];
dst.g = modulate_table[texColor.g][materialColor.g];
dst.b = modulate_table[texColor.b][materialColor.b];
dst.a = modulate_table[texColor.a][materialColor.a];
dst.a = modulate_table[GFX3D_5TO6(texColor.a)][GFX3D_5TO6(materialColor.a)]>>1;
//dst.color.components.a = 31;
//#ifdef _MSC_VER
//if(GetAsyncKeyState(VK_SHIFT)) {
@ -399,7 +396,7 @@ struct Shader
u = invu*w;
v = invv*w;
texColor = sampler.sample(u,v);
FragmentColor toonColor = toonTable[materialColor.r];
FragmentColor toonColor = toonTable[materialColor.r>>1];
if(sampler.texFormat == 0)
{
//if no texture is set then we dont need to modulate texture with toon
@ -414,20 +411,21 @@ struct Shader
dst.r = modulate_table[texColor.r][materialColor.r];
dst.g = modulate_table[texColor.g][materialColor.r];
dst.b = modulate_table[texColor.b][materialColor.r];
dst.a = modulate_table[texColor.a][materialColor.a];
dst.a = modulate_table[GFX3D_5TO6(texColor.a)][GFX3D_5TO6(materialColor.a)]>>1;
dst.r = min<u8>(31, (dst.r + toonColor.r));
dst.g = min<u8>(31, (dst.g + toonColor.g));
dst.b = min<u8>(31, (dst.b + toonColor.b));
dst.r = min<u8>(63, (dst.r + toonColor.r));
dst.g = min<u8>(63, (dst.g + toonColor.g));
dst.b = min<u8>(63, (dst.b + toonColor.b));
}
else
{
dst.r = modulate_table[texColor.r][toonColor.r];
dst.g = modulate_table[texColor.g][toonColor.g];
dst.b = modulate_table[texColor.b][toonColor.b];
dst.a = modulate_table[texColor.a][materialColor.a];
dst.a = modulate_table[GFX3D_5TO6(texColor.a)][GFX3D_5TO6(materialColor.a)]>>1;
}
}
}
break;
case 3: //shadows
@ -519,9 +517,9 @@ static FORCEINLINE void pixel(int adr,float r, float g, float b, float invu, flo
//this is a HACK:
//we are being very sloppy with our interpolation precision right now
//and rather than fix it, i just want to clamp it
shader.materialColor.r = max(0U,min(31U,u32floor(r)));
shader.materialColor.g = max(0U,min(31U,u32floor(g)));
shader.materialColor.b = max(0U,min(31U,u32floor(b)));
shader.materialColor.r = max(0U,min(63U,u32floor(r)));
shader.materialColor.g = max(0U,min(63U,u32floor(g)));
shader.materialColor.b = max(0U,min(63U,u32floor(b)));
shader.materialColor.a = polyAttr.alpha;
@ -927,11 +925,11 @@ static char SoftRastInit(void)
clippedPolys = new TClippedPoly[POLYLIST_SIZE*2];
for(int i=0;i<32;i++)
for(int i=0;i<64;i++)
{
for(int j=0;j<32;j++)
for(int j=0;j<64;j++)
{
modulate_table[i][j] = ((i+1) * (j+1) - 1) >> 5;
modulate_table[i][j] = ((i+1) * (j+1) - 1) >> 6;
for(int a=0;a<32;a++)
decal_table[a][i][j] = ((i*a) + (j*(31-a))) >> 5;
}
@ -1053,9 +1051,9 @@ static void SoftRastFramebufferProcess()
if(gfx3d.enableFog)
{
u32 r = gfx3d.fogColor&0x1F;
u32 g = (gfx3d.fogColor>>5)&0x1F;
u32 b = (gfx3d.fogColor>>10)&0x1F;
u32 r = ((gfx3d.fogColor)&0x1F)<<1;
u32 g = ((gfx3d.fogColor>>5)&0x1F)<<1;
u32 b = ((gfx3d.fogColor>>10)&0x1F)<<1;
u32 a = (gfx3d.fogColor>>16)&0x1F;
for(int i=0;i<256*192;i++)
{
@ -1079,29 +1077,7 @@ static void SoftRastFramebufferProcess()
static void SoftRastConvertFramebuffer()
{
FragmentColor* src = screenColor;
u16* dst = gfx3d_convertedScreen;
u8* dstAlpha = gfx3d_convertedAlpha;
//in an effort to speed this up, the misc pixel buffers and the color buffer were separated.
for(int i=0,y=0;y<192;y++)
{
// u8* wanx = (u8*)&src[i];
// #define ASS(X,Y) __asm { prefetchnta [wanx+32*0x##X##Y] }
// #define PUNK(X) ASS(X,0) ASS(X,1) ASS(X,2) ASS(X,3) ASS(X,4) ASS(X,5) ASS(X,6) ASS(X,7) ASS(X,8) ASS(X,9) ASS(X,A) ASS(X,B) ASS(X,C) ASS(X,D) ASS(X,E) ASS(X,F)
// PUNK(0); PUNK(1);
for(int x=0;x<256;x++,i++)
{
const u8 r = src[i].r;
const u8 g = src[i].g;
const u8 b = src[i].b;
const u8 a = src[i].a;
dst[i] = R5G5B5TORGB15(r,g,b) | alpha_lookup[a];
dstAlpha[i] = alpha_5bit_to_4bit[a];
}
}
memcpy(gfx3d_convertedScreen,screenColor,256*192*4);
}
@ -1272,10 +1248,10 @@ static void SoftRastRender()
Fragment clearFragment;
FragmentColor clearFragmentColor;
clearFragment.isTranslucentPoly = 0;
clearFragmentColor.r = gfx3d.clearColor&0x1F;
clearFragmentColor.g = (gfx3d.clearColor>>5)&0x1F;
clearFragmentColor.b = (gfx3d.clearColor>>10)&0x1F;
clearFragmentColor.a = (gfx3d.clearColor>>16)&0x1F;
clearFragmentColor.r = GFX3D_5TO6(gfx3d.clearColor&0x1F);
clearFragmentColor.g = GFX3D_5TO6((gfx3d.clearColor>>5)&0x1F);
clearFragmentColor.b = GFX3D_5TO6((gfx3d.clearColor>>10)&0x1F);
clearFragmentColor.a = ((gfx3d.clearColor>>16)&0x1F);
clearFragment.polyid.opaque = (gfx3d.clearColor>>24)&0x3F;
//special value for uninitialized translucent polyid. without this, fires in spiderman2 dont display
//I am not sure whether it is right, though. previously this was cleared to 0, as a guess,
@ -1333,9 +1309,9 @@ static void SoftRastRender()
//convert the toon colors
//TODO for a slight speedup this could be cached in gfx3d (oglrenderer could benefit as well)
for(int i=0;i<32;i++) {
toonTable[i].r = gfx3d.u16ToonTable[i]&0x1F;
toonTable[i].g = (gfx3d.u16ToonTable[i]>>5)&0x1F;
toonTable[i].b = (gfx3d.u16ToonTable[i]>>10)&0x1F;
toonTable[i].r = GFX3D_5TO6((gfx3d.u16ToonTable[i])&0x1F);
toonTable[i].g = GFX3D_5TO6((gfx3d.u16ToonTable[i]>>5)&0x1F);
toonTable[i].b = GFX3D_5TO6((gfx3d.u16ToonTable[i]>>10)&0x1F);
}
//setup fog variables (but only if fog is enabled)

View File

@ -180,7 +180,7 @@ static void DebugDumpTexture(int which)
static int lastTexture = -1;
#define CONVERT(color,alpha) ((TEXFORMAT == TexFormat_32bpp)?(RGB15TO32(color,alpha)):RGB15TO5555(color,alpha))
#define CONVERT(color,alpha) ((TEXFORMAT == TexFormat_32bpp)?(RGB15TO32(color,alpha)):RGB15TO6665(color,alpha))
template<TexCache_TexFormat TEXFORMAT>
void TexCache_SetTexture(u32 format, u32 texpal)
@ -360,7 +360,7 @@ REJECT:
u16 c = pal[*adr&31];
u8 alpha = *adr>>5;
if(TEXFORMAT == TexFormat_15bpp)
*dwdst++ = RGB15TO5555(c,material_3bit_to_5bit[alpha]);
*dwdst++ = RGB15TO6665(c,material_3bit_to_5bit[alpha]);
else
*dwdst++ = RGB15TO32(c,material_3bit_to_8bit[alpha]);
adr++;
@ -537,8 +537,11 @@ REJECT:
{
for(int i=0;i<4;i++)
{
tmp_col[i] >>= 3;
tmp_col[i] &= 0x1F1F1F1F;
tmp_col[i] >>= 2;
tmp_col[i] &= 0x3F3F3F3F;
u32 a = tmp_col[i]>>24;
tmp_col[i] &= 0x00FFFFFF;
tmp_col[i] |= (a>>1)<<24;
}
}
@ -573,7 +576,7 @@ REJECT:
u16 c = pal[*adr&0x07];
u8 alpha = (*adr>>3);
if(TEXFORMAT == TexFormat_15bpp)
*dwdst++ = RGB15TO5555(c,alpha);
*dwdst++ = RGB15TO6665(c,alpha);
else
*dwdst++ = RGB15TO32(c,material_5bit_to_8bit[alpha]);
adr++;