Fix the 2d/3d compositing well enough for NSMB to fix bugs, but it is still bad

This commit is contained in:
zeromus 2008-09-08 03:01:34 +00:00
parent 2e7b135228
commit d3605aed43
3 changed files with 135 additions and 44 deletions

View File

@ -42,6 +42,7 @@
- Implement lighting model in software instead of using opengl; improves (potential?) compatibility [zeromus]
- Defer rendering until after flush. This was a necessary architectural change, as it permits savestate
for the display list, and allows us eventually to separate the GE emulation from the rendering [zeromus]
- Fix the 2d/3d compositing well enough for NSMB to fix bugs, but it is still bad [zeromus]
0.7.3 -> 0.8

View File

@ -642,6 +642,41 @@ int NDS_WriteBMP(const char *filename)
return 1;
}
int NDS_WriteBMP_32bppBuffer(int width, int height, const void* buf, const char *filename)
{
bmpfileheader_struct fileheader;
bmpimgheader_struct imageheader;
FILE *file;
int i,j,k;
u16 * bmp = (u16 *)GPU_screen;
memset(&fileheader, 0, sizeof(fileheader));
fileheader.size = sizeof(fileheader);
fileheader.id = 'B' | ('M' << 8);
fileheader.imgoffset = sizeof(fileheader)+sizeof(imageheader);
memset(&imageheader, 0, sizeof(imageheader));
imageheader.size = sizeof(imageheader);
imageheader.width = width;
imageheader.height = height;
imageheader.planes = 1;
imageheader.bpp = 32;
imageheader.cmptype = 0; // None
imageheader.imgsize = imageheader.width * imageheader.height * 4;
if ((file = fopen(filename,"wb")) == NULL)
return 0;
fwrite(&fileheader, 1, sizeof(fileheader), file);
fwrite(&imageheader, 1, sizeof(imageheader), file);
fwrite(buf,1,imageheader.imgsize,file);
fclose(file);
return 1;
}
static void
fill_user_data_area( struct NDS_fw_config_data *user_settings,
u8 *data, int count) {

View File

@ -19,7 +19,10 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
//todo - why doesnt mario run behind the floor at the beginning of nsmb? is it using a depth clear buffer?
//problem - alpha-on-alpha texture rendering might work but the dest alpha buffer isnt tracked correctly
//due to zeromus not having any idea how to set dest alpha blending in opengl.
//so, it doesnt composite to 2d correctly.
//(re: new super mario brothers renders the stormclouds at the beginning)
#include <math.h>
#include <stdlib.h>
@ -46,9 +49,8 @@
#define fix2float(v) (((float)((s32)(v))) / (float)(1<<12))
#define fix10_2float(v) (((float)((s32)(v))) / (float)(1<<9))
static unsigned char GPU_screen3D [256*256*3]={0};
static float GPU_screen3Ddepth [256*256]={0};
static unsigned char GPU_screenAlpha[256*256]={0};
static unsigned char GPU_screen3D [256*256*4]={0};
static unsigned char GPU_screenStencil[256*256]={0};
// Acceleration tables
static float* float16table = NULL;
@ -103,6 +105,10 @@ static const u8 material_5bit_to_8bit[] = {
0xC6, 0xCE, 0xD6, 0xDE, 0xE7, 0xEF, 0xF7, 0xFF
};
static const u8 material_3bit_to_8bit[] = {
0x00, 0x24, 0x49, 0x6D, 0x92, 0xB6, 0xDB, 0xFF
};
#define RGB16TO32(col,alpha) (((alpha)<<24) | ((((col) & 0x7C00)>>7)<<16) | ((((col) & 0x3E0)>>2)<<8) | (((col) & 0x1F)<<3))
//make a table out of this:
#define RGB15TO32(col,alpha8) ( ((alpha8)<<24) | (material_5bit_to_8bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | material_5bit_to_8bit[(col)&0x1F] )
@ -305,8 +311,9 @@ char NDS_glInit(void)
pfd.nVersion = 1;
pfd.dwFlags = PFD_DRAW_TO_BITMAP | PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER;
pfd.iPixelType = PFD_TYPE_RGBA;
pfd.cColorBits = 32;
pfd.cColorBits = 24;
pfd.cDepthBits = 24;
pfd.cAlphaBits = 8;
pfd.cStencilBits = 8;
pfd.iLayerType = PFD_MAIN_PLANE ;
@ -752,6 +759,28 @@ __forceinline void* memcpy_fast(void* dest, const void* src, size_t count)
return dest;
}
static void DebugDumpTexture(int which)
{
int NDS_WriteBMP_32bppBuffer(int width, int height, const void* buf, const char *filename);
static int ctr = 0;
char fname[100];
FILE* outf;
sprintf(fname,"c:\\dump\\%d.bmp", ctr);
ctr++;
glBindTexture(GL_TEXTURE_2D,texcache[which].id);
glGetTexImage( GL_TEXTURE_2D ,
0,
GL_RGBA,
GL_UNSIGNED_BYTE,
texMAP);
NDS_WriteBMP_32bppBuffer(texcache[which].sizeX,texcache[which].sizeY,texMAP,fname);
}
//================================================================================
__forceinline void setTexture(unsigned int format, unsigned int texpal)
{
@ -798,9 +827,6 @@ __forceinline void setTexture(unsigned int format, unsigned int texpal)
{
texcache_count=i;
glBindTexture(GL_TEXTURE_2D,texcache[i].id);
if(i==30) {
int zzz=9;
}
return;
}
}
@ -834,6 +860,10 @@ __forceinline void setTexture(unsigned int format, unsigned int texpal)
texcache[i].frm=format;
if(i==62 || textureMode==1) {
int zzz=9;
}
//printlog("Texture %03i - format=%08X; pal=%04X (mode %X, width %04i, height %04i)\n",i, texcache[i].frm, texcache[i].pal, texcache[i].mode, sizeX, sizeY);
//============================================================================ Texture render
@ -847,11 +877,14 @@ __forceinline void setTexture(unsigned int format, unsigned int texpal)
pal = (unsigned short *)(ARM9Mem.texPalSlot[0] + (texturePalette<<4));
for(x = 0; x < imageSize; x++, dst += 4)
{
unsigned short c = pal[adr[x]&31], alpha = (adr[x]>>5);
unsigned short c = pal[adr[x]&31], alpha = adr[x]>>5;
dst[0] = (unsigned char)((c & 0x1F)<<3);
dst[1] = (unsigned char)((c & 0x3E0)>>2);
dst[2] = (unsigned char)((c & 0x7C00)>>7);
dst[3] = ((alpha<<2)+(alpha>>1))<<3;
dst[0] = material_3bit_to_8bit[alpha];
dst[1] = material_3bit_to_8bit[alpha];
dst[2] = material_3bit_to_8bit[alpha];
dst[3] = material_3bit_to_8bit[alpha];
CHECKSLOT;
}
break;
@ -1031,12 +1064,13 @@ __forceinline void setTexture(unsigned int format, unsigned int texpal)
pal = (unsigned short *)(ARM9Mem.texPalSlot[0] + (texturePalette<<4));
for(x = 0; x < imageSize; x++)
{
unsigned short c = pal[adr[x]&0x07];
unsigned short c = pal[adr[x]&0x07], alpha = (adr[x]>>3);
dst[0] = (unsigned char)((c & 0x1F)<<3);
dst[1] = (unsigned char)((c & 0x3E0)>>2);
dst[2] = (unsigned char)((c & 0x7C00)>>7);
dst[3] = (adr[x]&0xF8);
dst[3] = material_5bit_to_8bit[alpha];
dst += 4;
CHECKSLOT;
}
break;
@ -1067,21 +1101,13 @@ __forceinline void setTexture(unsigned int format, unsigned int texpal)
}
}
////zero debug - dump tex to verify contents
//{
// int NDS_WriteBMP_32bppBuffer(int width, int height, const void* buf, const char *filename);
// static int ctr = 0;
// char fname[100];
// FILE* outf;
// sprintf(fname,"c:\\dump\\%d.bmp", ctr);
// ctr++;
// NDS_WriteBMP_32bppBuffer(sizeX,sizeY,texMAP,fname);
//}
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA,
texcache[i].sizeX, texcache[i].sizeY, 0,
GL_RGBA, GL_UNSIGNED_BYTE, texMAP);
DebugDumpTexture(i);
//============================================================================================
texcache_count=i;
@ -1164,7 +1190,7 @@ static void BeginRenderPoly()
//when the polyID is zero, we are writing the shadow mask.
//set stencilbuf = 1 where the shadow volume is obstructed by geometry.
//do not write color or depth information.
glStencilFunc(GL_ALWAYS,1,1);
glStencilFunc(GL_ALWAYS,2,255);
glStencilOp(GL_KEEP,GL_REPLACE,GL_KEEP);
glColorMask(GL_FALSE,GL_FALSE,GL_FALSE,GL_FALSE);
enableDepthWrite = 1;
@ -1172,13 +1198,15 @@ static void BeginRenderPoly()
//when the polyid is nonzero, we are drawing the shadow poly.
//only draw the shadow poly where the stencilbuf==1.
//I am not sure whether to update the depth buffer here--so I chose not to.
glStencilFunc(GL_EQUAL,1,1);
glStencilFunc(GL_EQUAL,2,255);
glStencilOp(GL_KEEP,GL_KEEP,GL_KEEP);
glColorMask(GL_TRUE,GL_TRUE,GL_TRUE,GL_TRUE);
enableDepthWrite = 0;
}
} else {
glDisable(GL_STENCIL_TEST);
glEnable(GL_STENCIL_TEST);
glStencilFunc(GL_ALWAYS,1,255);
glStencilOp(GL_REPLACE,GL_REPLACE,GL_REPLACE);
glColorMask(GL_TRUE,GL_TRUE,GL_TRUE,GL_TRUE);
}
@ -1391,28 +1419,56 @@ __forceinline int NDS_glGetNumVertex (void)
}
//NHerve mod3 - Fixed blending with 2D backgrounds (New Super Mario Bros looks better)
//zeromus post-mod3: fix even better
__forceinline void NDS_glGetLine (int line, unsigned short * dst)
{
int i, t;
u8 *screen3D = (u8 *)&GPU_screen3D [(192-(line%192))*768];
float *screen3Ddepth = &GPU_screen3Ddepth [(192-(line%192))*256];
u8 *screenAlpha = (u8*)&GPU_screenAlpha[(191-(line%192))*256];
u8 *screen3D = (u8 *)&GPU_screen3D [(191-(line%192))*1024];
u8 *screenStencil = (u8*)&GPU_screenStencil[(191-(line%192))*256];
u32 r,g,b,a;
//the renderer clears the stencil to 0
//then it sets it to 1 whenever it renders a pixel that passes the alpha test
//(it also sets it to 2 under some circumstances when rendering shadow volumes)
//so, we COULD use a zero stencil value to indicate that nothing should get composited.
//in fact, we are going to do that to fix some problems.
//but beware that it i figure it might could CAUSE some problems
//this alpha compositing blending logic isnt thought through at all
//someone needs to think about what bitdepth it should take place at and how to do it efficiently
u32 a,r,g,b,stencil,oldcolor,oldr,oldg,oldb;
for(i = 0, t=0; i < 256; i++)
{
if (screen3Ddepth[i] < 1.f)
{
t=i*3;
r = screen3D[t];
g = screen3D[t+1];
b = screen3D[t+2];
a = screenAlpha[i];
stencil = screenStencil[i];
if(a)
dst[i] = ((r>>3)<<10) | ((g>>3)<<5) | (b>>3);
//you would use this if you wanted to use the stencil buffer to make decisions here
if(!stencil) continue;
t=i*4;
r = screen3D[t+0];
g = screen3D[t+1];
b = screen3D[t+2];
a = screen3D[t+3];
if(a != 0xFF && a != 0) {
int zzz=9;
}
oldcolor = RGB15TO32(dst[i],0);
oldr = oldcolor&0xFF;
oldg = (oldcolor>>8)&0xFF;
oldb = (oldcolor>>16)&0xFF;
r = (r*a + oldr*(255-a)) / 255;
g = (g*a + oldg*(255-a)) / 255;
b = (b*a + oldb*(255-a)) / 255;
r=min(255,r);
g=min(255,g);
b=min(255,b);
dst[i] = ((b>>3)<<10) | ((g>>3)<<5) | (r>>3);
}
}
@ -1506,9 +1562,8 @@ __forceinline void NDS_glFlush(unsigned long v)
//capture rendering results
glFlush();
glReadPixels(0,0,256,192,GL_DEPTH_COMPONENT, GL_FLOAT, GPU_screen3Ddepth);
glReadPixels(0,0,256,192,GL_BGR_EXT, GL_UNSIGNED_BYTE, GPU_screen3D);
glReadPixels(0,0,256,192,GL_ALPHA, GL_UNSIGNED_BYTE, GPU_screenAlpha);
glReadPixels(0,0,256,192,GL_RGBA, GL_UNSIGNED_BYTE, GPU_screen3D);
glReadPixels(0,0,256,192,GL_STENCIL_INDEX, GL_UNSIGNED_BYTE, GPU_screenStencil);
//debug: view depth buffer via color buffer for debugging
{
@ -1684,11 +1739,11 @@ __forceinline void NDS_glControl(unsigned long v)
if(v&(1<<2))
{
glAlphaFunc (GL_GREATER, alphaTestBase);
//glAlphaFunc (GL_GREATER, alphaTestBase);
}
else
{
glAlphaFunc (GL_GREATER, 0.1f);
//glAlphaFunc (GL_GREATER, 0.1f);
}
if(v&(1<<3))