diff --git a/desmume/ChangeLog b/desmume/ChangeLog index 5c6654331..d469fe1fc 100644 --- a/desmume/ChangeLog +++ b/desmume/ChangeLog @@ -42,6 +42,7 @@ - Implement lighting model in software instead of using opengl; improves (potential?) compatibility [zeromus] - Defer rendering until after flush. This was a necessary architectural change, as it permits savestate for the display list, and allows us eventually to separate the GE emulation from the rendering [zeromus] + - Fix the 2d/3d compositing well enough for NSMB to fix bugs, but it is still bad [zeromus] 0.7.3 -> 0.8 diff --git a/desmume/src/NDSSystem.c b/desmume/src/NDSSystem.c index 1879e02b1..533913650 100644 --- a/desmume/src/NDSSystem.c +++ b/desmume/src/NDSSystem.c @@ -642,6 +642,41 @@ int NDS_WriteBMP(const char *filename) return 1; } +int NDS_WriteBMP_32bppBuffer(int width, int height, const void* buf, const char *filename) +{ + bmpfileheader_struct fileheader; + bmpimgheader_struct imageheader; + FILE *file; + int i,j,k; + u16 * bmp = (u16 *)GPU_screen; + + memset(&fileheader, 0, sizeof(fileheader)); + fileheader.size = sizeof(fileheader); + fileheader.id = 'B' | ('M' << 8); + fileheader.imgoffset = sizeof(fileheader)+sizeof(imageheader); + + memset(&imageheader, 0, sizeof(imageheader)); + imageheader.size = sizeof(imageheader); + imageheader.width = width; + imageheader.height = height; + imageheader.planes = 1; + imageheader.bpp = 32; + imageheader.cmptype = 0; // None + imageheader.imgsize = imageheader.width * imageheader.height * 4; + + if ((file = fopen(filename,"wb")) == NULL) + return 0; + + fwrite(&fileheader, 1, sizeof(fileheader), file); + fwrite(&imageheader, 1, sizeof(imageheader), file); + + fwrite(buf,1,imageheader.imgsize,file); + fclose(file); + + return 1; +} + + static void fill_user_data_area( struct NDS_fw_config_data *user_settings, u8 *data, int count) { diff --git a/desmume/src/windows/OGLRender.c b/desmume/src/windows/OGLRender.c index 1b1ea0fc7..b52737e93 100644 --- a/desmume/src/windows/OGLRender.c +++ b/desmume/src/windows/OGLRender.c @@ -19,7 +19,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -//todo - why doesnt mario run behind the floor at the beginning of nsmb? is it using a depth clear buffer? +//problem - alpha-on-alpha texture rendering might work but the dest alpha buffer isnt tracked correctly +//due to zeromus not having any idea how to set dest alpha blending in opengl. +//so, it doesnt composite to 2d correctly. +//(re: new super mario brothers renders the stormclouds at the beginning) #include #include @@ -46,9 +49,8 @@ #define fix2float(v) (((float)((s32)(v))) / (float)(1<<12)) #define fix10_2float(v) (((float)((s32)(v))) / (float)(1<<9)) -static unsigned char GPU_screen3D [256*256*3]={0}; -static float GPU_screen3Ddepth [256*256]={0}; -static unsigned char GPU_screenAlpha[256*256]={0}; +static unsigned char GPU_screen3D [256*256*4]={0}; +static unsigned char GPU_screenStencil[256*256]={0}; // Acceleration tables static float* float16table = NULL; @@ -103,6 +105,10 @@ static const u8 material_5bit_to_8bit[] = { 0xC6, 0xCE, 0xD6, 0xDE, 0xE7, 0xEF, 0xF7, 0xFF }; +static const u8 material_3bit_to_8bit[] = { + 0x00, 0x24, 0x49, 0x6D, 0x92, 0xB6, 0xDB, 0xFF +}; + #define RGB16TO32(col,alpha) (((alpha)<<24) | ((((col) & 0x7C00)>>7)<<16) | ((((col) & 0x3E0)>>2)<<8) | (((col) & 0x1F)<<3)) //make a table out of this: #define RGB15TO32(col,alpha8) ( ((alpha8)<<24) | (material_5bit_to_8bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | material_5bit_to_8bit[(col)&0x1F] ) @@ -305,8 +311,9 @@ char NDS_glInit(void) pfd.nVersion = 1; pfd.dwFlags = PFD_DRAW_TO_BITMAP | PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER; pfd.iPixelType = PFD_TYPE_RGBA; - pfd.cColorBits = 32; + pfd.cColorBits = 24; pfd.cDepthBits = 24; + pfd.cAlphaBits = 8; pfd.cStencilBits = 8; pfd.iLayerType = PFD_MAIN_PLANE ; @@ -752,6 +759,28 @@ __forceinline void* memcpy_fast(void* dest, const void* src, size_t count) return dest; } + +static void DebugDumpTexture(int which) +{ + int NDS_WriteBMP_32bppBuffer(int width, int height, const void* buf, const char *filename); + static int ctr = 0; + char fname[100]; + FILE* outf; + sprintf(fname,"c:\\dump\\%d.bmp", ctr); + ctr++; + + glBindTexture(GL_TEXTURE_2D,texcache[which].id); + glGetTexImage( GL_TEXTURE_2D , + 0, + GL_RGBA, + GL_UNSIGNED_BYTE, + texMAP); + + NDS_WriteBMP_32bppBuffer(texcache[which].sizeX,texcache[which].sizeY,texMAP,fname); + + +} + //================================================================================ __forceinline void setTexture(unsigned int format, unsigned int texpal) { @@ -798,9 +827,6 @@ __forceinline void setTexture(unsigned int format, unsigned int texpal) { texcache_count=i; glBindTexture(GL_TEXTURE_2D,texcache[i].id); - if(i==30) { - int zzz=9; - } return; } } @@ -834,6 +860,10 @@ __forceinline void setTexture(unsigned int format, unsigned int texpal) texcache[i].frm=format; + if(i==62 || textureMode==1) { + int zzz=9; + } + //printlog("Texture %03i - format=%08X; pal=%04X (mode %X, width %04i, height %04i)\n",i, texcache[i].frm, texcache[i].pal, texcache[i].mode, sizeX, sizeY); //============================================================================ Texture render @@ -847,11 +877,14 @@ __forceinline void setTexture(unsigned int format, unsigned int texpal) pal = (unsigned short *)(ARM9Mem.texPalSlot[0] + (texturePalette<<4)); for(x = 0; x < imageSize; x++, dst += 4) { - unsigned short c = pal[adr[x]&31], alpha = (adr[x]>>5); + unsigned short c = pal[adr[x]&31], alpha = adr[x]>>5; dst[0] = (unsigned char)((c & 0x1F)<<3); dst[1] = (unsigned char)((c & 0x3E0)>>2); dst[2] = (unsigned char)((c & 0x7C00)>>7); - dst[3] = ((alpha<<2)+(alpha>>1))<<3; + dst[0] = material_3bit_to_8bit[alpha]; + dst[1] = material_3bit_to_8bit[alpha]; + dst[2] = material_3bit_to_8bit[alpha]; + dst[3] = material_3bit_to_8bit[alpha]; CHECKSLOT; } break; @@ -1031,12 +1064,13 @@ __forceinline void setTexture(unsigned int format, unsigned int texpal) pal = (unsigned short *)(ARM9Mem.texPalSlot[0] + (texturePalette<<4)); for(x = 0; x < imageSize; x++) { - unsigned short c = pal[adr[x]&0x07]; + unsigned short c = pal[adr[x]&0x07], alpha = (adr[x]>>3); dst[0] = (unsigned char)((c & 0x1F)<<3); dst[1] = (unsigned char)((c & 0x3E0)>>2); dst[2] = (unsigned char)((c & 0x7C00)>>7); - dst[3] = (adr[x]&0xF8); + dst[3] = material_5bit_to_8bit[alpha]; dst += 4; + CHECKSLOT; } break; @@ -1067,21 +1101,13 @@ __forceinline void setTexture(unsigned int format, unsigned int texpal) } } - ////zero debug - dump tex to verify contents - //{ - // int NDS_WriteBMP_32bppBuffer(int width, int height, const void* buf, const char *filename); - // static int ctr = 0; - // char fname[100]; - // FILE* outf; - // sprintf(fname,"c:\\dump\\%d.bmp", ctr); - // ctr++; - // NDS_WriteBMP_32bppBuffer(sizeX,sizeY,texMAP,fname); - //} - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texcache[i].sizeX, texcache[i].sizeY, 0, GL_RGBA, GL_UNSIGNED_BYTE, texMAP); + + DebugDumpTexture(i); + //============================================================================================ texcache_count=i; @@ -1164,7 +1190,7 @@ static void BeginRenderPoly() //when the polyID is zero, we are writing the shadow mask. //set stencilbuf = 1 where the shadow volume is obstructed by geometry. //do not write color or depth information. - glStencilFunc(GL_ALWAYS,1,1); + glStencilFunc(GL_ALWAYS,2,255); glStencilOp(GL_KEEP,GL_REPLACE,GL_KEEP); glColorMask(GL_FALSE,GL_FALSE,GL_FALSE,GL_FALSE); enableDepthWrite = 1; @@ -1172,13 +1198,15 @@ static void BeginRenderPoly() //when the polyid is nonzero, we are drawing the shadow poly. //only draw the shadow poly where the stencilbuf==1. //I am not sure whether to update the depth buffer here--so I chose not to. - glStencilFunc(GL_EQUAL,1,1); + glStencilFunc(GL_EQUAL,2,255); glStencilOp(GL_KEEP,GL_KEEP,GL_KEEP); glColorMask(GL_TRUE,GL_TRUE,GL_TRUE,GL_TRUE); enableDepthWrite = 0; } } else { - glDisable(GL_STENCIL_TEST); + glEnable(GL_STENCIL_TEST); + glStencilFunc(GL_ALWAYS,1,255); + glStencilOp(GL_REPLACE,GL_REPLACE,GL_REPLACE); glColorMask(GL_TRUE,GL_TRUE,GL_TRUE,GL_TRUE); } @@ -1391,28 +1419,56 @@ __forceinline int NDS_glGetNumVertex (void) } //NHerve mod3 - Fixed blending with 2D backgrounds (New Super Mario Bros looks better) +//zeromus post-mod3: fix even better __forceinline void NDS_glGetLine (int line, unsigned short * dst) { int i, t; - u8 *screen3D = (u8 *)&GPU_screen3D [(192-(line%192))*768]; - float *screen3Ddepth = &GPU_screen3Ddepth [(192-(line%192))*256]; - u8 *screenAlpha = (u8*)&GPU_screenAlpha[(191-(line%192))*256]; + u8 *screen3D = (u8 *)&GPU_screen3D [(191-(line%192))*1024]; + u8 *screenStencil = (u8*)&GPU_screenStencil[(191-(line%192))*256]; - u32 r,g,b,a; + //the renderer clears the stencil to 0 + //then it sets it to 1 whenever it renders a pixel that passes the alpha test + //(it also sets it to 2 under some circumstances when rendering shadow volumes) + //so, we COULD use a zero stencil value to indicate that nothing should get composited. + //in fact, we are going to do that to fix some problems. + //but beware that it i figure it might could CAUSE some problems + + //this alpha compositing blending logic isnt thought through at all + //someone needs to think about what bitdepth it should take place at and how to do it efficiently + + u32 a,r,g,b,stencil,oldcolor,oldr,oldg,oldb; for(i = 0, t=0; i < 256; i++) { - if (screen3Ddepth[i] < 1.f) - { - t=i*3; - r = screen3D[t]; - g = screen3D[t+1]; - b = screen3D[t+2]; - a = screenAlpha[i]; + stencil = screenStencil[i]; - if(a) - dst[i] = ((r>>3)<<10) | ((g>>3)<<5) | (b>>3); + //you would use this if you wanted to use the stencil buffer to make decisions here + if(!stencil) continue; + + t=i*4; + r = screen3D[t+0]; + g = screen3D[t+1]; + b = screen3D[t+2]; + a = screen3D[t+3]; + + if(a != 0xFF && a != 0) { + int zzz=9; } + + oldcolor = RGB15TO32(dst[i],0); + oldr = oldcolor&0xFF; + oldg = (oldcolor>>8)&0xFF; + oldb = (oldcolor>>16)&0xFF; + + r = (r*a + oldr*(255-a)) / 255; + g = (g*a + oldg*(255-a)) / 255; + b = (b*a + oldb*(255-a)) / 255; + + r=min(255,r); + g=min(255,g); + b=min(255,b); + + dst[i] = ((b>>3)<<10) | ((g>>3)<<5) | (r>>3); } } @@ -1506,9 +1562,8 @@ __forceinline void NDS_glFlush(unsigned long v) //capture rendering results glFlush(); - glReadPixels(0,0,256,192,GL_DEPTH_COMPONENT, GL_FLOAT, GPU_screen3Ddepth); - glReadPixels(0,0,256,192,GL_BGR_EXT, GL_UNSIGNED_BYTE, GPU_screen3D); - glReadPixels(0,0,256,192,GL_ALPHA, GL_UNSIGNED_BYTE, GPU_screenAlpha); + glReadPixels(0,0,256,192,GL_RGBA, GL_UNSIGNED_BYTE, GPU_screen3D); + glReadPixels(0,0,256,192,GL_STENCIL_INDEX, GL_UNSIGNED_BYTE, GPU_screenStencil); //debug: view depth buffer via color buffer for debugging { @@ -1684,11 +1739,11 @@ __forceinline void NDS_glControl(unsigned long v) if(v&(1<<2)) { - glAlphaFunc (GL_GREATER, alphaTestBase); + //glAlphaFunc (GL_GREATER, alphaTestBase); } else { - glAlphaFunc (GL_GREATER, 0.1f); + //glAlphaFunc (GL_GREATER, 0.1f); } if(v&(1<<3))