diff --git a/desmume/ChangeLog b/desmume/ChangeLog
index af34a69c8..c168ed37f 100644
--- a/desmume/ChangeLog
+++ b/desmume/ChangeLog
@@ -44,6 +44,12 @@
- Defer rendering until after flush. This was a necessary architectural change, as it permits savestate
for the display list, and allows us eventually to separate the GE emulation from the rendering [zeromus]
- Fix the 2d/3d compositing well enough for NSMB to fix bugs, but it is still bad [zeromus]
+ - Reorganize 3d code to defer rendering to vblank. eliminates tearing, and maybe some texturing artifacts.
+ also possibly helps performance a bit by letting the hardware pipeline work some more before blocking for
+ framebuffer read. [zeromus]
+ - Tweak optimization flags and change entire source code to use fastcall [zeromus]
+ - Add opengl state caching. This is of dubious performance assistance, but it is easy to take out so I am leaving it for now. [zeromus]
+ - Add MMU->GPU signal for when vram mappings change, which allows it to assume textures are unchanged unless vram has changed [zeromus]
0.7.3 -> 0.8
diff --git a/desmume/src/ARM9.h b/desmume/src/ARM9.h
index 4dae037d8..849f045c8 100644
--- a/desmume/src/ARM9.h
+++ b/desmume/src/ARM9.h
@@ -23,7 +23,7 @@ typedef struct {
u8 * ObjExtPal[2][2];
u8 * texPalSlot[4];
- const u8 *textureSlotAddr[4];
+ u8 *textureSlotAddr[4];
u8 *blank_memory[0x20000];
} ARM9_struct;
diff --git a/desmume/src/MMU.cpp b/desmume/src/MMU.cpp
index 70b22da8b..8ba794009 100644
--- a/desmume/src/MMU.cpp
+++ b/desmume/src/MMU.cpp
@@ -1060,6 +1060,8 @@ void FASTCALL MMU_write8(u32 proc, u32 adr, u8 val)
ARM9Mem.textureSlotAddr[slot_index] =
&ARM9Mem.ARM9_LCD[0x20000 * (adr - REG_VRAMCNTA)];
+
+ gpu3D->NDS_3D_VramReconfigureSignal();
}
}
MMU_VRAMReloadFromLCD(adr-REG_VRAMCNTA,val) ;
diff --git a/desmume/src/NDSSystem.cpp b/desmume/src/NDSSystem.cpp
index eeb889b2e..31f350d70 100644
--- a/desmume/src/NDSSystem.cpp
+++ b/desmume/src/NDSSystem.cpp
@@ -1022,6 +1022,8 @@ NDS_exec(s32 nb, BOOL force)
nds.lignerendu = FALSE;
if(nds.VCount==192)
{
+ gpu3D->NDS_3D_VBlankSignal();
+
T1WriteWord(ARM9Mem.ARM9_REG, 4, T1ReadWord(ARM9Mem.ARM9_REG, 4) | 1);
T1WriteWord(MMU.ARM7_REG, 4, T1ReadWord(MMU.ARM7_REG, 4) | 1);
NDS_ARM9VBlankInt();
diff --git a/desmume/src/render3D.h b/desmume/src/render3D.h
index fac293c22..a598605d6 100644
--- a/desmume/src/render3D.h
+++ b/desmume/src/render3D.h
@@ -21,11 +21,8 @@
#ifndef GPU_3D
#define GPU_3D
-#ifdef _MSC_VER
-#define CALL_CONVENTION __cdecl
-#else
+//not using this right now
#define CALL_CONVENTION
-#endif
/*
enum DRIVER_3D
@@ -125,7 +122,9 @@ typedef struct GPU3DInterface
long (CALL_CONVENTION* NDS_3D_GetPosRes) (unsigned int index);
long (CALL_CONVENTION* NDS_3D_GetVecRes) (unsigned int index);
- void (CALL_CONVENTION* NDS_3D_UpdateToonTable) (void* toonTable);
+ void (CALL_CONVENTION* NDS_3D_UpdateToonTable) (void* toonTable);
+ void (CALL_CONVENTION* NDS_3D_VBlankSignal) ();
+ void (CALL_CONVENTION* NDS_3D_VramReconfigureSignal) ();
} GPU3DInterface;
diff --git a/desmume/src/windows/DeSmuME_2005.vcproj b/desmume/src/windows/DeSmuME_2005.vcproj
index 8f2a2f788..f8c3679a7 100644
--- a/desmume/src/windows/DeSmuME_2005.vcproj
+++ b/desmume/src/windows/DeSmuME_2005.vcproj
@@ -111,6 +111,7 @@
IntermediateDirectory="$(SolutionDir)\.VS2005\$(ConfigurationName)\$(PlatformName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
+ WholeProgramOptimization="1"
>
+
+
+
+
+
+
+
+
count = 0;
}
+//------------------------------------------------------------
+
+
+#define OGLEXT(x,y) x y;
+#define INITOGLEXT(x,y) y = (x)wglGetProcAddress(#y);
+
+OGLEXT(PFNGLCREATESHADERPROC,glCreateShader)
+//zero: i dont understand this at all. my glext.h has the wrong thing declared here... so I have to do it myself
+typedef void (APIENTRYP X_PFNGLGETSHADERSOURCEPROC) (GLuint shader, GLsizei bufSize, GLchar **source, GLsizei *length);
+OGLEXT(X_PFNGLGETSHADERSOURCEPROC,glShaderSource)
+OGLEXT(PFNGLCOMPILESHADERPROC,glCompileShader)
+OGLEXT(PFNGLCREATEPROGRAMPROC,glCreateProgram)
+OGLEXT(PFNGLATTACHSHADERPROC,glAttachShader)
+OGLEXT(PFNGLLINKPROGRAMPROC,glLinkProgram)
+OGLEXT(PFNGLUSEPROGRAMPROC,glUseProgram)
+OGLEXT(PFNGLGETSHADERINFOLOGPROC,glGetShaderInfoLog)
+
+//opengl state caching:
+//This is of dubious performance assistance, but it is easy to take out so I am leaving it for now.
+//every function that is xgl* can be replaced with gl* if we decide to rip this out or if anyone else
+//doesnt feel like sticking with it (or if it causes trouble)
+
+void xglDepthFunc(GLenum func) {
+ static GLenum oldfunc = -1;
+ if(oldfunc == func) return;
+ glDepthFunc(oldfunc=func);
+}
+
+void xglPolygonMode(GLenum face,GLenum mode) {
+ static GLenum oldmodes[2] = {-1,-1};
+ switch(face) {
+ case GL_FRONT: if(oldmodes[0]==mode) return; else glPolygonMode(GL_FRONT,oldmodes[0]=mode); return;
+ case GL_BACK: if(oldmodes[1]==mode) return; else glPolygonMode(GL_BACK,oldmodes[1]=mode); return;
+ case GL_FRONT_AND_BACK: if(oldmodes[0]==mode && oldmodes[1]==mode) return; else glPolygonMode(GL_FRONT_AND_BACK,oldmodes[0]=oldmodes[1]=mode);
+ }
+}
+
+void xglUseProgram(GLuint program) {
+ if(!glUseProgram) return;
+ static GLuint oldprogram = -1;
+ if(oldprogram==program) return;
+ glUseProgram(oldprogram=program);
+}
+
+void xglDepthMask (GLboolean flag) {
+ static GLboolean oldflag = -1;
+ if(oldflag==flag) return;
+ glDepthMask(oldflag=flag);
+}
+
+struct GLCaps {
+ u8 caps[0x100];
+ GLCaps() {
+ memset(caps,0xFF,sizeof(caps));
+ }
+};
+static GLCaps glcaps;
+
+void _xglEnable(GLenum cap) {
+ cap -= 0x0B00;
+ if(glcaps.caps[cap] == 0xFF || glcaps.caps[cap] == 0) {
+ glEnable(cap+0x0B00);
+ glcaps.caps[cap] = 1;
+ }
+}
+
+void _xglDisable(GLenum cap) {
+ cap -= 0x0B00;
+ if(glcaps.caps[cap]) {
+ glDisable(cap+0x0B00);
+ glcaps.caps[cap] = 0;
+ }
+}
+
+#define xglEnable(cap) { \
+ CTASSERT((cap-0x0B00)<0x100); \
+ _xglEnable(cap); }
+
+#define xglDisable(cap) {\
+ CTASSERT((cap-0x0B00)<0x100); \
+ _xglDisable(cap); }
+
+
//================================================= Textures
#define MAX_TEXTURE 500
-typedef struct
+struct TextureCache
{
+ TextureCache()
+ : suspectedInvalid(true)
+ {}
+
GLenum id;
unsigned int frm;
unsigned int mode;
@@ -217,7 +309,10 @@ typedef struct
float invSizeY;
unsigned char texture[128*1024]; // 128Kb texture slot
-} TextureCache;
+ //set if this texture is suspected be invalid due to a vram reconfigure
+ bool suspectedInvalid;
+
+} ;
TextureCache texcache[MAX_TEXTURE+1];
u32 texcache_count;
@@ -283,20 +378,6 @@ static void NDS_3D_UpdateToonTable(void* toonTable) {
glTexImage1D(GL_TEXTURE_1D, 0, GL_RGB, 32, 0, GL_RGBA, GL_UNSIGNED_BYTE, rgbToonTable);
}
-#define OGLEXT(x,y) x y;
-#define INITOGLEXT(x,y) y = (x)wglGetProcAddress(#y);
-
-OGLEXT(PFNGLCREATESHADERPROC,glCreateShader)
-//zero: i dont understand this at all. my glext.h has the wrong thing declared here... so I have to do it myself
-typedef void (APIENTRYP X_PFNGLGETSHADERSOURCEPROC) (GLuint shader, GLsizei bufSize, GLchar **source, GLsizei *length);
-OGLEXT(X_PFNGLGETSHADERSOURCEPROC,glShaderSource)
-OGLEXT(PFNGLCOMPILESHADERPROC,glCompileShader)
-OGLEXT(PFNGLCREATEPROGRAMPROC,glCreateProgram)
-OGLEXT(PFNGLATTACHSHADERPROC,glAttachShader)
-OGLEXT(PFNGLLINKPROGRAMPROC,glLinkProgram)
-OGLEXT(PFNGLUSEPROGRAMPROC,glUseProgram)
-OGLEXT(PFNGLGETSHADERINFOLOGPROC,glGetShaderInfoLog)
-
char NDS_glInit(void)
{
int i;
@@ -343,12 +424,14 @@ char NDS_glInit(void)
#endif
glClearColor (0.f, 0.f, 0.f, 1.f);
- glEnable (GL_NORMALIZE);
- glEnable (GL_DEPTH_TEST);
+ glPixelStorei(GL_PACK_ALIGNMENT,8);
+
+ xglEnable (GL_NORMALIZE);
+ xglEnable (GL_DEPTH_TEST);
glEnable (GL_TEXTURE_2D);
glAlphaFunc (GL_GREATER, 0);
- glEnable (GL_ALPHA_TEST);
+ xglEnable (GL_ALPHA_TEST);
glGenTextures (MAX_TEXTURE, &oglTempTextureID[0]);
@@ -710,6 +793,22 @@ __forceinline void NDS_glMultMatrix4x4(signed long v)
//todo - make all color conversions go through a properly spread table!!
+//I think this is slower than the regular memcmp.. doesnt make sense to me, but my
+//asm optimization knowlege is 15 years old..
+__forceinline int memcmp_slow(const void* src, const void* dst, u32 count) {
+ int retval;
+ __asm {
+ mov [retval], 0;
+ mov ecx, [count];
+ shr ecx, 2;
+ mov esi, [src];
+ mov edi, [dst];
+ repe cmpsd;
+ setc byte ptr [retval];
+ }
+ return retval;
+}
+
__forceinline void* memcpy_fast(void* dest, const void* src, size_t count)
{
size_t blockCnt = count / 64;
@@ -781,6 +880,7 @@ static void DebugDumpTexture(int which)
}
//================================================================================
+static int lastTexture = -1;
__forceinline void setTexture(unsigned int format, unsigned int texpal)
{
int palSize[7]={32,4,16,256,0,8,32768};
@@ -817,17 +917,27 @@ __forceinline void setTexture(unsigned int format, unsigned int texpal)
i=texcache_start;
- if(false)
+ //if(false)
while (TRUE)
{
if (texcache_stop==i) break;
if (texcache[i].frm==0) break;
if ((texcache[i].frm==format)&&(texcache[i].pal==texpal))
{
- if (!memcmp(adr,texcache[i].texture,imageSize))
+ //TODO - we need to compare the palette also.
+ //TODO - this doesnt correctly span bank boundaries. in fact, it seems quite dangerous.
+ if (!texcache[i].suspectedInvalid || !memcmp(adr,texcache[i].texture,min(imageSize,sizeof(texcache[i].texture))))
{
+ texcache[i].suspectedInvalid = false;
texcache_count=i;
- glBindTexture(GL_TEXTURE_2D,texcache[i].id);
+ if(i != lastTexture)
+ {
+ lastTexture = i;
+ glBindTexture(GL_TEXTURE_2D,texcache[i].id);
+ glMatrixMode (GL_TEXTURE);
+ glLoadIdentity ();
+ glScaled (texcache[i].invSizeX, texcache[i].invSizeY, 1.0f);
+ }
return;
}
}
@@ -846,7 +956,10 @@ __forceinline void setTexture(unsigned int format, unsigned int texpal)
}
}
+ lastTexture = i;
glBindTexture(GL_TEXTURE_2D, texcache[i].id);
+
+ texcache[i].suspectedInvalid = false;
texcache[i].mode=textureMode;
texcache[i].pal=texpal;
texcache[i].sizeX=sizeX;
@@ -856,11 +969,15 @@ __forceinline void setTexture(unsigned int format, unsigned int texpal)
texcache[i].invSizeY=1.0f/((float)sizeY*(1<<4));
texcache[i].texenv=envMode;
//memcpy(texcache[i].texture,adr,imageSize); //======================= copy
- memcpy_fast(texcache[i].texture,adr,imageSize); //======================= copy
+ memcpy_fast(texcache[i].texture,adr,min(imageSize,sizeof(texcache[i].texture))); //======================= copy
texcache[i].numcolors=palSize[texcache[i].mode];
texcache[i].frm=format;
+ glMatrixMode (GL_TEXTURE);
+ glLoadIdentity ();
+ glScaled (texcache[i].invSizeX, texcache[i].invSizeY, 1.0f);
+
if(i==62 || textureMode==1) {
int zzz=9;
}
@@ -1135,29 +1252,32 @@ __forceinline void NDS_glBegin(unsigned long v)
tempVertList.count = 0;
}
+//controls states:
+//glStencilFunc
+//glStencilOp
+//glColorMask
+static u32 stencilStateSet = -1;
+
static void BeginRenderPoly()
{
int enableDepthWrite = 1;
u32 tmp=0;
- tempVertList.count = 0;
-
- glDepthFunc (depthFuncMode);
+ xglDepthFunc (depthFuncMode);
// Cull face
if (cullingMask != 0xC0)
{
- glEnable(GL_CULL_FACE);
+ xglEnable(GL_CULL_FACE);
glCullFace(map3d_cull[cullingMask>>6]);
}
else
- glDisable(GL_CULL_FACE);
+ xglDisable(GL_CULL_FACE);
// Alpha value, actually not well handled, 0 should be wireframe
if (colorAlpha > 0)
{
- glPolygonMode (GL_FRONT, GL_FILL);
- glPolygonMode (GL_BACK, GL_FILL);
+ xglPolygonMode (GL_FRONT_AND_BACK, GL_FILL);
//non-31 alpha polys are translucent
if(colorAlpha != 0x7FFFFFFF)
@@ -1165,21 +1285,10 @@ static void BeginRenderPoly()
}
else
{
- glPolygonMode (GL_FRONT, GL_LINE);
- glPolygonMode (GL_BACK, GL_LINE);
+ xglPolygonMode (GL_FRONT_AND_BACK, GL_LINE);
}
- // texture environment
setTexture(textureFormat, texturePalette);
- //=================
- if (texcache_count!=-1)
- {
- texCoordinateTransform = texcache[texcache_count].coord;
-
- glMatrixMode (GL_TEXTURE);
- glLoadIdentity ();
- glScaled (texcache[texcache_count].invSizeX, texcache[texcache_count].invSizeY, 1.0f);
- }
//a5i3 or a3i5 textures are translucent
alphaDepthWrite = 0; //zero - as a hack, we are never going to write depth buffer for alpha values
@@ -1194,43 +1303,48 @@ static void BeginRenderPoly()
//handle shadow polys
if(envMode == 3)
{
- glEnable(GL_STENCIL_TEST);
+ xglEnable(GL_STENCIL_TEST);
if(polyID == 0) {
- //when the polyID is zero, we are writing the shadow mask.
- //set stencilbuf = 1 where the shadow volume is obstructed by geometry.
- //do not write color or depth information.
- glStencilFunc(GL_ALWAYS,2,255);
- glStencilOp(GL_KEEP,GL_REPLACE,GL_KEEP);
- glColorMask(GL_FALSE,GL_FALSE,GL_FALSE,GL_FALSE);
enableDepthWrite = 1;
+ if(stencilStateSet!=0) {
+ stencilStateSet = 0;
+ //when the polyID is zero, we are writing the shadow mask.
+ //set stencilbuf = 1 where the shadow volume is obstructed by geometry.
+ //do not write color or depth information.
+ glStencilFunc(GL_ALWAYS,2,255);
+ glStencilOp(GL_KEEP,GL_REPLACE,GL_KEEP);
+ glColorMask(GL_FALSE,GL_FALSE,GL_FALSE,GL_FALSE);
+ }
} else {
- //when the polyid is nonzero, we are drawing the shadow poly.
- //only draw the shadow poly where the stencilbuf==1.
- //I am not sure whether to update the depth buffer here--so I chose not to.
- glStencilFunc(GL_EQUAL,2,255);
- glStencilOp(GL_KEEP,GL_KEEP,GL_KEEP);
- glColorMask(GL_TRUE,GL_TRUE,GL_TRUE,GL_TRUE);
enableDepthWrite = 0;
+ if(stencilStateSet!=1) {
+ stencilStateSet = 1;
+ //when the polyid is nonzero, we are drawing the shadow poly.
+ //only draw the shadow poly where the stencilbuf==1.
+ //I am not sure whether to update the depth buffer here--so I chose not to.
+ glStencilFunc(GL_EQUAL,2,255);
+ glStencilOp(GL_KEEP,GL_KEEP,GL_KEEP);
+ glColorMask(GL_TRUE,GL_TRUE,GL_TRUE,GL_TRUE);
+ }
}
} else {
- glEnable(GL_STENCIL_TEST);
- glStencilFunc(GL_ALWAYS,1,255);
- glStencilOp(GL_REPLACE,GL_REPLACE,GL_REPLACE);
- glColorMask(GL_TRUE,GL_TRUE,GL_TRUE,GL_TRUE);
+ xglEnable(GL_STENCIL_TEST);
+ if(stencilStateSet!=2) {
+ stencilStateSet=2;
+ glStencilFunc(GL_ALWAYS,1,255);
+ glStencilOp(GL_REPLACE,GL_REPLACE,GL_REPLACE);
+ glColorMask(GL_TRUE,GL_TRUE,GL_TRUE,GL_TRUE);
+ }
}
//handle toon rendering
if(glUseProgram) {
if(envMode == 2) {
- glUseProgram(toonProgram);
- } else glUseProgram(0);
+ xglUseProgram(toonProgram);
+ } else xglUseProgram(0);
}
- glDepthMask(enableDepthWrite?GL_TRUE:GL_FALSE);
-
- //just to be sure
- glMatrixMode(GL_MODELVIEW);
- glLoadIdentity();
+ xglDepthMask(enableDepthWrite?GL_TRUE:GL_FALSE);
}
__forceinline void NDS_glEnd (void)
@@ -1427,60 +1541,6 @@ __forceinline int NDS_glGetNumVertex (void)
return 0;
}
-//NHerve mod3 - Fixed blending with 2D backgrounds (New Super Mario Bros looks better)
-//zeromus post-mod3: fix even better
-__forceinline void NDS_glGetLine (int line, unsigned short * dst)
-{
- int i, t;
- u8 *screen3D = (u8 *)&GPU_screen3D [(191-(line%192))*1024];
- u8 *screenStencil = (u8*)&GPU_screenStencil[(191-(line%192))*256];
-
- //the renderer clears the stencil to 0
- //then it sets it to 1 whenever it renders a pixel that passes the alpha test
- //(it also sets it to 2 under some circumstances when rendering shadow volumes)
- //so, we COULD use a zero stencil value to indicate that nothing should get composited.
- //in fact, we are going to do that to fix some problems.
- //but beware that it i figure it might could CAUSE some problems
-
- //this alpha compositing blending logic isnt thought through at all
- //someone needs to think about what bitdepth it should take place at and how to do it efficiently
-
- u32 a,r,g,b,stencil,oldcolor,oldr,oldg,oldb;
-
- for(i = 0, t=0; i < 256; i++)
- {
- stencil = screenStencil[i];
-
- //you would use this if you wanted to use the stencil buffer to make decisions here
- if(!stencil) continue;
-
- t=i*4;
- r = screen3D[t+0];
- g = screen3D[t+1];
- b = screen3D[t+2];
- a = screen3D[t+3];
-
- if(a != 0xFF && a != 0) {
- int zzz=9;
- }
-
- oldcolor = RGB15TO32(dst[i],0);
- oldr = oldcolor&0xFF;
- oldg = (oldcolor>>8)&0xFF;
- oldb = (oldcolor>>16)&0xFF;
-
- r = (r*a + oldr*(255-a)) / 255;
- g = (g*a + oldg*(255-a)) / 255;
- b = (b*a + oldb*(255-a)) / 255;
-
- r=min(255,r);
- g=min(255,g);
- b=min(255,b);
-
- dst[i] = ((b>>3)<<10) | ((g>>3)<<5) | (r>>3);
- }
-}
-
static void InstallPolygonAttrib(unsigned long val)
{
// Light enable/disable
@@ -1512,92 +1572,6 @@ __forceinline void NDS_glPolygonAttrib (unsigned long val)
InstallPolygonAttrib(polyAttr);
}
-__forceinline void NDS_glFlush(unsigned long v)
-{
- u32 wbuffer = v&1;
- u32 sortmode = (v>>1)&1;
-
- // Set back some secure render states
- glPolygonMode (GL_BACK, GL_FILL);
- glPolygonMode (GL_FRONT, GL_FILL);
-
- glDepthMask (GL_TRUE);
- glClear (GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
-
- //render display list
- //TODO - properly doublebuffer the display lists
- {
- int i;
- for(i=0;icount;i++) {
- POLY *poly = &polylist->list[i];
- int type = poly->type;
- int j;
- InstallPolygonAttrib(poly->polyAttr);
- textureFormat = poly->texParam;
- texturePalette = poly->texPalette;
- BeginRenderPoly();
-
- //since we havent got the whole pipeline working yet, lets use opengl for the projection
- glMatrixMode(GL_PROJECTION);
- glLoadMatrixf(poly->projMatrix);
-
- glBegin(type==3?GL_TRIANGLES:GL_QUADS);
- for(j=0;jlist[poly->vertIndexes[j]];
-
- //float tempCoord[4];
- //Vector4Copy(tempCoord,vert->coord);
- //we havent got the whole pipeline working yet, so we cant do this
- ////convert from ds device coords to opengl
- //tempCoord[0] *= 2;
- //tempCoord[1] *= 2;
- //tempCoord[0] -= 1;
- //tempCoord[1] -= 1;
-
- //todo - edge flag?
- glTexCoord2fv(vert->texcoord);
- glColor4iv(vert->color);
- //glVertex3fv(tempCoord);
- glVertex3fv(vert->coord);
- }
- glEnd();
- }
- }
-
- twiddleLists();
-
- //reset gpu state
- clCmd = 0;
- clInd = 0;
-
- //capture rendering results
- glFlush();
- glReadPixels(0,0,256,192,GL_RGBA, GL_UNSIGNED_BYTE, GPU_screen3D);
- glReadPixels(0,0,256,192,GL_STENCIL_INDEX, GL_UNSIGNED_BYTE, GPU_screenStencil);
-
- //debug: view depth buffer via color buffer for debugging
- {
- //int ctr=0;
- //for(ctr=0;ctr<256*192;ctr++) {
- // float zval = GPU_screen3Ddepth[ctr];
- // u8* colorPtr = GPU_screen3D+ctr*3;
- // if(zval<0) {
- // colorPtr[0] = 255;
- // colorPtr[1] = 0;
- // colorPtr[2] = 0;
- // } else if(zval>1) {
- // colorPtr[0] = 0;
- // colorPtr[1] = 0;
- // colorPtr[2] = 255;
- // } else {
- // colorPtr[0] = colorPtr[1] = colorPtr[2] = zval*255;
- // //printlog("%f %f %d\n",zval, zval*255,colorPtr[0]);
- // }
-
- //}
- }
-}
-
/*
0-4 Diffuse Reflection Red
5-9 Diffuse Reflection Green
@@ -1734,6 +1708,7 @@ __forceinline void NDS_glLightColor (unsigned long v)
__forceinline void NDS_glAlphaFunc(unsigned long v)
{
alphaTestRef = (v&31)/31.f;
+ glAlphaFunc (GL_GREATER, alphaTestBase);
}
__forceinline void NDS_glControl(unsigned long v)
@@ -1749,11 +1724,11 @@ __forceinline void NDS_glControl(unsigned long v)
if(v&(1<<2))
{
- //glAlphaFunc (GL_GREATER, alphaTestBase);
+ glAlphaFunc (GL_GREATER, alphaTestBase);
}
else
{
- //glAlphaFunc (GL_GREATER, 0.1f);
+ glAlphaFunc (GL_GREATER, 0);
}
if(v&(1<<3))
@@ -1878,6 +1853,181 @@ __forceinline void NDS_glNormal(unsigned long v)
}
}
+static bool flushPending = false;
+static u32 flush_wbuffer;
+static u32 flush_sortmode;
+
+void NDS_glFlush(unsigned long v)
+{
+ flushPending = true;
+ flush_wbuffer = v&1;
+ flush_sortmode = (v>>1)&1;
+}
+
+void GL_Draw()
+{
+ xglDepthMask (GL_TRUE);
+ glClear (GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
+
+ //render display list
+ //TODO - properly doublebuffer the display lists
+ {
+
+ u32 lastTextureFormat, lastTexturePalette, lastPolyAttr;
+
+ for(int i=0;icount;i++) {
+ POLY *poly = &polylist->list[i];
+ int type = poly->type;
+
+ //a very macro-level state caching approach:
+ //these are the only things which control the GPU rendering state.
+ if(i==0 || lastTextureFormat != poly->texParam || lastTexturePalette != poly->texPalette || lastPolyAttr != poly->polyAttr)
+ {
+ InstallPolygonAttrib(lastPolyAttr=poly->polyAttr);
+ lastTextureFormat = textureFormat = poly->texParam;
+ lastTexturePalette = texturePalette = poly->texPalette;
+ BeginRenderPoly();
+ }
+
+ //since we havent got the whole pipeline working yet, lets use opengl for the projection
+ glMatrixMode(GL_PROJECTION);
+ glLoadMatrixf(poly->projMatrix);
+
+ glBegin(type==3?GL_TRIANGLES:GL_QUADS);
+ for(int j=0;jlist[poly->vertIndexes[j]];
+
+ //float tempCoord[4];
+ //Vector4Copy(tempCoord,vert->coord);
+ //we havent got the whole pipeline working yet, so we cant do this
+ ////convert from ds device coords to opengl
+ //tempCoord[0] *= 2;
+ //tempCoord[1] *= 2;
+ //tempCoord[0] -= 1;
+ //tempCoord[1] -= 1;
+
+ //todo - edge flag?
+ glTexCoord2fv(vert->texcoord);
+ glColor4iv(vert->color);
+ //glVertex3fv(tempCoord);
+ glVertex3fv(vert->coord);
+ }
+ glEnd();
+ }
+ }
+
+ //since we just redrew, we need to refresh the framebuffers
+ needRefreshFramebuffer = true;
+
+ twiddleLists();
+
+ //reset GE state
+ clCmd = 0;
+ clInd = 0;
+}
+
+void NDS_3D_VBlankSignal()
+{
+ //the 3d buffers are swapped when a vblank begins.
+ //so, if we have a redraw pending, now is a safe time to do it
+ if(!flushPending) return;
+ flushPending = false;
+ GL_Draw();
+}
+
+void NDS_3D_VramReconfigureSignal()
+{
+ //well, this is a very blunt instrument.
+ //lets just flag all the textures as invalid.
+ for(int i=0;i1) {
+ // colorPtr[0] = 0;
+ // colorPtr[1] = 0;
+ // colorPtr[2] = 255;
+ // } else {
+ // colorPtr[0] = colorPtr[1] = colorPtr[2] = zval*255;
+ // //printlog("%f %f %d\n",zval, zval*255,colorPtr[0]);
+ // }
+
+ //}
+}
+
+//NHerve mod3 - Fixed blending with 2D backgrounds (New Super Mario Bros looks better)
+//zeromus post-mod3: fix even better
+__forceinline void NDS_glGetLine (int line, unsigned short * dst)
+{
+ if(needRefreshFramebuffer) {
+ needRefreshFramebuffer = false;
+ GL_ReadFramebuffer();
+ }
+ int i, t;
+ u8 *screen3D = (u8 *)&GPU_screen3D [(191-(line%192))*1024];
+ u8 *screenStencil = (u8*)&GPU_screenStencil[(191-(line%192))*256];
+
+ //the renderer clears the stencil to 0
+ //then it sets it to 1 whenever it renders a pixel that passes the alpha test
+ //(it also sets it to 2 under some circumstances when rendering shadow volumes)
+ //so, we COULD use a zero stencil value to indicate that nothing should get composited.
+ //in fact, we are going to do that to fix some problems.
+ //but beware that it i figure it might could CAUSE some problems
+
+ //this alpha compositing blending logic isnt thought through at all
+ //someone needs to think about what bitdepth it should take place at and how to do it efficiently
+
+ u32 a,r,g,b,stencil,oldcolor,oldr,oldg,oldb;
+
+ for(i = 0, t=0; i < 256; i++)
+ {
+ stencil = screenStencil[i];
+
+ //you would use this if you wanted to use the stencil buffer to make decisions here
+ if(!stencil) continue;
+
+ t=i*4;
+ r = screen3D[t+0];
+ g = screen3D[t+1];
+ b = screen3D[t+2];
+ a = screen3D[t+3];
+
+ if(a != 0xFF && a != 0) {
+ int zzz=9;
+ }
+
+ oldcolor = RGB15TO32(dst[i],0);
+ oldr = oldcolor&0xFF;
+ oldg = (oldcolor>>8)&0xFF;
+ oldb = (oldcolor>>16)&0xFF;
+
+ r = (r*a + oldr*(255-a)) / 255;
+ g = (g*a + oldg*(255-a)) / 255;
+ b = (b*a + oldb*(255-a)) / 255;
+
+ r=min(255,r);
+ g=min(255,g);
+ b=min(255,b);
+
+ dst[i] = ((b>>3)<<10) | ((g>>3)<<5) | (r>>3);
+ }
+}
+
__forceinline void NDS_glBoxTest(unsigned long v)
{
}
@@ -2405,7 +2555,9 @@ GPU3DInterface gpu3Dgl = { NDS_glInit,
NDS_glVecTest,
NDS_glGetPosRes,
NDS_glGetVecRes,
- NDS_3D_UpdateToonTable
+ NDS_3D_UpdateToonTable,
+ NDS_3D_VBlankSignal,
+ NDS_3D_VramReconfigureSignal,
};
diff --git a/desmume/src/windows/zlib123/zconf.h b/desmume/src/windows/zlib123/zconf.h
index ccbf3751e..ec69ca940 100644
--- a/desmume/src/windows/zlib123/zconf.h
+++ b/desmume/src/windows/zlib123/zconf.h
@@ -247,7 +247,7 @@
# define ZEXTERN extern
#endif
#ifndef ZEXPORT
-# define ZEXPORT
+# define ZEXPORT __cdecl
#endif
#ifndef ZEXPORTVA
# define ZEXPORTVA
diff --git a/desmume/src/windows/zziplib/zzip/_msvc.h b/desmume/src/windows/zziplib/zzip/_msvc.h
index e85251344..81adb6a4a 100644
--- a/desmume/src/windows/zziplib/zzip/_msvc.h
+++ b/desmume/src/windows/zziplib/zzip/_msvc.h
@@ -1,5 +1,7 @@
#ifndef _ZZIP__MSVC_H
#define _ZZIP__MSVC_H 1
+
+#define _zzip_calltype __cdecl
/* zzip/_msvc.h. Generated automatically at end of configure. */
/* config.h.in. Generated from configure.ac by autoheader. */
diff --git a/desmume/src/windows/zziplib/zzip/zzip.h b/desmume/src/windows/zziplib/zzip/zzip.h
index 6f2c51690..80cfc0cec 100644
--- a/desmume/src/windows/zziplib/zzip/zzip.h
+++ b/desmume/src/windows/zziplib/zzip/zzip.h
@@ -94,9 +94,9 @@ struct zzip_dirent
* zzip/err.c
*/
_zzip_export /* error in _opendir : */
-zzip_char_t* zzip_strerror(int errcode);
+zzip_char_t* _zzip_calltype zzip_strerror(int errcode);
_zzip_export /* error in other functions : */
-zzip_char_t* zzip_strerror_of(ZZIP_DIR * dir);
+zzip_char_t* _zzip_calltype zzip_strerror_of(ZZIP_DIR * dir);
_zzip_export /* error mapped to errno.h defines : */
int zzip_errno(int errcode);
@@ -107,46 +107,46 @@ int zzip_errno(int errcode);
* zzip/info.c
*/
_zzip_export
-int zzip_error(ZZIP_DIR * dir);
+int _zzip_calltype zzip_error(ZZIP_DIR * dir);
_zzip_export
-void zzip_seterror(ZZIP_DIR * dir, int errcode);
+void _zzip_calltype zzip_seterror(ZZIP_DIR * dir, int errcode);
_zzip_export
zzip_char_t* zzip_compr_str(int compr);
_zzip_export
-ZZIP_DIR * zzip_dirhandle(ZZIP_FILE * fp);
+ZZIP_DIR * _zzip_calltype zzip_dirhandle(ZZIP_FILE * fp);
_zzip_export
-int zzip_dirfd(ZZIP_DIR * dir);
+int _zzip_calltype zzip_dirfd(ZZIP_DIR * dir);
_zzip_export
-int zzip_dir_real(ZZIP_DIR * dir);
+int _zzip_calltype zzip_dir_real(ZZIP_DIR * dir);
_zzip_export
-int zzip_file_real(ZZIP_FILE * fp);
+int _zzip_calltype zzip_file_real(ZZIP_FILE * fp);
_zzip_export
-void* zzip_realdir(ZZIP_DIR * dir);
+void* _zzip_calltype zzip_realdir(ZZIP_DIR * dir);
_zzip_export
-int zzip_realfd(ZZIP_FILE * fp);
+int _zzip_calltype zzip_realfd(ZZIP_FILE * fp);
/*
* zip handle management
* zzip/zip.c
*/
_zzip_export
-ZZIP_DIR * zzip_dir_alloc(zzip_strings_t* fileext);
+ZZIP_DIR * _zzip_calltype zzip_dir_alloc(zzip_strings_t* fileext);
_zzip_export
-int zzip_dir_free(ZZIP_DIR *);
+int _zzip_calltype zzip_dir_free(ZZIP_DIR *);
/*
* Opening/closing a zip archive
* zzip-zip.c
*/
_zzip_export
-ZZIP_DIR * zzip_dir_fdopen(int fd, zzip_error_t * errcode_p);
+ZZIP_DIR * _zzip_calltype zzip_dir_fdopen(int fd, zzip_error_t * errcode_p);
_zzip_export
-ZZIP_DIR * zzip_dir_open(zzip_char_t* filename, zzip_error_t * errcode_p);
+ZZIP_DIR * _zzip_calltype zzip_dir_open(zzip_char_t* filename, zzip_error_t * errcode_p);
_zzip_export
-int zzip_dir_close(ZZIP_DIR * dir);
+int _zzip_calltype zzip_dir_close(ZZIP_DIR * dir);
_zzip_export
-int zzip_dir_read(ZZIP_DIR * dir, ZZIP_DIRENT * dirent);
+int _zzip_calltype zzip_dir_read(ZZIP_DIR * dir, ZZIP_DIRENT * dirent);
/*
@@ -155,46 +155,46 @@ int zzip_dir_read(ZZIP_DIR * dir, ZZIP_DIRENT * dirent);
* zzip/zip.c
*/
_zzip_export
-ZZIP_DIR * zzip_opendir(zzip_char_t* filename);
+ZZIP_DIR * _zzip_calltype zzip_opendir(zzip_char_t* filename);
_zzip_export
-int zzip_closedir(ZZIP_DIR * dir);
+int _zzip_calltype zzip_closedir(ZZIP_DIR * dir);
_zzip_export
-ZZIP_DIRENT * zzip_readdir(ZZIP_DIR * dir);
+ZZIP_DIRENT * _zzip_calltype zzip_readdir(ZZIP_DIR * dir);
_zzip_export
-void zzip_rewinddir(ZZIP_DIR * dir);
+void _zzip_calltype zzip_rewinddir(ZZIP_DIR * dir);
_zzip_export
-zzip_off_t zzip_telldir(ZZIP_DIR * dir);
+zzip_off_t _zzip_calltype zzip_telldir(ZZIP_DIR * dir);
_zzip_export
-void zzip_seekdir(ZZIP_DIR * dir, zzip_off_t offset);
+void _zzip_calltype zzip_seekdir(ZZIP_DIR * dir, zzip_off_t offset);
/*
* 'opening', 'closing' and reading invidual files in zip archive.
* zzip/file.c
*/
_zzip_export
-ZZIP_FILE * zzip_file_open(ZZIP_DIR * dir, zzip_char_t* name, int flags);
+ZZIP_FILE * _zzip_calltype zzip_file_open(ZZIP_DIR * dir, zzip_char_t* name, int flags);
_zzip_export
-int zzip_file_close(ZZIP_FILE * fp);
+int _zzip_calltype zzip_file_close(ZZIP_FILE * fp);
_zzip_export
-zzip_ssize_t zzip_file_read(ZZIP_FILE * fp, void* buf, zzip_size_t len);
+zzip_ssize_t _zzip_calltype zzip_file_read(ZZIP_FILE * fp, void* buf, zzip_size_t len);
_zzip_export
-ZZIP_FILE * zzip_open(zzip_char_t* name, int flags);
+ZZIP_FILE * _zzip_calltype zzip_open(zzip_char_t* name, int flags);
_zzip_export
-int zzip_close(ZZIP_FILE * fp);
+int _zzip_calltype zzip_close(ZZIP_FILE * fp);
_zzip_export
-zzip_ssize_t zzip_read(ZZIP_FILE * fp, void * buf, zzip_size_t len);
+zzip_ssize_t _zzip_calltype zzip_read(ZZIP_FILE * fp, void * buf, zzip_size_t len);
/*
* the stdc variant to open/read/close files. - Take note of the freopen()
* call as it may reuse an existing preparsed copy of a zip central directory
*/
_zzip_export
-ZZIP_FILE* zzip_freopen(zzip_char_t* name, zzip_char_t* mode, ZZIP_FILE*);
+ZZIP_FILE* _zzip_calltype zzip_freopen(zzip_char_t* name, zzip_char_t* mode, ZZIP_FILE*);
_zzip_export
-ZZIP_FILE* zzip_fopen(zzip_char_t* name, zzip_char_t* mode);
+ZZIP_FILE* _zzip_calltype zzip_fopen(zzip_char_t* name, zzip_char_t* mode);
_zzip_export
-zzip_size_t zzip_fread(void *ptr, zzip_size_t size, zzip_size_t nmemb,
+zzip_size_t _zzip_calltype zzip_fread(void *ptr, zzip_size_t size, zzip_size_t nmemb,
ZZIP_FILE * file);
_zzip_export
int zzip_fclose(ZZIP_FILE * fp);
@@ -203,23 +203,23 @@ int zzip_fclose(ZZIP_FILE * fp);
* seek and tell functions
*/
_zzip_export
-int zzip_rewind(ZZIP_FILE *fp);
+int _zzip_calltype zzip_rewind(ZZIP_FILE *fp);
_zzip_export
-zzip_off_t zzip_seek(ZZIP_FILE * fp, zzip_off_t offset, int whence);
+zzip_off_t _zzip_calltype zzip_seek(ZZIP_FILE * fp, zzip_off_t offset, int whence);
_zzip_export
-zzip_off_t zzip_tell(ZZIP_FILE * fp);
+zzip_off_t _zzip_calltype zzip_tell(ZZIP_FILE * fp);
/*
* reading info of a single file
* zzip/stat.c
*/
_zzip_export
-int zzip_dir_stat(ZZIP_DIR * dir, zzip_char_t* name,
+int _zzip_calltype zzip_dir_stat(ZZIP_DIR * dir, zzip_char_t* name,
ZZIP_STAT * zs, int flags);
_zzip_export
-int zzip_file_stat(ZZIP_FILE * fp, ZZIP_STAT * zs);
+int _zzip_calltype zzip_file_stat(ZZIP_FILE * fp, ZZIP_STAT * zs);
_zzip_export
-int zzip_fstat(ZZIP_FILE * fp, ZZIP_STAT * zs);
+int _zzip_calltype zzip_fstat(ZZIP_FILE * fp, ZZIP_STAT * zs);
#ifdef ZZIP_LARGEFILE_RENAME
#define zzip_open_shared_io zzip_open_shared_io64
@@ -236,20 +236,20 @@ int zzip_fstat(ZZIP_FILE * fp, ZZIP_STAT * zs);
typedef union _zzip_plugin_io _zzip_const * zzip_plugin_io_t;
_zzip_export
-ZZIP_FILE * zzip_open_shared_io(ZZIP_FILE* stream,
+ZZIP_FILE * _zzip_calltype zzip_open_shared_io(ZZIP_FILE* stream,
zzip_char_t* name, int o_flags, int o_modes,
zzip_strings_t* ext, zzip_plugin_io_t io);
_zzip_export
-ZZIP_FILE * zzip_open_ext_io(zzip_char_t* name, int o_flags, int o_modes,
+ZZIP_FILE * _zzip_calltype zzip_open_ext_io(zzip_char_t* name, int o_flags, int o_modes,
zzip_strings_t* ext, zzip_plugin_io_t io);
_zzip_export
-ZZIP_DIR * zzip_opendir_ext_io(zzip_char_t* name, int o_modes,
+ZZIP_DIR * _zzip_calltype zzip_opendir_ext_io(zzip_char_t* name, int o_modes,
zzip_strings_t* ext, zzip_plugin_io_t io);
_zzip_export
-ZZIP_DIR * zzip_dir_open_ext_io(zzip_char_t* filename,
+ZZIP_DIR * _zzip_calltype zzip_dir_open_ext_io(zzip_char_t* filename,
zzip_error_t* errcode_p,
zzip_strings_t* ext, zzip_plugin_io_t io);