Defer rendering until after flush. This was a necessary architectural change, as it permits savestate for the display list, and allows us eventually to separate the GE emulation from the rendering

This commit is contained in:
zeromus 2008-09-07 20:35:30 +00:00
parent de1b998454
commit 3daa24c932
4 changed files with 288 additions and 157 deletions

View File

@ -8,6 +8,8 @@
- Added SSE2 version for matrix [CrazyMax]
- Some fixes in core (New SMB don't freeze now) [CrazyMax]
- Some optimizations in code [CrazyMax]
- Make matrix 4x4 multiply routines use W-coordinate. [zeromus]
- Add many matrix and vector functions to matrix.c [zeromus]
Mac OS X port:
- Fixed: Filenames and paths with unicode characters now work. [Jeff]
- Fixed: Load state from file button works again. [Jeff]
@ -27,8 +29,7 @@
- Some fixes in 3D core OGL (fixed textures) [CrazyMax]
- Added texture caching (speedup 3D core) [CrazyMax]
- Fixes clear depth (ex. Castlevania now don't flipping) [NHerve]
- Make matrix 4x4 multiply routines use W-coordinate. [zeromus]
- Make GPU matrix mult and load commands clear out unused rows and cols to identity correctly [zeromus]
- Make GE matrix mult and load commands clear out unused rows and cols to identity correctly [zeromus]
- carry w=1 from vertex() through pipeline (this will be necessary for software 3d rendering) [zeromus]
- Track polycount better. still worthless: at the very least, it doesnt account for clipping and culling [zeromus]
- Fix errors in matrix operations regarding projection mode and pos-vector mode [zeromus]
@ -39,6 +40,8 @@
- Add many of NHerve's improvements into OGLRender because I was trying to fix all the 3d issues. [zeromus]
- Toon shading infrastructure and a demo implementation [zeromus]
- Implement lighting model in software instead of using opengl; improves (potential?) compatibility [zeromus]
- Defer rendering until after flush. This was a necessary architectural change, as it permits savestate
for the display list, and allows us eventually to separate the GE emulation from the rendering [zeromus]
0.7.3 -> 0.8

View File

@ -292,4 +292,12 @@ void Vector3Normalize(float *dst)
{
float length = Vector3Length(dst);
Vector3Scale(dst,1.0f/length);
}
void Vector4Copy(float *dst, float *src)
{
dst[0] = src[0];
dst[1] = src[1];
dst[2] = src[2];
dst[3] = src[3];
}

View File

@ -85,4 +85,6 @@ void Vector3Scale(float *dst, float scale);
void Vector3Copy(float *dst, float *src);
void Vector3Normalize(float *dst);
void Vector4Copy(float *dst, float *src);
#endif

View File

@ -19,6 +19,8 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
//todo - why doesnt mario run behind the floor at the beginning of nsmb? is it using a depth clear buffer?
#include <math.h>
#include <stdlib.h>
#include <string.h>
@ -54,10 +56,6 @@ static float* float10Table = NULL;
static float* float10RelTable = NULL;
static float* normalTable = NULL;
static int numVertex = 0;
static int vertexCounter = 0;
static int numPolys = 0;
// Matrix stack handling
static __declspec(align(16)) MatrixStack mtxStack[4];
static __declspec(align(16)) float mtxCurrent [4][16];
@ -112,6 +110,9 @@ static const u8 material_5bit_to_8bit[] = {
static unsigned short matrixMode[2] = {GL_PROJECTION, GL_MODELVIEW};
static short mode = 0;
//raw ds format poly attributes
static u32 polyAttr=0,textureFormat=0, texturePalette=0;
static int colorAlpha=0;
static unsigned int polyID=0;
static unsigned int depthFuncMode=0;
@ -131,10 +132,10 @@ static int alphaDepthWrite = 0;
static int colorRGB[4] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff};
static int texCoordinateTransform = 0;
static int _t=0, _s=0;
static char beginCalled = 0;
static float last_t, last_s;
static unsigned int vtxFormat;
static unsigned int old_vtxFormat;
static unsigned int textureFormat=0, texturePalette=0;
static unsigned int textureMode=0;
static u16 dsDiffuse, dsAmbient, dsSpecular, dsEmission;
@ -143,6 +144,53 @@ static int diffuse[4] = {0},
specular[4] = {0},
emission[4] = {0};
typedef struct _POLY {
int type; //tri or quad
int vertIndexes[4]; //up to four verts can be referenced by this poly
u32 polyAttr, texParam, texPalette; //the hardware rendering params
float projMatrix[16];
} POLY;
#define POLYLIST_SIZE 6000
//#define POLYLIST_SIZE 2048
typedef struct _POLYLIST {
int count;
POLY list[POLYLIST_SIZE];
} POLYLIST;
POLYLIST polylists[2];
POLYLIST* polylist = &polylists[0];
typedef struct _VERT {
float coord[4];
float texcoord[2];
int color[4];
u32 depth;
} VERT;
#define VERTLIST_SIZE 30000
//#define VERTLIST_SIZE 10000
typedef struct _VERTLIST {
int count;
VERT list[VERTLIST_SIZE];
} VERTLIST;
VERTLIST vertlists[2];
VERTLIST* vertlist = &vertlists[0];
int listTwiddle = 1;
int triStripToggle;
//list-building state
VERTLIST tempVertList;
static void twiddleLists() {
listTwiddle++;
listTwiddle &= 1;
polylist = &polylists[listTwiddle];
vertlist = &vertlists[listTwiddle];
polylist->count = 0;
vertlist->count = 0;
}
//================================================= Textures
#define MAX_TEXTURE 500
typedef struct
@ -296,6 +344,8 @@ char NDS_glInit(void)
if (glGetError() != GL_NO_ERROR)
return 0;
twiddleLists();
// Precalculate some tables, to avoid pushing data to the FPU and back for conversion
float16table = (float*) malloc (sizeof(float)*65536);
for (i = 0; i < 65536; i++)
@ -388,29 +438,16 @@ __forceinline void NDS_3D_Close()
__forceinline void NDS_glViewPort(unsigned long v)
{
if(beginCalled)
glEnd();
//zero: NHerve messed with this in mod2 and mod3, but im still not sure its perfect. need to research this.
glViewport( (v&0xFF), ((v>>8)&0xFF), (((v>>16)&0xFF)+1)-(v&0xFF), ((v>>24)+1)-((v>>8)&0xFF));
if(beginCalled)
glBegin(vtxFormat);
}
__forceinline void NDS_glClearColor(unsigned long v)
{
if(beginCalled)
glEnd();
glClearColor( ((float)(v&0x1F))/31.0f,
((float)((v>>5)&0x1F))/31.0f,
((float)((v>>10)&0x1F))/31.0f,
((float)((v>>16)&0x1F))/31.0f);
if(beginCalled)
glBegin(vtxFormat);
}
__forceinline void NDS_glFogColor(unsigned long v)
@ -428,66 +465,22 @@ __forceinline void NDS_glFogOffset (unsigned long v)
__forceinline void NDS_glClDepth()
{
if(beginCalled)
glEnd();
glClear(GL_DEPTH_BUFFER_BIT);
if(beginCalled)
glBegin(vtxFormat);
}
__forceinline void NDS_glClearDepth(unsigned long v)
{
u32 depth24b;
if(beginCalled)
glEnd();
v &= 0x7FFFF;
// Thanks for NHerve
depth24b = (v*0x200)+((v+1)/0x8000)*0x01FF;
glClearDepth(depth24b / ((float)(1<<24)));
if(beginCalled)
glBegin(vtxFormat);
}
__forceinline void NDS_glMatrixMode(unsigned long v)
{
if (beginCalled)
{
glEnd();
}
mode = (short)(v&3);
if (beginCalled)
{
glBegin (vtxFormat);
}
}
__forceinline void SetMatrix (void)
{
if (mode < 2)
{
if (beginCalled)
glEnd();
glMatrixMode (matrixMode[mode]);
glLoadMatrixf(mtxCurrent[mode]);
glBegin (vtxFormat);
beginCalled = 1;
}
else if (mode == 2)
{
if (beginCalled)
glEnd();
glMatrixMode (matrixMode[1]);
glLoadMatrixf(mtxCurrent[1]);
glBegin (vtxFormat);
beginCalled = 1;
}
}
__forceinline void NDS_glLoadIdentity (void)
@ -561,7 +554,7 @@ __forceinline void NDS_glRestoreMatrix(unsigned long v)
__forceinline void NDS_glPushMatrix (void)
{
//this command always works on both pos and vector when either pos or pos-vector are the current mtx mode
//this command always works on both pos and vector when either pos or pos-vector are the current mtx mode
short mymode = (mode==1?2:mode);
MatrixStackPushMatrix (&mtxStack[mymode], mtxCurrent[mymode]);
@ -1100,14 +1093,17 @@ __forceinline void setTexture(unsigned int format, unsigned int texpal)
__forceinline void NDS_glBegin(unsigned long v)
{
int enableDepthWrite = 1;
vtxFormat = polyType[v&0x03];
triStripToggle = 0;
tempVertList.count = 0;
}
static void BeginRenderPoly()
{
int enableDepthWrite = 1;
u32 tmp=0;
if (beginCalled)
{
glEnd();
}
tempVertList.count = 0;
glDepthFunc (depthFuncMode);
@ -1193,24 +1189,14 @@ __forceinline void NDS_glBegin(unsigned long v)
glDepthMask(enableDepthWrite?GL_TRUE:GL_FALSE);
glMatrixMode(GL_PROJECTION);
glLoadMatrixf(mtxCurrent[0]);
//just to be sure
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
vertexCounter = 0;
beginCalled = 1;
vtxFormat = polyType[v&0x03];
glBegin(vtxFormat);
}
__forceinline void NDS_glEnd (void)
{
if (beginCalled)
{
glEnd();
beginCalled = 0;
}
tempVertList.count = 0;
}
__forceinline void NDS_glColor3b(unsigned long v)
@ -1220,6 +1206,7 @@ __forceinline void NDS_glColor3b(unsigned long v)
colorRGB[2] = material_5bit_to_31bit[((v>>10)&0x1F)];
}
//Submit a vertex to the GE
static __forceinline void SetVertex()
{
__declspec(align(16)) float coordTransformed[4] = { coord[0], coord[1], coord[2], 1 };
@ -1233,36 +1220,113 @@ static __forceinline void SetVertex()
coord[1]*mtxCurrent[3][5] +
coord[2]*mtxCurrent[3][9]) + _t);
glTexCoord2f (s2, t2);
glTexCoord2f (last_s=s2, last_t=t2);
}
//refuse to do anything if we have too many verts or polys
if(vertlist->count >= VERTLIST_SIZE)
return;
if(polylist->count >= POLYLIST_SIZE)
return;
//apply modelview matrix
MatrixMultVec4x4 (mtxCurrent[1], coordTransformed);
glColor4iv ((GLint*)colorRGB);
glVertex3fv (coordTransformed);
//deferred rendering:
//count the polys and verts
vertexCounter++;
numVertex++;
switch(vtxFormat) {
case GL_TRIANGLES:
if(vertexCounter%3 == 0)
numPolys++;
break;
case GL_QUADS:
if((vertexCounter&3) == 0)
numPolys++;
break;
case GL_TRIANGLE_STRIP:
if(vertexCounter>=3)
numPolys++;
break;
case GL_QUAD_STRIP:
if(vertexCounter==4)
numPolys++;
else if((vertexCounter&1)==0)
numPolys++;
//todo - we havent got the whole pipeline working yet, so lets save the projection matrix and let opengl do it
////apply projection matrix
//MatrixMultVec4x4 (mtxCurrent[0], coordTransformed);
////perspective division
//coordTransformed[0] = (coordTransformed[0] + coordTransformed[3]) / 2 / coordTransformed[3];
//coordTransformed[1] = (coordTransformed[1] + coordTransformed[3]) / 2 / coordTransformed[3];
//coordTransformed[2] = (coordTransformed[2] + coordTransformed[3]) / 2 / coordTransformed[3];
//coordTransformed[3] = 1;
//TODO - culling should be done here.
//TODO - viewport transform
//record the vertex
tempVertList.list[tempVertList.count].texcoord[0] = last_s;
tempVertList.list[tempVertList.count].texcoord[1] = last_t;
tempVertList.list[tempVertList.count].coord[0] = coordTransformed[0];
tempVertList.list[tempVertList.count].coord[1] = coordTransformed[1];
tempVertList.list[tempVertList.count].coord[2] = coordTransformed[2];
tempVertList.list[tempVertList.count].coord[3] = coordTransformed[3];
tempVertList.list[tempVertList.count].color[0] = colorRGB[0];
tempVertList.list[tempVertList.count].color[1] = colorRGB[1];
tempVertList.list[tempVertList.count].color[2] = colorRGB[2];
tempVertList.list[tempVertList.count].color[3] = colorRGB[3];
tempVertList.list[tempVertList.count].depth = 0x7FFF * coordTransformed[2];
tempVertList.count++;
//possibly complete a polygon
{
#define SUBMITVERTEX(i,n) vertlist->list[polylist->list[polylist->count].vertIndexes[i] = vertlist->count++] = tempVertList.list[n];
int completed=0;
switch(vtxFormat) {
case GL_TRIANGLES:
if(tempVertList.count!=3)
break;
completed = 1;
SUBMITVERTEX(0,0);
SUBMITVERTEX(1,1);
SUBMITVERTEX(2,2);
polylist->list[polylist->count].type = 3;
tempVertList.count = 0;
break;
case GL_QUADS:
if(tempVertList.count!=4)
break;
completed = 1;
SUBMITVERTEX(0,0);
SUBMITVERTEX(1,1);
SUBMITVERTEX(2,2);
SUBMITVERTEX(3,3);
polylist->list[polylist->count].type = 4;
tempVertList.count = 0;
break;
case GL_TRIANGLE_STRIP:
if(tempVertList.count!=3)
break;
completed = 1;
SUBMITVERTEX(0,0);
SUBMITVERTEX(1,1);
SUBMITVERTEX(2,2);
polylist->list[polylist->count].type = 3;
if(triStripToggle)
tempVertList.list[1] = tempVertList.list[2];
else
tempVertList.list[0] = tempVertList.list[2];
triStripToggle ^= 1;
tempVertList.count = 2;
break;
case GL_QUAD_STRIP:
if(tempVertList.count!=4)
break;
completed = 1;
SUBMITVERTEX(0,0);
SUBMITVERTEX(1,1);
SUBMITVERTEX(2,3);
SUBMITVERTEX(3,2);
polylist->list[polylist->count].type = 4;
tempVertList.list[0] = tempVertList.list[2];
tempVertList.list[1] = tempVertList.list[3];
tempVertList.count = 2;
break;
}
if(completed) {
MatrixCopy(polylist->list[polylist->count].projMatrix,mtxCurrent[0]);
polylist->list[polylist->count].polyAttr = polyAttr;
polylist->list[polylist->count].texParam = textureFormat;
polylist->list[polylist->count].texPalette = texturePalette;
polylist->count++;
}
}
}
__forceinline void NDS_glVertex16b(unsigned int v)
@ -1316,12 +1380,14 @@ __forceinline void NDS_glSwapScreen(unsigned int screen)
__forceinline int NDS_glGetNumPolys (void)
{
return numPolys;
//so is this in the currently-displayed or currently-built list?
return 0;
}
__forceinline int NDS_glGetNumVertex (void)
{
return numVertex;
//so is this in the currently-displayed or currently-built list?
return 0;
}
//NHerve mod3 - Fixed blending with 2D backgrounds (New Super Mario Bros looks better)
@ -1350,37 +1416,8 @@ __forceinline void NDS_glGetLine (int line, unsigned short * dst)
}
}
__forceinline void NDS_glFlush(unsigned long v)
static void InstallPolygonAttrib(unsigned long val)
{
if (beginCalled)
{
glEnd();
beginCalled = 0;
}
clCmd = 0;
clInd = 0;
glFlush();
glReadPixels(0,0,256,192,GL_DEPTH_COMPONENT, GL_FLOAT, GPU_screen3Ddepth);
glReadPixels(0,0,256,192,GL_BGR_EXT, GL_UNSIGNED_BYTE, GPU_screen3D);
glReadPixels(0,0,256,192,GL_ALPHA, GL_UNSIGNED_BYTE, GPU_screenAlpha);
numVertex = 0;
numPolys = 0;
vertexCounter = 0;
// Set back some secure render states
glPolygonMode (GL_BACK, GL_FILL);
glPolygonMode (GL_FRONT, GL_FILL);
glDepthMask (GL_TRUE);
glClear (GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
}
__forceinline void NDS_glPolygonAttrib (unsigned long val)
{
// Light enable/disable
lightMask = (val&0xF);
@ -1404,6 +1441,98 @@ __forceinline void NDS_glPolygonAttrib (unsigned long val)
polyID = (val>>24)&0x1F;
}
__forceinline void NDS_glPolygonAttrib (unsigned long val)
{
polyAttr = val;
InstallPolygonAttrib(polyAttr);
}
__forceinline void NDS_glFlush(unsigned long v)
{
u32 wbuffer = v&1;
u32 sortmode = (v>>1)&1;
// Set back some secure render states
glPolygonMode (GL_BACK, GL_FILL);
glPolygonMode (GL_FRONT, GL_FILL);
glDepthMask (GL_TRUE);
glClear (GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
//render display list
//TODO - properly doublebuffer the display lists
{
int i;
for(i=0;i<polylist->count;i++) {
POLY *poly = &polylist->list[i];
int type = poly->type;
int j;
InstallPolygonAttrib(poly->polyAttr);
textureFormat = poly->texParam;
texturePalette = poly->texPalette;
BeginRenderPoly();
//since we havent got the whole pipeline working yet, lets use opengl for the projection
glMatrixMode(GL_PROJECTION);
glLoadMatrixf(poly->projMatrix);
glBegin(type==3?GL_TRIANGLES:GL_QUADS);
for(j=0;j<type;j++) {
VERT* vert = &vertlist->list[poly->vertIndexes[j]];
float tempCoord[4];
Vector4Copy(tempCoord,vert->coord);
//we havent got the whole pipeline working yet, so we cant do this
////convert from ds device coords to opengl
//tempCoord[0] *= 2;
//tempCoord[1] *= 2;
//tempCoord[0] -= 1;
//tempCoord[1] -= 1;
//todo - edge flag?
glTexCoord2fv(vert->texcoord);
glColor4iv(vert->color);
glVertex3fv(tempCoord);
}
glEnd();
}
}
twiddleLists();
//reset gpu state
clCmd = 0;
clInd = 0;
//capture rendering results
glFlush();
glReadPixels(0,0,256,192,GL_DEPTH_COMPONENT, GL_FLOAT, GPU_screen3Ddepth);
glReadPixels(0,0,256,192,GL_BGR_EXT, GL_UNSIGNED_BYTE, GPU_screen3D);
glReadPixels(0,0,256,192,GL_ALPHA, GL_UNSIGNED_BYTE, GPU_screenAlpha);
//debug: view depth buffer via color buffer for debugging
{
//int ctr=0;
//for(ctr=0;ctr<256*192;ctr++) {
// float zval = GPU_screen3Ddepth[ctr];
// u8* colorPtr = GPU_screen3D+ctr*3;
// if(zval<0) {
// colorPtr[0] = 255;
// colorPtr[1] = 0;
// colorPtr[2] = 0;
// } else if(zval>1) {
// colorPtr[0] = 0;
// colorPtr[1] = 0;
// colorPtr[2] = 255;
// } else {
// colorPtr[0] = colorPtr[1] = colorPtr[2] = zval*255;
// //printlog("%f %f %d\n",zval, zval*255,colorPtr[0]);
// }
//}
}
}
/*
0-4 Diffuse Reflection Red
5-9 Diffuse Reflection Green
@ -1450,9 +1579,6 @@ __forceinline void NDS_glMaterial1 (unsigned long val)
emission[1] = material_5bit_to_31bit[(val>>21)&0x1F];
emission[2] = material_5bit_to_31bit[(val>>26)&0x1F];
emission[3] = 0x7fffffff;
glMaterialiv (GL_FRONT_AND_BACK, GL_SPECULAR, (GLint*)specular);
glMaterialiv (GL_FRONT_AND_BACK, GL_EMISSION, (GLint*)emission);
}
__forceinline void NDS_glShininess (unsigned long val)
@ -1483,10 +1609,10 @@ __forceinline void NDS_glTexCoord(unsigned long val)
t2 =_s*mtxCurrent[3][1] + _t*mtxCurrent[3][5] +
0.0625f*mtxCurrent[3][9] + 0.0625f*mtxCurrent[3][13];
glTexCoord2f (s2, t2);
glTexCoord2f (last_s=s2, last_t=t2);
return;
}
glTexCoord2f (_s, _t);
glTexCoord2f (last_s=_s, last_t=_t);
}
__forceinline signed long NDS_glGetClipMatrix (unsigned int index)
@ -1524,7 +1650,7 @@ __forceinline void NDS_glLightDirection (unsigned long v)
g_lightInfo[index].floatDirection[2] = -normalTable[(v>>20)&1023];
g_lightInfo[index].floatDirection[3] = 0;
// Keep information for GetLightDirection function
// Keep information for fightDirection function
g_lightInfo[index].direction = v;
}
@ -1546,9 +1672,6 @@ __forceinline void NDS_glAlphaFunc(unsigned long v)
__forceinline void NDS_glControl(unsigned long v)
{
if(beginCalled)
glEnd();
if(v&1)
{
glEnable (GL_TEXTURE_2D);
@ -1581,9 +1704,6 @@ __forceinline void NDS_glControl(unsigned long v)
{
printlog("Enabled BITMAP background mode\n");
}
if(beginCalled)
glBegin(vtxFormat);
}
__forceinline void NDS_glNormal(unsigned long v)
@ -1605,7 +1725,7 @@ __forceinline void NDS_glNormal(unsigned long v)
float t2 =( (normal[0] *mtxCurrent[3][1] + normal[1] *mtxCurrent[3][5] +
normal[2] *mtxCurrent[3][9]) + _t);
glTexCoord2f (s2, t2);
glTexCoord2f (last_s=s2, last_t=t2);
}
//use the current normal transform matrix
@ -2133,11 +2253,6 @@ __forceinline void NDS_glCallList(unsigned long v)
}
if((clCmd&0xFF)==0x41)
{
if(beginCalled)
{
glEnd();
beginCalled = 0;
}
--clInd;
clCmd>>=8;
}
@ -2230,3 +2345,6 @@ GPU3DInterface gpu3Dgl = { NDS_glInit,
};
//http://www.opengl.org/documentation/specs/version1.1/glspec1.1/node17.html
//talks about the state required to process verts in quadlists etc. helpful ideas.
//consider building a little state structure that looks exactly like this describes