- Add many of NHerve's improvements into OGLRender because I was trying to fix all the 3d issues

- Track polycount better. still worthless: at the very least, it doesnt account for clipping and culling
- carry w=1 from vertex() through pipeline (this will be necessary for software 3d rendering)
- Make GPU matrix mult and load commands clear out unused rows and cols to identity correctly
- Make matrix 4x4 multiply routines use W-coordinate.
This commit is contained in:
zeromus 2008-09-06 04:08:35 +00:00
parent 7d2fc8964e
commit 5278185e73
6 changed files with 2842 additions and 2749 deletions

View File

@ -27,6 +27,12 @@
- Some fixes in 3D core OGL (fixed textures) [CrazyMax] - Some fixes in 3D core OGL (fixed textures) [CrazyMax]
- Added texture caching (speedup 3D core) [CrazyMax] - Added texture caching (speedup 3D core) [CrazyMax]
- Fixes clear depth (ex. Castlevania now don't flipping) [NHerve] - Fixes clear depth (ex. Castlevania now don't flipping) [NHerve]
- Make matrix 4x4 multiply routines use W-coordinate. [zeromus]
- Make GPU matrix mult and load commands clear out unused rows and cols to identity correctly;
carry w=1 from vertex() through pipeline (this will be necessary for software 3d rendering) [zeromus]
- Track polycount better. still worthless: at the very least, it doesnt account for clipping and culling [zeromus]
- Fix errors in matrix operations regarding projection mode and pos-vector mode [zeromus]
- Fix error in command unpacking which caused some display lists to totally blow up [zeromus]
0.7.3 -> 0.8 0.7.3 -> 0.8
Cocoa: Cocoa:

View File

@ -59,10 +59,12 @@ void MatrixMultVec4x4 (float *matrix, float *vecPtr)
float x = vecPtr[0]; float x = vecPtr[0];
float y = vecPtr[1]; float y = vecPtr[1];
float z = vecPtr[2]; float z = vecPtr[2];
float w = vecPtr[3];
vecPtr[0] = x * matrix[0] + y * matrix[4] + z * matrix[ 8] + matrix[12]; vecPtr[0] = x * matrix[0] + y * matrix[4] + z * matrix[ 8] + w * matrix[12];
vecPtr[1] = x * matrix[1] + y * matrix[5] + z * matrix[ 9] + matrix[13]; vecPtr[1] = x * matrix[1] + y * matrix[5] + z * matrix[ 9] + w * matrix[13];
vecPtr[2] = x * matrix[2] + y * matrix[6] + z * matrix[10] + matrix[14]; vecPtr[2] = x * matrix[2] + y * matrix[6] + z * matrix[10] + w * matrix[14];
vecPtr[3] = x * matrix[3] + y * matrix[7] + z * matrix[11] + w * matrix[15];
} }
void MatrixMultVec3x3 (float *matrix, float *vecPtr) void MatrixMultVec3x3 (float *matrix, float *vecPtr)
@ -123,6 +125,31 @@ void MatrixSet (float *matrix, int x, int y, float value)
matrix [x+(y<<2)] = value; matrix [x+(y<<2)] = value;
} }
void MatrixTranspose(float *matrix)
{
float temp;
#define swap(A,B) temp = matrix[A];matrix[A] = matrix[B]; matrix[B] = temp;
swap(1,4);
swap(2,8);
swap(3,0xC);
swap(6,9);
swap(7,0xD);
swap(0xB,0xE);
#undef swap
/*
0 1 2 3
4 5 6 7
8 9 A B
C D E F
0 4 8 C
1 5 9 D
2 6 A E
3 7 B F
*/
}
void MatrixCopy (float *matrixDST, float *matrixSRC) void MatrixCopy (float *matrixDST, float *matrixSRC)
{ {
memcpy (matrixDST, matrixSRC, sizeof(float)*16); memcpy (matrixDST, matrixSRC, sizeof(float)*16);

View File

@ -67,6 +67,8 @@ void MatrixTranslate (float *matrix, float *ptr);
void MatrixScale (float *matrix, float *ptr); void MatrixScale (float *matrix, float *ptr);
#endif #endif
void MatrixTranspose(float *matrix);
void MatrixStackInit (MatrixStack *stack); void MatrixStackInit (MatrixStack *stack);
void MatrixStackSetMaxSize (MatrixStack *stack, int size); void MatrixStackSetMaxSize (MatrixStack *stack, int size);
void MatrixStackSetStackPosition (MatrixStack *stack, int pos); void MatrixStackSetStackPosition (MatrixStack *stack, int pos);

View File

@ -33,12 +33,14 @@ MatrixMultVec4x4 PROC PUBLIC
shufps xmm4, xmm4, 00000000b shufps xmm4, xmm4, 00000000b
shufps xmm5, xmm5, 01010101b shufps xmm5, xmm5, 01010101b
shufps xmm6, xmm6, 10101010b shufps xmm6, xmm6, 10101010b
shufps xmm7, xmm7, 11111111b
mulps xmm4, xmm0 mulps xmm4, xmm0
mulps xmm5, xmm1 mulps xmm5, xmm1
mulps xmm6, xmm2 mulps xmm6, xmm2
mulps xmm7, xmm3
addps xmm4, xmm5 addps xmm4, xmm5
addps xmm4, xmm6 addps xmm4, xmm6
addps xmm4, xmm3 addps xmm4, xmm7
movaps XMMWORD PTR [rdx], xmm4 movaps XMMWORD PTR [rdx], xmm4
ret 0 ret 0
MatrixMultVec4x4 ENDP MatrixMultVec4x4 ENDP

View File

@ -36,12 +36,14 @@
shufps xmm4, xmm4, 00000000b shufps xmm4, xmm4, 00000000b
shufps xmm5, xmm5, 01010101b shufps xmm5, xmm5, 01010101b
shufps xmm6, xmm6, 10101010b shufps xmm6, xmm6, 10101010b
shufps xmm7, xmm7, 11111111b
mulps xmm4, xmm0 mulps xmm4, xmm0
mulps xmm5, xmm1 mulps xmm5, xmm1
mulps xmm6, xmm2 mulps xmm6, xmm2
mulps xmm7, xmm3
addps xmm4, xmm5 addps xmm4, xmm5
addps xmm4, xmm6 addps xmm4, xmm6
addps xmm4, xmm3 addps xmm4, xmm7
movaps XMMWORD PTR [edx], xmm4 movaps XMMWORD PTR [edx], xmm4
ret 0 ret 0
@MatrixMultVec4x4@8 ENDP @MatrixMultVec4x4@8 ENDP

View File

@ -46,13 +46,17 @@
static unsigned char GPU_screen3D [256*256*3]={0}; static unsigned char GPU_screen3D [256*256*3]={0};
static float GPU_screen3Ddepth [256*256]={0}; static float GPU_screen3Ddepth [256*256]={0};
static unsigned char GPU_screenAlpha[256*256]={0};
// Acceleration tables // Acceleration tables
static float* float16table = NULL; static float* float16table = NULL;
static float* float10Table = NULL; static float* float10Table = NULL;
static float* float10RelTable = NULL; static float* float10RelTable = NULL;
static float* normalTable = NULL; static float* normalTable = NULL;
static int numVertex = 0; static int numVertex = 0;
static int vertexCounter = 0;
static int numPolys = 0;
// Matrix stack handling // Matrix stack handling
static __declspec(align(16)) MatrixStack mtxStack[4]; static __declspec(align(16)) MatrixStack mtxStack[4];
@ -318,7 +322,9 @@ __forceinline void NDS_glViewPort(unsigned long v)
if(beginCalled) if(beginCalled)
glEnd(); glEnd();
glViewport( (v&0xFF), ((v>>8)&0xFF), ((v>>16)&0xFF), ((v>>24)&0xFF)); //zero: NHerve messed with this in mod2 and mod3, but im still not sure its perfect. need to research this.
glViewport( (v&0xFF), ((v>>8)&0xFF), (((v>>16)&0xFF)+1)-(v&0xFF), ((v>>24)+1)-((v>>8)&0xFF));
if(beginCalled) if(beginCalled)
glBegin(vtxFormat); glBegin(vtxFormat);
@ -448,6 +454,10 @@ __forceinline void NDS_glLoadMatrix4x3(signed long v)
if(ML4x3_l<4) return; if(ML4x3_l<4) return;
ML4x3_l = 0; ML4x3_l = 0;
//fill in the unusued matrix values
mtxCurrent[mode][3] = mtxCurrent[mode][7] = mtxCurrent[mode][11] = 0;
mtxCurrent[mode][15] = 1;
if (mode == 2) if (mode == 2)
MatrixCopy (mtxCurrent[1], mtxCurrent[2]); MatrixCopy (mtxCurrent[1], mtxCurrent[2]);
} }
@ -552,11 +562,17 @@ __forceinline void NDS_glMultMatrix3x3(signed long v)
if(MM3x3_l<3) return; if(MM3x3_l<3) return;
MM3x3_l = 0; MM3x3_l = 0;
//fill in the unusued matrix values
mtxTemporal[3] = mtxTemporal[7] = mtxTemporal[11] = 0;
mtxTemporal[15] = 1;
mtxTemporal[12] = mtxTemporal[13] = mtxTemporal[14] = 0;
MatrixMultiply (mtxCurrent[mode], mtxTemporal); MatrixMultiply (mtxCurrent[mode], mtxTemporal);
if (mode == 2) if (mode == 2)
MatrixMultiply (mtxCurrent[1], mtxTemporal); MatrixMultiply (mtxCurrent[1], mtxTemporal);
//does this really need to be done?
MatrixIdentity (mtxTemporal); MatrixIdentity (mtxTemporal);
} }
@ -572,10 +588,15 @@ __forceinline void NDS_glMultMatrix4x3(signed long v)
if(MM4x3_l<4) return; if(MM4x3_l<4) return;
MM4x3_l = 0; MM4x3_l = 0;
//fill in the unusued matrix values
mtxTemporal[3] = mtxTemporal[7] = mtxTemporal[11] = 0;
mtxTemporal[15] = 1;
MatrixMultiply (mtxCurrent[mode], mtxTemporal); MatrixMultiply (mtxCurrent[mode], mtxTemporal);
if (mode == 2) if (mode == 2)
MatrixMultiply (mtxCurrent[1], mtxTemporal); MatrixMultiply (mtxCurrent[1], mtxTemporal);
//does this really need to be done?
MatrixIdentity (mtxTemporal); MatrixIdentity (mtxTemporal);
} }
@ -1019,13 +1040,17 @@ __forceinline void NDS_glBegin(unsigned long v)
if (lightMask) if (lightMask)
{ {
glEnable (GL_LIGHTING); glEnable (GL_LIGHTING);
//glEnable(GL_COLOR_MATERIAL); //NHerve added this in mod2 but it doesnt do any good unless it gets setup
(lightMask&0x01)?glEnable (GL_LIGHT0):glDisable(GL_LIGHT0); (lightMask&0x01)?glEnable (GL_LIGHT0):glDisable(GL_LIGHT0);
(lightMask&0x02)?glEnable (GL_LIGHT1):glDisable(GL_LIGHT1); (lightMask&0x02)?glEnable (GL_LIGHT1):glDisable(GL_LIGHT1);
(lightMask&0x04)?glEnable (GL_LIGHT2):glDisable(GL_LIGHT2); (lightMask&0x04)?glEnable (GL_LIGHT2):glDisable(GL_LIGHT2);
(lightMask&0x08)?glEnable (GL_LIGHT3):glDisable(GL_LIGHT3); (lightMask&0x08)?glEnable (GL_LIGHT3):glDisable(GL_LIGHT3);
} }
else else
{
glDisable (GL_LIGHTING); glDisable (GL_LIGHTING);
//glDisable(GL_COLOR_MATERIAL); //NHerve added this in mod2 but it doesnt do any good unless it gets setup
}
glDepthFunc (depthFuncMode); glDepthFunc (depthFuncMode);
@ -1120,7 +1145,7 @@ __forceinline void NDS_glBegin(unsigned long v)
glMatrixMode(GL_MODELVIEW); glMatrixMode(GL_MODELVIEW);
glLoadIdentity(); glLoadIdentity();
vertexCounter = 0;
beginCalled = 1; beginCalled = 1;
vtxFormat = polyType[v&0x03]; vtxFormat = polyType[v&0x03];
glBegin(vtxFormat); glBegin(vtxFormat);
@ -1145,7 +1170,7 @@ __forceinline void NDS_glColor3b(unsigned long v)
static __forceinline void SetVertex() static __forceinline void SetVertex()
{ {
__declspec(align(16)) float coordTransformed[3] = { coord[0], coord[1], coord[2] }; __declspec(align(16)) float coordTransformed[4] = { coord[0], coord[1], coord[2], 1 };
if (texCoordinateTransform == 3) if (texCoordinateTransform == 3)
{ {
@ -1164,10 +1189,31 @@ static __forceinline void SetVertex()
glVertex3fv (coordTransformed); glVertex3fv (coordTransformed);
//count the polys and verts
vertexCounter++;
numVertex++;
switch(vtxFormat) {
case GL_TRIANGLES:
if(vertexCounter%3 == 0)
numPolys++;
break;
case GL_QUADS:
if((vertexCounter&3) == 0)
numPolys++;
break;
case GL_TRIANGLE_STRIP:
if(vertexCounter>=3)
numPolys++;
break;
case GL_QUAD_STRIP:
if(vertexCounter==4)
numPolys++;
else if((vertexCounter&1)==0)
numPolys++;
}
//zero - helpful in making sure vertex colors or lighting arent broken //zero - helpful in making sure vertex colors or lighting arent broken
//glColor3ub(rand()&255,rand()&255,rand()&255); //glColor3ub(rand()&255,rand()&255,rand()&255);
numVertex++;
} }
__forceinline void NDS_glVertex16b(unsigned int v) __forceinline void NDS_glVertex16b(unsigned int v)
@ -1219,10 +1265,9 @@ __forceinline void NDS_glSwapScreen(unsigned int screen)
} }
// THIS IS A HACK :D
__forceinline int NDS_glGetNumPolys (void) __forceinline int NDS_glGetNumPolys (void)
{ {
return numVertex/3; return numPolys;
} }
__forceinline int NDS_glGetNumVertex (void) __forceinline int NDS_glGetNumVertex (void)
@ -1230,12 +1275,15 @@ __forceinline int NDS_glGetNumVertex (void)
return numVertex; return numVertex;
} }
//NHerve mod3 - Fixed blending with 2D backgrounds (New Super Mario Bros looks better)
__forceinline void NDS_glGetLine (int line, unsigned short * dst) __forceinline void NDS_glGetLine (int line, unsigned short * dst)
{ {
int i, t; int i, t;
u8 *screen3D = (u8 *)&GPU_screen3D [(192-(line%192))*768]; u8 *screen3D = (u8 *)&GPU_screen3D [(192-(line%192))*768];
float *screen3Ddepth = &GPU_screen3Ddepth [(192-(line%192))*256]; float *screen3Ddepth = &GPU_screen3Ddepth [(192-(line%192))*256];
u32 r,g,b; u8 *screenAlpha = (u8*)&GPU_screenAlpha[(191-(line%192))*256];
u32 r,g,b,a;
for(i = 0, t=0; i < 256; i++) for(i = 0, t=0; i < 256; i++)
{ {
@ -1245,7 +1293,9 @@ __forceinline void NDS_glGetLine (int line, unsigned short * dst)
r = screen3D[t]; r = screen3D[t];
g = screen3D[t+1]; g = screen3D[t+1];
b = screen3D[t+2]; b = screen3D[t+2];
a = screenAlpha[i];
if(a)
dst[i] = ((r>>3)<<10) | ((g>>3)<<5) | (b>>3); dst[i] = ((r>>3)<<10) | ((g>>3)<<5) | (b>>3);
} }
} }
@ -1265,8 +1315,11 @@ __forceinline void NDS_glFlush(unsigned long v)
glFlush(); glFlush();
glReadPixels(0,0,256,192,GL_DEPTH_COMPONENT, GL_FLOAT, GPU_screen3Ddepth); glReadPixels(0,0,256,192,GL_DEPTH_COMPONENT, GL_FLOAT, GPU_screen3Ddepth);
glReadPixels(0,0,256,192,GL_BGR_EXT, GL_UNSIGNED_BYTE, GPU_screen3D); glReadPixels(0,0,256,192,GL_BGR_EXT, GL_UNSIGNED_BYTE, GPU_screen3D);
glReadPixels(0,0,256,192,GL_ALPHA, GL_UNSIGNED_BYTE, GPU_screenAlpha);
numVertex = 0; numVertex = 0;
numPolys = 0;
vertexCounter = 0;
// Set back some secure render states // Set back some secure render states
glPolygonMode (GL_BACK, GL_FILL); glPolygonMode (GL_BACK, GL_FILL);
@ -2079,3 +2132,4 @@ GPU3DInterface gpu3Dgl = { NDS_glInit,
NDS_glGetVecRes NDS_glGetVecRes
}; };