- Add many of NHerve's improvements into OGLRender because I was trying to fix all the 3d issues
- Track polycount better. still worthless: at the very least, it doesnt account for clipping and culling - carry w=1 from vertex() through pipeline (this will be necessary for software 3d rendering) - Make GPU matrix mult and load commands clear out unused rows and cols to identity correctly - Make matrix 4x4 multiply routines use W-coordinate.
This commit is contained in:
parent
7d2fc8964e
commit
5278185e73
|
@ -27,6 +27,12 @@
|
||||||
- Some fixes in 3D core OGL (fixed textures) [CrazyMax]
|
- Some fixes in 3D core OGL (fixed textures) [CrazyMax]
|
||||||
- Added texture caching (speedup 3D core) [CrazyMax]
|
- Added texture caching (speedup 3D core) [CrazyMax]
|
||||||
- Fixes clear depth (ex. Castlevania now don't flipping) [NHerve]
|
- Fixes clear depth (ex. Castlevania now don't flipping) [NHerve]
|
||||||
|
- Make matrix 4x4 multiply routines use W-coordinate. [zeromus]
|
||||||
|
- Make GPU matrix mult and load commands clear out unused rows and cols to identity correctly;
|
||||||
|
carry w=1 from vertex() through pipeline (this will be necessary for software 3d rendering) [zeromus]
|
||||||
|
- Track polycount better. still worthless: at the very least, it doesnt account for clipping and culling [zeromus]
|
||||||
|
- Fix errors in matrix operations regarding projection mode and pos-vector mode [zeromus]
|
||||||
|
- Fix error in command unpacking which caused some display lists to totally blow up [zeromus]
|
||||||
|
|
||||||
0.7.3 -> 0.8
|
0.7.3 -> 0.8
|
||||||
Cocoa:
|
Cocoa:
|
||||||
|
|
|
@ -59,10 +59,12 @@ void MatrixMultVec4x4 (float *matrix, float *vecPtr)
|
||||||
float x = vecPtr[0];
|
float x = vecPtr[0];
|
||||||
float y = vecPtr[1];
|
float y = vecPtr[1];
|
||||||
float z = vecPtr[2];
|
float z = vecPtr[2];
|
||||||
|
float w = vecPtr[3];
|
||||||
|
|
||||||
vecPtr[0] = x * matrix[0] + y * matrix[4] + z * matrix[ 8] + matrix[12];
|
vecPtr[0] = x * matrix[0] + y * matrix[4] + z * matrix[ 8] + w * matrix[12];
|
||||||
vecPtr[1] = x * matrix[1] + y * matrix[5] + z * matrix[ 9] + matrix[13];
|
vecPtr[1] = x * matrix[1] + y * matrix[5] + z * matrix[ 9] + w * matrix[13];
|
||||||
vecPtr[2] = x * matrix[2] + y * matrix[6] + z * matrix[10] + matrix[14];
|
vecPtr[2] = x * matrix[2] + y * matrix[6] + z * matrix[10] + w * matrix[14];
|
||||||
|
vecPtr[3] = x * matrix[3] + y * matrix[7] + z * matrix[11] + w * matrix[15];
|
||||||
}
|
}
|
||||||
|
|
||||||
void MatrixMultVec3x3 (float *matrix, float *vecPtr)
|
void MatrixMultVec3x3 (float *matrix, float *vecPtr)
|
||||||
|
@ -123,6 +125,31 @@ void MatrixSet (float *matrix, int x, int y, float value)
|
||||||
matrix [x+(y<<2)] = value;
|
matrix [x+(y<<2)] = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MatrixTranspose(float *matrix)
|
||||||
|
{
|
||||||
|
float temp;
|
||||||
|
#define swap(A,B) temp = matrix[A];matrix[A] = matrix[B]; matrix[B] = temp;
|
||||||
|
swap(1,4);
|
||||||
|
swap(2,8);
|
||||||
|
swap(3,0xC);
|
||||||
|
swap(6,9);
|
||||||
|
swap(7,0xD);
|
||||||
|
swap(0xB,0xE);
|
||||||
|
#undef swap
|
||||||
|
|
||||||
|
/*
|
||||||
|
0 1 2 3
|
||||||
|
4 5 6 7
|
||||||
|
8 9 A B
|
||||||
|
C D E F
|
||||||
|
|
||||||
|
0 4 8 C
|
||||||
|
1 5 9 D
|
||||||
|
2 6 A E
|
||||||
|
3 7 B F
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
void MatrixCopy (float *matrixDST, float *matrixSRC)
|
void MatrixCopy (float *matrixDST, float *matrixSRC)
|
||||||
{
|
{
|
||||||
memcpy (matrixDST, matrixSRC, sizeof(float)*16);
|
memcpy (matrixDST, matrixSRC, sizeof(float)*16);
|
||||||
|
|
|
@ -67,6 +67,8 @@ void MatrixTranslate (float *matrix, float *ptr);
|
||||||
void MatrixScale (float *matrix, float *ptr);
|
void MatrixScale (float *matrix, float *ptr);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
void MatrixTranspose(float *matrix);
|
||||||
|
|
||||||
void MatrixStackInit (MatrixStack *stack);
|
void MatrixStackInit (MatrixStack *stack);
|
||||||
void MatrixStackSetMaxSize (MatrixStack *stack, int size);
|
void MatrixStackSetMaxSize (MatrixStack *stack, int size);
|
||||||
void MatrixStackSetStackPosition (MatrixStack *stack, int pos);
|
void MatrixStackSetStackPosition (MatrixStack *stack, int pos);
|
||||||
|
|
|
@ -33,12 +33,14 @@ MatrixMultVec4x4 PROC PUBLIC
|
||||||
shufps xmm4, xmm4, 00000000b
|
shufps xmm4, xmm4, 00000000b
|
||||||
shufps xmm5, xmm5, 01010101b
|
shufps xmm5, xmm5, 01010101b
|
||||||
shufps xmm6, xmm6, 10101010b
|
shufps xmm6, xmm6, 10101010b
|
||||||
|
shufps xmm7, xmm7, 11111111b
|
||||||
mulps xmm4, xmm0
|
mulps xmm4, xmm0
|
||||||
mulps xmm5, xmm1
|
mulps xmm5, xmm1
|
||||||
mulps xmm6, xmm2
|
mulps xmm6, xmm2
|
||||||
|
mulps xmm7, xmm3
|
||||||
addps xmm4, xmm5
|
addps xmm4, xmm5
|
||||||
addps xmm4, xmm6
|
addps xmm4, xmm6
|
||||||
addps xmm4, xmm3
|
addps xmm4, xmm7
|
||||||
movaps XMMWORD PTR [rdx], xmm4
|
movaps XMMWORD PTR [rdx], xmm4
|
||||||
ret 0
|
ret 0
|
||||||
MatrixMultVec4x4 ENDP
|
MatrixMultVec4x4 ENDP
|
||||||
|
|
|
@ -36,12 +36,14 @@
|
||||||
shufps xmm4, xmm4, 00000000b
|
shufps xmm4, xmm4, 00000000b
|
||||||
shufps xmm5, xmm5, 01010101b
|
shufps xmm5, xmm5, 01010101b
|
||||||
shufps xmm6, xmm6, 10101010b
|
shufps xmm6, xmm6, 10101010b
|
||||||
|
shufps xmm7, xmm7, 11111111b
|
||||||
mulps xmm4, xmm0
|
mulps xmm4, xmm0
|
||||||
mulps xmm5, xmm1
|
mulps xmm5, xmm1
|
||||||
mulps xmm6, xmm2
|
mulps xmm6, xmm2
|
||||||
|
mulps xmm7, xmm3
|
||||||
addps xmm4, xmm5
|
addps xmm4, xmm5
|
||||||
addps xmm4, xmm6
|
addps xmm4, xmm6
|
||||||
addps xmm4, xmm3
|
addps xmm4, xmm7
|
||||||
movaps XMMWORD PTR [edx], xmm4
|
movaps XMMWORD PTR [edx], xmm4
|
||||||
ret 0
|
ret 0
|
||||||
@MatrixMultVec4x4@8 ENDP
|
@MatrixMultVec4x4@8 ENDP
|
||||||
|
|
|
@ -46,13 +46,17 @@
|
||||||
|
|
||||||
static unsigned char GPU_screen3D [256*256*3]={0};
|
static unsigned char GPU_screen3D [256*256*3]={0};
|
||||||
static float GPU_screen3Ddepth [256*256]={0};
|
static float GPU_screen3Ddepth [256*256]={0};
|
||||||
|
static unsigned char GPU_screenAlpha[256*256]={0};
|
||||||
|
|
||||||
// Acceleration tables
|
// Acceleration tables
|
||||||
static float* float16table = NULL;
|
static float* float16table = NULL;
|
||||||
static float* float10Table = NULL;
|
static float* float10Table = NULL;
|
||||||
static float* float10RelTable = NULL;
|
static float* float10RelTable = NULL;
|
||||||
static float* normalTable = NULL;
|
static float* normalTable = NULL;
|
||||||
|
|
||||||
static int numVertex = 0;
|
static int numVertex = 0;
|
||||||
|
static int vertexCounter = 0;
|
||||||
|
static int numPolys = 0;
|
||||||
|
|
||||||
// Matrix stack handling
|
// Matrix stack handling
|
||||||
static __declspec(align(16)) MatrixStack mtxStack[4];
|
static __declspec(align(16)) MatrixStack mtxStack[4];
|
||||||
|
@ -318,7 +322,9 @@ __forceinline void NDS_glViewPort(unsigned long v)
|
||||||
if(beginCalled)
|
if(beginCalled)
|
||||||
glEnd();
|
glEnd();
|
||||||
|
|
||||||
glViewport( (v&0xFF), ((v>>8)&0xFF), ((v>>16)&0xFF), ((v>>24)&0xFF));
|
//zero: NHerve messed with this in mod2 and mod3, but im still not sure its perfect. need to research this.
|
||||||
|
glViewport( (v&0xFF), ((v>>8)&0xFF), (((v>>16)&0xFF)+1)-(v&0xFF), ((v>>24)+1)-((v>>8)&0xFF));
|
||||||
|
|
||||||
|
|
||||||
if(beginCalled)
|
if(beginCalled)
|
||||||
glBegin(vtxFormat);
|
glBegin(vtxFormat);
|
||||||
|
@ -448,6 +454,10 @@ __forceinline void NDS_glLoadMatrix4x3(signed long v)
|
||||||
if(ML4x3_l<4) return;
|
if(ML4x3_l<4) return;
|
||||||
ML4x3_l = 0;
|
ML4x3_l = 0;
|
||||||
|
|
||||||
|
//fill in the unusued matrix values
|
||||||
|
mtxCurrent[mode][3] = mtxCurrent[mode][7] = mtxCurrent[mode][11] = 0;
|
||||||
|
mtxCurrent[mode][15] = 1;
|
||||||
|
|
||||||
if (mode == 2)
|
if (mode == 2)
|
||||||
MatrixCopy (mtxCurrent[1], mtxCurrent[2]);
|
MatrixCopy (mtxCurrent[1], mtxCurrent[2]);
|
||||||
}
|
}
|
||||||
|
@ -552,11 +562,17 @@ __forceinline void NDS_glMultMatrix3x3(signed long v)
|
||||||
if(MM3x3_l<3) return;
|
if(MM3x3_l<3) return;
|
||||||
MM3x3_l = 0;
|
MM3x3_l = 0;
|
||||||
|
|
||||||
|
//fill in the unusued matrix values
|
||||||
|
mtxTemporal[3] = mtxTemporal[7] = mtxTemporal[11] = 0;
|
||||||
|
mtxTemporal[15] = 1;
|
||||||
|
mtxTemporal[12] = mtxTemporal[13] = mtxTemporal[14] = 0;
|
||||||
|
|
||||||
MatrixMultiply (mtxCurrent[mode], mtxTemporal);
|
MatrixMultiply (mtxCurrent[mode], mtxTemporal);
|
||||||
|
|
||||||
if (mode == 2)
|
if (mode == 2)
|
||||||
MatrixMultiply (mtxCurrent[1], mtxTemporal);
|
MatrixMultiply (mtxCurrent[1], mtxTemporal);
|
||||||
|
|
||||||
|
//does this really need to be done?
|
||||||
MatrixIdentity (mtxTemporal);
|
MatrixIdentity (mtxTemporal);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -572,10 +588,15 @@ __forceinline void NDS_glMultMatrix4x3(signed long v)
|
||||||
if(MM4x3_l<4) return;
|
if(MM4x3_l<4) return;
|
||||||
MM4x3_l = 0;
|
MM4x3_l = 0;
|
||||||
|
|
||||||
|
//fill in the unusued matrix values
|
||||||
|
mtxTemporal[3] = mtxTemporal[7] = mtxTemporal[11] = 0;
|
||||||
|
mtxTemporal[15] = 1;
|
||||||
|
|
||||||
MatrixMultiply (mtxCurrent[mode], mtxTemporal);
|
MatrixMultiply (mtxCurrent[mode], mtxTemporal);
|
||||||
if (mode == 2)
|
if (mode == 2)
|
||||||
MatrixMultiply (mtxCurrent[1], mtxTemporal);
|
MatrixMultiply (mtxCurrent[1], mtxTemporal);
|
||||||
|
|
||||||
|
//does this really need to be done?
|
||||||
MatrixIdentity (mtxTemporal);
|
MatrixIdentity (mtxTemporal);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1019,13 +1040,17 @@ __forceinline void NDS_glBegin(unsigned long v)
|
||||||
if (lightMask)
|
if (lightMask)
|
||||||
{
|
{
|
||||||
glEnable (GL_LIGHTING);
|
glEnable (GL_LIGHTING);
|
||||||
|
//glEnable(GL_COLOR_MATERIAL); //NHerve added this in mod2 but it doesnt do any good unless it gets setup
|
||||||
(lightMask&0x01)?glEnable (GL_LIGHT0):glDisable(GL_LIGHT0);
|
(lightMask&0x01)?glEnable (GL_LIGHT0):glDisable(GL_LIGHT0);
|
||||||
(lightMask&0x02)?glEnable (GL_LIGHT1):glDisable(GL_LIGHT1);
|
(lightMask&0x02)?glEnable (GL_LIGHT1):glDisable(GL_LIGHT1);
|
||||||
(lightMask&0x04)?glEnable (GL_LIGHT2):glDisable(GL_LIGHT2);
|
(lightMask&0x04)?glEnable (GL_LIGHT2):glDisable(GL_LIGHT2);
|
||||||
(lightMask&0x08)?glEnable (GL_LIGHT3):glDisable(GL_LIGHT3);
|
(lightMask&0x08)?glEnable (GL_LIGHT3):glDisable(GL_LIGHT3);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
glDisable (GL_LIGHTING);
|
glDisable (GL_LIGHTING);
|
||||||
|
//glDisable(GL_COLOR_MATERIAL); //NHerve added this in mod2 but it doesnt do any good unless it gets setup
|
||||||
|
}
|
||||||
|
|
||||||
glDepthFunc (depthFuncMode);
|
glDepthFunc (depthFuncMode);
|
||||||
|
|
||||||
|
@ -1120,7 +1145,7 @@ __forceinline void NDS_glBegin(unsigned long v)
|
||||||
glMatrixMode(GL_MODELVIEW);
|
glMatrixMode(GL_MODELVIEW);
|
||||||
glLoadIdentity();
|
glLoadIdentity();
|
||||||
|
|
||||||
|
vertexCounter = 0;
|
||||||
beginCalled = 1;
|
beginCalled = 1;
|
||||||
vtxFormat = polyType[v&0x03];
|
vtxFormat = polyType[v&0x03];
|
||||||
glBegin(vtxFormat);
|
glBegin(vtxFormat);
|
||||||
|
@ -1145,7 +1170,7 @@ __forceinline void NDS_glColor3b(unsigned long v)
|
||||||
|
|
||||||
static __forceinline void SetVertex()
|
static __forceinline void SetVertex()
|
||||||
{
|
{
|
||||||
__declspec(align(16)) float coordTransformed[3] = { coord[0], coord[1], coord[2] };
|
__declspec(align(16)) float coordTransformed[4] = { coord[0], coord[1], coord[2], 1 };
|
||||||
|
|
||||||
if (texCoordinateTransform == 3)
|
if (texCoordinateTransform == 3)
|
||||||
{
|
{
|
||||||
|
@ -1164,10 +1189,31 @@ static __forceinline void SetVertex()
|
||||||
|
|
||||||
glVertex3fv (coordTransformed);
|
glVertex3fv (coordTransformed);
|
||||||
|
|
||||||
|
//count the polys and verts
|
||||||
|
vertexCounter++;
|
||||||
|
numVertex++;
|
||||||
|
switch(vtxFormat) {
|
||||||
|
case GL_TRIANGLES:
|
||||||
|
if(vertexCounter%3 == 0)
|
||||||
|
numPolys++;
|
||||||
|
break;
|
||||||
|
case GL_QUADS:
|
||||||
|
if((vertexCounter&3) == 0)
|
||||||
|
numPolys++;
|
||||||
|
break;
|
||||||
|
case GL_TRIANGLE_STRIP:
|
||||||
|
if(vertexCounter>=3)
|
||||||
|
numPolys++;
|
||||||
|
break;
|
||||||
|
case GL_QUAD_STRIP:
|
||||||
|
if(vertexCounter==4)
|
||||||
|
numPolys++;
|
||||||
|
else if((vertexCounter&1)==0)
|
||||||
|
numPolys++;
|
||||||
|
}
|
||||||
|
|
||||||
//zero - helpful in making sure vertex colors or lighting arent broken
|
//zero - helpful in making sure vertex colors or lighting arent broken
|
||||||
//glColor3ub(rand()&255,rand()&255,rand()&255);
|
//glColor3ub(rand()&255,rand()&255,rand()&255);
|
||||||
|
|
||||||
numVertex++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline void NDS_glVertex16b(unsigned int v)
|
__forceinline void NDS_glVertex16b(unsigned int v)
|
||||||
|
@ -1219,10 +1265,9 @@ __forceinline void NDS_glSwapScreen(unsigned int screen)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// THIS IS A HACK :D
|
|
||||||
__forceinline int NDS_glGetNumPolys (void)
|
__forceinline int NDS_glGetNumPolys (void)
|
||||||
{
|
{
|
||||||
return numVertex/3;
|
return numPolys;
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline int NDS_glGetNumVertex (void)
|
__forceinline int NDS_glGetNumVertex (void)
|
||||||
|
@ -1230,12 +1275,15 @@ __forceinline int NDS_glGetNumVertex (void)
|
||||||
return numVertex;
|
return numVertex;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//NHerve mod3 - Fixed blending with 2D backgrounds (New Super Mario Bros looks better)
|
||||||
__forceinline void NDS_glGetLine (int line, unsigned short * dst)
|
__forceinline void NDS_glGetLine (int line, unsigned short * dst)
|
||||||
{
|
{
|
||||||
int i, t;
|
int i, t;
|
||||||
u8 *screen3D = (u8 *)&GPU_screen3D [(192-(line%192))*768];
|
u8 *screen3D = (u8 *)&GPU_screen3D [(192-(line%192))*768];
|
||||||
float *screen3Ddepth = &GPU_screen3Ddepth [(192-(line%192))*256];
|
float *screen3Ddepth = &GPU_screen3Ddepth [(192-(line%192))*256];
|
||||||
u32 r,g,b;
|
u8 *screenAlpha = (u8*)&GPU_screenAlpha[(191-(line%192))*256];
|
||||||
|
|
||||||
|
u32 r,g,b,a;
|
||||||
|
|
||||||
for(i = 0, t=0; i < 256; i++)
|
for(i = 0, t=0; i < 256; i++)
|
||||||
{
|
{
|
||||||
|
@ -1245,7 +1293,9 @@ __forceinline void NDS_glGetLine (int line, unsigned short * dst)
|
||||||
r = screen3D[t];
|
r = screen3D[t];
|
||||||
g = screen3D[t+1];
|
g = screen3D[t+1];
|
||||||
b = screen3D[t+2];
|
b = screen3D[t+2];
|
||||||
|
a = screenAlpha[i];
|
||||||
|
|
||||||
|
if(a)
|
||||||
dst[i] = ((r>>3)<<10) | ((g>>3)<<5) | (b>>3);
|
dst[i] = ((r>>3)<<10) | ((g>>3)<<5) | (b>>3);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1265,8 +1315,11 @@ __forceinline void NDS_glFlush(unsigned long v)
|
||||||
glFlush();
|
glFlush();
|
||||||
glReadPixels(0,0,256,192,GL_DEPTH_COMPONENT, GL_FLOAT, GPU_screen3Ddepth);
|
glReadPixels(0,0,256,192,GL_DEPTH_COMPONENT, GL_FLOAT, GPU_screen3Ddepth);
|
||||||
glReadPixels(0,0,256,192,GL_BGR_EXT, GL_UNSIGNED_BYTE, GPU_screen3D);
|
glReadPixels(0,0,256,192,GL_BGR_EXT, GL_UNSIGNED_BYTE, GPU_screen3D);
|
||||||
|
glReadPixels(0,0,256,192,GL_ALPHA, GL_UNSIGNED_BYTE, GPU_screenAlpha);
|
||||||
|
|
||||||
numVertex = 0;
|
numVertex = 0;
|
||||||
|
numPolys = 0;
|
||||||
|
vertexCounter = 0;
|
||||||
|
|
||||||
// Set back some secure render states
|
// Set back some secure render states
|
||||||
glPolygonMode (GL_BACK, GL_FILL);
|
glPolygonMode (GL_BACK, GL_FILL);
|
||||||
|
@ -2079,3 +2132,4 @@ GPU3DInterface gpu3Dgl = { NDS_glInit,
|
||||||
NDS_glGetVecRes
|
NDS_glGetVecRes
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue