From 4dbec12893358415b7b5c5c5d8a805338b37774f Mon Sep 17 00:00:00 2001 From: zeromus Date: Sat, 3 Jul 2010 11:35:10 +0000 Subject: [PATCH] gfx3d: change lighting engine almost entirely to fixed point, and substantially improve accuracy of specular component --- desmume/src/gfx3d.cpp | 212 ++++++++++++++++++++++++----------------- desmume/src/gfx3d.h | 2 +- desmume/src/matrix.cpp | 10 ++ desmume/src/matrix.h | 2 +- 4 files changed, 139 insertions(+), 87 deletions(-) diff --git a/desmume/src/gfx3d.cpp b/desmume/src/gfx3d.cpp index 88a13042b..465fe4547 100644 --- a/desmume/src/gfx3d.cpp +++ b/desmume/src/gfx3d.cpp @@ -385,7 +385,7 @@ u32 control = 0; //light state: static u32 lightColor[4] = {0,0,0,0}; -static u32 lightDirection[4] = {0,0,0,0}; +static s32 lightDirection[4] = {0,0,0,0}; //material state: static u16 dsDiffuse, dsAmbient, dsSpecular, dsEmission; //used for indexing the shininess table during parameters to shininess command @@ -400,8 +400,8 @@ static u32 envMode=0; static u32 lightMask=0; //other things: static int texCoordinateTransform = 0; -static CACHE_ALIGN float cacheLightDirection[4][4]; -static CACHE_ALIGN float cacheHalfVector[4][4]; +static CACHE_ALIGN s32 cacheLightDirection[4][4]; +static CACHE_ALIGN s32 cacheHalfVector[4][4]; //------------------ #define RENDER_FRONT_SURFACE 0x80 @@ -587,7 +587,24 @@ void gfx3d_reset() //================================================================================= //================================================================================= -#define vec3dot(a, b) (((a[0]) * (b[0])) + ((a[1]) * (b[1])) + ((a[2]) * (b[2]))) +inline float vec3dot(float* a, float* b) { + return (((a[0]) * (b[0])) + ((a[1]) * (b[1])) + ((a[2]) * (b[2]))); +} + +inline s32 mul_fixed32(s32 a, s32 b) +{ + s64 temp = ((s64)a)*((s64)b); + return (s32)(temp>>12); +} + +inline s32 vec3dot_fixed32(s32* a, s32* b) { + const s64 va[] = {a[0],a[1],a[2]}; + const s64 vb[] = {b[0],b[1],b[2]}; + s64 dot = va[0]*vb[0]+va[1]*vb[1]+va[2]*vb[2]; + return (s32)(dot>>12); +} + + #define SUBMITVERTEX(ii, nn) polylist->list[polylist->count].vertIndexes[ii] = tempVertInfo.map[nn]; //Submit a vertex to the GE static void SetVertex() @@ -776,26 +793,46 @@ static void gfx3d_glTexImage_cache() static void gfx3d_glLightDirection_cache(int index) { - u32 v = lightDirection[index]; + s32 v = lightDirection[index]; - // Convert format into floating point value - cacheLightDirection[index][0] = normalTable[v&1023]; - cacheLightDirection[index][1] = normalTable[(v>>10)&1023]; - cacheLightDirection[index][2] = normalTable[(v>>20)&1023]; + s16 x = ((v<<22)>>22)<<3; + s16 y = ((v<<12)>>22)<<3; + s16 z = ((v<<2)>>22)<<3; + + cacheLightDirection[index][0] = x; + cacheLightDirection[index][1] = y; + cacheLightDirection[index][2] = z; cacheLightDirection[index][3] = 0; - /* Multiply the vector by the directional matrix */ - CACHE_ALIGN float temp[16] = {mtxCurrent[2][0]/4096.0f,mtxCurrent[2][1]/4096.0f,mtxCurrent[2][2]/4096.0f,mtxCurrent[2][3]/4096.0f,mtxCurrent[2][4]/4096.0f,mtxCurrent[2][5]/4096.0f,mtxCurrent[2][6]/4096.0f,mtxCurrent[2][7]/4096.0f,mtxCurrent[2][8]/4096.0f,mtxCurrent[2][9]/4096.0f,mtxCurrent[2][10]/4096.0f,mtxCurrent[2][11]/4096.0f,mtxCurrent[2][12]/4096.0f,mtxCurrent[2][13]/4096.0f,mtxCurrent[2][14]/4096.0f,mtxCurrent[2][15]/4096.0f}; - MatrixMultVec3x3(temp, cacheLightDirection[index]); + //Multiply the vector by the directional matrix + MatrixMultVec3x3_fixed(mtxCurrent[2], cacheLightDirection[index]); - /* Calculate the half vector */ - float lineOfSight[4] = {0.0f, 0.0f, -1.0f, 0.0f}; + //Calculate the half angle vector + s32 lineOfSight[4] = {0, 0, (-1)<<12, 0}; for(int i = 0; i < 4; i++) { - cacheHalfVector[index][i] = ((cacheLightDirection[index][i] + lineOfSight[i]) / 2.0f); + cacheHalfVector[index][i] = ((cacheLightDirection[index][i] + lineOfSight[i])); + } + + //normalize the half angle vector + //can't believe the hardware really does this... but yet it seems... + s32 halfLength = ((s32)(sqrt((double)vec3dot_fixed32(cacheHalfVector[index],cacheHalfVector[index]))))<<6; + + if(halfLength!=0) + { + halfLength = abs(halfLength); + halfLength >>= 6; + for(int i = 0; i < 4; i++) + { + s32 temp = cacheHalfVector[index][i]; + temp <<= 6; + temp /= halfLength; + cacheHalfVector[index][i] = temp; + } } } + //=============================================================================== static void gfx3d_glMatrixMode(u32 v) { @@ -1099,80 +1136,85 @@ static void gfx3d_glNormal(s32 v) last_t = (s32)(((s64)nx * mtxCurrent[3][1] + (s64)ny * mtxCurrent[3][5] + (s64)nz * mtxCurrent[3][9] + (_t<<24))>>24); } - CACHE_ALIGN float normal[4] = { nx/4096.0f, ny/4096.0f, nz/4096.0f, 1.0f }; + CACHE_ALIGN s32 normal[4] = { nx,ny,nz,(1<<12) }; - - //use the current normal transform matrix - CACHE_ALIGN float temp[16] = {mtxCurrent[2][0]/4096.0f,mtxCurrent[2][1]/4096.0f,mtxCurrent[2][2]/4096.0f,mtxCurrent[2][3]/4096.0f,mtxCurrent[2][4]/4096.0f,mtxCurrent[2][5]/4096.0f,mtxCurrent[2][6]/4096.0f,mtxCurrent[2][7]/4096.0f,mtxCurrent[2][8]/4096.0f,mtxCurrent[2][9]/4096.0f,mtxCurrent[2][10]/4096.0f,mtxCurrent[2][11]/4096.0f,mtxCurrent[2][12]/4096.0f,mtxCurrent[2][13]/4096.0f,mtxCurrent[2][14]/4096.0f,mtxCurrent[2][15]/4096.0f}; - MatrixMultVec3x3 (temp, normal); + MatrixMultVec3x3_fixed(mtxCurrent[2],normal); //apply lighting model + u8 diffuse[3] = { + (dsDiffuse)&0x1F, + (dsDiffuse>>5)&0x1F, + (dsDiffuse>>10)&0x1F }; + + u8 ambient[3] = { + (dsAmbient)&0x1F, + (dsAmbient>>5)&0x1F, + (dsAmbient>>10)&0x1F }; + + u8 emission[3] = { + (dsEmission)&0x1F, + (dsEmission>>5)&0x1F, + (dsEmission>>10)&0x1F }; + + u8 specular[3] = { + (dsSpecular)&0x1F, + (dsSpecular>>5)&0x1F, + (dsSpecular>>10)&0x1F }; + + int vertexColor[3] = { emission[0], emission[1], emission[2] }; + + for(int i=0; i<4; i++) { - u8 diffuse[3] = { - (dsDiffuse)&0x1F, - (dsDiffuse>>5)&0x1F, - (dsDiffuse>>10)&0x1F }; + if(!((lightMask>>i)&1)) continue; - u8 ambient[3] = { - (dsAmbient)&0x1F, - (dsAmbient>>5)&0x1F, - (dsAmbient>>10)&0x1F }; + u8 _lightColor[3] = { + (lightColor[i])&0x1F, + (lightColor[i]>>5)&0x1F, + (lightColor[i]>>10)&0x1F }; - u8 emission[3] = { - (dsEmission)&0x1F, - (dsEmission>>5)&0x1F, - (dsEmission>>10)&0x1F }; + //This formula is the one used by the DS + //Reference : http://nocash.emubase.de/gbatek.htm#ds3dpolygonlightparameters + s32 fixed_diffuse = std::max(0,-vec3dot_fixed32(cacheLightDirection[i],normal)); + + //todo - this could be cached in this form + s32 fixedTempNegativeHalf[] = {-cacheHalfVector[i][0],-cacheHalfVector[i][1],-cacheHalfVector[i][2],-cacheHalfVector[i][3]}; + s32 dot = vec3dot_fixed32(fixedTempNegativeHalf, normal); - u8 specular[3] = { - (dsSpecular)&0x1F, - (dsSpecular>>5)&0x1F, - (dsSpecular>>10)&0x1F }; - - int vertexColor[3] = { emission[0], emission[1], emission[2] }; - - for(int i=0; i<4; i++) + s32 fixedshininess = 0; + if(dot>0) //prevent shininess on opposite side { - if(!((lightMask>>i)&1)) continue; - - u8 _lightColor[3] = { - (lightColor[i])&0x1F, - (lightColor[i]>>5)&0x1F, - (lightColor[i]>>10)&0x1F }; - - /* This formula is the one used by the DS */ - /* Reference : http://nocash.emubase.de/gbatek.htm#ds3dpolygonlightparameters */ - - float diffuseLevel = std::max(0.0f, -vec3dot(cacheLightDirection[i], normal)); - float shininessLevel = pow(std::max(0.0f, vec3dot(-cacheHalfVector[i], normal)), 2); - - if(dsSpecular & 0x8000) - { - int shininessIndex = (int)(shininessLevel * 128); - if(shininessIndex >= (int)ARRAY_SIZE(gfx3d.state.shininessTable)) { - //we can't print this right now, because when a game triggers this it triggers it _A_LOT_ - //so wait until we have per-frame diagnostics. - //this was tested using Princess Debut (US) after proceeding through the intro and getting the tiara. - //After much research, I determined that this was caused by the game feeding in a totally jacked matrix - //to mult4x4 from 0x02129B80 (after feeding two other valid matrices) - //the game seems to internally index these as: ?, 0x37, 0x2B <-- error - //but, man... this is seriously messed up. there must be something going wrong. - //maybe it has something to do with what looks like a mirror room effect that is going on during this time? - //PROGINFO("ERROR: shininess table out of bounds.\n maybe an emulator error; maybe a non-unit normal; setting to 0\n"); - shininessIndex = 0; - } - shininessLevel = gfx3d.state.shininessTable[shininessIndex]; - } - - for(int c = 0; c < 3; c++) - { - vertexColor[c] += (int)(((specular[c] * _lightColor[c] * shininessLevel) - + (diffuse[c] * _lightColor[c] * diffuseLevel) - + (ambient[c] * _lightColor[c])) / 31.0f); - } + //we have cos(a). it seems that we need cos(2a). trig identity is a fast way to get it. + //cos^2(a)=(1/2)(1+cos(2a)) + //2*cos^2(a)-1=cos(2a) + fixedshininess = 2*mul_fixed32(dot,dot)-4096; + //gbatek is almost right but not quite! } - for(int c=0;c<3;c++) - colorRGB[c] = std::min(31,vertexColor[c]); + //this seems to need to be saturated, or else the table will overflow. + //even without a table, failure to saturate is bad news + fixedshininess = std::min(fixedshininess,4095); + fixedshininess = std::max(fixedshininess,0); + + if(dsSpecular & 0x8000) + { + //shininess is 20.12 fixed point, so >>5 gives us .7 which is 128 entries + //the entries are 8bits each so <<4 gives us .12 again, compatible with the lighting formulas below + //(according to other normal nds procedures, we might should fill the bottom bits with 1 or 0 according to rules...) + fixedshininess = gfx3d.state.shininessTable[fixedshininess>>5]<<4; + } + + for(int c = 0; c < 3; c++) + { + s32 specComp = ((specular[c] * _lightColor[c] * fixedshininess)>>17); //5 bits for color*color and 12 bits for the shininess + s32 diffComp = ((diffuse[c] * _lightColor[c] * fixed_diffuse)>>17); //5bits for the color*color and 12 its for the diffuse + s32 ambComp = ((ambient[c] * _lightColor[c])>>5); //5bits for color*color + vertexColor[c] += specComp + diffComp + ambComp; + } + } + + for(int c=0;c<3;c++) + { + colorRGB[c] = std::min(31,vertexColor[c]); } GFX_DELAY(9); @@ -1322,7 +1364,7 @@ static void gfx3d_glLightDirection (u32 v) { int index = v>>30; - lightDirection[index] = v; + lightDirection[index] = (s32)(v&0x3FFFFFFF); gfx3d_glLightDirection_cache(index); GFX_DELAY(6); } @@ -1336,10 +1378,10 @@ static void gfx3d_glLightColor (u32 v) static BOOL gfx3d_glShininess (u32 val) { - gfx3d.state.shininessTable[shininessInd++] = ((val & 0xFF) / 256.0f); - gfx3d.state.shininessTable[shininessInd++] = (((val >> 8) & 0xFF) / 256.0f); - gfx3d.state.shininessTable[shininessInd++] = (((val >> 16) & 0xFF) / 256.0f); - gfx3d.state.shininessTable[shininessInd++] = (((val >> 24) & 0xFF) / 256.0f); + gfx3d.state.shininessTable[shininessInd++] = ((val & 0xFF)); + gfx3d.state.shininessTable[shininessInd++] = (((val >> 8) & 0xFF)); + gfx3d.state.shininessTable[shininessInd++] = (((val >> 16) & 0xFF)); + gfx3d.state.shininessTable[shininessInd++] = (((val >> 24) & 0xFF)); if (shininessInd < 128) return FALSE; shininessInd = 0; @@ -2336,7 +2378,7 @@ SFORMAT SF_GFX3D[]={ { "GSFC", 4, 4, &gfx3d.state.fogColor}, { "GSFO", 4, 1, &gfx3d.state.fogOffset}, { "GST4", 2, 32, gfx3d.state.u16ToonTable}, - { "GSST", 4, 128, &gfx3d.state.shininessTable[0]}, + { "GSSU", 1, 128, &gfx3d.state.shininessTable[0]}, { "GSSI", 4, 1, &shininessInd}, { "GSAF", 4, 1, &gfx3d.state.activeFlushCommand}, { "GSPF", 4, 1, &gfx3d.state.pendingFlushCommand}, diff --git a/desmume/src/gfx3d.h b/desmume/src/gfx3d.h index 52768d038..9cf07dcf7 100644 --- a/desmume/src/gfx3d.h +++ b/desmume/src/gfx3d.h @@ -350,7 +350,7 @@ struct GFX3D_State bool invalidateToon; u16 u16ToonTable[32]; - float shininessTable[128]; + u8 shininessTable[128]; }; struct Viewer3d_State diff --git a/desmume/src/matrix.cpp b/desmume/src/matrix.cpp index 224e9b0e8..e84b06e31 100644 --- a/desmume/src/matrix.cpp +++ b/desmume/src/matrix.cpp @@ -52,6 +52,16 @@ void MatrixMultVec4x4 (const s32 *matrix, s32 *vecPtr) vecPtr[3] = (s32)((x * matrix[3] + y * matrix[7] + z * matrix[11] + w * matrix[15])>>12); } +void MatrixMultVec3x3_fixed(const s32 *matrix, s32 *vecPtr) +{ + s64 x = vecPtr[0]; + s64 y = vecPtr[1]; + s64 z = vecPtr[2]; + + vecPtr[0] = (s32)((x * matrix[0] + y * matrix[4] + z * matrix[ 8])>>12); + vecPtr[1] = (s32)((x * matrix[1] + y * matrix[5] + z * matrix[ 9])>>12); + vecPtr[2] = (s32)((x * matrix[2] + y * matrix[6] + z * matrix[10])>>12); +} //------------------------- //switched SSE functions: implementations for no SSE diff --git a/desmume/src/matrix.h b/desmume/src/matrix.h index bed198a6a..3e39f9a77 100644 --- a/desmume/src/matrix.h +++ b/desmume/src/matrix.h @@ -223,7 +223,7 @@ FORCEINLINE void MatrixMultVec4x4_M2(const float *matrix, float *vecPtr) MatrixMultVec4x4(matrix,vecPtr); } - +void MatrixMultVec3x3_fixed(const s32 *matrix, s32 *vecPtr); FORCEINLINE void MatrixMultVec3x3(const float * matrix, float * vecPtr) { const __m128 vec = _mm_load_ps(vecPtr);