diff --git a/desmume/src/rasterize.cpp b/desmume/src/rasterize.cpp index 4d1ee83c7..4c4757966 100644 --- a/desmume/src/rasterize.cpp +++ b/desmume/src/rasterize.cpp @@ -45,6 +45,9 @@ #include "texcache.h" #include "NDSSystem.h" +//#undef FORCEINLINE +//#define FORCEINLINE + using std::min; using std::max; using std::swap; @@ -58,6 +61,40 @@ static int polynum; static u8 modulate_table[32][32]; static u8 decal_table[32][32][32]; +static u8 index_lookup_table[65]; +static u8 index_start_table[8]; + +////optimized float floor useful in limited cases +////from http://www.stereopsis.com/FPU.html#convert +//int Real2Int(double val) +//{ +// const double _double2fixmagic = 68719476736.0*1.5; //2^36 * 1.5, (52-_shiftamt=36) uses limited precisicion to floor +// const int _shiftamt = 16; //16.16 fixed point representation, +// +// #ifdef WORDS_BIGENDIAN +// #define iman_ 1 +// #else +// #define iman_ 0 +// #endif +// +// val = val + _double2fixmagic; +// return ((int*)&val)[iman_] >> _shiftamt; +//} + + +// +//int Real2Int(float val) +//{ +// //val -= 0.5f; +// //int temp; +// //__asm { +// // fld val; +// // fistp temp; +// //} +// //return temp; +// return 0; +//} + //----texture cache--- @@ -248,7 +285,7 @@ static struct Sampler } } - Fragment::Color sample(float u, float v) + FORCEINLINE Fragment::Color sample(float u, float v) { //should we use floor here? I still think so, but now I am not so sure. //note that I changed away from floor for accuracy reasons, not performance, even though it would be faster without the floor. @@ -285,7 +322,7 @@ struct Shader float invu, invv, w; Fragment::Color materialColor; - void shade(Fragment& dst) + FORCEINLINE void shade(Fragment& dst) { Fragment::Color texColor; float u,v; @@ -300,17 +337,17 @@ struct Shader dst.color.components.g = modulate_table[texColor.components.g][materialColor.components.g]; dst.color.components.b = modulate_table[texColor.components.b][materialColor.components.b]; dst.color.components.a = modulate_table[texColor.components.a][materialColor.components.a]; - #ifdef _MSC_VER - if(GetAsyncKeyState(VK_SHIFT)) { - //debugging tricks - dst.color = materialColor; - if(GetAsyncKeyState(VK_TAB)) { - u8 alpha = dst.color.components.a; - dst.color.color = polynum*8+8; - dst.color.components.a = alpha; - } - } - #endif + //#ifdef _MSC_VER + //if(GetAsyncKeyState(VK_SHIFT)) { + // //debugging tricks + // dst.color = materialColor; + // if(GetAsyncKeyState(VK_TAB)) { + // u8 alpha = dst.color.components.a; + // dst.color.color = polynum*8+8; + // dst.color.components.a = alpha; + // } + //} + //#endif break; case 1: //decal u = invu*w; @@ -354,7 +391,7 @@ struct Shader } shader; -static void alphaBlend(Fragment::Color & dst, const Fragment::Color & src) +static FORCEINLINE void alphaBlend(Fragment::Color & dst, const Fragment::Color & src) { if(gfx3d.enableAlphaBlending) { @@ -390,18 +427,28 @@ static void alphaBlend(Fragment::Color & dst, const Fragment::Color & src) } } -static void pixel(int adr,float r, float g, float b, float invu, float invv, float w, float z) { +//doesnt work yet +//static FORCEINLINE int fastFloor(float f) +//{ +// float temp = f + 1.f; +// int ret = (*((u32*)&temp))&0x7FFFFF; +// return ret; +//} + +static FORCEINLINE void pixel(int adr,float r, float g, float b, float invu, float invv, float w, float z) { Fragment &destFragment = screen[adr]; //depth test u32 depth; - if(gfx3d.wbuffer) + if(gfx3d.wbuffer) { //not sure about this //this value was chosen to make the skybox, castle window decals, and water level render correctly in SM64 - depth = 4096*w; + depth = (4096*w); + } else { - depth = z*0x7FFF; //not sure about this + //depth = fastFloor(z*0x7FFF)>>8; + depth = z*0x7FFF; } if(polyAttr.decalMode) { @@ -573,7 +620,7 @@ inline long Ceil28_4( fixed28_4 Value ) { struct edge_fx_fl { edge_fx_fl() {} edge_fx_fl(int Top, int Bottom); - inline int Step(); + FORCEINLINE int Step(); long X, XStep, Numerator, Denominator; // DDA info for x long ErrorTerm; @@ -581,8 +628,8 @@ struct edge_fx_fl { struct Interpolant { float curr, step, stepExtra; - void doStep() { curr += step; } - void doStepExtra() { curr += stepExtra; } + FORCEINLINE void doStep() { curr += step; } + FORCEINLINE void doStepExtra() { curr += stepExtra; } void initialize(float top, float bottom, float dx, float dy, long XStep, float XPrestep, float YPrestep) { dx = 0; dy *= (bottom-top); @@ -599,11 +646,11 @@ struct edge_fx_fl { }; Interpolant interpolants[NUM_INTERPOLANTS]; }; - void doStepInterpolants() { for(int i=0;iy); int YEnd = Ceil28_4(verts[Bottom]->y); Height = YEnd - Y; @@ -633,7 +680,7 @@ edge_fx_fl::edge_fx_fl(int Top, int Bottom) { } } -inline int edge_fx_fl::Step() { +FORCEINLINE int edge_fx_fl::Step() { X += XStep; Y++; Height--; doStepInterpolants(); @@ -685,8 +732,9 @@ static void drawscanline(edge_fx_fl *pLeft, edge_fx_fl *pRight) u += du_dx; v += dv_dx; z += dz_dx; - for(int i=0;i<3;i++) - color[i] += dc_dx[i]; + color[0] += dc_dx[0]; + color[1] += dc_dx[1]; + color[2] += dc_dx[2]; } } @@ -807,6 +855,20 @@ static char SoftRastInit(void) decal_table[a][i][j] = ((i*a) + (j*(31-a))) >> 5; } } + + //these tables are used to increment through vert lists without having to do wrapping logic/math + int idx=0; + for(int i=3;i<=8;i++) + { + index_start_table[i-3] = idx; + for(int j=0;j