rasterize: some small speedups and experimental optimization code

This commit is contained in:
zeromus 2009-03-09 16:40:30 +00:00
parent e51385c0bf
commit 14efca22bd
1 changed files with 101 additions and 30 deletions

View File

@ -45,6 +45,9 @@
#include "texcache.h" #include "texcache.h"
#include "NDSSystem.h" #include "NDSSystem.h"
//#undef FORCEINLINE
//#define FORCEINLINE
using std::min; using std::min;
using std::max; using std::max;
using std::swap; using std::swap;
@ -58,6 +61,40 @@ static int polynum;
static u8 modulate_table[32][32]; static u8 modulate_table[32][32];
static u8 decal_table[32][32][32]; static u8 decal_table[32][32][32];
static u8 index_lookup_table[65];
static u8 index_start_table[8];
////optimized float floor useful in limited cases
////from http://www.stereopsis.com/FPU.html#convert
//int Real2Int(double val)
//{
// const double _double2fixmagic = 68719476736.0*1.5; //2^36 * 1.5, (52-_shiftamt=36) uses limited precisicion to floor
// const int _shiftamt = 16; //16.16 fixed point representation,
//
// #ifdef WORDS_BIGENDIAN
// #define iman_ 1
// #else
// #define iman_ 0
// #endif
//
// val = val + _double2fixmagic;
// return ((int*)&val)[iman_] >> _shiftamt;
//}
//
//int Real2Int(float val)
//{
// //val -= 0.5f;
// //int temp;
// //__asm {
// // fld val;
// // fistp temp;
// //}
// //return temp;
// return 0;
//}
//----texture cache--- //----texture cache---
@ -248,7 +285,7 @@ static struct Sampler
} }
} }
Fragment::Color sample(float u, float v) FORCEINLINE Fragment::Color sample(float u, float v)
{ {
//should we use floor here? I still think so, but now I am not so sure. //should we use floor here? I still think so, but now I am not so sure.
//note that I changed away from floor for accuracy reasons, not performance, even though it would be faster without the floor. //note that I changed away from floor for accuracy reasons, not performance, even though it would be faster without the floor.
@ -285,7 +322,7 @@ struct Shader
float invu, invv, w; float invu, invv, w;
Fragment::Color materialColor; Fragment::Color materialColor;
void shade(Fragment& dst) FORCEINLINE void shade(Fragment& dst)
{ {
Fragment::Color texColor; Fragment::Color texColor;
float u,v; float u,v;
@ -300,17 +337,17 @@ struct Shader
dst.color.components.g = modulate_table[texColor.components.g][materialColor.components.g]; dst.color.components.g = modulate_table[texColor.components.g][materialColor.components.g];
dst.color.components.b = modulate_table[texColor.components.b][materialColor.components.b]; dst.color.components.b = modulate_table[texColor.components.b][materialColor.components.b];
dst.color.components.a = modulate_table[texColor.components.a][materialColor.components.a]; dst.color.components.a = modulate_table[texColor.components.a][materialColor.components.a];
#ifdef _MSC_VER //#ifdef _MSC_VER
if(GetAsyncKeyState(VK_SHIFT)) { //if(GetAsyncKeyState(VK_SHIFT)) {
//debugging tricks // //debugging tricks
dst.color = materialColor; // dst.color = materialColor;
if(GetAsyncKeyState(VK_TAB)) { // if(GetAsyncKeyState(VK_TAB)) {
u8 alpha = dst.color.components.a; // u8 alpha = dst.color.components.a;
dst.color.color = polynum*8+8; // dst.color.color = polynum*8+8;
dst.color.components.a = alpha; // dst.color.components.a = alpha;
} // }
} //}
#endif //#endif
break; break;
case 1: //decal case 1: //decal
u = invu*w; u = invu*w;
@ -354,7 +391,7 @@ struct Shader
} shader; } shader;
static void alphaBlend(Fragment::Color & dst, const Fragment::Color & src) static FORCEINLINE void alphaBlend(Fragment::Color & dst, const Fragment::Color & src)
{ {
if(gfx3d.enableAlphaBlending) if(gfx3d.enableAlphaBlending)
{ {
@ -390,18 +427,28 @@ static void alphaBlend(Fragment::Color & dst, const Fragment::Color & src)
} }
} }
static void pixel(int adr,float r, float g, float b, float invu, float invv, float w, float z) { //doesnt work yet
//static FORCEINLINE int fastFloor(float f)
//{
// float temp = f + 1.f;
// int ret = (*((u32*)&temp))&0x7FFFFF;
// return ret;
//}
static FORCEINLINE void pixel(int adr,float r, float g, float b, float invu, float invv, float w, float z) {
Fragment &destFragment = screen[adr]; Fragment &destFragment = screen[adr];
//depth test //depth test
u32 depth; u32 depth;
if(gfx3d.wbuffer) if(gfx3d.wbuffer) {
//not sure about this //not sure about this
//this value was chosen to make the skybox, castle window decals, and water level render correctly in SM64 //this value was chosen to make the skybox, castle window decals, and water level render correctly in SM64
depth = 4096*w; depth = (4096*w);
}
else else
{ {
depth = z*0x7FFF; //not sure about this //depth = fastFloor(z*0x7FFF)>>8;
depth = z*0x7FFF;
} }
if(polyAttr.decalMode) if(polyAttr.decalMode)
{ {
@ -573,7 +620,7 @@ inline long Ceil28_4( fixed28_4 Value ) {
struct edge_fx_fl { struct edge_fx_fl {
edge_fx_fl() {} edge_fx_fl() {}
edge_fx_fl(int Top, int Bottom); edge_fx_fl(int Top, int Bottom);
inline int Step(); FORCEINLINE int Step();
long X, XStep, Numerator, Denominator; // DDA info for x long X, XStep, Numerator, Denominator; // DDA info for x
long ErrorTerm; long ErrorTerm;
@ -581,8 +628,8 @@ struct edge_fx_fl {
struct Interpolant { struct Interpolant {
float curr, step, stepExtra; float curr, step, stepExtra;
void doStep() { curr += step; } FORCEINLINE void doStep() { curr += step; }
void doStepExtra() { curr += stepExtra; } FORCEINLINE void doStepExtra() { curr += stepExtra; }
void initialize(float top, float bottom, float dx, float dy, long XStep, float XPrestep, float YPrestep) { void initialize(float top, float bottom, float dx, float dy, long XStep, float XPrestep, float YPrestep) {
dx = 0; dx = 0;
dy *= (bottom-top); dy *= (bottom-top);
@ -599,11 +646,11 @@ struct edge_fx_fl {
}; };
Interpolant interpolants[NUM_INTERPOLANTS]; Interpolant interpolants[NUM_INTERPOLANTS];
}; };
void doStepInterpolants() { for(int i=0;i<NUM_INTERPOLANTS;i++) interpolants[i].doStep(); } void FORCEINLINE doStepInterpolants() { for(int i=0;i<NUM_INTERPOLANTS;i++) interpolants[i].doStep(); }
void doStepExtraInterpolants() { for(int i=0;i<NUM_INTERPOLANTS;i++) interpolants[i].doStepExtra(); } void FORCEINLINE doStepExtraInterpolants() { for(int i=0;i<NUM_INTERPOLANTS;i++) interpolants[i].doStepExtra(); }
}; };
edge_fx_fl::edge_fx_fl(int Top, int Bottom) { FORCEINLINE edge_fx_fl::edge_fx_fl(int Top, int Bottom) {
Y = Ceil28_4(verts[Top]->y); Y = Ceil28_4(verts[Top]->y);
int YEnd = Ceil28_4(verts[Bottom]->y); int YEnd = Ceil28_4(verts[Bottom]->y);
Height = YEnd - Y; Height = YEnd - Y;
@ -633,7 +680,7 @@ edge_fx_fl::edge_fx_fl(int Top, int Bottom) {
} }
} }
inline int edge_fx_fl::Step() { FORCEINLINE int edge_fx_fl::Step() {
X += XStep; Y++; Height--; X += XStep; Y++; Height--;
doStepInterpolants(); doStepInterpolants();
@ -685,8 +732,9 @@ static void drawscanline(edge_fx_fl *pLeft, edge_fx_fl *pRight)
u += du_dx; u += du_dx;
v += dv_dx; v += dv_dx;
z += dz_dx; z += dz_dx;
for(int i=0;i<3;i++) color[0] += dc_dx[0];
color[i] += dc_dx[i]; color[1] += dc_dx[1];
color[2] += dc_dx[2];
} }
} }
@ -807,6 +855,20 @@ static char SoftRastInit(void)
decal_table[a][i][j] = ((i*a) + (j*(31-a))) >> 5; decal_table[a][i][j] = ((i*a) + (j*(31-a))) >> 5;
} }
} }
//these tables are used to increment through vert lists without having to do wrapping logic/math
int idx=0;
for(int i=3;i<=8;i++)
{
index_start_table[i-3] = idx;
for(int j=0;j<i;j++) {
int a = j;
int b = j+1;
if(b==i) b = 0;
index_lookup_table[idx++] = a;
index_lookup_table[idx++] = b;
}
}
} }
TexCache_Reset(); TexCache_Reset();
@ -887,7 +949,7 @@ static T interpolate(const float ratio, const T& x0, const T& x1) {
//http://www.cs.berkeley.edu/~ug/slide/pipeline/assignments/as6/discussion.shtml //http://www.cs.berkeley.edu/~ug/slide/pipeline/assignments/as6/discussion.shtml
static VERT clipPoint(VERT* inside, VERT* outside, int coord, int which) static FORCEINLINE VERT clipPoint(VERT* inside, VERT* outside, int coord, int which)
{ {
VERT ret; VERT ret;
@ -936,7 +998,7 @@ static VERT clipPoint(VERT* inside, VERT* outside, int coord, int which)
#define CLIPLOG(X) #define CLIPLOG(X)
#define CLIPLOG2(X,Y,Z) #define CLIPLOG2(X,Y,Z)
static void clipSegmentVsPlane(VERT** verts, const int coord, int which) static FORCEINLINE void clipSegmentVsPlane(VERT** verts, const int coord, int which)
{ {
bool out0, out1; bool out0, out1;
if(which==-1) if(which==-1)
@ -982,7 +1044,7 @@ static void clipSegmentVsPlane(VERT** verts, const int coord, int which)
} }
} }
static void clipPolyVsPlane(const int coord, int which) static FORCEINLINE void clipPolyVsPlane(const int coord, int which)
{ {
outClippedPoly.type = 0; outClippedPoly.type = 0;
CLIPLOG2("Clipping coord %d against %f\n",coord,x); CLIPLOG2("Clipping coord %d against %f\n",coord,x);
@ -991,6 +1053,15 @@ static void clipPolyVsPlane(const int coord, int which)
VERT* testverts[2] = {&tempClippedPoly.clipVerts[i],&tempClippedPoly.clipVerts[(i+1)%tempClippedPoly.type]}; VERT* testverts[2] = {&tempClippedPoly.clipVerts[i],&tempClippedPoly.clipVerts[(i+1)%tempClippedPoly.type]};
clipSegmentVsPlane(testverts, coord, which); clipSegmentVsPlane(testverts, coord, which);
} }
//this doesnt seem to help any. leaving it until i decide to get rid of it
//int j = index_start_table[tempClippedPoly.type-3];
//for(int i=0;i<tempClippedPoly.type;i++,j+=2)
//{
// VERT* testverts[2] = {&tempClippedPoly.clipVerts[index_lookup_table[j]],&tempClippedPoly.clipVerts[index_lookup_table[j+1]]};
// clipSegmentVsPlane(testverts, coord, which);
//}
tempClippedPoly = outClippedPoly; tempClippedPoly = outClippedPoly;
} }