From 517a27294acf06c86e0abd159fd384254463b2ea Mon Sep 17 00:00:00 2001 From: zeromus Date: Tue, 3 Feb 2009 06:36:36 +0000 Subject: [PATCH] rasterize: textures, shading, blending --- desmume/src/OGLRender.cpp | 18 +- desmume/src/gfx3d.cpp | 30 +--- desmume/src/gfx3d.h | 1 - desmume/src/rasterize.cpp | 364 +++++++++++++++++++++++++++++--------- desmume/src/texcache.cpp | 18 ++ desmume/src/texcache.h | 1 + 6 files changed, 297 insertions(+), 135 deletions(-) diff --git a/desmume/src/OGLRender.cpp b/desmume/src/OGLRender.cpp index c7fc8a7c0..8b8033f9e 100644 --- a/desmume/src/OGLRender.cpp +++ b/desmume/src/OGLRender.cpp @@ -683,9 +683,6 @@ static void OGLRender() glViewport(gfx3d.viewport.x,gfx3d.viewport.y,gfx3d.viewport.width,gfx3d.viewport.height); - //it might be handy to print the size of the projection list, in case a game is doing something weird with it - //printf("%d\n",gfx3d.projlist->count); - //we're not using the alpha clear color right now glClearColor(gfx3d.clearColor[0],gfx3d.clearColor[1],gfx3d.clearColor[2], gfx3d.clearColor[3]); glClearDepth(gfx3d.clearDepth); @@ -761,20 +758,7 @@ static void OGLRender() static void OGLVramReconfigureSignal() { - //well, this is a very blunt instrument. - //lets just flag all the textures as invalid. - for(int i=0;icount + tempVertInfo.count - continuation; tempVertInfo.count++; @@ -709,27 +702,6 @@ static void SetVertex() if(completed) { POLY &poly = polylist->list[polylist->count]; - //todo - dont overrun proj list - - //see if the last entry in the proj list matches the current matrix, if there is one. - /* if(projlist->count != 0 && - //here is an example of something that does not work. - //(for a speed hack, we consider the matrices different if the first element differs) - //mtxCurrent[0][0] == projlist->projMatrix[projlist->count-1][0] - - //here is what we must do: make sure the matrices are identical - !MatrixCompare(mtxCurrent[0],projlist->projMatrix[projlist->count-1]) - ) - { - //it matches. use it - poly.projIndex = projlist->count-1; - } - else - { - MatrixCopy(projlist->projMatrix[projlist->count],mtxCurrent[0]); - poly.projIndex = projlist->count; - projlist->count++; - }*/ poly.polyAttr = polyAttr; poly.texParam = textureFormat; diff --git a/desmume/src/gfx3d.h b/desmume/src/gfx3d.h index 726714abd..8a7258f88 100644 --- a/desmume/src/gfx3d.h +++ b/desmume/src/gfx3d.h @@ -93,7 +93,6 @@ struct PROJLIST { struct VERT { float coord[4]; float texcoord[2]; - u32 depth; u8 color[4]; }; diff --git a/desmume/src/rasterize.cpp b/desmume/src/rasterize.cpp index 31d570051..34eab438c 100644 --- a/desmume/src/rasterize.cpp +++ b/desmume/src/rasterize.cpp @@ -21,10 +21,16 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +//nothing in this file should be assumed to be accurate +//please check everything carefully, and sign off on it when you think it is accurate +//if you change it, erase other signatures. +//if you optimize it and think it is risky, erase other signatures + #include "Rasterize.h" #include +#include "bits.h" #include "common.h" #include "render3D.h" #include "gfx3d.h" @@ -36,27 +42,39 @@ using std::max; template T min(T a, T b, T c) { return min(min(a,b),c); } template T max(T a, T b, T c) { return max(max(a,b),c); } -static u16 screen[256*192]; - -static struct +struct Fragment { - int width, height; -} Texture; + union Color { + u32 color; + struct { + //#ifdef WORDS_BIGENDIAN ? + u8 r,g,b,a; + } components; + } color; -void set_pixel(int x, int y, u16 color) + u8 polyid; + u32 depth; +}; + +struct Vertex { - if(x<0 || y<0 || x>=256 || y>=192) return; - screen[y*256+x] = color | 0x8000; + VERT* vert; + int w; +} verts[3]; + +void SubmitVertex(VERT* rawvert) +{ + static int vert_index = 0; + Vertex &vert = verts[vert_index++]; + if(vert_index==3) vert_index = 0; + + vert.vert = rawvert; + vert.w = rawvert->coord[3] * 4096; //not sure about this } -void hline(int x, int y, int xe, u16 color) -{ - for(int i=x;i<=xe;i++) - set_pixel(x,y,color); -} +static Fragment screen[256*192]; //http://www.devmaster.net/forums/showthread.php?t=1884 - #if defined(_MSC_VER) inline int iround(float x) { @@ -76,6 +94,122 @@ int iround(float f) { } #endif + +static struct +{ + int width, height; + int wmask, hmask; + int wrap; + void setup(u32 format) + { + width=(8 << ((format>>20)&0x07)); + height=(8 << ((format>>23)&0x07)); + wmask = width-1; + hmask = height-1; + wrap = (format>>16)&0xF; + } + + void clamp(int &val, int size, int sizemask){ + if(val<0) val = 0; + if(val>sizemask) val = sizemask; + } + void hclamp(int &val) { clamp(val,width,wmask); } + void vclamp(int &val) { clamp(val,height,hmask); } + + void repeat(int &val, int size, int sizemask) { + val &= sizemask; + } + void hrepeat(int &val) { repeat(val,width,wmask); } + void vrepeat(int &val) { repeat(val,height,hmask); } + + void flip(int &val, int size, int sizemask) { + val &= ((size<<1)-1); + if(val>=size) val = (size<<1)-val-1; + } + void hflip(int &val) { flip(val,width,wmask); } + void vflip(int &val) { flip(val,height,hmask); } + + void dowrap(int& iu, int& iv) + { + switch(wrap) { + //flip none + case 0x0: hclamp(iu); vclamp(iv); break; + case 0x1: hrepeat(iu); vclamp(iv); break; + case 0x2: hclamp(iu); vrepeat(iv); break; + case 0x3: hrepeat(iu); vrepeat(iv); break; + //flip S + case 0x4: hclamp(iu); vclamp(iv); break; + case 0x5: hflip(iu); vclamp(iv); break; + case 0x6: hclamp(iu); vrepeat(iv); break; + case 0x7: hflip(iu); vrepeat(iv); break; + //flip T + case 0x8: hclamp(iu); vclamp(iv); break; + case 0x9: hrepeat(iu); vclamp(iv); break; + case 0xA: hclamp(iu); vflip(iv); break; + case 0xB: hrepeat(iu); vflip(iv); break; + //flip both + case 0xC: hclamp(iu); vclamp(iv); break; + case 0xD: hflip(iu); vclamp(iv); break; + case 0xE: hclamp(iu); vflip(iv); break; + case 0xF: hflip(iu); vflip(iv); break; + } + } + + Fragment::Color sample(float u, float v) + { + int iu = iround(u); + int iv = iround(v); + dowrap(iu,iv); + + Fragment::Color color; + u32 col32 = ((u32*)TexCache_texMAP)[iv*width+iu]; + //todo - teach texcache how to provide these already in 5555 + col32 >>= 3; + col32 &= 0x1F1F1F1F; + color.color = col32; + return color; + } + +} sampler; + +struct Shader +{ + u8 mode; + void setup(u32 polyattr) + { + mode = (polyattr>>4)&0x3; + } + + float u, v; + Fragment::Color materialColor; + + void shade(Fragment& dst) + { + Fragment::Color texColor; + //if(mode==0||mode==1) + // texColor = sampler.sample(i_tex_u.Z,i_tex_v.Z); + + switch(mode) + { + case 0: //modulate + texColor = sampler.sample(u,v); + dst.color.components.r = ((texColor.components.r+1) * (materialColor.components.r+1)-1)>>5; + dst.color.components.g = ((texColor.components.g+1) * (materialColor.components.g+1)-1)>>5; + dst.color.components.b = ((texColor.components.b+1) * (materialColor.components.b+1)-1)>>5; + dst.color.components.a = ((texColor.components.a+1) * (materialColor.components.a+1)-1)>>5; + break; + case 1: //decal + case 2: + case 3: //..and everything else, for now + texColor = sampler.sample(u,v); + dst.color = texColor; + break; + } + } + +} shader; + + struct Interpolator { int A,B,C; @@ -109,22 +243,21 @@ struct Interpolator FORCEINLINE void pop() { Z = pZ; } FORCEINLINE void incy() { Z += dy; } FORCEINLINE void incx() { Z += dx; } + FORCEINLINE void incx(int count) { Z += dx*count; } }; //http://www.devmaster.net/forums/showthread.php?t=1884&page=1 //todo - change to the tile-based renderer and try to apply some optimizations from that thread -void triangle_from_devmaster(VERT** verts) +void triangle_from_devmaster() { - u16 color =0x7FFF; - // 28.4 fixed-point coordinates - const int Y1 = iround(16.0f * verts[0]->coord[1]); - const int Y2 = iround(16.0f * verts[1]->coord[1]); - const int Y3 = iround(16.0f * verts[2]->coord[1]); + const int Y1 = iround(16.0f * verts[0].vert->coord[1]); + const int Y2 = iround(16.0f * verts[1].vert->coord[1]); + const int Y3 = iround(16.0f * verts[2].vert->coord[1]); - const int X1 = iround(16.0f * verts[0]->coord[0]); - const int X2 = iround(16.0f * verts[1]->coord[0]); - const int X3 = iround(16.0f * verts[2]->coord[0]); + const int X1 = iround(16.0f * verts[0].vert->coord[0]); + const int X2 = iround(16.0f * verts[1].vert->coord[0]); + const int X3 = iround(16.0f * verts[2].vert->coord[0]); // Deltas const int DX12 = X1 - X2; @@ -166,76 +299,122 @@ void triangle_from_devmaster(VERT** verts) int CY2 = C2 + DX23 * (miny << 4) - DY23 * (minx << 4); int CY3 = C3 + DX31 * (miny << 4) - DY31 * (minx << 4); - float fx1 = verts[0]->coord[0], fy1 = verts[0]->coord[1]; - float fx2 = verts[1]->coord[0], fy2 = verts[1]->coord[1]; - float fx3 = verts[2]->coord[0], fy3 = verts[2]->coord[1]; - u8 r1 = verts[0]->color[0], g1 = verts[0]->color[1], b1 = verts[0]->color[2]; - u8 r2 = verts[1]->color[0], g2 = verts[1]->color[1], b2 = verts[1]->color[2]; - u8 r3 = verts[2]->color[0], g3 = verts[2]->color[1], b3 = verts[2]->color[2]; - int u1 = verts[0]->texcoord[0], v1 = verts[0]->texcoord[1]; - int u2 = verts[1]->texcoord[0], v2 = verts[1]->texcoord[1]; - int u3 = verts[2]->texcoord[0], v3 = verts[2]->texcoord[1]; + float fx1 = verts[0].vert->coord[0], fy1 = verts[0].vert->coord[1]; + float fx2 = verts[1].vert->coord[0], fy2 = verts[1].vert->coord[1]; + float fx3 = verts[2].vert->coord[0], fy3 = verts[2].vert->coord[1]; + u8 r1 = verts[0].vert->color[0], g1 = verts[0].vert->color[1], b1 = verts[0].vert->color[2], a1 = verts[0].vert->color[3]; + u8 r2 = verts[1].vert->color[0], g2 = verts[1].vert->color[1], b2 = verts[1].vert->color[2], a2 = verts[1].vert->color[3]; + u8 r3 = verts[2].vert->color[0], g3 = verts[2].vert->color[1], b3 = verts[2].vert->color[2], a3 = verts[2].vert->color[3]; + int u1 = verts[0].vert->texcoord[0], v1 = verts[0].vert->texcoord[1]; + int u2 = verts[1].vert->texcoord[0], v2 = verts[1].vert->texcoord[1]; + int u3 = verts[2].vert->texcoord[0], v3 = verts[2].vert->texcoord[1]; + int w1 = verts[0].w, w2 = verts[1].w, w3 = verts[2].w; Interpolator i_color_r(fx1,fx2,fx3,fy1,fy2,fy3,r1,r2,r3); Interpolator i_color_g(fx1,fx2,fx3,fy1,fy2,fy3,g1,g2,g3); Interpolator i_color_b(fx1,fx2,fx3,fy1,fy2,fy3,b1,b2,b3); + Interpolator i_color_a(fx1,fx2,fx3,fy1,fy2,fy3,a1,a2,a3); Interpolator i_tex_u(fx1,fx2,fx3,fy1,fy2,fy3,u1,u2,u3); Interpolator i_tex_v(fx1,fx2,fx3,fy1,fy2,fy3,v1,v2,v3); + Interpolator i_w(fx1,fx2,fx3,fy1,fy2,fy3,w1,w2,w3); i_color_r.init(minx,miny); i_color_g.init(minx,miny); i_color_b.init(minx,miny); + i_color_a.init(minx,miny); i_tex_u.init(minx,miny); i_tex_v.init(minx,miny); + i_w.init(minx,miny); for(int y = miny; y < maxy; y++) { - int CX1 = CY1; + //HACK - bad screen clipping + + int CX1 = CY1; int CX2 = CY2; int CX3 = CY3; bool done = false; - i_color_r.push(); i_color_g.push(); i_color_b.push(); + i_color_r.push(); i_color_g.push(); i_color_b.push(); ; i_color_a.push(); i_tex_u.push(); i_tex_v.push(); - for(int x = minx; x < maxx; x++) - { - if(CX1 > 0 && CX2 > 0 && CX3 > 0) - { - //material color - //color = R5G5B5TORGB15(i_color_r.cur(),i_color_g.cur(),i_color_b.cur()); - - //texture - int u = i_tex_u.cur(); - int v = i_tex_v.cur(); - if(u<0) u = 0; - if(v<0) v = 0; - u32 color32 = ((u32*)TexCache_texMAP)[v*Texture.width+u]; - color32>>=3; - color32 &= 0x1F1F1F1F; - u8* color8 = (u8*)&color32; - color = (color8[0] | (color8[1] << 5) | (color8[2] << 10)); - - //hack: for testing, dont render non-opaque textures - if(color8[3] < 0x1F) return; - - set_pixel(x,desty,color); + i_w.push(); - done = true; - } else if(done) break; + if(y>=0 && y<192) + { + int xaccum = 1; + for(int x = minx; x < maxx; x++) + { + if(CX1 > 0 && CX2 > 0 && CX3 > 0) + { + done = true; - i_color_r.incx(); i_color_g.incx(); i_color_b.incx(); - i_tex_u.incx(); i_tex_v.incx(); + //reject out of bounds pixels + if(x<0 || x>=256) goto rejected_fragment; - CX1 -= FDY12; - CX2 -= FDY23; - CX3 -= FDY31; - } + //execute interpolators. + //HACK: we defer this until we know we need it, and accumulate the number of deltas which are necessary. + //this is just a temporary measure until we do proper clipping against the clip frustum. + //since we dont, we are receiving waaay out of bounds polys and so unless we do this we spend a lot of time calculating + //out of bounds pixels + i_color_r.incx(xaccum); i_color_g.incx(xaccum); i_color_b.incx(xaccum); i_color_a.incx(xaccum); + i_tex_u.incx(xaccum); i_tex_v.incx(xaccum); + i_w.incx(xaccum); + xaccum = 0; + + int adr = (y<<8)+x; + Fragment &destFragment = screen[adr]; + + //w-buffer depth test + int w = i_w.cur(); + if(w>destFragment.depth) + goto rejected_fragment; + + //material color + //color = R5G5B5TORGB15(i_color_r.cur(),i_color_g.cur(),i_color_b.cur()); + + shader.u = i_tex_u.Z; + shader.v = i_tex_v.Z; + shader.materialColor.components.a = i_color_a.cur(); + shader.materialColor.components.r = i_color_r.cur(); + shader.materialColor.components.g = i_color_g.cur(); + shader.materialColor.components.b = i_color_b.cur(); + Fragment shaderOutput; + shader.shade(shaderOutput); + + //alpha blend + if(shaderOutput.color.components.a == 31) + { + destFragment.color = shaderOutput.color; + } + else + { + u8 alpha = shaderOutput.color.components.a+1; + u8 invAlpha = 32 - alpha; + destFragment.color.components.r = (alpha*shaderOutput.color.components.r + invAlpha*destFragment.color.components.r)>>5; + destFragment.color.components.g = (alpha*shaderOutput.color.components.g + invAlpha*destFragment.color.components.g)>>5; + destFragment.color.components.b = (alpha*shaderOutput.color.components.b + invAlpha*destFragment.color.components.b)>>5; + destFragment.color.components.a = max(shaderOutput.color.components.b,destFragment.color.components.a); + } + + destFragment.depth = w; + + } else if(done) break; + rejected_fragment: + xaccum++; + + CX1 -= FDY12; + CX2 -= FDY23; + CX3 -= FDY31; + } + } //end of y inbounds check + i_color_a.pop(); i_color_a.incy(); i_color_r.pop(); i_color_r.incy(); i_color_g.pop(); i_color_g.incy(); i_color_b.pop(); i_color_b.incy(); i_tex_u.pop(); i_tex_u.incy(); i_tex_v.pop(); i_tex_v.incy(); + i_w.pop(); i_w.incy(); CY1 += FDX12; @@ -255,12 +434,26 @@ static void Reset() {} static void Close() {} -static void VramReconfigureSignal() {} +static void VramReconfigureSignal() { + TexCache_Invalidate(); +} static void GetLine(int line, u16* dst, u8* dstAlpha) { - memcpy(dst,screen+(191-line)*256,512); - memset(dstAlpha,16,256); + Fragment* src = screen+((191-line)<<8); + for(int i=0;i<256;i++) + { + u8 r = src->color.components.r; + u8 g = src->color.components.g; + u8 b = src->color.components.b; + *dst = R5G5B5TORGB15(r,g,b); + if(src->color.components.a > 0) *dst |= 0x8000; + *dstAlpha = alpha_5bit_to_4bit[src->color.components.a]; + src++; + dst++; + dstAlpha++; + } + } static void GetLineCaptured(int line, u16* dst) {} @@ -273,7 +466,9 @@ static void Render() //B. backface cull //C. transforms - memset(screen,0,256*192*2); + memset(screen,0,sizeof(screen)); + for(int i=0;i<256*192;i++) + screen[i].depth = 0x007FFFFF; for(int i=0;icount;i++) { @@ -292,39 +487,32 @@ static void Render() int type = poly->type; TexCache_SetTexture(poly->texParam,poly->texPalette); - if(TexCache_Curr()) - Texture.width = TexCache_Curr()->sizeX; + sampler.setup(poly->texParam); //note that when we build our triangle vert lists, we reorder them for our renderer. //we should probably fix the renderer so we dont have to do this; //but then again, what does it matter? if(type == 4) { - VERT* vertA[3] = { - &gfx3d.vertlist->list[poly->vertIndexes[0]], - &gfx3d.vertlist->list[poly->vertIndexes[2]], - &gfx3d.vertlist->list[poly->vertIndexes[1]], - }; + SubmitVertex(&gfx3d.vertlist->list[poly->vertIndexes[2]]); + SubmitVertex(&gfx3d.vertlist->list[poly->vertIndexes[1]]); + SubmitVertex(&gfx3d.vertlist->list[poly->vertIndexes[0]]); - triangle_from_devmaster(vertA); + triangle_from_devmaster(); - VERT* vertB[3] = { - &gfx3d.vertlist->list[poly->vertIndexes[0]], - &gfx3d.vertlist->list[poly->vertIndexes[3]], - &gfx3d.vertlist->list[poly->vertIndexes[2]], - }; + SubmitVertex(&gfx3d.vertlist->list[poly->vertIndexes[0]]); + SubmitVertex(&gfx3d.vertlist->list[poly->vertIndexes[3]]); + SubmitVertex(&gfx3d.vertlist->list[poly->vertIndexes[2]]); - triangle_from_devmaster(vertB); + triangle_from_devmaster(); } if(type == 3) { - VERT* vert[3] = { - &gfx3d.vertlist->list[poly->vertIndexes[2]], - &gfx3d.vertlist->list[poly->vertIndexes[1]], - &gfx3d.vertlist->list[poly->vertIndexes[0]], - }; + SubmitVertex(&gfx3d.vertlist->list[poly->vertIndexes[2]]); + SubmitVertex(&gfx3d.vertlist->list[poly->vertIndexes[1]]); + SubmitVertex(&gfx3d.vertlist->list[poly->vertIndexes[0]]); - triangle_from_devmaster(vert); + triangle_from_devmaster(); } } diff --git a/desmume/src/texcache.cpp b/desmume/src/texcache.cpp index a9c8fdfe8..9927078d8 100644 --- a/desmume/src/texcache.cpp +++ b/desmume/src/texcache.cpp @@ -590,5 +590,23 @@ TextureCache* TexCache_Curr() else return &texcache[lastTexture]; } +void TexCache_Invalidate() +{ + //well, this is a very blunt instrument. + //lets just flag all the textures as invalid. + for(int i=0;i