diff --git a/desmume/src/OGLRender.cpp b/desmume/src/OGLRender.cpp index afc46ad52..c7fc8a7c0 100644 --- a/desmume/src/OGLRender.cpp +++ b/desmume/src/OGLRender.cpp @@ -69,123 +69,8 @@ static void ENDGL() { #include "gfx3d.h" #include "shaders.h" +#include "texcache.h" -//This class represents a number of regions of memory which should be viewed as contiguous -class MemSpan -{ -public: - static const int MAXSIZE = 8; - - MemSpan() - : numItems(0) - {} - - int numItems; - - struct Item { - u32 start; - u32 len; - u8* ptr; - u32 ofs; //offset within the memspan - } items[MAXSIZE]; - - int size; - - //this MemSpan shall be considered the first argument to a standard memcmp - //the length shall be as specified in this MemSpan, unless you specify otherwise - int memcmp(void* buf2, int size=-1) - { - if(size==-1) size = this->size; - size = std::min(this->size,size); - for(int i=0;isize; - size = std::min(this->size,size); - u8* bufptr = (u8*)buf; - int done = 0; - for(int i=0;i>17)&3; //slots will wrap around - curr.len = std::min(len,0x20000-curr.start); - curr.ofs = currofs; - len -= curr.len; - ofs += curr.len; - currofs += curr.len; - u8* ptr = ARM9Mem.textureSlotAddr[slot]; - - if(ptr == ARM9Mem.blank_memory) { - PROGINFO("Tried to reference unmapped texture memory: slot %d\n",slot); - } - curr.ptr = ptr + curr.start; - } - return ret; -} - -//creates a MemSpan in texture palette memory -static MemSpan MemSpan_TexPalette(u32 ofs, u32 len) -{ - MemSpan ret; - ret.size = len; - u32 currofs = 0; - while(len) { - MemSpan::Item &curr = ret.items[ret.numItems++]; - curr.start = ofs&0x3FFF; - u32 slot = (ofs>>14)&7; //this masks to 8 slots, but there are really only 6 - if(slot>5) { - PROGINFO("Texture palette overruns texture memory. Wrapping at palette slot 0.\n"); - slot -= 5; - } - curr.len = std::min(len,0x4000-curr.start); - curr.ofs = currofs; - len -= curr.len; - ofs += curr.len; - //if(len != 0) - //here is an actual test case of bank spanning - currofs += curr.len; - u8* ptr = ARM9Mem.texPalSlot[slot]; - - if(ptr == ARM9Mem.blank_memory) { - PROGINFO("Tried to reference unmapped texture palette memory: 16k slot #%d\n",slot); - } - curr.ptr = ptr + curr.start; - } - return ret; -} #ifndef CTASSERT @@ -199,14 +84,12 @@ static const unsigned short map3d_cull[4] = {GL_FRONT_AND_BACK, GL_FRONT, GL_BAC static const int texEnv[4] = { GL_MODULATE, GL_DECAL, GL_MODULATE, GL_MODULATE }; static const int depthFunc[2] = { GL_LESS, GL_EQUAL }; static bool needRefreshFramebuffer = false; -static unsigned char texMAP[1024*2048*4]; -static unsigned int textureMode=TEXMODE_NONE; + float clearAlpha; -//raw ds format poly attributes, installed from the display list -static u32 textureFormat=0, texturePalette=0; + //derived values extracted from polyattr etc static bool wireframe=false, alpha31=false; @@ -219,6 +102,8 @@ static bool alphaDepthWrite; static unsigned int lightMask=0; static bool isTranslucent; +static u32 textureFormat=0, texturePalette=0; + //------------------------------------------------------------ #define OGLEXT(x,y) x y = 0; @@ -335,37 +220,6 @@ static void _xglDisable(GLenum cap) { _xglDisable(cap); } -//================================================= Textures -#define MAX_TEXTURE 500 -#ifdef SSE2 -struct ALIGN(16) TextureCache -#else -struct ALIGN(8) TextureCache -#endif -{ - GLenum id; - u32 frm; - u32 mode; - u32 pal; - u32 sizeX; - u32 sizeY; - float invSizeX; - float invSizeY; - int textureSize, indexSize; - u8 texture[128*1024]; // 128Kb texture slot - u8 palette[256*2]; - - //set if this texture is suspected be invalid due to a vram reconfigure - bool suspectedInvalid; - -}; - -TextureCache texcache[MAX_TEXTURE+1]; -u32 texcache_count; - -u32 texcache_start; -u32 texcache_stop; -//u32 texcache_last; GLenum oglTempTextureID[MAX_TEXTURE]; GLenum oglToonTableTextureID; @@ -418,6 +272,7 @@ GLuint shaderProgram; static GLuint hasTexLoc; static GLuint texBlendLoc; +static bool hasTexture = false; /* Shaders init */ @@ -479,24 +334,38 @@ static void OGLReset() int i; //reset the texture cache - memset(&texcache,0,sizeof(texcache)); - texcache_count=0; + TexCache_Reset(); for (i = 0; i < MAX_TEXTURE; i++) texcache[i].id=oglTempTextureID[i]; - texcache_start=0; - texcache_stop=MAX_TEXTURE<<1; - for(i=0;i>26)&0x07); - unsigned int sizeX=(8 << ((format>>20)&0x07)); - unsigned int sizeY=(8 << ((format>>23)&0x07)); - unsigned int imageSize = sizeX*sizeY; - - u8 *adr; + textureFormat = format; + texturePalette = texpal; + u32 textureMode = (unsigned short)((format>>26)&0x07); if (format==0) { - texcache_count=-1; +// texcache_count=-1; if(hasShaders && hasTexture) { glUniform1i(hasTexLoc, 0); hasTexture = false; } return; } if (textureMode==0) { - texcache_count=-1; +// texcache_count=-1; if(hasShaders && hasTexture) { glUniform1i(hasTexLoc, 0); hasTexture = false; } return; } @@ -692,400 +526,8 @@ static void setTexture(unsigned int format, unsigned int texpal) glActiveTexture(GL_TEXTURE0); } - u32 paletteAddress; - switch (textureMode) - { - case TEXMODE_I2: - paletteAddress = texturePalette<<3; - break; - case TEXMODE_A3I5: //a3i5 - case TEXMODE_I4: //i4 - case TEXMODE_I8: //i8 - case TEXMODE_A5I3: //a5i3 - case TEXMODE_16BPP: //16bpp - case TEXMODE_4X4: //4x4 - default: - paletteAddress = texturePalette<<4; - break; - } - - //analyze the texture memory mapping and the specifications of this texture - int palSize = palSizes[textureMode]; - int texSize = (imageSize*texSizes[textureMode])>>2; //shifted because the texSizes multiplier is fixed point - MemSpan ms = MemSpan_TexMem((format&0xFFFF)<<3,texSize); - MemSpan mspal = MemSpan_TexPalette(paletteAddress,palSize*2); - - //determine the location for 4x4 index data - u32 indexBase; - if((format & 0xc000) == 0x8000) indexBase = 0x30000; - else indexBase = 0x20000; - - u32 indexOffset = (format&0x3FFF)<<2; - - int indexSize = 0; - MemSpan msIndex; - if(textureMode == TEXMODE_4X4) - { - indexSize = imageSize>>3; - msIndex = MemSpan_TexMem(indexOffset+indexBase,indexSize); - } - - - //dump the palette to a temp buffer, so that we don't have to worry about memory mapping. - //this isnt such a problem with texture memory, because we read sequentially from it. - //however, we read randomly from palette memory, so the mapping is more costly. - mspal.dump(pal); - - - u32 tx=texcache_start; - - //if(false) - while (TRUE) - { - //conditions where we give up and regenerate the texture: - if (texcache_stop == tx) break; - if (texcache[tx].frm == 0) break; - - //conditions where we reject matches: - //when the teximage or texpal params dont match - //(this is our key for identifying palettes in the cache) - if (texcache[tx].frm != format) goto REJECT; - if (texcache[tx].pal != texpal) goto REJECT; - - //the texture matches params, but isnt suspected invalid. accept it. - if (!texcache[tx].suspectedInvalid) goto ACCEPT; - - //if we couldnt cache this entire texture due to it being too large, then reject it - if (texSize+indexSize > (int)sizeof(texcache[tx].texture)) goto REJECT; - - //when the palettes dont match: - //note that we are considering 4x4 textures to have a palette size of 0. - //they really have a potentially HUGE palette, too big for us to handle like a normal palette, - //so they go through a different system - if (mspal.size != 0 && memcmp(texcache[tx].palette,pal,mspal.size)) goto REJECT; - - //when the texture data doesn't match - if(ms.memcmp(texcache[tx].texture,sizeof(texcache[tx].texture))) goto REJECT; - - //if the texture is 4x4 then the index data must match - if(textureMode == TEXMODE_4X4) - { - if(msIndex.memcmp(texcache[tx].texture + texcache[tx].textureSize,texcache[tx].indexSize)) goto REJECT; - } - - -ACCEPT: - texcache[tx].suspectedInvalid = false; - texcache_count = tx; - if(lastTexture == -1 || (int)tx != lastTexture) - { - lastTexture = tx; - glBindTexture(GL_TEXTURE_2D,texcache[tx].id); - glMatrixMode (GL_TEXTURE); - glLoadIdentity (); - glScaled (texcache[tx].invSizeX, texcache[tx].invSizeY, 1.0f); - } - return; - -REJECT: - tx++; - if ( tx > MAX_TEXTURE ) - { - texcache_stop=texcache_start; - texcache[texcache_stop].frm=0; - texcache_start++; - if (texcache_start>MAX_TEXTURE) - { - texcache_start=0; - texcache_stop=MAX_TEXTURE<<1; - } - tx=0; - } - } - - lastTexture = tx; - glBindTexture(GL_TEXTURE_2D, texcache[tx].id); - - texcache[tx].suspectedInvalid = false; - texcache[tx].frm=format; - texcache[tx].mode=textureMode; - texcache[tx].pal=texpal; - texcache[tx].sizeX=sizeX; - texcache[tx].sizeY=sizeY; - texcache[tx].invSizeX=1.0f/((float)(sizeX)); - texcache[tx].invSizeY=1.0f/((float)(sizeY)); - texcache[tx].textureSize = ms.dump(texcache[tx].texture,sizeof(texcache[tx].texture)); - - //dump palette data for cache keying - if ( palSize ) - { - memcpy(texcache[tx].palette, pal, palSize*2); - } - //dump 4x4 index data for cache keying - texcache[tx].indexSize = 0; - if(textureMode == TEXMODE_4X4) - { - texcache[tx].indexSize = std::min(msIndex.size,(int)sizeof(texcache[tx].texture) - texcache[tx].textureSize); - msIndex.dump(texcache[tx].texture+texcache[tx].textureSize,texcache[tx].indexSize); - } - - - glMatrixMode (GL_TEXTURE); - glLoadIdentity (); - glScaled (texcache[tx].invSizeX, texcache[tx].invSizeY, 1.0f); - - - //INFO("Texture %03i - format=%08X; pal=%04X (mode %X, width %04i, height %04i)\n",i, texcache[i].frm, texcache[i].pal, texcache[i].mode, sizeX, sizeY); - - //============================================================================ Texture conversion - u32 palZeroTransparent = (1-((format>>29)&1))*255; // shash: CONVERT THIS TO A TABLE :) - - switch (texcache[tx].mode) - { - case TEXMODE_A3I5: - { - for(int j=0;j>5; - *dwdst++ = RGB15TO32(c,material_3bit_to_8bit[alpha]); - adr++; - } - } - - break; - } - case TEXMODE_I2: - { - for(int j=0;j>2)&0x3; - c = pal[bits]; - *dwdst++ = RGB15TO32(c,(bits == 0) ? palZeroTransparent : 255); - - bits = ((*adr)>>4)&0x3; - c = pal[bits]; - *dwdst++ = RGB15TO32(c,(bits == 0) ? palZeroTransparent : 255); - - bits = ((*adr)>>6)&0x3; - c = pal[bits]; - *dwdst++ = RGB15TO32(c,(bits == 0) ? palZeroTransparent : 255); - - adr++; - } - } - break; - } - case TEXMODE_I4: - { - for(int j=0;j>4); - c = pal[bits]; - *dwdst++ = RGB15TO32(c,(bits == 0) ? palZeroTransparent : 255); - - adr++; - } - } - break; - } - case TEXMODE_I8: - { - for(int j=0;j>14)] + ((paletteAddress + (offset)*2)&0x3FFF) ) ) - - u16* slot1; - u32* map = (u32*)ms.items[0].ptr; - u32 limit = ms.items[0].len<<2; - u32 d = 0; - if ( (texcache[tx].frm & 0xc000) == 0x8000) - // texel are in slot 2 - slot1=(u16*)&ARM9Mem.textureSlotAddr[1][((texcache[tx].frm & 0x3FFF)<<2)+0x010000]; - else - slot1=(u16*)&ARM9Mem.textureSlotAddr[1][(texcache[tx].frm & 0x3FFF)<<2]; - - u16 yTmpSize = (texcache[tx].sizeY>>2); - u16 xTmpSize = (texcache[tx].sizeX>>2); - - //this is flagged whenever a 4x4 overruns its slot. - //i am guessing we just generate black in that case - bool dead = false; - - for (int y = 0; y < yTmpSize; y ++) - { - u32 tmpPos[4]={(y<<2)*texcache[tx].sizeX,((y<<2)+1)*texcache[tx].sizeX, - ((y<<2)+2)*texcache[tx].sizeX,((y<<2)+3)*texcache[tx].sizeX}; - for (int x = 0; x < xTmpSize; x ++, d++) - { - if(d >= limit) - dead = true; - - if(dead) { - for (int sy = 0; sy < 4; sy++) - { - u32 currentPos = (x<<2) + tmpPos[sy]; - dwdst[currentPos] = dwdst[currentPos+1] = dwdst[currentPos+2] = dwdst[currentPos+3] = 0; - } - continue; - } - - u32 currBlock = map[d]; - u16 pal1 = slot1[d]; - u16 pal1offset = (pal1 & 0x3FFF)<<1; - u8 mode = pal1>>14; - u32 tmp_col[4]; - - tmp_col[0]=RGB16TO32(PAL4X4(pal1offset),255); - tmp_col[1]=RGB16TO32(PAL4X4(pal1offset+1),255); - - switch (mode) - { - case 0: - tmp_col[2]=RGB16TO32(PAL4X4(pal1offset+2),255); - tmp_col[3]=RGB16TO32(0x7FFF,0); - break; - case 1: - tmp_col[2]=(((tmp_col[0]&0xFF)+(tmp_col[1]&0xff))>>1)| - (((tmp_col[0]&(0xFF<<8))+(tmp_col[1]&(0xFF<<8)))>>1)| - (((tmp_col[0]&(0xFF<<16))+(tmp_col[1]&(0xFF<<16)))>>1)| - (0xff<<24); - tmp_col[3]=RGB16TO32(0x7FFF,0); - break; - case 2: - tmp_col[2]=RGB16TO32(PAL4X4(pal1offset+2),255); - tmp_col[3]=RGB16TO32(PAL4X4(pal1offset+3),255); - break; - case 3: - { - u32 red1, red2; - u32 green1, green2; - u32 blue1, blue2; - u16 tmp1, tmp2; - - red1=tmp_col[0]&0xff; - green1=(tmp_col[0]>>8)&0xff; - blue1=(tmp_col[0]>>16)&0xff; - red2=tmp_col[1]&0xff; - green2=(tmp_col[1]>>8)&0xff; - blue2=(tmp_col[1]>>16)&0xff; - - tmp1=((red1*5+red2*3)>>6)| - (((green1*5+green2*3)>>6)<<5)| - (((blue1*5+blue2*3)>>6)<<10); - tmp2=((red2*5+red1*3)>>6)| - (((green2*5+green1*3)>>6)<<5)| - (((blue2*5+blue1*3)>>6)<<10); - - tmp_col[2]=RGB16TO32(tmp1,255); - tmp_col[3]=RGB16TO32(tmp2,255); - break; - } - } - - //set all 16 texels - for (int sy = 0; sy < 4; sy++) - { - // Texture offset - u32 currentPos = (x<<2) + tmpPos[sy]; - u8 currRow = (u8)((currBlock>>(sy<<3))&0xFF); - - dwdst[currentPos] = tmp_col[currRow&3]; - dwdst[currentPos+1] = tmp_col[(currRow>>2)&3]; - dwdst[currentPos+2] = tmp_col[(currRow>>4)&3]; - dwdst[currentPos+3] = tmp_col[(currRow>>6)&3]; - } - - - } - } - - - break; - } - case TEXMODE_A5I3: - { - for(int j=0;j>3); - *dwdst++ = RGB15TO32(c,material_5bit_to_8bit[alpha]); - adr++; - } - } - break; - } - case TEXMODE_16BPP: - { - for(int j=0;j +//produce a 32bpp color from a DS RGB16 +#define RGB16TO32(col,alpha) (((alpha)<<24) | ((((col) & 0x7C00)>>7)<<16) | ((((col) & 0x3E0)>>2)<<8) | (((col) & 0x1F)<<3)) + +//produce a 32bpp color from a ds RGB15 plus an 8bit alpha, using a table +#define RGB15TO32(col,alpha8) ( ((alpha8)<<24) | color_15bit_to_24bit[col&0x7FFF] ) + +//produce a 24bpp color from a ds RGB15, using a table +#define RGB15TO24_REVERSE(col) ( color_15bit_to_24bit_reverse[col&0x7FFF] ) + +//produce a 32bpp color from a ds RGB15 plus an 8bit alpha, not using a table (but using other tables) +#define RGB15TO32_DIRECT(col,alpha8) ( ((alpha8)<<24) | (material_5bit_to_8bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | material_5bit_to_8bit[(col)&0x1F] ) + +//produce a 15bpp color from individual 5bit components +#define R5G5B5TORGB15(r,g,b) ((r)|((g)<<5)|((b)<<10)) + + #define TEXMODE_NONE 0 #define TEXMODE_A3I5 1 #define TEXMODE_I2 2 @@ -141,18 +157,6 @@ extern GFX3D gfx3d; //--------------------- -//produce a 32bpp color from a DS RGB16 -#define RGB16TO32(col,alpha) (((alpha)<<24) | ((((col) & 0x7C00)>>7)<<16) | ((((col) & 0x3E0)>>2)<<8) | (((col) & 0x1F)<<3)) - -//produce a 32bpp color from a ds RGB15 plus an 8bit alpha, using a table -#define RGB15TO32(col,alpha8) ( ((alpha8)<<24) | color_15bit_to_24bit[col&0x7FFF] ) - -//produce a 24bpp color from a ds RGB15, using a table -#define RGB15TO24_REVERSE(col) ( color_15bit_to_24bit_reverse[col&0x7FFF] ) - -//produce a 32bpp color from a ds RGB15 plus an 8bit alpha, not using a table (but using other tables) -#define RGB15TO32_DIRECT(col,alpha8) ( ((alpha8)<<24) | (material_5bit_to_8bit[((col)>>10)&0x1F]<<16) | (material_5bit_to_8bit[((col)>>5)&0x1F]<<8) | material_5bit_to_8bit[(col)&0x1F] ) - extern CACHE_ALIGN u32 color_15bit_to_24bit[32768]; extern CACHE_ALIGN u32 color_15bit_to_24bit_reverse[32768]; extern CACHE_ALIGN u8 mixTable555[32][32][32]; diff --git a/desmume/src/rasterize.cpp b/desmume/src/rasterize.cpp index 63758ce24..31d570051 100644 --- a/desmume/src/rasterize.cpp +++ b/desmume/src/rasterize.cpp @@ -22,10 +22,13 @@ */ #include "Rasterize.h" + +#include + #include "common.h" #include "render3D.h" #include "gfx3d.h" -#include +#include "texcache.h" using std::min; using std::max; @@ -33,9 +36,13 @@ using std::max; template T min(T a, T b, T c) { return min(min(a,b),c); } template T max(T a, T b, T c) { return max(max(a,b),c); } - static u16 screen[256*192]; +static struct +{ + int width, height; +} Texture; + void set_pixel(int x, int y, u16 color) { if(x<0 || y<0 || x>=256 || y>=192) return; @@ -50,31 +57,74 @@ void hline(int x, int y, int xe, u16 color) //http://www.devmaster.net/forums/showthread.php?t=1884 +#if defined(_MSC_VER) +inline int iround(float x) +{ + int t; + + __asm + { + fld x + fistp t + } + + return t; +} +#else int iround(float f) { return (int)f; //lol } +#endif -void triangle_from_devmaster(int x1, int y1, int x2, int y2, int x3, int y3, u16 color) +struct Interpolator { - int desty = 0; - struct Vertex { - int x,y; - } v1, v2, v3; - v1.x = x1; - v1.y = y1; - v2.x = x2; - v2.y = y2; - v3.x = x3; - v3.y = y3; + int A,B,C; + float dx, dy; + float Z, pZ; - // 28.4 fixed-point coordinates - const int Y1 = iround(16.0f * v1.y); - const int Y2 = iround(16.0f * v2.y); - const int Y3 = iround(16.0f * v3.y); + struct { + int x,y,z; + } point0; + + Interpolator(int x1, int x2, int x3, int y1, int y2, int y3, int z1, int z2, int z3) + { + A = (z3 - z1) * (y2 - y1) - (z2 - z1) * (y3 - y1); + B = (x3 - x1) * (z2 - z1) - (x2 - x1) * (z3 - z1); + C = (x2 - x1) * (y3 - y1) - (x3 - x1) * (y2 - y1); + dx = -(float)A / C; + dy = -(float)B / C; + point0.x = x1; + point0.y = y1; + point0.z = z1; + } - const int X1 = iround(16.0f * v1.x); - const int X2 = iround(16.0f * v2.x); - const int X3 = iround(16.0f * v3.x); + void init(int x, int y) + { + Z = point0.z + dx * (x-point0.x) + dy * (y-point0.y); + } + + FORCEINLINE int cur() { return iround(Z); } + + FORCEINLINE void push() { pZ = Z; } + FORCEINLINE void pop() { Z = pZ; } + FORCEINLINE void incy() { Z += dy; } + FORCEINLINE void incx() { Z += dx; } +}; + +//http://www.devmaster.net/forums/showthread.php?t=1884&page=1 +//todo - change to the tile-based renderer and try to apply some optimizations from that thread +void triangle_from_devmaster(VERT** verts) +{ + u16 color =0x7FFF; + + // 28.4 fixed-point coordinates + const int Y1 = iround(16.0f * verts[0]->coord[1]); + const int Y2 = iround(16.0f * verts[1]->coord[1]); + const int Y3 = iround(16.0f * verts[2]->coord[1]); + + const int X1 = iround(16.0f * verts[0]->coord[0]); + const int X2 = iround(16.0f * verts[1]->coord[0]); + const int X3 = iround(16.0f * verts[2]->coord[0]); // Deltas const int DX12 = X1 - X2; @@ -100,15 +150,7 @@ void triangle_from_devmaster(int x1, int y1, int x2, int y2, int x3, int y3, u16 int miny = (min(Y1, Y2, Y3) + 0xF) >> 4; int maxy = (max(Y1, Y2, Y3) + 0xF) >> 4; - // Block size, standard 8x8 (must be power of two) - const int q = 8; - - // Start in corner of 8x8 block - minx &= ~(q - 1); - miny &= ~(q - 1); - - //(char*&)colorBuffer += miny * stride; - desty = miny; + int desty = miny; // Half-edge constants int C1 = DY12 * X1 - DX12 * Y1; @@ -120,95 +162,87 @@ void triangle_from_devmaster(int x1, int y1, int x2, int y2, int x3, int y3, u16 if(DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; if(DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; - // Loop through blocks - for(int y = miny; y < maxy; y += q) + int CY1 = C1 + DX12 * (miny << 4) - DY12 * (minx << 4); + int CY2 = C2 + DX23 * (miny << 4) - DY23 * (minx << 4); + int CY3 = C3 + DX31 * (miny << 4) - DY31 * (minx << 4); + + float fx1 = verts[0]->coord[0], fy1 = verts[0]->coord[1]; + float fx2 = verts[1]->coord[0], fy2 = verts[1]->coord[1]; + float fx3 = verts[2]->coord[0], fy3 = verts[2]->coord[1]; + u8 r1 = verts[0]->color[0], g1 = verts[0]->color[1], b1 = verts[0]->color[2]; + u8 r2 = verts[1]->color[0], g2 = verts[1]->color[1], b2 = verts[1]->color[2]; + u8 r3 = verts[2]->color[0], g3 = verts[2]->color[1], b3 = verts[2]->color[2]; + int u1 = verts[0]->texcoord[0], v1 = verts[0]->texcoord[1]; + int u2 = verts[1]->texcoord[0], v2 = verts[1]->texcoord[1]; + int u3 = verts[2]->texcoord[0], v3 = verts[2]->texcoord[1]; + + Interpolator i_color_r(fx1,fx2,fx3,fy1,fy2,fy3,r1,r2,r3); + Interpolator i_color_g(fx1,fx2,fx3,fy1,fy2,fy3,g1,g2,g3); + Interpolator i_color_b(fx1,fx2,fx3,fy1,fy2,fy3,b1,b2,b3); + Interpolator i_tex_u(fx1,fx2,fx3,fy1,fy2,fy3,u1,u2,u3); + Interpolator i_tex_v(fx1,fx2,fx3,fy1,fy2,fy3,v1,v2,v3); + + + i_color_r.init(minx,miny); + i_color_g.init(minx,miny); + i_color_b.init(minx,miny); + i_tex_u.init(minx,miny); + i_tex_v.init(minx,miny); + + for(int y = miny; y < maxy; y++) { - for(int x = minx; x < maxx; x += q) + int CX1 = CY1; + int CX2 = CY2; + int CX3 = CY3; + + bool done = false; + i_color_r.push(); i_color_g.push(); i_color_b.push(); + i_tex_u.push(); i_tex_v.push(); + for(int x = minx; x < maxx; x++) { - // Corners of block - int x0 = x << 4; - int x1 = (x + q - 1) << 4; - int y0 = y << 4; - int y1 = (y + q - 1) << 4; - - // Evaluate half-space functions - bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; - bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; - bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; - bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; - int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); - - bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; - bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; - bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; - bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; - int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); - - bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; - bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; - bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; - bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; - int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); - - // Skip block when outside an edge - if(a == 0x0 || b == 0x0 || c == 0x0) continue; - - //unsigned int *buffer = colorBuffer; - int _desty = desty; - - // Accept whole block when totally covered - if(a == 0xF && b == 0xF && c == 0xF) + if(CX1 > 0 && CX2 > 0 && CX3 > 0) { - for(int iy = 0; iy < q; iy++) - { - for(int ix = x; ix < x + q; ix++) - { - //buffer[ix] = 0x00007F00;<< // Green - set_pixel(ix,_desty,color); - } + //material color + //color = R5G5B5TORGB15(i_color_r.cur(),i_color_g.cur(),i_color_b.cur()); + + //texture + int u = i_tex_u.cur(); + int v = i_tex_v.cur(); + if(u<0) u = 0; + if(v<0) v = 0; + u32 color32 = ((u32*)TexCache_texMAP)[v*Texture.width+u]; + color32>>=3; + color32 &= 0x1F1F1F1F; + u8* color8 = (u8*)&color32; + color = (color8[0] | (color8[1] << 5) | (color8[2] << 10)); + + //hack: for testing, dont render non-opaque textures + if(color8[3] < 0x1F) return; + + set_pixel(x,desty,color); - //(char*&)buffer += stride; - _desty++; + done = true; + } else if(done) break; - } - } - else // Partially covered block - { - int CY1 = C1 + DX12 * y0 - DY12 * x0; - int CY2 = C2 + DX23 * y0 - DY23 * x0; - int CY3 = C3 + DX31 * y0 - DY31 * x0; + i_color_r.incx(); i_color_g.incx(); i_color_b.incx(); + i_tex_u.incx(); i_tex_v.incx(); - for(int iy = y; iy < y + q; iy++) - { - int CX1 = CY1; - int CX2 = CY2; - int CX3 = CY3; - - for(int ix = x; ix < x + q; ix++) - { - if(CX1 > 0 && CX2 > 0 && CX3 > 0) - { - //buffer[ix] = 0x0000007F;<< // Blue - set_pixel(ix,_desty,color); - } - - CX1 -= FDY12; - CX2 -= FDY23; - CX3 -= FDY31; - } - - CY1 += FDX12; - CY2 += FDX23; - CY3 += FDX31; - - //(char*&)buffer += stride; - _desty ++; - } - } + CX1 -= FDY12; + CX2 -= FDY23; + CX3 -= FDY31; } + i_color_r.pop(); i_color_r.incy(); + i_color_g.pop(); i_color_g.incy(); + i_color_b.pop(); i_color_b.incy(); + i_tex_u.pop(); i_tex_u.incy(); + i_tex_v.pop(); i_tex_v.incy(); - //(char*&)colorBuffer += q * stride; - desty += q; + + CY1 += FDX12; + CY2 += FDX23; + CY3 += FDX31; + + desty++; } } @@ -245,20 +279,9 @@ static void Render() { VERT &vert = gfx3d.vertlist->list[i]; - ////perspective division - //vert.coord[0] = (vert.coord[0] + vert.coord[3]) / 2 / vert.coord[3]; - //vert.coord[1] = (vert.coord[1] + vert.coord[3]) / 2 / vert.coord[3]; - //vert.coord[2] = (vert.coord[2] + vert.coord[3]) / 2 / vert.coord[3]; - //vert.coord[3] = 1; - - ////transform to viewport. this is badly broken - //vert.coord[0] = (vert.coord[0])*128; - //vert.coord[1] = (vert.coord[1])*96; - + //perspective division and viewport transform vert.coord[0] = (vert.coord[0]+vert.coord[3])*256 / (2*vert.coord[3]) + 0; vert.coord[1] = (vert.coord[1]+vert.coord[3])*192 / (2*vert.coord[3]) + 0; - - int zzz=9; } @@ -268,21 +291,40 @@ static void Render() POLY *poly = &gfx3d.polylist->list[gfx3d.indexlist[i]]; int type = poly->type; + TexCache_SetTexture(poly->texParam,poly->texPalette); + if(TexCache_Curr()) + Texture.width = TexCache_Curr()->sizeX; - if(type == 3) { - VERT* vert[3] = { + //note that when we build our triangle vert lists, we reorder them for our renderer. + //we should probably fix the renderer so we dont have to do this; + //but then again, what does it matter? + if(type == 4) { + + VERT* vertA[3] = { &gfx3d.vertlist->list[poly->vertIndexes[0]], + &gfx3d.vertlist->list[poly->vertIndexes[2]], &gfx3d.vertlist->list[poly->vertIndexes[1]], + }; + + triangle_from_devmaster(vertA); + + VERT* vertB[3] = { + &gfx3d.vertlist->list[poly->vertIndexes[0]], + &gfx3d.vertlist->list[poly->vertIndexes[3]], &gfx3d.vertlist->list[poly->vertIndexes[2]], }; - u16 color = vert[0]->color[0] | (vert[0]->color[1]<<5) | (vert[0]->color[2]<<10); + triangle_from_devmaster(vertB); - triangle_from_devmaster( - vert[0]->coord[0],vert[0]->coord[1], - vert[1]->coord[0],vert[1]->coord[1], - vert[2]->coord[0],vert[2]->coord[1], - color); + } + if(type == 3) { + VERT* vert[3] = { + &gfx3d.vertlist->list[poly->vertIndexes[2]], + &gfx3d.vertlist->list[poly->vertIndexes[1]], + &gfx3d.vertlist->list[poly->vertIndexes[0]], + }; + + triangle_from_devmaster(vert); } } diff --git a/desmume/src/texcache.cpp b/desmume/src/texcache.cpp new file mode 100644 index 000000000..a9c8fdfe8 --- /dev/null +++ b/desmume/src/texcache.cpp @@ -0,0 +1,594 @@ +#include "texcache.h" + +#include + +#include "bits.h" +#include "common.h" +#include "debug.h" +#include "gfx3d.h" +#include "NDSSystem.h" + +using std::min; +using std::max; + +//This class represents a number of regions of memory which should be viewed as contiguous +class MemSpan +{ +public: + static const int MAXSIZE = 8; + + MemSpan() + : numItems(0) + {} + + int numItems; + + struct Item { + u32 start; + u32 len; + u8* ptr; + u32 ofs; //offset within the memspan + } items[MAXSIZE]; + + int size; + + //this MemSpan shall be considered the first argument to a standard memcmp + //the length shall be as specified in this MemSpan, unless you specify otherwise + int memcmp(void* buf2, int size=-1) + { + if(size==-1) size = this->size; + size = min(this->size,size); + for(int i=0;isize; + size = min(this->size,size); + u8* bufptr = (u8*)buf; + int done = 0; + for(int i=0;i>17)&3; //slots will wrap around + curr.len = min(len,0x20000-curr.start); + curr.ofs = currofs; + len -= curr.len; + ofs += curr.len; + currofs += curr.len; + u8* ptr = ARM9Mem.textureSlotAddr[slot]; + + if(ptr == ARM9Mem.blank_memory) { + PROGINFO("Tried to reference unmapped texture memory: slot %d\n",slot); + } + curr.ptr = ptr + curr.start; + } + return ret; +} + +//creates a MemSpan in texture palette memory +static MemSpan MemSpan_TexPalette(u32 ofs, u32 len) +{ + MemSpan ret; + ret.size = len; + u32 currofs = 0; + while(len) { + MemSpan::Item &curr = ret.items[ret.numItems++]; + curr.start = ofs&0x3FFF; + u32 slot = (ofs>>14)&7; //this masks to 8 slots, but there are really only 6 + if(slot>5) { + PROGINFO("Texture palette overruns texture memory. Wrapping at palette slot 0.\n"); + slot -= 5; + } + curr.len = min(len,0x4000-curr.start); + curr.ofs = currofs; + len -= curr.len; + ofs += curr.len; + //if(len != 0) + //here is an actual test case of bank spanning + currofs += curr.len; + u8* ptr = ARM9Mem.texPalSlot[slot]; + + if(ptr == ARM9Mem.blank_memory) { + PROGINFO("Tried to reference unmapped texture palette memory: 16k slot #%d\n",slot); + } + curr.ptr = ptr + curr.start; + } + return ret; +} + +//================================================= Textures +TextureCache texcache[MAX_TEXTURE+1]; +//u32 texcache_count; +u32 texcache_start; +u32 texcache_stop; + + +u8 TexCache_texMAP[1024*2048*4]; +//raw ds format poly attributes + +//todo - make all color conversions go through a properly spread table!! + +#if defined (DEBUG_DUMP_TEXTURE) && defined (WIN32) +static void DebugDumpTexture(int which) +{ + char fname[100]; + sprintf(fname,"c:\\dump\\%d.bmp", which); + + glBindTexture(GL_TEXTURE_2D,texcache[which].id); + glGetTexImage( GL_TEXTURE_2D , + 0, + GL_BGRA_EXT, + GL_UNSIGNED_BYTE, + TexCache_texMAP); + + NDS_WriteBMP_32bppBuffer(texcache[which].sizeX,texcache[which].sizeY,TexCache_texMAP,fname); +} +#else +#define DebugDumpTexture(which) do { (void)which; } while (0) +#endif + + +static int lastTexture = -1; +void TexCache_SetTexture(unsigned int format, unsigned int texpal) +{ + //for each texformat, number of palette entries + const int palSizes[] = {0, 32, 4, 16, 256, 0, 8, 0}; + + //for each texformat, multiplier from numtexels to numbytes (fixed point 30.2) + const int texSizes[] = {0, 4, 1, 2, 4, 1, 4, 8}; + + //used to hold a copy of the palette specified for this texture + u16 pal[256]; + + u32 *dwdst = (u32*)TexCache_texMAP; + + u32 textureMode = (unsigned short)((format>>26)&0x07); + unsigned int sizeX=(8 << ((format>>20)&0x07)); + unsigned int sizeY=(8 << ((format>>23)&0x07)); + unsigned int imageSize = sizeX*sizeY; + + u8 *adr; + + u32 paletteAddress; + + switch (textureMode) + { + case TEXMODE_I2: + paletteAddress = texpal<<3; + break; + case TEXMODE_A3I5: //a3i5 + case TEXMODE_I4: //i4 + case TEXMODE_I8: //i8 + case TEXMODE_A5I3: //a5i3 + case TEXMODE_16BPP: //16bpp + case TEXMODE_4X4: //4x4 + default: + paletteAddress = texpal<<4; + break; + } + + //analyze the texture memory mapping and the specifications of this texture + int palSize = palSizes[textureMode]; + int texSize = (imageSize*texSizes[textureMode])>>2; //shifted because the texSizes multiplier is fixed point + MemSpan ms = MemSpan_TexMem((format&0xFFFF)<<3,texSize); + MemSpan mspal = MemSpan_TexPalette(paletteAddress,palSize*2); + + //determine the location for 4x4 index data + u32 indexBase; + if((format & 0xc000) == 0x8000) indexBase = 0x30000; + else indexBase = 0x20000; + + u32 indexOffset = (format&0x3FFF)<<2; + + int indexSize = 0; + MemSpan msIndex; + if(textureMode == TEXMODE_4X4) + { + indexSize = imageSize>>3; + msIndex = MemSpan_TexMem(indexOffset+indexBase,indexSize); + } + + + //dump the palette to a temp buffer, so that we don't have to worry about memory mapping. + //this isnt such a problem with texture memory, because we read sequentially from it. + //however, we read randomly from palette memory, so the mapping is more costly. + mspal.dump(pal); + + + u32 tx=texcache_start; + + //if(false) + while (TRUE) + { + //conditions where we give up and regenerate the texture: + if (texcache_stop == tx) break; + if (texcache[tx].frm == 0) break; + + //conditions where we reject matches: + //when the teximage or texpal params dont match + //(this is our key for identifying palettes in the cache) + if (texcache[tx].frm != format) goto REJECT; + if (texcache[tx].pal != texpal) goto REJECT; + + //the texture matches params, but isnt suspected invalid. accept it. + if (!texcache[tx].suspectedInvalid) goto ACCEPT; + + //if we couldnt cache this entire texture due to it being too large, then reject it + if (texSize+indexSize > (int)sizeof(texcache[tx].texture)) goto REJECT; + + //when the palettes dont match: + //note that we are considering 4x4 textures to have a palette size of 0. + //they really have a potentially HUGE palette, too big for us to handle like a normal palette, + //so they go through a different system + if (mspal.size != 0 && memcmp(texcache[tx].palette,pal,mspal.size)) goto REJECT; + + //when the texture data doesn't match + if(ms.memcmp(texcache[tx].texture,sizeof(texcache[tx].texture))) goto REJECT; + + //if the texture is 4x4 then the index data must match + if(textureMode == TEXMODE_4X4) + { + if(msIndex.memcmp(texcache[tx].texture + texcache[tx].textureSize,texcache[tx].indexSize)) goto REJECT; + } + + +ACCEPT: + texcache[tx].suspectedInvalid = false; +// texcache_count = tx; + if(lastTexture == -1 || (int)tx != lastTexture) + { + lastTexture = tx; + if(TexCache_BindTexture) + TexCache_BindTexture(tx); + } + return; + +REJECT: + tx++; + if ( tx > MAX_TEXTURE ) + { + texcache_stop=texcache_start; + texcache[texcache_stop].frm=0; + texcache_start++; + if (texcache_start>MAX_TEXTURE) + { + texcache_start=0; + texcache_stop=MAX_TEXTURE<<1; + } + tx=0; + } + } + + lastTexture = tx; + //glBindTexture(GL_TEXTURE_2D, texcache[tx].id); + + texcache[tx].suspectedInvalid = false; + texcache[tx].frm=format; + texcache[tx].mode=textureMode; + texcache[tx].pal=texpal; + texcache[tx].sizeX=sizeX; + texcache[tx].sizeY=sizeY; + texcache[tx].invSizeX=1.0f/((float)(sizeX)); + texcache[tx].invSizeY=1.0f/((float)(sizeY)); + texcache[tx].textureSize = ms.dump(texcache[tx].texture,sizeof(texcache[tx].texture)); + + //dump palette data for cache keying + if ( palSize ) + { + memcpy(texcache[tx].palette, pal, palSize*2); + } + //dump 4x4 index data for cache keying + texcache[tx].indexSize = 0; + if(textureMode == TEXMODE_4X4) + { + texcache[tx].indexSize = min(msIndex.size,(int)sizeof(texcache[tx].texture) - texcache[tx].textureSize); + msIndex.dump(texcache[tx].texture+texcache[tx].textureSize,texcache[tx].indexSize); + } + + + //glMatrixMode (GL_TEXTURE); + //glLoadIdentity (); + //glScaled (texcache[tx].invSizeX, texcache[tx].invSizeY, 1.0f); + + + //INFO("Texture %03i - format=%08X; pal=%04X (mode %X, width %04i, height %04i)\n",i, texcache[i].frm, texcache[i].pal, texcache[i].mode, sizeX, sizeY); + + //============================================================================ Texture conversion + u32 palZeroTransparent = (1-((format>>29)&1))*255; // shash: CONVERT THIS TO A TABLE :) + + switch (texcache[tx].mode) + { + case TEXMODE_A3I5: + { + for(int j=0;j>5; + *dwdst++ = RGB15TO32(c,material_3bit_to_8bit[alpha]); + adr++; + } + } + + break; + } + case TEXMODE_I2: + { + for(int j=0;j>2)&0x3; + c = pal[bits]; + *dwdst++ = RGB15TO32(c,(bits == 0) ? palZeroTransparent : 255); + + bits = ((*adr)>>4)&0x3; + c = pal[bits]; + *dwdst++ = RGB15TO32(c,(bits == 0) ? palZeroTransparent : 255); + + bits = ((*adr)>>6)&0x3; + c = pal[bits]; + *dwdst++ = RGB15TO32(c,(bits == 0) ? palZeroTransparent : 255); + + adr++; + } + } + break; + } + case TEXMODE_I4: + { + for(int j=0;j>4); + c = pal[bits]; + *dwdst++ = RGB15TO32(c,(bits == 0) ? palZeroTransparent : 255); + + adr++; + } + } + break; + } + case TEXMODE_I8: + { + for(int j=0;j>14)] + ((paletteAddress + (offset)*2)&0x3FFF) ) ) + + u16* slot1; + u32* map = (u32*)ms.items[0].ptr; + u32 limit = ms.items[0].len<<2; + u32 d = 0; + if ( (texcache[tx].frm & 0xc000) == 0x8000) + // texel are in slot 2 + slot1=(u16*)&ARM9Mem.textureSlotAddr[1][((texcache[tx].frm & 0x3FFF)<<2)+0x010000]; + else + slot1=(u16*)&ARM9Mem.textureSlotAddr[1][(texcache[tx].frm & 0x3FFF)<<2]; + + u16 yTmpSize = (texcache[tx].sizeY>>2); + u16 xTmpSize = (texcache[tx].sizeX>>2); + + //this is flagged whenever a 4x4 overruns its slot. + //i am guessing we just generate black in that case + bool dead = false; + + for (int y = 0; y < yTmpSize; y ++) + { + u32 tmpPos[4]={(y<<2)*texcache[tx].sizeX,((y<<2)+1)*texcache[tx].sizeX, + ((y<<2)+2)*texcache[tx].sizeX,((y<<2)+3)*texcache[tx].sizeX}; + for (int x = 0; x < xTmpSize; x ++, d++) + { + if(d >= limit) + dead = true; + + if(dead) { + for (int sy = 0; sy < 4; sy++) + { + u32 currentPos = (x<<2) + tmpPos[sy]; + dwdst[currentPos] = dwdst[currentPos+1] = dwdst[currentPos+2] = dwdst[currentPos+3] = 0; + } + continue; + } + + u32 currBlock = map[d]; + u16 pal1 = slot1[d]; + u16 pal1offset = (pal1 & 0x3FFF)<<1; + u8 mode = pal1>>14; + u32 tmp_col[4]; + + tmp_col[0]=RGB16TO32(PAL4X4(pal1offset),255); + tmp_col[1]=RGB16TO32(PAL4X4(pal1offset+1),255); + + switch (mode) + { + case 0: + tmp_col[2]=RGB16TO32(PAL4X4(pal1offset+2),255); + tmp_col[3]=RGB16TO32(0x7FFF,0); + break; + case 1: + tmp_col[2]=(((tmp_col[0]&0xFF)+(tmp_col[1]&0xff))>>1)| + (((tmp_col[0]&(0xFF<<8))+(tmp_col[1]&(0xFF<<8)))>>1)| + (((tmp_col[0]&(0xFF<<16))+(tmp_col[1]&(0xFF<<16)))>>1)| + (0xff<<24); + tmp_col[3]=RGB16TO32(0x7FFF,0); + break; + case 2: + tmp_col[2]=RGB16TO32(PAL4X4(pal1offset+2),255); + tmp_col[3]=RGB16TO32(PAL4X4(pal1offset+3),255); + break; + case 3: + { + u32 red1, red2; + u32 green1, green2; + u32 blue1, blue2; + u16 tmp1, tmp2; + + red1=tmp_col[0]&0xff; + green1=(tmp_col[0]>>8)&0xff; + blue1=(tmp_col[0]>>16)&0xff; + red2=tmp_col[1]&0xff; + green2=(tmp_col[1]>>8)&0xff; + blue2=(tmp_col[1]>>16)&0xff; + + tmp1=((red1*5+red2*3)>>6)| + (((green1*5+green2*3)>>6)<<5)| + (((blue1*5+blue2*3)>>6)<<10); + tmp2=((red2*5+red1*3)>>6)| + (((green2*5+green1*3)>>6)<<5)| + (((blue2*5+blue1*3)>>6)<<10); + + tmp_col[2]=RGB16TO32(tmp1,255); + tmp_col[3]=RGB16TO32(tmp2,255); + break; + } + } + + //set all 16 texels + for (int sy = 0; sy < 4; sy++) + { + // Texture offset + u32 currentPos = (x<<2) + tmpPos[sy]; + u8 currRow = (u8)((currBlock>>(sy<<3))&0xFF); + + dwdst[currentPos] = tmp_col[currRow&3]; + dwdst[currentPos+1] = tmp_col[(currRow>>2)&3]; + dwdst[currentPos+2] = tmp_col[(currRow>>4)&3]; + dwdst[currentPos+3] = tmp_col[(currRow>>6)&3]; + } + + + } + } + + + break; + } + case TEXMODE_A5I3: + { + for(int j=0;j>3); + *dwdst++ = RGB15TO32(c,material_5bit_to_8bit[alpha]); + adr++; + } + } + break; + } + case TEXMODE_16BPP: + { + for(int j=0;j + + + +