From 5f22118a6f7fb11b42553e0fd7c7e20e986cb33d Mon Sep 17 00:00:00 2001 From: zeromus Date: Mon, 20 Apr 2009 05:31:04 +0000 Subject: [PATCH] tiny softrasterizer speedups: change texcache to produce different pixel formats depending on 3d renderer, and optimize getline --- desmume/src/OGLRender.cpp | 2 +- desmume/src/gfx3d.cpp | 5 ++++ desmume/src/gfx3d.h | 4 +++ desmume/src/rasterize.cpp | 34 ++++++++++++------------- desmume/src/texcache.cpp | 53 +++++++++++++++++++++++++++++---------- desmume/src/texcache.h | 11 +++++++- 6 files changed, 77 insertions(+), 32 deletions(-) diff --git a/desmume/src/OGLRender.cpp b/desmume/src/OGLRender.cpp index da136c3b0..dbf0c972c 100644 --- a/desmume/src/OGLRender.cpp +++ b/desmume/src/OGLRender.cpp @@ -536,7 +536,7 @@ static void setTexture(unsigned int format, unsigned int texpal) } - TexCache_SetTexture(format, texpal); + TexCache_SetTexture(format, texpal); } diff --git a/desmume/src/gfx3d.cpp b/desmume/src/gfx3d.cpp index d14d74ef0..ca16ffb32 100644 --- a/desmume/src/gfx3d.cpp +++ b/desmume/src/gfx3d.cpp @@ -79,6 +79,11 @@ CACHE_ALIGN const u8 material_3bit_to_8bit[] = { 0x00, 0x24, 0x49, 0x6D, 0x92, 0xB6, 0xDB, 0xFF }; +//maybe not very precise +CACHE_ALIGN const u8 material_3bit_to_5bit[] = { + 0, 4, 8, 13, 17, 22, 26, 31 +}; + CACHE_ALIGN const u8 alpha_5bit_to_4bit[] = { 0x00, 0x00, 0x01, 0x01, diff --git a/desmume/src/gfx3d.h b/desmume/src/gfx3d.h index a83b42560..050e36a4e 100644 --- a/desmume/src/gfx3d.h +++ b/desmume/src/gfx3d.h @@ -33,6 +33,9 @@ //produce a 32bpp color from a ds RGB15 plus an 8bit alpha, using a table #define RGB15TO32(col,alpha8) ( ((alpha8)<<24) | color_15bit_to_24bit[col&0x7FFF] ) +//produce a 5555 32bit color from a ds RGB15 plus an 5bit alpha +#define RGB15TO5555(col,alpha5) (((alpha5)<<24) | ((((col) & 0x7C00)>>10)<<16) | ((((col) & 0x3E0)>>5)<<8) | (((col) & 0x1F))) + //produce a 24bpp color from a ds RGB15, using a table #define RGB15TO24_REVERSE(col) ( color_15bit_to_24bit_reverse[col&0x7FFF] ) @@ -183,6 +186,7 @@ extern CACHE_ALIGN u16 color_15bit_to_16bit_reverse[32768]; extern CACHE_ALIGN u8 mixTable555[32][32][32]; extern CACHE_ALIGN const int material_5bit_to_31bit[32]; extern CACHE_ALIGN const u8 material_5bit_to_8bit[32]; +extern CACHE_ALIGN const u8 material_3bit_to_5bit[8]; extern CACHE_ALIGN const u8 material_3bit_to_8bit[8]; extern CACHE_ALIGN const u8 alpha_5bit_to_4bit[32]; diff --git a/desmume/src/rasterize.cpp b/desmume/src/rasterize.cpp index 87971143e..f5d9b36ed 100644 --- a/desmume/src/rasterize.cpp +++ b/desmume/src/rasterize.cpp @@ -309,12 +309,7 @@ static struct Sampler dowrap(iu,iv); Fragment::Color color; - u32 col32 = ((u32*)textures.currentData)[(iv<>= 3; - col32 &= 0x1F1F1F1F; - color.color = col32; - + color.color = ((u32*)textures.currentData)[(iv<color.components.r; - u8 g = src->color.components.g; - u8 b = src->color.components.b; + const u8 r = src->color.components.r; + const u8 g = src->color.components.g; + const u8 b = src->color.components.b; *dst = R5G5B5TORGB15(r,g,b); - if(src->color.components.a > 0) - *dst |= 0x8000; + + *dst |= alpha_lookup[src->color.components.a]; *dstAlpha = alpha_5bit_to_4bit[src->color.components.a]; if(testRenderAlpha) @@ -971,12 +972,11 @@ static void SoftRastGetLineCaptured(int line, u16* dst) { Fragment* src = screen+((line)<<8); for(int i=0;i<256;i++) { - u8 r = src->color.components.r; - u8 g = src->color.components.g; - u8 b = src->color.components.b; + const u8 r = src->color.components.r; + const u8 g = src->color.components.g; + const u8 b = src->color.components.b; *dst = R5G5B5TORGB15(r,g,b); - if(src->color.components.a > 0) - *dst |= 0x8000; + *dst |= alpha_lookup[src->color.components.a]; src++; dst++; } @@ -1269,7 +1269,7 @@ static void SoftRastRender() if(needInitTexture || lastTextureFormat != poly->texParam || lastTexturePalette != poly->texPalette) { - TexCache_SetTexture(poly->texParam,poly->texPalette); + TexCache_SetTexture(poly->texParam,poly->texPalette); sampler.setup(poly->texParam); lastTextureFormat = poly->texParam; lastTexturePalette = poly->texPalette; diff --git a/desmume/src/texcache.cpp b/desmume/src/texcache.cpp index 1f14115ed..58e8dfa9e 100644 --- a/desmume/src/texcache.cpp +++ b/desmume/src/texcache.cpp @@ -12,6 +12,7 @@ using std::min; using std::max; +//only dump this from ogl renderer. for now, softrasterizer creates things in an incompatible pixel format //#define DEBUG_DUMP_TEXTURE //This class represents a number of regions of memory which should be viewed as contiguous @@ -150,7 +151,11 @@ static void DebugDumpTexture(int which) static int lastTexture = -1; -void TexCache_SetTexture(unsigned int format, unsigned int texpal) + +#define CONVERT(color,alpha) ((TEXFORMAT == TexFormat_32bpp)?(RGB15TO32(color,alpha)):RGB15TO5555(color,alpha)) + +template +void TexCache_SetTexture(u32 format, u32 texpal) { //for each texformat, number of palette entries const int palSizes[] = {0, 32, 4, 16, 256, 0, 8, 0}; @@ -309,7 +314,8 @@ REJECT: //INFO("Texture %03i - format=%08X; pal=%04X (mode %X, width %04i, height %04i)\n",i, texcache[i].frm, texcache[i].pal, texcache[i].mode, sizeX, sizeY); //============================================================================ Texture conversion - u32 palZeroTransparent = (1-((format>>29)&1))*255; // shash: CONVERT THIS TO A TABLE :) + const u32 opaqueColor = TEXFORMAT==TexFormat_32bpp?255:31; + u32 palZeroTransparent = (1-((format>>29)&1))*opaqueColor; switch (texcache[tx].mode) { @@ -321,7 +327,10 @@ REJECT: { u16 c = pal[*adr&31]; u8 alpha = *adr>>5; - *dwdst++ = RGB15TO32(c,material_3bit_to_8bit[alpha]); + if(TEXFORMAT == TexFormat_15bpp) + *dwdst++ = RGB15TO5555(c,material_3bit_to_5bit[alpha]); + else + *dwdst++ = RGB15TO32(c,material_3bit_to_8bit[alpha]); adr++; } } @@ -339,19 +348,19 @@ REJECT: bits = (*adr)&0x3; c = pal[bits]; - *dwdst++ = RGB15TO32(c,(bits == 0) ? palZeroTransparent : 255); + *dwdst++ = CONVERT(c,(bits == 0) ? palZeroTransparent : opaqueColor); bits = ((*adr)>>2)&0x3; c = pal[bits]; - *dwdst++ = RGB15TO32(c,(bits == 0) ? palZeroTransparent : 255); + *dwdst++ = CONVERT(c,(bits == 0) ? palZeroTransparent : opaqueColor); bits = ((*adr)>>4)&0x3; c = pal[bits]; - *dwdst++ = RGB15TO32(c,(bits == 0) ? palZeroTransparent : 255); + *dwdst++ = CONVERT(c,(bits == 0) ? palZeroTransparent : opaqueColor); bits = ((*adr)>>6)&0x3; c = pal[bits]; - *dwdst++ = RGB15TO32(c,(bits == 0) ? palZeroTransparent : 255); + *dwdst++ = CONVERT(c,(bits == 0) ? palZeroTransparent : opaqueColor); adr++; } @@ -369,11 +378,11 @@ REJECT: bits = (*adr)&0xF; c = pal[bits]; - *dwdst++ = RGB15TO32(c,(bits == 0) ? palZeroTransparent : 255); + *dwdst++ = CONVERT(c,(bits == 0) ? palZeroTransparent : opaqueColor); bits = ((*adr)>>4); c = pal[bits]; - *dwdst++ = RGB15TO32(c,(bits == 0) ? palZeroTransparent : 255); + *dwdst++ = CONVERT(c,(bits == 0) ? palZeroTransparent : opaqueColor); adr++; } } @@ -386,7 +395,7 @@ REJECT: for(u32 x = 0; x < ms.items[j].len; ++x) { u16 c = pal[*adr]; - *dwdst++ = RGB15TO32(c,(*adr == 0) ? palZeroTransparent : 255); + *dwdst++ = CONVERT(c,(*adr == 0) ? palZeroTransparent : opaqueColor); adr++; } } @@ -444,7 +453,7 @@ REJECT: u16 pal1offset = (pal1 & 0x3FFF)<<1; u8 mode = pal1>>14; u32 tmp_col[4]; - + tmp_col[0]=RGB16TO32(PAL4X4(pal1offset),255); tmp_col[1]=RGB16TO32(PAL4X4(pal1offset+1),255); @@ -492,6 +501,17 @@ REJECT: } } + if(TEXFORMAT==TexFormat_15bpp) + { + for(int i=0;i<4;i++) + { + tmp_col[i] >>= 3; + tmp_col[i] &= 0x1F1F1F1F; + } + } + + //TODO - this could be more precise for 32bpp mode (run it through the color separation table) + //set all 16 texels for (int sy = 0; sy < 4; sy++) { @@ -520,7 +540,10 @@ REJECT: { u16 c = pal[*adr&0x07]; u8 alpha = (*adr>>3); - *dwdst++ = RGB15TO32(c,material_5bit_to_8bit[alpha]); + if(TEXFORMAT == TexFormat_15bpp) + *dwdst++ = RGB15TO5555(c,alpha); + else + *dwdst++ = RGB15TO32(c,material_5bit_to_8bit[alpha]); adr++; } } @@ -534,7 +557,7 @@ REJECT: { u16 c = map[x]; int alpha = ((c&0x8000)?255:0); - *dwdst++ = RGB15TO32(c&0x7FFF,alpha); + *dwdst++ = CONVERT(c&0x7FFF,alpha); } } break; @@ -585,3 +608,7 @@ void TexCache_Invalidate() void (*TexCache_BindTexture)(u32 texnum) = NULL; void (*TexCache_BindTextureData)(u32 texnum, u8* data); + +//these templates needed to be instantiated manually +template void TexCache_SetTexture(u32 format, u32 texpal); +template void TexCache_SetTexture(u32 format, u32 texpal); \ No newline at end of file diff --git a/desmume/src/texcache.h b/desmume/src/texcache.h index bc2c329d0..c350a2fe4 100644 --- a/desmume/src/texcache.h +++ b/desmume/src/texcache.h @@ -3,6 +3,12 @@ #include "common.h" +enum TexCache_TexFormat +{ + TexFormat_32bpp, + TexFormat_15bpp +}; + #define MAX_TEXTURE 500 #ifndef NOSSE2 struct ALIGN(16) TextureCache @@ -36,7 +42,10 @@ extern void (*TexCache_BindTexture)(u32 texnum); extern void (*TexCache_BindTextureData)(u32 texnum, u8* data); void TexCache_Reset(); -void TexCache_SetTexture(unsigned int format, unsigned int texpal); + +template +void TexCache_SetTexture(u32 format, u32 texpal); + void TexCache_Invalidate(); extern u8 TexCache_texMAP[1024*2048*4];