tiny softrasterizer speedups: change texcache to produce different pixel formats depending on 3d renderer, and optimize getline

This commit is contained in:
zeromus 2009-04-20 05:31:04 +00:00
parent 5abdb2faed
commit 5f22118a6f
6 changed files with 77 additions and 32 deletions

View File

@ -536,7 +536,7 @@ static void setTexture(unsigned int format, unsigned int texpal)
} }
TexCache_SetTexture(format, texpal); TexCache_SetTexture<TexFormat_32bpp>(format, texpal);
} }

View File

@ -79,6 +79,11 @@ CACHE_ALIGN const u8 material_3bit_to_8bit[] = {
0x00, 0x24, 0x49, 0x6D, 0x92, 0xB6, 0xDB, 0xFF 0x00, 0x24, 0x49, 0x6D, 0x92, 0xB6, 0xDB, 0xFF
}; };
//maybe not very precise
CACHE_ALIGN const u8 material_3bit_to_5bit[] = {
0, 4, 8, 13, 17, 22, 26, 31
};
CACHE_ALIGN const u8 alpha_5bit_to_4bit[] = { CACHE_ALIGN const u8 alpha_5bit_to_4bit[] = {
0x00, 0x00, 0x00, 0x00,
0x01, 0x01, 0x01, 0x01,

View File

@ -33,6 +33,9 @@
//produce a 32bpp color from a ds RGB15 plus an 8bit alpha, using a table //produce a 32bpp color from a ds RGB15 plus an 8bit alpha, using a table
#define RGB15TO32(col,alpha8) ( ((alpha8)<<24) | color_15bit_to_24bit[col&0x7FFF] ) #define RGB15TO32(col,alpha8) ( ((alpha8)<<24) | color_15bit_to_24bit[col&0x7FFF] )
//produce a 5555 32bit color from a ds RGB15 plus an 5bit alpha
#define RGB15TO5555(col,alpha5) (((alpha5)<<24) | ((((col) & 0x7C00)>>10)<<16) | ((((col) & 0x3E0)>>5)<<8) | (((col) & 0x1F)))
//produce a 24bpp color from a ds RGB15, using a table //produce a 24bpp color from a ds RGB15, using a table
#define RGB15TO24_REVERSE(col) ( color_15bit_to_24bit_reverse[col&0x7FFF] ) #define RGB15TO24_REVERSE(col) ( color_15bit_to_24bit_reverse[col&0x7FFF] )
@ -183,6 +186,7 @@ extern CACHE_ALIGN u16 color_15bit_to_16bit_reverse[32768];
extern CACHE_ALIGN u8 mixTable555[32][32][32]; extern CACHE_ALIGN u8 mixTable555[32][32][32];
extern CACHE_ALIGN const int material_5bit_to_31bit[32]; extern CACHE_ALIGN const int material_5bit_to_31bit[32];
extern CACHE_ALIGN const u8 material_5bit_to_8bit[32]; extern CACHE_ALIGN const u8 material_5bit_to_8bit[32];
extern CACHE_ALIGN const u8 material_3bit_to_5bit[8];
extern CACHE_ALIGN const u8 material_3bit_to_8bit[8]; extern CACHE_ALIGN const u8 material_3bit_to_8bit[8];
extern CACHE_ALIGN const u8 alpha_5bit_to_4bit[32]; extern CACHE_ALIGN const u8 alpha_5bit_to_4bit[32];

View File

@ -309,12 +309,7 @@ static struct Sampler
dowrap(iu,iv); dowrap(iu,iv);
Fragment::Color color; Fragment::Color color;
u32 col32 = ((u32*)textures.currentData)[(iv<<wshift)+iu]; color.color = ((u32*)textures.currentData)[(iv<<wshift)+iu];
//todo - teach texcache how to provide these already in 5555
col32 >>= 3;
col32 &= 0x1F1F1F1F;
color.color = col32;
return color; return color;
} }
@ -940,18 +935,24 @@ static void SoftRastVramReconfigureSignal() {
TexCache_Invalidate(); TexCache_Invalidate();
} }
CACHE_ALIGN static const u16 alpha_lookup[] = {
0x0000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000};
static void SoftRastGetLine(int line, u16* dst, u8* dstAlpha) static void SoftRastGetLine(int line, u16* dst, u8* dstAlpha)
{ {
Fragment* src = screen+((line)<<8); Fragment* src = screen+((line)<<8);
for(int i=0;i<256;i++) for(int i=0;i<256;i++)
{ {
const bool testRenderAlpha = false; const bool testRenderAlpha = false;
u8 r = src->color.components.r; const u8 r = src->color.components.r;
u8 g = src->color.components.g; const u8 g = src->color.components.g;
u8 b = src->color.components.b; const u8 b = src->color.components.b;
*dst = R5G5B5TORGB15(r,g,b); *dst = R5G5B5TORGB15(r,g,b);
if(src->color.components.a > 0)
*dst |= 0x8000; *dst |= alpha_lookup[src->color.components.a];
*dstAlpha = alpha_5bit_to_4bit[src->color.components.a]; *dstAlpha = alpha_5bit_to_4bit[src->color.components.a];
if(testRenderAlpha) if(testRenderAlpha)
@ -971,12 +972,11 @@ static void SoftRastGetLineCaptured(int line, u16* dst) {
Fragment* src = screen+((line)<<8); Fragment* src = screen+((line)<<8);
for(int i=0;i<256;i++) for(int i=0;i<256;i++)
{ {
u8 r = src->color.components.r; const u8 r = src->color.components.r;
u8 g = src->color.components.g; const u8 g = src->color.components.g;
u8 b = src->color.components.b; const u8 b = src->color.components.b;
*dst = R5G5B5TORGB15(r,g,b); *dst = R5G5B5TORGB15(r,g,b);
if(src->color.components.a > 0) *dst |= alpha_lookup[src->color.components.a];
*dst |= 0x8000;
src++; src++;
dst++; dst++;
} }
@ -1269,7 +1269,7 @@ static void SoftRastRender()
if(needInitTexture || lastTextureFormat != poly->texParam || lastTexturePalette != poly->texPalette) if(needInitTexture || lastTextureFormat != poly->texParam || lastTexturePalette != poly->texPalette)
{ {
TexCache_SetTexture(poly->texParam,poly->texPalette); TexCache_SetTexture<TexFormat_15bpp>(poly->texParam,poly->texPalette);
sampler.setup(poly->texParam); sampler.setup(poly->texParam);
lastTextureFormat = poly->texParam; lastTextureFormat = poly->texParam;
lastTexturePalette = poly->texPalette; lastTexturePalette = poly->texPalette;

View File

@ -12,6 +12,7 @@
using std::min; using std::min;
using std::max; using std::max;
//only dump this from ogl renderer. for now, softrasterizer creates things in an incompatible pixel format
//#define DEBUG_DUMP_TEXTURE //#define DEBUG_DUMP_TEXTURE
//This class represents a number of regions of memory which should be viewed as contiguous //This class represents a number of regions of memory which should be viewed as contiguous
@ -150,7 +151,11 @@ static void DebugDumpTexture(int which)
static int lastTexture = -1; static int lastTexture = -1;
void TexCache_SetTexture(unsigned int format, unsigned int texpal)
#define CONVERT(color,alpha) ((TEXFORMAT == TexFormat_32bpp)?(RGB15TO32(color,alpha)):RGB15TO5555(color,alpha))
template<TexCache_TexFormat TEXFORMAT>
void TexCache_SetTexture(u32 format, u32 texpal)
{ {
//for each texformat, number of palette entries //for each texformat, number of palette entries
const int palSizes[] = {0, 32, 4, 16, 256, 0, 8, 0}; const int palSizes[] = {0, 32, 4, 16, 256, 0, 8, 0};
@ -309,7 +314,8 @@ REJECT:
//INFO("Texture %03i - format=%08X; pal=%04X (mode %X, width %04i, height %04i)\n",i, texcache[i].frm, texcache[i].pal, texcache[i].mode, sizeX, sizeY); //INFO("Texture %03i - format=%08X; pal=%04X (mode %X, width %04i, height %04i)\n",i, texcache[i].frm, texcache[i].pal, texcache[i].mode, sizeX, sizeY);
//============================================================================ Texture conversion //============================================================================ Texture conversion
u32 palZeroTransparent = (1-((format>>29)&1))*255; // shash: CONVERT THIS TO A TABLE :) const u32 opaqueColor = TEXFORMAT==TexFormat_32bpp?255:31;
u32 palZeroTransparent = (1-((format>>29)&1))*opaqueColor;
switch (texcache[tx].mode) switch (texcache[tx].mode)
{ {
@ -321,6 +327,9 @@ REJECT:
{ {
u16 c = pal[*adr&31]; u16 c = pal[*adr&31];
u8 alpha = *adr>>5; u8 alpha = *adr>>5;
if(TEXFORMAT == TexFormat_15bpp)
*dwdst++ = RGB15TO5555(c,material_3bit_to_5bit[alpha]);
else
*dwdst++ = RGB15TO32(c,material_3bit_to_8bit[alpha]); *dwdst++ = RGB15TO32(c,material_3bit_to_8bit[alpha]);
adr++; adr++;
} }
@ -339,19 +348,19 @@ REJECT:
bits = (*adr)&0x3; bits = (*adr)&0x3;
c = pal[bits]; c = pal[bits];
*dwdst++ = RGB15TO32(c,(bits == 0) ? palZeroTransparent : 255); *dwdst++ = CONVERT(c,(bits == 0) ? palZeroTransparent : opaqueColor);
bits = ((*adr)>>2)&0x3; bits = ((*adr)>>2)&0x3;
c = pal[bits]; c = pal[bits];
*dwdst++ = RGB15TO32(c,(bits == 0) ? palZeroTransparent : 255); *dwdst++ = CONVERT(c,(bits == 0) ? palZeroTransparent : opaqueColor);
bits = ((*adr)>>4)&0x3; bits = ((*adr)>>4)&0x3;
c = pal[bits]; c = pal[bits];
*dwdst++ = RGB15TO32(c,(bits == 0) ? palZeroTransparent : 255); *dwdst++ = CONVERT(c,(bits == 0) ? palZeroTransparent : opaqueColor);
bits = ((*adr)>>6)&0x3; bits = ((*adr)>>6)&0x3;
c = pal[bits]; c = pal[bits];
*dwdst++ = RGB15TO32(c,(bits == 0) ? palZeroTransparent : 255); *dwdst++ = CONVERT(c,(bits == 0) ? palZeroTransparent : opaqueColor);
adr++; adr++;
} }
@ -369,11 +378,11 @@ REJECT:
bits = (*adr)&0xF; bits = (*adr)&0xF;
c = pal[bits]; c = pal[bits];
*dwdst++ = RGB15TO32(c,(bits == 0) ? palZeroTransparent : 255); *dwdst++ = CONVERT(c,(bits == 0) ? palZeroTransparent : opaqueColor);
bits = ((*adr)>>4); bits = ((*adr)>>4);
c = pal[bits]; c = pal[bits];
*dwdst++ = RGB15TO32(c,(bits == 0) ? palZeroTransparent : 255); *dwdst++ = CONVERT(c,(bits == 0) ? palZeroTransparent : opaqueColor);
adr++; adr++;
} }
} }
@ -386,7 +395,7 @@ REJECT:
for(u32 x = 0; x < ms.items[j].len; ++x) for(u32 x = 0; x < ms.items[j].len; ++x)
{ {
u16 c = pal[*adr]; u16 c = pal[*adr];
*dwdst++ = RGB15TO32(c,(*adr == 0) ? palZeroTransparent : 255); *dwdst++ = CONVERT(c,(*adr == 0) ? palZeroTransparent : opaqueColor);
adr++; adr++;
} }
} }
@ -492,6 +501,17 @@ REJECT:
} }
} }
if(TEXFORMAT==TexFormat_15bpp)
{
for(int i=0;i<4;i++)
{
tmp_col[i] >>= 3;
tmp_col[i] &= 0x1F1F1F1F;
}
}
//TODO - this could be more precise for 32bpp mode (run it through the color separation table)
//set all 16 texels //set all 16 texels
for (int sy = 0; sy < 4; sy++) for (int sy = 0; sy < 4; sy++)
{ {
@ -520,6 +540,9 @@ REJECT:
{ {
u16 c = pal[*adr&0x07]; u16 c = pal[*adr&0x07];
u8 alpha = (*adr>>3); u8 alpha = (*adr>>3);
if(TEXFORMAT == TexFormat_15bpp)
*dwdst++ = RGB15TO5555(c,alpha);
else
*dwdst++ = RGB15TO32(c,material_5bit_to_8bit[alpha]); *dwdst++ = RGB15TO32(c,material_5bit_to_8bit[alpha]);
adr++; adr++;
} }
@ -534,7 +557,7 @@ REJECT:
{ {
u16 c = map[x]; u16 c = map[x];
int alpha = ((c&0x8000)?255:0); int alpha = ((c&0x8000)?255:0);
*dwdst++ = RGB15TO32(c&0x7FFF,alpha); *dwdst++ = CONVERT(c&0x7FFF,alpha);
} }
} }
break; break;
@ -585,3 +608,7 @@ void TexCache_Invalidate()
void (*TexCache_BindTexture)(u32 texnum) = NULL; void (*TexCache_BindTexture)(u32 texnum) = NULL;
void (*TexCache_BindTextureData)(u32 texnum, u8* data); void (*TexCache_BindTextureData)(u32 texnum, u8* data);
//these templates needed to be instantiated manually
template void TexCache_SetTexture<TexFormat_32bpp>(u32 format, u32 texpal);
template void TexCache_SetTexture<TexFormat_15bpp>(u32 format, u32 texpal);

View File

@ -3,6 +3,12 @@
#include "common.h" #include "common.h"
enum TexCache_TexFormat
{
TexFormat_32bpp,
TexFormat_15bpp
};
#define MAX_TEXTURE 500 #define MAX_TEXTURE 500
#ifndef NOSSE2 #ifndef NOSSE2
struct ALIGN(16) TextureCache struct ALIGN(16) TextureCache
@ -36,7 +42,10 @@ extern void (*TexCache_BindTexture)(u32 texnum);
extern void (*TexCache_BindTextureData)(u32 texnum, u8* data); extern void (*TexCache_BindTextureData)(u32 texnum, u8* data);
void TexCache_Reset(); void TexCache_Reset();
void TexCache_SetTexture(unsigned int format, unsigned int texpal);
template<TexCache_TexFormat format>
void TexCache_SetTexture(u32 format, u32 texpal);
void TexCache_Invalidate(); void TexCache_Invalidate();
extern u8 TexCache_texMAP[1024*2048*4]; extern u8 TexCache_texMAP[1024*2048*4];