Texture cache optimization for frequent palette updates

Some old 2D games change palettes very frequently, which causes the
texture to be updated for each render.
This change uses a hash of the palette to detect changes, and include
the palette type in the cache key to distinguish textures of different
depth.
This commit is contained in:
Flyinghead 2018-09-06 12:11:55 +02:00
parent a167b43361
commit e54ac36544
4 changed files with 54 additions and 37 deletions

View File

@ -6,10 +6,6 @@
bool pal_needs_update=true;
bool fog_needs_update=true;
u32 _pal_rev_256[4]={0};
u32 _pal_rev_16[64]={0};
u32 pal_rev_256[4]={0};
u32 pal_rev_16[64]={0};
u8 pvr_regs[pvr_RegSize];
@ -95,16 +91,9 @@ void pvr_WriteReg(u32 paddr,u32 data)
return;
}
if (addr>=PALETTE_RAM_START_addr)
if (addr>=PALETTE_RAM_START_addr && PvrReg(addr,u32)!=data)
{
if (PvrReg(addr,u32)!=data)
{
u32 pal=(addr/4)&1023;
pal_needs_update=true;
_pal_rev_256[pal>>8]++;
_pal_rev_16[pal>>4]++;
}
pal_needs_update=true;
}
if (addr>=FOG_TABLE_START_addr && addr<=FOG_TABLE_END_addr && PvrReg(addr,u32)!=data)

View File

@ -14,6 +14,8 @@ u32 palette_index;
bool KillTex=false;
u32 palette16_ram[1024];
u32 palette32_ram[1024];
u32 pal_hash_256[4];
u32 pal_hash_16[64];
u32 detwiddle[2][8][1024];
//input : address in the yyyyyxxxxx format
@ -70,12 +72,25 @@ void BuildTwiddleTables()
static OnLoad btt(&BuildTwiddleTables);
// FNV-1a hashing algorithm
#define HASH_OFFSET 2166136261
#define HASH_PRIME 16777619
#define HASH_PALETTE(palette_hash, bpp) do { u32 &hash = palette_hash[i >> bpp]; \
if ((i & ((1 << bpp) - 1)) == 0) \
hash = HASH_OFFSET; \
u8 *p = (u8 *)&palette32_ram[i]; \
hash = (hash ^ p[0]) * HASH_PRIME; \
hash = (hash ^ p[1]) * HASH_PRIME; \
hash = (hash ^ p[2]) * HASH_PRIME; \
hash = (hash ^ p[3]) * HASH_PRIME; } while (false)
#define HASH_PALETTE_16() HASH_PALETTE(pal_hash_16, 4)
#define HASH_PALETTE_256() HASH_PALETTE(pal_hash_256, 8)
void palette_update()
{
if (pal_needs_update==false)
return;
memcpy(pal_rev_256,_pal_rev_256,sizeof(pal_rev_256));
memcpy(pal_rev_16,_pal_rev_16,sizeof(pal_rev_16));
pal_needs_update=false;
switch(PAL_RAM_CTRL&3)
@ -85,6 +100,8 @@ void palette_update()
{
palette16_ram[i] = ARGB1555(PALETTE_RAM[i]);
palette32_ram[i] = ARGB1555_32(PALETTE_RAM[i]);
HASH_PALETTE_16();
HASH_PALETTE_256();
}
break;
@ -93,6 +110,8 @@ void palette_update()
{
palette16_ram[i] = ARGB565(PALETTE_RAM[i]);
palette32_ram[i] = ARGB565_32(PALETTE_RAM[i]);
HASH_PALETTE_16();
HASH_PALETTE_256();
}
break;
@ -101,6 +120,8 @@ void palette_update()
{
palette16_ram[i] = ARGB4444(PALETTE_RAM[i]);
palette32_ram[i] = ARGB4444_32(PALETTE_RAM[i]);
HASH_PALETTE_16();
HASH_PALETTE_256();
}
break;
@ -109,6 +130,8 @@ void palette_update()
{
palette16_ram[i] = ARGB8888(PALETTE_RAM[i]);
palette32_ram[i] = ARGB8888_32(PALETTE_RAM[i]);
HASH_PALETTE_16();
HASH_PALETTE_256();
}
break;
}

View File

@ -6,10 +6,8 @@ extern u32 palette_index;
extern u32 palette16_ram[1024];
extern u32 palette32_ram[1024];
extern bool pal_needs_update,fog_needs_update,KillTex;
extern u32 pal_rev_256[4];
extern u32 pal_rev_16[64];
extern u32 _pal_rev_256[4];
extern u32 _pal_rev_16[64];
extern u32 pal_hash_256[4];
extern u32 pal_hash_16[64];
extern u32 detwiddle[2][8][1024];

View File

@ -238,8 +238,7 @@ struct TextureCacheData
u32 Updates;
//used for palette updates
u32 pal_local_rev; //local palette rev
u32* pal_table_rev; //table palette rev pointer
u32 palette_hash; // Palette hash at time of last update
u32 indirect_color_ptr; //palette color table index for pal. tex
//VQ quantizers table for VQ tex
//a texture can't be both VQ and PAL at the same time
@ -264,6 +263,11 @@ struct TextureCacheData
printf(" id=%d\n", texID);
}
bool IsPaletted()
{
return tcw.PixelFmt == PixelPal4 || tcw.PixelFmt == PixelPal8;
}
//Create GL texture from tsp/tcw
void Create(bool isGL)
{
@ -294,19 +298,9 @@ struct TextureCacheData
//PAL texture
if (tex->bpp==4)
{
pal_table_rev=&pal_rev_16[tcw.PalSelect];
indirect_color_ptr=tcw.PalSelect<<4;
}
else if (tex->bpp==8)
{
pal_table_rev=&pal_rev_256[tcw.PalSelect>>4];
indirect_color_ptr=(tcw.PalSelect>>4)<<8;
}
else
{
pal_table_rev=0;
}
//VQ table (if VQ tex)
if (tcw.VQ_Comp)
@ -383,12 +377,17 @@ struct TextureCacheData
GLuint textype=tex->type;
bool has_alpha = false;
if (pal_table_rev)
if (IsPaletted())
{
textype=PAL_TYPE[PAL_RAM_CTRL&3];
pal_local_rev=*pal_table_rev; //make sure to update the local rev, so it won't have to redo the tex
if (textype == GL_UNSIGNED_INT_8_8_8_8)
has_alpha = true;
// Get the palette hash to check for future updates
if (tcw.PixelFmt == PixelPal4)
palette_hash = pal_hash_16[tcw.PalSelect];
else
palette_hash = pal_hash_256[tcw.PalSelect >> 4];
}
palette_index=indirect_color_ptr; //might be used if pal. tex
@ -420,7 +419,7 @@ struct TextureCacheData
|| w * h > settings.rend.MaxFilteredTextureSize
* settings.rend.MaxFilteredTextureSize // Don't process textures that are too big
|| tcw.PixelFmt == PixelYUV) // Don't process YUV textures
&& (pal_table_rev == NULL || textype != GL_UNSIGNED_INT_8_8_8_8)
&& (!IsPaletted() || textype != GL_UNSIGNED_INT_8_8_8_8)
&& texconv != NULL)
need_32bit_buffer = false;
// TODO avoid upscaling/depost. textures that change too often
@ -526,8 +525,13 @@ struct TextureCacheData
}
}
//true if : dirty or paletted texture and revs don't match
bool NeedsUpdate() { return (dirty) || (pal_table_rev!=0 && *pal_table_rev!=pal_local_rev); }
//true if : dirty or paletted texture and hashes don't match
bool NeedsUpdate() {
bool rc = dirty
|| (tcw.PixelFmt == PixelPal4 && palette_hash != pal_hash_16[tcw.PalSelect])
|| (tcw.PixelFmt == PixelPal8 && palette_hash != pal_hash_256[tcw.PalSelect >> 4]);
return rc;
}
void Delete()
{
@ -800,7 +804,10 @@ TextureCacheData *getTextureCacheData(TSP tsp, TCW tcw) {
u64 key = tsp.full & TSPTextureCacheMask.full;
if (tcw.PixelFmt == PixelPal4 || tcw.PixelFmt == PixelPal8)
// Paletted textures have a palette selection that must be part of the key
key |= (u64)tcw.full << 32;
// We also add the palette type to the key to avoid thrashing the cache
// when the palette type is changed. If the palette type is changed back in the future,
// this texture will stil be available.
key |= ((u64)tcw.full << 32) | ((PAL_RAM_CTRL & 3) << 6);
else
key |= (u64)(tcw.full & TCWTextureCacheMask.full) << 32;