More work on texture cache. Texture address is most of the time the cacheEntry index (as it was before r1871) to help CopyRenderTargetToTexture (fix it still a TODO). But not when texture format is tlut dependent to fix MP1 perf issue.

Fix tlut overrun.
Change TexDecoder_GetSafeTextureHash to fix Pokemon Colosseum and improve speed (we may need to tweak hardcoded value there -> TOTEST).
Few notes too and I let my debug stuff for a while.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1880 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
memberTwo.mb2 2009-01-16 16:28:33 +00:00
parent 7afc53c3a1
commit acc062a2c1
2 changed files with 90 additions and 35 deletions

View File

@ -16,6 +16,7 @@
// http://code.google.com/p/dolphin-emu/
#include "Common.h"
//#include "VideoCommon.h" // to get debug logs
#include "CPUDetect.h"
#include "TextureDecoder.h"
@ -67,29 +68,42 @@ int TexDecoder_GetTextureSizeInBytes(int width, int height, int format)
u32 TexDecoder_GetTlutHash(const u8* src, int len)
{
//char str[40000], st[20]; str[0]='\0';for (int i=0;i<len;i++){sprintf(st,"%02x ",src[i]);strcat(str,st);}
//DebugLog("tlut: %s", str);
u32 hash = 0xbeefbabe;
for (int i = 0; i < len ; i++) {
hash = _rotl(hash, 7) ^ src[i];
hash += 7;
for (int i = 0; i < len / 4; i ++) {
hash = _rotl(hash, 7) ^ ((u32 *)src)[i];
hash += 7; // to add a bit more entropy/mess in here
//DebugLog("%02i | hash: %08x | src: %08x", i, hash, ((u32 *)src)[i]);
}
return hash;
}
u32 TexDecoder_GetSafeTextureHash(const u8 *src, int width, int height, int texformat, u32 seed)
{
int sz = TexDecoder_GetTextureSizeInBytes(width, height, texformat);
// Notes (mb2): A relative important mess in data is needed for a good hash. The safest way to satisfy this would be
// to perform the hash on the whole texture. But since it kills perf we use some assuptions for speed:
// -First assumption: texture borders don't carry more different data than the rest of the texture. We skip few
// texels on the edges.
// -Second assumption: consecutives lines may not differ that much. We skip some lines regularly.
// -Third assumption: User info (messy datas), in textures, should be either always centered or at the beginning.
// So we can stop hashing near the center.
// very tweakable (Pokemon Colesseum texts are pretty good test cases, especially short ones)
const int edgeSkip = 3;
const int colSkip = 3;
const int rowSkip = 5;
const int rowEnd = (width - edgeSkip)/4;
const int byteWidth = TexDecoder_GetTextureSizeInBytes(width, 1, texformat);
const int colEnd = height / 2 - edgeSkip;
u32 hash = seed ? seed : 0x1337c0de;
if (sz < 2048) {
for (int i = 0; i < sz / 4; i += 13) {
hash = _rotl(hash, 17) ^ ((u32 *)src)[i];
}
return hash;
} else {
int step = sz / 13 / 4;
for (int i = 0; i < sz / 4; i += step) {
hash = _rotl(hash, 17) ^ ((u32 *)src)[i];
for (int y = edgeSkip; y < colEnd; y += colSkip)
{
for (int x = edgeSkip; x < rowEnd; x += rowSkip)
{
hash = _rotl(hash, 17) ^ ((u32 *)src)[x+byteWidth*y];
}
}
return hash;

View File

@ -217,36 +217,63 @@ void TextureMngr::Cleanup()
}
}
//int dbgTexIdx=0;
TextureMngr::TCacheEntry* TextureMngr::Load(int texstage, u32 address, int width, int height, int format, int tlutaddr, int tlutfmt)
{
/* notes (about "UNsafe texture cache"):
* Have to be removed soon.
* But we keep it until the "safe" way became rock solid
* pros: it has an unique ID held by the texture data itself (@address) once cached.
* cons: it writes this unique ID in the gc RAM <- very dangerous (break MP1) and ugly
*/
/* notes (about "safe texture cache"):
* Metroids text issue (character table):
* Same addr, same GX_TF_C4 texture data but different TLUT (hence different outputs).
* That's why we have to hash the TLUT too for TLUT format dependent textures (ie. GX_TF_C4, GX_TF_C8, GX_TF_C14X2).
* And since the address and tex data don't change, the key index in the cacheEntry map can't be the address and
* have to be a hash value (or address + few bits if address is really always aligned). This hash value takes count
* of address, texture data @ address and if TLUT dependent fmt then the tlut @ tlutaddr.
* TODO: for small TLUT (ie. GX_TF_C4 => 16B) we have to hash on the whole TLUT because diff can be tiny.
*
* Pokemon Colosseum text issue (plain text):
* Use a GX_TF_I4 512x512 text-flush-texture at a const address.
* The problem here was just the sparse hash on the texture. This texture is partly overwrited (what is needed only)
* so lot's of remaning old text. Thin white chars on black bg too.
*/
// TODO: - clean this up when ready to kill old "unsafe texture cache"
// - fix pokemun coloseum font for bSafeTextureCache (works with !bSafeTextureCache)
// TODO (mb2): get why other fmt needs a tlut hash too (pokemon coloseum font -> fmt 1 or 8 or 14 iirc)
// - fix the key index situation with CopyRenderTargetToTexture.
// Could happen only for GX_TF_C4, GX_TF_C8 and GX_TF_C14X2 fmt.
// Wonder if we can't use tex width&height to know if EFB might be copied to it...
// raw idea: TOCHECK if addresses are aligned we have few bits left...
if (address == 0)
return NULL;
TexMode0 &tm0 = bpmem.tex[texstage > 3].texMode0[texstage & 3];
u8 *ptr = g_VideoInitialize.pGetMemoryPointer(address);
u32 hash_value;
u32 hashseed = address;
if ( (format == GX_TF_C4) || (format == GX_TF_C8) || (format == GX_TF_C14X2) )
// tlut size mask can be up to 0x3FFF (GX_TF_C14X2) but Safer == Slower.
//hashseed = TexDecoder_GetTlutHash(&texMem[tlutaddr], TexDecoder_GetPaletteSize(format)&0x7FFF);
hashseed += TexDecoder_GetTlutHash(texMem + tlutaddr, 32);
int bs = TexDecoder_GetBlockWidthInTexels(format) - 1;
int expandedWidth = (width + bs) & (~bs);
u32 hash_value;
u32 texID = address;
if (g_Config.bSafeTextureCache)
hash_value = TexDecoder_GetSafeTextureHash(ptr, expandedWidth, height, format, hashseed);
{
hash_value = TexDecoder_GetSafeTextureHash(ptr, expandedWidth, height, format, 0); // remove last arg
if ( (format == GX_TF_C4) || (format == GX_TF_C8) || (format == GX_TF_C14X2) )
{
// WARNING! texID != address now => may break CopyRenderTargetToTexture (cf. TODO up)
// tlut size (in bytes) mask can be up to 0x7FFF (GX_TF_C14X2) but Safer == Slower.
texID ^= TexDecoder_GetTlutHash(&texMem[tlutaddr], TexDecoder_GetPaletteSize(format)&0x7F);
//DebugLog("addr: %08x | texID: %08x | texHash: %08x", address, texID, hash_value);
}
}
bool skip_texture_create = false;
TexCache::iterator iter = textures.find(g_Config.bSafeTextureCache ? hash_value : address);
if (g_Config.bSafeTextureCache && iter == textures.end())
iter = textures.find(address);
TexCache::iterator iter = textures.find(texID);
if (iter != textures.end()) {
TCacheEntry &entry = iter->second;
@ -287,10 +314,10 @@ TextureMngr::TCacheEntry* TextureMngr::Load(int texstage, u32 address, int width
PC_TexFormat dfmt = TexDecoder_Decode(temp, ptr, expandedWidth, height, format, tlutaddr, tlutfmt);
//Make an entry in the table
TCacheEntry& entry = textures[ g_Config.bSafeTextureCache ? hash_value : address ];
TCacheEntry& entry = textures[texID];
entry.hashoffset = 0;
entry.paletteHash = hashseed;
//entry.paletteHash = hashseed;
entry.oldpixel = ((u32 *)ptr)[entry.hashoffset];
if (g_Config.bSafeTextureCache) {
entry.hash = hash_value;
@ -353,7 +380,21 @@ TextureMngr::TCacheEntry* TextureMngr::Load(int texstage, u32 address, int width
SETSTAT(stats.numTexturesAlive, textures.size());
//glEnable(entry.isNonPow2?GL_TEXTURE_RECTANGLE_ARB:GL_TEXTURE_2D);
/*
if ( 1
//&& (entry.w != 640 && entry.h != 480)
//&& (entry.w != 320 && entry.h != 240)
&& (entry.w ==512 && entry.h == 512)
//&& (entry.w > 200 && entry.h > 200)
//&& (format!=1)
)
{
char fn[256];
sprintf(fn, "z_%i_%i_%ix%i_%08x.tga", dbgTexIdx, format, entry.w, entry.h, entry.addr);
SaveTexture(fn, target, entry.texture, entry.w, entry.h);
dbgTexIdx++;
}
*/
//SaveTexture("tex.tga", target, entry.texture, entry.w, entry.h);
return &entry;
}