Changes to hi-res textures. Textures now load correctly when loading/saving a savestate, and can be toggled on and off in game.

Changed non-hi-res textures to use MurmurHash3, which has better performance that the previous hash.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@7080 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
baby.lueshi 2011-02-05 10:08:06 +00:00
parent 20e2046fe1
commit 888cd78724
8 changed files with 376 additions and 131 deletions

View File

@ -21,6 +21,8 @@
#include <nmmintrin.h> #include <nmmintrin.h>
#endif #endif
static u64 (*ptrHashFunction)(const u8 *src, int len, u32 samples) = &GetMurmurHash3;
// uint32_t // uint32_t
// WARNING - may read one more byte! // WARNING - may read one more byte!
// Implementation from Wikipedia. // Implementation from Wikipedia.
@ -108,7 +110,128 @@ u32 HashEctor(const u8* ptr, int length)
return(crc); return(crc);
} }
#ifdef _M_X64 #ifdef _M_X64
//-----------------------------------------------------------------------------
// Block read - if your platform needs to do endian-swapping or can only
// handle aligned reads, do the conversion here
inline u64 getblock(const u64 * p, int i)
{
return p[i];
}
//----------
// Block mix - combine the key bits with the hash bits and scramble everything
inline void bmix64(u64 & h1, u64 & h2, u64 & k1, u64 & k2, u64 & c1, u64 & c2)
{
k1 *= c1;
k1 = _rotl64(k1,23);
k1 *= c2;
h1 ^= k1;
h1 += h2;
h2 = _rotl64(h2,41);
k2 *= c2;
k2 = _rotl64(k2,23);
k2 *= c1;
h2 ^= k2;
h2 += h1;
h1 = h1*3+0x52dce729;
h2 = h2*3+0x38495ab5;
c1 = c1*5+0x7b7d159c;
c2 = c2*5+0x6bce6396;
}
//----------
// Finalization mix - avalanches all bits to within 0.05% bias
inline u64 fmix64(u64 k)
{
k ^= k >> 33;
k *= 0xff51afd7ed558ccd;
k ^= k >> 33;
k *= 0xc4ceb9fe1a85ec53;
k ^= k >> 33;
return k;
}
u64 GetMurmurHash3(const u8 *src, const int len, u32 samples)
{
const u8 * data = (const u8*)src;
const int nblocks = len / 16;
u64 h1 = 0x9368e53c2f6af274;
u64 h2 = 0x586dcd208f7cd3fd;
u64 c1 = 0x87c37b91114253d5;
u64 c2 = 0x4cf5ad432745937f;
//----------
// body
const u64 * blocks = (const u64 *)(data);
for(int i = 0; i < nblocks; i++)
{
u64 k1 = getblock(blocks,i*2+0);
u64 k2 = getblock(blocks,i*2+1);
bmix64(h1,h2,k1,k2,c1,c2);
}
//----------
// tail
const u8 * tail = (const u8*)(data + nblocks*16);
u64 k1 = 0;
u64 k2 = 0;
switch(len & 15)
{
case 15: k2 ^= u64(tail[14]) << 48;
case 14: k2 ^= u64(tail[13]) << 40;
case 13: k2 ^= u64(tail[12]) << 32;
case 12: k2 ^= u64(tail[11]) << 24;
case 11: k2 ^= u64(tail[10]) << 16;
case 10: k2 ^= u64(tail[ 9]) << 8;
case 9: k2 ^= u64(tail[ 8]) << 0;
case 8: k1 ^= u64(tail[ 7]) << 56;
case 7: k1 ^= u64(tail[ 6]) << 48;
case 6: k1 ^= u64(tail[ 5]) << 40;
case 5: k1 ^= u64(tail[ 4]) << 32;
case 4: k1 ^= u64(tail[ 3]) << 24;
case 3: k1 ^= u64(tail[ 2]) << 16;
case 2: k1 ^= u64(tail[ 1]) << 8;
case 1: k1 ^= u64(tail[ 0]) << 0;
bmix64(h1,h2,k1,k2,c1,c2);
};
//----------
// finalization
h2 ^= len;
h1 += h2;
h2 += h1;
h1 = fmix64(h1);
h2 = fmix64(h2);
h1 += h2;
return h1;
}
// CRC32 hash using the SSE4.2 instruction // CRC32 hash using the SSE4.2 instruction
u64 GetCRC32(const u8 *src, int len, u32 samples) u64 GetCRC32(const u8 *src, int len, u32 samples)
{ {
@ -133,24 +256,16 @@ u64 GetCRC32(const u8 *src, int len, u32 samples)
#endif #endif
} }
u64 GetHash64(const u8 *src, int len, u32 samples, bool legacy)
{
const u64 m = 0xc6a4a7935bd1e995;
u64 h = len * m;
#if _M_SSE >= 0x402
if (cpu_info.bSSE4_2 && !legacy)
{
h = GetCRC32(src, len, samples);
}
else
#endif
/* NOTE: This hash function is used for custom texture loading/dumping, so /* NOTE: This hash function is used for custom texture loading/dumping, so
it should not be changed, which would require all custom textures to be it should not be changed, which would require all custom textures to be
recalculated for their new hash values. If the hashing function is recalculated for their new hash values. If the hashing function is
changed, make sure this one is still used when the legacy parameter is changed, make sure this one is still used when the legacy parameter is
true. */ true. */
u64 GetHashHiresTexture(const u8 *src, int len, u32 samples)
{ {
const u64 m = 0xc6a4a7935bd1e995;
u64 h = len * m;
const int r = 47; const int r = 47;
u32 Step = (len / 8); u32 Step = (len / 8);
const u64 *data = (const u64 *)src; const u64 *data = (const u64 *)src;
@ -186,7 +301,6 @@ u64 GetHash64(const u8 *src, int len, u32 samples, bool legacy)
h ^= h >> r; h ^= h >> r;
h *= m; h *= m;
h ^= h >> r; h ^= h >> r;
}
return h; return h;
} }
@ -215,79 +329,191 @@ u64 GetCRC32(const u8 *src, int len, u32 samples)
#endif #endif
} }
u64 GetHash64(const u8 *src, int len, u32 samples, bool legacy) //-----------------------------------------------------------------------------
// Block read - if your platform needs to do endian-swapping or can only
// handle aligned reads, do the conversion here
inline u32 getblock(const u32 * p, int i)
{ {
const u32 m = 0x5bd1e995; return p[i];
u64 h = 0;
#if _M_SSE >= 0x402
if (cpu_info.bSSE4_2 && !legacy)
{
h = GetCRC32(src, len, samples);
} }
else
#endif //----------
// Finalization mix - force all bits of a hash block to avalanche
// avalanches all bits to within 0.25% bias
inline u32 fmix32(u32 h)
{ {
const int r = 24; h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
u32 h1 = len; return h;
u32 h2 = 0; }
u32 Step = (len / 4); inline void bmix32(u32 & h1, u32 & h2, u32 & k1, u32 & k2, u32 & c1, u32 & c2)
const u32 * data = (const u32 *)src;
const u32 * end = data + Step;
const u8 * uEnd = (const u8 *)end;
if(samples == 0) samples = Step;
Step = Step / samples;
if(Step < 2) Step = 2;
while(data < end)
{ {
u32 k1 = data[0]; k1 *= c1;
k1 *= m; k1 = _rotl(k1,11);
k1 ^= k1 >> r; k1 *= c2;
k1 *= m;
h1 *= m;
h1 ^= k1; h1 ^= k1;
h1 += h2;
h2 = _rotl(h2,17);
u32 k2 = data[1]; k2 *= c2;
k2 *= m; k2 = _rotl(k2,11);
k2 ^= k2 >> r; k2 *= c1;
k2 *= m;
h2 *= m;
h2 ^= k2; h2 ^= k2;
data+=Step; h2 += h1;
h1 = h1*3+0x52dce729;
h2 = h2*3+0x38495ab5;
c1 = c1*5+0x7b7d159c;
c2 = c2*5+0x6bce6396;
} }
if((len & 7) > 3) //----------
u64 GetMurmurHash3(const u8* src, int len, u32 samples)
{ {
u32 k1 = *(end - 1); const u8 * data = (const u8*)src;
k1 *= m; const int nblocks = len / 8;
k1 ^= k1 >> r; u32 out[2];
k1 *= m;
h1 *= m; u32 h1 = 0x8de1c3ac;
h1 ^= k1; u32 h2 = 0xbab98226;
len -= 4;
u32 c1 = 0x95543787;
u32 c2 = 0x2ad7eb25;
//----------
// body
const u32 * blocks = (const u32 *)(data + nblocks*8);
for(int i = -nblocks; i; i++)
{
u32 k1 = getblock(blocks,i*2+0);
u32 k2 = getblock(blocks,i*2+1);
bmix32(h1,h2,k1,k2,c1,c2);
} }
switch(len & 3) //----------
// tail
const u8 * tail = (const u8*)(data + nblocks*8);
u32 k1 = 0;
u32 k2 = 0;
switch(len & 7)
{ {
case 3: h2 ^= uEnd[2] << 16; case 7: k2 ^= tail[6] << 16;
case 2: h2 ^= uEnd[1] << 8; case 6: k2 ^= tail[5] << 8;
case 1: h2 ^= uEnd[0]; case 5: k2 ^= tail[4] << 0;
h2 *= m; case 4: k1 ^= tail[3] << 24;
case 3: k1 ^= tail[2] << 16;
case 2: k1 ^= tail[1] << 8;
case 1: k1 ^= tail[0] << 0;
bmix32(h1,h2,k1,k2,c1,c2);
}; };
h1 ^= h2 >> 18; h1 *= m; //----------
h2 ^= h1 >> 22; h2 *= m; // finalization
h1 ^= h2 >> 17; h1 *= m;
h2 ^= h1 >> 19; h2 *= m;
h = h1; h2 ^= len;
h = (h << 32) | h2; h1 += h2;
h2 += h1;
h1 = fmix32(h1);
h2 = fmix32(h2);
h1 += h2;
h2 += h1;
out[0] = h1;
out[1] = h2;
return *((u64 *)&out);
} }
/* FIXME: The old 32-bit version of this hash made different hashes than the
64-bit version. Until someone can make a new version of the 32-bit one that
makes identical hashes, this is just a c/p of the 64-bit one. */
u64 GetHashHiresTexture(const u8 *src, int len, u32 samples)
{
const u64 m = 0xc6a4a7935bd1e995;
u64 h = len * m;
const int r = 47;
u32 Step = (len / 8);
const u64 *data = (const u64 *)src;
const u64 *end = data + Step;
if(samples == 0) samples = Step;
Step = Step / samples;
if(Step < 1) Step = 1;
while(data < end)
{
u64 k = data[0];
data+=Step;
k *= m;
k ^= k >> r;
k *= m;
h ^= k;
h *= m;
}
const u8 * data2 = (const u8*)end;
switch(len & 7)
{
case 7: h ^= u64(data2[6]) << 48;
case 6: h ^= u64(data2[5]) << 40;
case 5: h ^= u64(data2[4]) << 32;
case 4: h ^= u64(data2[3]) << 24;
case 3: h ^= u64(data2[2]) << 16;
case 2: h ^= u64(data2[1]) << 8;
case 1: h ^= u64(data2[0]);
h *= m;
};
h ^= h >> r;
h *= m;
h ^= h >> r;
return h; return h;
} }
#endif #endif
u64 GetHash64(const u8 *src, int len, u32 samples)
{
return ptrHashFunction(src, len, samples);
}
// sets the hash function used for the texture cache
void SetHash64Function(bool useHiresTextures)
{
if (useHiresTextures)
{
ptrHashFunction = &GetHashHiresTexture;
}
#if _M_SSE >= 0x402
else if (cpu_info.bSSE4_2 && !useHiresTextures) // sse crc32 version
{
ptrHashFunction = &GetCRC32;
}
#endif
else
{
ptrHashFunction = &GetMurmurHash3;
}
}

View File

@ -25,5 +25,8 @@ u32 HashAdler32(const u8* data, size_t len); // Fairly accurate, slightl
u32 HashFNV(const u8* ptr, int length); // Another fast and decent hash u32 HashFNV(const u8* ptr, int length); // Another fast and decent hash
u32 HashEctor(const u8* ptr, int length); // JUNK. DO NOT USE FOR NEW THINGS u32 HashEctor(const u8* ptr, int length); // JUNK. DO NOT USE FOR NEW THINGS
u64 GetCRC32(const u8 *src, int len, u32 samples); // SSE4.2 version of CRC32 u64 GetCRC32(const u8 *src, int len, u32 samples); // SSE4.2 version of CRC32
u64 GetHash64(const u8 *src, int len, u32 samples, bool legacy = false); u64 GetHashHiresTexture(const u8 *src, int len, u32 samples);
u64 GetMurmurHash3(const u8 *src, int len, u32 samples);
u64 GetHash64(const u8 *src, int len, u32 samples);
void SetHash64Function(bool useHiresTextures);
#endif // _HASH_H_ #endif // _HASH_H_

View File

@ -33,6 +33,8 @@ std::map<std::string, std::string> textureMap;
void Init(const char *gameCode) void Init(const char *gameCode)
{ {
textureMap.clear();
CFileSearch::XStringVector Directories; CFileSearch::XStringVector Directories;
//Directories.push_back(std::string(File::GetUserPath(D_HIRESTEXTURES_IDX))); //Directories.push_back(std::string(File::GetUserPath(D_HIRESTEXTURES_IDX)));
char szDir[MAX_PATH]; char szDir[MAX_PATH];
@ -88,11 +90,6 @@ void Init(const char *gameCode)
} }
} }
void Shutdown()
{
textureMap.clear();
}
PC_TexFormat GetHiresTex(const char *fileName, unsigned int *pWidth, unsigned int *pHeight, int texformat, u8 *data) PC_TexFormat GetHiresTex(const char *fileName, unsigned int *pWidth, unsigned int *pHeight, int texformat, u8 *data)
{ {
std::string key(fileName); std::string key(fileName);

View File

@ -25,7 +25,6 @@
namespace HiresTextures namespace HiresTextures
{ {
void Init(const char *gameCode); void Init(const char *gameCode);
void Shutdown();
PC_TexFormat GetHiresTex(const char *fileName, unsigned int *pWidth, unsigned int *pHeight, int texformat, u8 *data); PC_TexFormat GetHiresTex(const char *fileName, unsigned int *pWidth, unsigned int *pHeight, int texformat, u8 *data);
}; };

View File

@ -23,8 +23,10 @@ enum
TextureCache *g_texture_cache; TextureCache *g_texture_cache;
u8 *TextureCache::temp; u8 *TextureCache::temp = NULL;
TextureCache::TexCache TextureCache::textures; TextureCache::TexCache TextureCache::textures;
Common::CriticalSection TextureCache::texMutex;
TextureCache::TCacheEntryBase::~TCacheEntryBase() TextureCache::TCacheEntryBase::~TCacheEntryBase()
{ {
@ -41,13 +43,17 @@ TextureCache::TCacheEntryBase::~TCacheEntryBase()
TextureCache::TextureCache() TextureCache::TextureCache()
{ {
if (!temp)
temp = (u8*)AllocateMemoryPages(TEMP_SIZE); temp = (u8*)AllocateMemoryPages(TEMP_SIZE);
TexDecoder_SetTexFmtOverlayOptions(g_ActiveConfig.bTexFmtOverlayEnable, g_ActiveConfig.bTexFmtOverlayCenter); TexDecoder_SetTexFmtOverlayOptions(g_ActiveConfig.bTexFmtOverlayEnable, g_ActiveConfig.bTexFmtOverlayCenter);
if(g_ActiveConfig.bHiresTextures && !g_ActiveConfig.bDumpTextures)
HiresTextures::Init(SConfig::GetInstance().m_LocalCoreStartupParameter.m_strUniqueID.c_str()); HiresTextures::Init(SConfig::GetInstance().m_LocalCoreStartupParameter.m_strUniqueID.c_str());
SetHash64Function(g_ActiveConfig.bHiresTextures || g_ActiveConfig.bDumpTextures);
} }
void TextureCache::Invalidate(bool shutdown) void TextureCache::Invalidate(bool shutdown)
{ {
texMutex.Enter();
TexCache::iterator TexCache::iterator
iter = textures.begin(), iter = textures.begin(),
tcend = textures.end(); tcend = textures.end();
@ -59,15 +65,21 @@ void TextureCache::Invalidate(bool shutdown)
} }
textures.clear(); textures.clear();
HiresTextures::Shutdown(); if(g_ActiveConfig.bHiresTextures && !g_ActiveConfig.bDumpTextures)
HiresTextures::Init(SConfig::GetInstance().m_LocalCoreStartupParameter.m_strUniqueID.c_str());
SetHash64Function(g_ActiveConfig.bHiresTextures || g_ActiveConfig.bDumpTextures);
texMutex.Leave();
} }
TextureCache::~TextureCache() TextureCache::~TextureCache()
{ {
Invalidate(true); Invalidate(true);
if (temp)
{
FreeMemoryPages(temp, TEMP_SIZE); FreeMemoryPages(temp, TEMP_SIZE);
temp = NULL; temp = NULL;
} }
}
void TextureCache::Cleanup() void TextureCache::Cleanup()
{ {
@ -180,22 +192,20 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int stage,
const u32 texture_size = TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, texformat); const u32 texture_size = TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, texformat);
const u32 palette_size = TexDecoder_GetPaletteSize(texformat); const u32 palette_size = TexDecoder_GetPaletteSize(texformat);
bool texture_is_dynamic = false; bool texture_is_dynamic = false;
bool forceLegacyHash = (g_ActiveConfig.bHiresTextures || g_ActiveConfig.bDumpTextures);
unsigned int texLevels; unsigned int texLevels;
PC_TexFormat pcfmt = PC_TEX_FMT_NONE; PC_TexFormat pcfmt = PC_TEX_FMT_NONE;
// someone who understands this var could rename it :p const bool isPaletteTexture = (texformat == GX_TF_C4 || texformat == GX_TF_C8 || texformat == GX_TF_C14X2);
const bool isC4_C8_C14X2 = (texformat == GX_TF_C4 || texformat == GX_TF_C8 || texformat == GX_TF_C14X2);
if (isC4_C8_C14X2) if (isPaletteTexture)
full_format = texformat | (tlutfmt << 16); full_format = texformat | (tlutfmt << 16);
// hires texture loading and texture dumping require accurate hashes // hires texture loading and texture dumping require accurate hashes
if (g_ActiveConfig.bSafeTextureCache || forceLegacyHash) if (g_ActiveConfig.bSafeTextureCache || g_ActiveConfig.bHiresTextures || g_ActiveConfig.bDumpTextures)
{ {
texHash = GetHash64(ptr, texture_size, g_ActiveConfig.iSafeTextureCache_ColorSamples, forceLegacyHash); texHash = GetHash64(ptr, texture_size, g_ActiveConfig.iSafeTextureCache_ColorSamples);
if (isC4_C8_C14X2) if (isPaletteTexture)
{ {
// WARNING! texID != address now => may break CopyRenderTargetToTexture (cf. TODO up) // WARNING! texID != address now => may break CopyRenderTargetToTexture (cf. TODO up)
// tlut size can be up to 32768B (GX_TF_C14X2) but Safer == Slower. // tlut size can be up to 32768B (GX_TF_C14X2) but Safer == Slower.
@ -206,7 +216,7 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int stage,
// we must make sure that texture with different tluts get different IDs. // we must make sure that texture with different tluts get different IDs.
const u64 tlutHash = GetHash64(texMem + tlutaddr, palette_size, const u64 tlutHash = GetHash64(texMem + tlutaddr, palette_size,
g_ActiveConfig.iSafeTextureCache_ColorSamples, forceLegacyHash); g_ActiveConfig.iSafeTextureCache_ColorSamples);
texHash ^= tlutHash; texHash ^= tlutHash;
@ -229,7 +239,7 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int stage,
{ {
hash_value = GetHash64(ptr, texture_size, g_ActiveConfig.iSafeTextureCache_ColorSamples); hash_value = GetHash64(ptr, texture_size, g_ActiveConfig.iSafeTextureCache_ColorSamples);
if (isC4_C8_C14X2) if (isPaletteTexture)
{ {
hash_value ^= GetHash64(&texMem[tlutaddr], palette_size, hash_value ^= GetHash64(&texMem[tlutaddr], palette_size,
g_ActiveConfig.iSafeTextureCache_ColorSamples); g_ActiveConfig.iSafeTextureCache_ColorSamples);

View File

@ -7,6 +7,7 @@
#include "VideoCommon.h" #include "VideoCommon.h"
#include "TextureDecoder.h" #include "TextureDecoder.h"
#include "BPMemory.h" #include "BPMemory.h"
#include "Thread.h"
#include "CommonTypes.h" #include "CommonTypes.h"
@ -71,6 +72,8 @@ public:
virtual ~TextureCache(); // needs virtual for DX11 dtor virtual ~TextureCache(); // needs virtual for DX11 dtor
static void Init();
static void Shutdown();
static void Cleanup(); static void Cleanup();
static void Invalidate(bool shutdown); static void Invalidate(bool shutdown);
@ -88,6 +91,8 @@ public:
static void CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, bool bIsIntensityFmt, static void CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, bool bIsIntensityFmt,
u32 copyfmt, bool bScaleByHalf, const EFBRectangle &source_rect); u32 copyfmt, bool bScaleByHalf, const EFBRectangle &source_rect);
static Common::CriticalSection texMutex;
protected: protected:
TextureCache(); TextureCache();

View File

@ -158,7 +158,9 @@ void VertexManager::AddVertices(int primitive, int numVertices)
void VertexManager::Flush() void VertexManager::Flush()
{ {
TextureCache::texMutex.Enter();
g_vertex_manager->vFlush(); g_vertex_manager->vFlush();
TextureCache::texMutex.Leave();
} }
// TODO: need to merge more stuff into VideoCommon to use this // TODO: need to merge more stuff into VideoCommon to use this

View File

@ -2,6 +2,7 @@
#include "VideoConfigDiag.h" #include "VideoConfigDiag.h"
#include "FileUtil.h" #include "FileUtil.h"
#include "TextureCacheBase.h"
#include <wx/intl.h> #include <wx/intl.h>
@ -58,6 +59,8 @@ void VideoConfigDiag::Event_Close(wxCloseEvent& ev)
g_Config.Save((File::GetUserPath(D_CONFIG_IDX) + ininame + ".ini").c_str()); g_Config.Save((File::GetUserPath(D_CONFIG_IDX) + ininame + ".ini").c_str());
ev.Skip(); ev.Skip();
TextureCache::Invalidate(false); // For settings like hi-res textures/texture format/etc.
} }