Changed the hash algorithm to CRC32 utilising the SSE4.2 instruction. The algorithm will automatically be used for the Accurate Texture Cache, EFB to RAM and texture id's when a SSE4.2 capable CPU is detected. It will fallback to the old algorithm if SSE4.2 is not detected. Using CRC32 speeds up the hash algorithm by around 2X.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@7060 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
skidau 2011-02-04 08:37:58 +00:00
parent d5f6d2bbae
commit d698e022f5
3 changed files with 161 additions and 93 deletions

View File

@ -155,7 +155,7 @@ private:
#elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)
#define _M_SSE 0x301
#elif _MSC_VER >= 1500 // Visual Studio 2008
#define _M_SSE 0x401
#define _M_SSE 0x402
#endif
// Host communication.

View File

@ -16,6 +16,10 @@
// http://code.google.com/p/dolphin-emu/
#include "Hash.h"
#if _M_SSE >= 0x402
#include "CPUDetect.h"
#include <nmmintrin.h>
#endif
// uint32_t
// WARNING - may read one more byte!
@ -105,12 +109,44 @@ u32 HashEctor(const u8* ptr, int length)
}
#ifdef _M_X64
// CRC32 hash using the SSE4.2 instruction
u64 GetCRC32(const u8 *src, int len, u32 samples)
{
#if _M_SSE >= 0x402
u64 h = len;
u32 Step = (len / 8);
const u64 *data = (const u64 *)src;
const u64 *end = data + Step;
if(samples == 0) samples = Step;
Step = Step / samples;
if(Step < 1) Step = 1;
while(data < end)
{
h = _mm_crc32_u64(h, data[0]);
data += Step;
}
const u8 *data2 = (const u8*)end;
return _mm_crc32_u64(h, u64(data2[0]));
#else
return 0;
#endif
}
u64 GetHash64(const u8 *src, int len, u32 samples)
{
const u64 m = 0xc6a4a7935bd1e995;
const int r = 47;
u64 h = len * m;
#if _M_SSE >= 0x402
if (cpu_info.bSSE4_2)
{
h = GetCRC32(src, len, samples);
}
else
#endif
{
const int r = 47;
u32 Step = (len / 8);
const u64 *data = (const u64 *)src;
const u64 *end = data + Step;
@ -145,14 +181,47 @@ u64 GetHash64(const u8 *src, int len, u32 samples)
h ^= h >> r;
h *= m;
h ^= h >> r;
}
return h;
}
#else
// CRC32 hash using the SSE4.2 instruction
u64 GetCRC32(const u8 *src, int len, u32 samples)
{
#if _M_SSE >= 0x402
u32 h = len;
u32 Step = (len/4);
const u32 *data = (const u32 *)src;
const u32 *end = data + Step;
if(samples == 0) samples = Step;
Step = Step / samples;
if(Step < 1) Step = 1;
while(data < end)
{
h = _mm_crc32_u32(h, data[0]);
data += Step;
}
const u8 *data2 = (const u8*)end;
return (u64)_mm_crc32_u32(h, u32(data2[0]));
#else
return 0;
#endif
}
u64 GetHash64(const u8 *src, int len, u32 samples)
{
const u32 m = 0x5bd1e995;
u64 h = 0;
#if _M_SSE >= 0x402
if (cpu_info.bSSE4_2)
{
h = GetCRC32(src, len, samples);
}
else
#endif
{
const int r = 24;
u32 h1 = len;
@ -210,12 +279,10 @@ u64 GetHash64(const u8 *src, int len, u32 samples)
h1 ^= h2 >> 17; h1 *= m;
h2 ^= h1 >> 19; h2 *= m;
u64 h = h1;
h = h1;
h = (h << 32) | h2;
}
return h;
}
#endif

View File

@ -24,5 +24,6 @@ u32 HashFletcher(const u8* data_u8, size_t length); // FAST. Length & 1 == 0.
u32 HashAdler32(const u8* data, size_t len); // Fairly accurate, slightly slower
u32 HashFNV(const u8* ptr, int length); // Another fast and decent hash
u32 HashEctor(const u8* ptr, int length); // JUNK. DO NOT USE FOR NEW THINGS
u64 GetCRC32(const u8 *src, int len, u32 samples); // SSE4.2 version of CRC32
u64 GetHash64(const u8 *src, int len, u32 samples);
#endif // _HASH_H_