First a bugfix:

fixed a misbehavior in the clear code that causes depth clear problems in reference hardware (Intel as example).
add 6 parameters to optimize Safe Texture Cache:
SafeTextureCacheColorSamples, SafeTextureCacheIndexedSamples, SafeTextureCacheTlutSamples:

this 3 parameters gives the number of samples taken to calculate the final hash value, less samples = more speed, more samples = more accuracy
if 0 is specified the hash is calculated using all the data in the texture.
SafeTextureCacheColorMaxSize, SafeTextureCacheIndexedMaxSize, SafeTextureCacheTlutMaxSize:
this parameters limits the amount of data used for the hash calculation, it could appear as redundant but in some games is better to make a full hash of the first bytes instead of some samples of all the texture.

color, indexed, tlut : define the texture type, full color data, indexed, and the tlut memory.

the parameters are available in the config , no GUI at this time, if the test are OK will add it to the GUI.
if someone needs it will give more examples on how to configure the values for specific games.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5116 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
Rodolfo Osvaldo Bogado 2010-02-23 21:52:12 +00:00
parent ba25f08d62
commit 3cc5d8ce6f
9 changed files with 150 additions and 85 deletions

View File

@ -88,36 +88,37 @@ int TexDecoder_GetTextureSizeInBytes(int width, int height, int format)
return (width * height * TexDecoder_GetTexelSizeInNibbles(format)) / 2;
}
u32 TexDecoder_GetFullHash32(const u8 *src, int len, u32 seed)
u32 TexDecoder_GetHash32(const u8 *src, int len, u32 samples)
{
const u32 m = 0x5bd1e995;
const int r = 24;
u32 h = seed ^ len;
u32 h = len;
const u32 * data = (const u32 *)src;
int Flen = len / 4;
while(len)
u32 Step = (len/4);
const u32 * End = data + Step;
const u8 * uEnd = (const u8 *)End;
if(samples == 0) samples = Step;
Step = Step / samples;
if(Step < 1) Step = 1;
while(data < End)
{
u32 k = data[0];
k *= m;
k ^= k >> r;
//k *= m;
//h *= m;
k *= m;
h *= m;
h ^= k;
data++;
len --;
data+=Step;
}
switch(len)
switch(len & 3)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
case 3: h ^= u32(uEnd[2]) << 16;
case 2: h ^= u32(uEnd[1]) << 8;
case 1: h ^= u32(uEnd[0]);
h *= m;
};
h ^= h >> 13;
h *= m;
h ^= h >> 15;
@ -125,29 +126,30 @@ u32 TexDecoder_GetFullHash32(const u8 *src, int len, u32 seed)
}
#ifdef _M_X64
u64 TexDecoder_GetFullHash(const u8 *src, int len, u64 seed)
u64 TexDecoder_GetHash64(const u8 *src, int len, u32 samples)
{
const u64 m = 0xc6a4a7935bd1e995;
const int r = 47;
u64 h = seed ^ (len * m);
u64 h = len * m;
u32 Step = (len/8);
const u64 * data = (const u64 *)src;
const u64 * end = data + (len/8);
while(data != end)
const u64 * end = data + Step;
if(samples == 0) samples = Step;
Step = Step / samples;
if(Step < 1) Step = 1;
while(data < end)
{
u64 k = data[0];
data++;
data+=Step;
k *= m;
k ^= k >> r;
//k *= m;
k *= m;
h ^= k;
//h *= m;
h *= m;
}
const u8 * data2 = (const u8*)data;
const u8 * data2 = (const u8*)end;
switch(len & 7)
{
@ -169,51 +171,58 @@ u64 TexDecoder_GetFullHash(const u8 *src, int len, u64 seed)
}
#else
u64 TexDecoder_GetFullHash(const u8 *src, int len, u64 seed)
u64 TexDecoder_GetHash64(const u8 *src, int len, u32 samples)
{
const u32 m = 0x5bd1e995;
const int r = 24;
u32 h1 = seed ^ len;
u32 h1 = len;
u32 h2 = 0;
u32 Step = (len / 4);
const u32 * data = (const u32 *)src;
const u32 * end = data + Step;
const u8 * uEnd = (const u8 *)end;
if(samples == 0) samples = Step;
Step = Step / samples;
while(len >= 8)
if(Step < 2) Step = 2;
while(data < end)
{
u32 k1 = *data++;
u32 k1 = data[0];
k1 *= m;
k1 ^= k1 >> r;
//k1 *= m;
//h1 *= m;
k1 *= m;
h1 *= m;
h1 ^= k1;
len -= 4;
u32 k2 = *data++;
u32 k2 = data[1];
k2 *= m;
k2 ^= k2 >> r;
//k2 *= m;
//h2 *= m;
k2 *= m;
h2 *= m;
h2 ^= k2;
len -= 4;
data+=Step;
}
if(len >= 4)
if(len & 7 > 3)
{
u32 k1 = *data++;
u32 k1 = *(end - 1);
k1 *= m;
k1 ^= k1 >> r;
//k1 *= m;
//h1 *= m;
k1 *= m;
h1 *= m;
h1 ^= k1;
len -= 4;
}
switch(len)
switch(len & 3)
{
case 3: h2 ^= ((u8*)data)[2] << 16;
case 2: h2 ^= ((u8*)data)[1] << 8;
case 1: h2 ^= ((u8*)data)[0];
case 3: h2 ^= uEnd[2] << 16;
case 2: h2 ^= uEnd[1] << 8;
case 1: h2 ^= uEnd[0];
h2 *= m;
};
@ -232,18 +241,6 @@ u64 TexDecoder_GetFullHash(const u8 *src, int len, u64 seed)
#endif
u64 TexDecoder_GetFastHash(const u8 *src, int len, u64 seed)
{
u64 hash = seed ? seed : 0x1337c0debeefbabeULL;
int step = (len / 8) / 37;
if (!step) step = 1;
for (int i = 0; i < len / 8; i += step) {
hash = _rotl64(hash, 19) ^ ((u64 *)src)[i];
hash += 7; // to add a bit more entropy/mess in here
}
return hash;
}
int TexDecoder_GetBlockWidthInTexels(u32 format)
{
switch (format)

View File

@ -89,9 +89,8 @@ PC_TexFormat GetPC_TexFormat(int texformat, int tlutfmt);
void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth, int texformat, int tlutaddr, int tlutfmt);
u64 TexDecoder_GetFullHash(const u8 *src, int len, u64 seed = 0);
u64 TexDecoder_GetFastHash(const u8 *src, int len, u64 seed = 0);
u32 TexDecoder_GetFullHash32(const u8 *src, int len, u32 seed = 0);
u64 TexDecoder_GetHash64(const u8 *src, int len, u32 samples = 0);
u32 TexDecoder_GetHash32(const u8 *src, int len, u32 samples = 0);
void TexDecoder_SetTexFmtOverlayOptions(bool enable, bool center);

View File

@ -62,6 +62,14 @@ void VideoConfig::Load(const char *ini_file)
iniFile.Get("Settings", "AutoScale", &bAutoScale, true);
iniFile.Get("Settings", "SafeTextureCache", &bSafeTextureCache, false); // Settings
//Safe texture cache params
iniFile.Get("Settings", "SafeTextureCacheColorSamples", &iSafeTextureCache_ColorSamples,37);
iniFile.Get("Settings", "SafeTextureCacheIndexedSamples", &iSafeTextureCache_IndexedSamples,0);
iniFile.Get("Settings", "SafeTextureCacheTlutSamples", &iSafeTextureCache_TlutSamples,0);
iniFile.Get("Settings", "SafeTextureCacheColorMaxSize", &iSafeTextureCache_ColorMaxSize,0);
iniFile.Get("Settings", "SafeTextureCacheIndexedMaxSize", &iSafeTextureCache_IndexedMaxSize,0);
iniFile.Get("Settings", "SafeTextureCacheTlutMaxSize", &iSafeTextureCache_TlutMaxSize,0);
iniFile.Get("Settings", "ShowFPS", &bShowFPS, false); // Settings
iniFile.Get("Settings", "OverlayStats", &bOverlayStats, false);
iniFile.Get("Settings", "OverlayProjStats", &bOverlayProjStats, false);
@ -126,6 +134,20 @@ void VideoConfig::GameIniLoad(const char *ini_file)
iniFile.Get("Video", "EFBScaledCopy", &bCopyEFBScaled, 0);
if (iniFile.Exists("Video", "SafeTextureCache"))
iniFile.Get("Video", "SafeTextureCache", &bSafeTextureCache, false);
//Safe texture cache params
if (iniFile.Exists("Video", "SafeTextureCacheColorSamples"))
iniFile.Get("Video", "SafeTextureCacheColorSamples", &iSafeTextureCache_ColorSamples,37);
if (iniFile.Exists("Video", "SafeTextureCacheIndexedSamples"))
iniFile.Get("Video", "SafeTextureCacheIndexedSamples", &iSafeTextureCache_IndexedSamples,0);
if (iniFile.Exists("Video", "SafeTextureCacheTlutSamples"))
iniFile.Get("Video", "SafeTextureCacheTlutSamples", &iSafeTextureCache_TlutSamples,0);
if (iniFile.Exists("Video", "SafeTextureCacheColorMaxSize"))
iniFile.Get("Video", "SafeTextureCacheColorMaxSize", &iSafeTextureCache_ColorMaxSize,0);
if (iniFile.Exists("Video", "SafeTextureCacheIndexedMaxSize"))
iniFile.Get("Video", "SafeTextureCacheIndexedMaxSize", &iSafeTextureCache_IndexedMaxSize,0);
if (iniFile.Exists("Video", "SafeTextureCacheTlutMaxSize"))
iniFile.Get("Video", "SafeTextureCacheTlutMaxSize", &iSafeTextureCache_TlutMaxSize,0);
if (iniFile.Exists("Video", "MSAA"))
iniFile.Get("Video", "MSAA", &iMultisampleMode, 0);
if (iniFile.Exists("Video", "DstAlphaPass"))
@ -157,6 +179,14 @@ void VideoConfig::Save(const char *ini_file)
iniFile.Set("Settings", "AutoScale", bAutoScale);
iniFile.Set("Settings", "SafeTextureCache", bSafeTextureCache);
//safe texture cache params
iniFile.Set("Settings", "SafeTextureCacheColorSamples", iSafeTextureCache_ColorSamples);
iniFile.Set("Settings", "SafeTextureCacheIndexedSamples", iSafeTextureCache_IndexedSamples);
iniFile.Set("Settings", "SafeTextureCacheTlutSamples", iSafeTextureCache_TlutSamples);
iniFile.Set("Settings", "SafeTextureCacheColorMaxSize", iSafeTextureCache_ColorMaxSize);
iniFile.Set("Settings", "SafeTextureCacheIndexedMaxSize", iSafeTextureCache_IndexedMaxSize);
iniFile.Set("Settings", "SafeTextureCacheTlutMaxSize", iSafeTextureCache_TlutMaxSize);
iniFile.Set("Settings", "ShowFPS", bShowFPS);
iniFile.Set("Settings", "OverlayStats", bOverlayStats);
iniFile.Set("Settings", "OverlayProjStats", bOverlayProjStats);

View File

@ -119,6 +119,12 @@ struct VideoConfig
bool bCopyEFBToTexture;
bool bCopyEFBScaled;
bool bSafeTextureCache;
int iSafeTextureCache_ColorSamples;
int iSafeTextureCache_IndexedSamples;
int iSafeTextureCache_TlutSamples;
int iSafeTextureCache_ColorMaxSize;
int iSafeTextureCache_IndexedMaxSize;
int iSafeTextureCache_TlutMaxSize;
bool bFIFOBPhack;
int iPhackvalue;
bool bPhackvalue1, bPhackvalue2;

View File

@ -1082,7 +1082,7 @@ void Renderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaE
D3D::dev->SetScissorRect(&sirc);
if (zEnable)
D3D::ChangeRenderState(D3DRS_ZFUNC, D3DCMP_ALWAYS);
D3D::drawClearQuad(color ,(z & 0xFFFFFF) / float(0xFFFFFF),PixelShaderCache::GetClearProgram(),VertexShaderCache::GetSimpleVertexShader());
D3D::drawClearQuad(color ,(z & 0xFFFFFF) / float(0xFFFFFF),PixelShaderCache::GetClearProgram(),VertexShaderCache::GetClearVertexShader());
if (zEnable)
D3D::RefreshRenderState(D3DRS_ZFUNC);
//D3D::dev->Clear(0, NULL, (colorEnable ? D3DCLEAR_TARGET : 0)| ( zEnable ? D3DCLEAR_ZBUFFER : 0), color | ((alphaEnable)?0:0xFF000000),(z & 0xFFFFFF) / float(0xFFFFFF), 0);

View File

@ -166,10 +166,14 @@ TextureCache::TCacheEntry *TextureCache::Load(int stage, u32 address, int width,
// each other stored in a single texture, and uses the palette to make different characters
// visible or invisible. Thus, unless we want to recreate the textures for every drawn character,
// we must make sure that texture with different tluts get different IDs.
texHash = TexDecoder_GetFullHash(ptr,TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, tex_format));
u32 tlutHash = TexDecoder_GetFullHash32(&texMem[tlutaddr], TexDecoder_GetPaletteSize(tex_format));
//texHash ^= tlutHash; //this line was the problem, as the hash changes with the tlut hash
//the textures where alway recreated
int tempsize = TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, tex_format);
tempsize = (g_ActiveConfig.iSafeTextureCache_IndexedMaxSize != 0 && g_ActiveConfig.iSafeTextureCache_IndexedMaxSize < tempsize)?g_ActiveConfig.iSafeTextureCache_IndexedMaxSize : tempsize;
texHash = TexDecoder_GetHash64(ptr,tempsize,g_ActiveConfig.iSafeTextureCache_IndexedSamples);
tempsize = TexDecoder_GetPaletteSize(tex_format);
tempsize = (g_ActiveConfig.iSafeTextureCache_TlutMaxSize != 0 && g_ActiveConfig.iSafeTextureCache_TlutMaxSize < tempsize)?g_ActiveConfig.iSafeTextureCache_TlutMaxSize : tempsize;
u32 tlutHash = TexDecoder_GetHash32(&texMem[tlutaddr], tempsize,g_ActiveConfig.iSafeTextureCache_TlutSamples);
texHash ^= tlutHash;
if (g_ActiveConfig.bSafeTextureCache)
{
texID = texID ^ tlutHash;
@ -177,7 +181,9 @@ TextureCache::TCacheEntry *TextureCache::Load(int stage, u32 address, int width,
}
else
{
texHash = TexDecoder_GetFastHash(ptr, TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, tex_format));
int tempsize = TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, tex_format);
tempsize = (g_ActiveConfig.iSafeTextureCache_ColorMaxSize != 0 && g_ActiveConfig.iSafeTextureCache_ColorMaxSize < tempsize)?g_ActiveConfig.iSafeTextureCache_ColorMaxSize : tempsize;
texHash = TexDecoder_GetHash64(ptr, tempsize,g_ActiveConfig.iSafeTextureCache_ColorSamples);
}
if (g_ActiveConfig.bSafeTextureCache)
hash_value = texHash;

View File

@ -41,6 +41,7 @@ const VertexShaderCache::VSCacheEntry *VertexShaderCache::last_entry;
static float GC_ALIGNED16(lastVSconstants[C_FOGPARAMS + 8][4]);
static LPDIRECT3DVERTEXSHADER9 SimpleVertexShader;
static LPDIRECT3DVERTEXSHADER9 ClearVertexShader;
static LPDIRECT3DVERTEXSHADER9 FSAAVertexShader;
LinearDiskCache g_vs_disk_cache;
@ -50,6 +51,11 @@ LPDIRECT3DVERTEXSHADER9 VertexShaderCache::GetSimpleVertexShader()
return SimpleVertexShader;
}
LPDIRECT3DVERTEXSHADER9 VertexShaderCache::GetClearVertexShader()
{
return ClearVertexShader;
}
LPDIRECT3DVERTEXSHADER9 VertexShaderCache::GetFSAAVertexShader()
{
return FSAAVertexShader;
@ -155,12 +161,12 @@ public:
void VertexShaderCache::Init()
{
char *vSimpleProg = new char[2048];
sprintf(vSimpleProg,"struct VSOUTPUT\n"
char* vProg = new char[2048];
sprintf(vProg,"struct VSOUTPUT\n"
"{\n"
"float4 vPosition : POSITION;\n"
"float4 vPosition : POSITION;\n"
"float4 vColor0 : COLOR0;\n"
"float2 vTexCoord : TEXCOORD0;\n"
"float2 vTexCoord : TEXCOORD0;\n"
"};\n"
"VSOUTPUT main(float4 inPosition : POSITION,float2 inTEX0 : TEXCOORD0,float4 inColor0: COLOR0)\n"
"{\n"
@ -171,10 +177,24 @@ void VertexShaderCache::Init()
"return OUT;\n"
"}\n");
SimpleVertexShader = D3D::CompileAndCreateVertexShader(vSimpleProg, (int)strlen(vSimpleProg));
SimpleVertexShader = D3D::CompileAndCreateVertexShader(vProg, (int)strlen(vProg));
sprintf(vProg,"struct VSOUTPUT\n"
"{\n"
"float4 vPosition : POSITION;\n"
"float4 vColor0 : COLOR0;\n"
"};\n"
"VSOUTPUT main(float4 inPosition : POSITION,float4 inColor0: COLOR0)\n"
"{\n"
"VSOUTPUT OUT;\n"
"OUT.vPosition = inPosition;\n"
"OUT.vColor0 = inColor0;\n"
"return OUT;\n"
"}\n");
ClearVertexShader = D3D::CompileAndCreateVertexShader(vProg, (int)strlen(vProg));
char *vFSAAProg = new char[2048];
sprintf(vFSAAProg, "struct VSOUTPUT\n"
sprintf(vProg, "struct VSOUTPUT\n"
"{\n"
"float4 vPosition : POSITION;\n"
"float4 vTexCoord : TEXCOORD0;\n"
@ -196,12 +216,10 @@ void VertexShaderCache::Init()
"OUT.vTexCoord5 = inTEX2;\n"
"return OUT;\n"
"}\n");
FSAAVertexShader = D3D::CompileAndCreateVertexShader(vFSAAProg, (int)strlen(vFSAAProg));
FSAAVertexShader = D3D::CompileAndCreateVertexShader(vProg, (int)strlen(vProg));
Clear();
delete [] vFSAAProg;
delete [] vSimpleProg;
delete [] vProg;
if (!File::Exists(File::GetUserPath(D_SHADERCACHE_IDX)))
File::CreateDir(File::GetUserPath(D_SHADERCACHE_IDX));

View File

@ -56,6 +56,7 @@ public:
static void Shutdown();
static bool SetShader(u32 components);
static LPDIRECT3DVERTEXSHADER9 GetSimpleVertexShader();
static LPDIRECT3DVERTEXSHADER9 GetClearVertexShader();
static LPDIRECT3DVERTEXSHADER9 GetFSAAVertexShader();
static bool InsertByteCode(const VERTEXSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate);
#if defined(_DEBUG) || defined(DEBUGFAST)

View File

@ -274,16 +274,24 @@ TextureMngr::TCacheEntry* TextureMngr::Load(int texstage, u32 address, int width
// each other stored in a single texture, and uses the palette to make different characters
// visible or invisible. Thus, unless we want to recreate the textures for every drawn character,
// we must make sure that texture with different tluts get different IDs.
texHash = TexDecoder_GetFullHash(ptr,TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, tex_format));
u32 tlutHash = TexDecoder_GetFullHash32(&texMem[tlutaddr], TexDecoder_GetPaletteSize(tex_format));
//texHash ^= tlutHash;
int tempsize = TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, tex_format);
tempsize = (g_ActiveConfig.iSafeTextureCache_IndexedMaxSize != 0 && g_ActiveConfig.iSafeTextureCache_IndexedMaxSize < tempsize)?g_ActiveConfig.iSafeTextureCache_IndexedMaxSize : tempsize;
texHash = TexDecoder_GetHash64(ptr,tempsize,g_ActiveConfig.iSafeTextureCache_IndexedSamples);
tempsize = TexDecoder_GetPaletteSize(tex_format);
tempsize = (g_ActiveConfig.iSafeTextureCache_TlutMaxSize != 0 && g_ActiveConfig.iSafeTextureCache_TlutMaxSize < tempsize)?g_ActiveConfig.iSafeTextureCache_TlutMaxSize : tempsize;
u32 tlutHash = TexDecoder_GetHash32(&texMem[tlutaddr], tempsize,g_ActiveConfig.iSafeTextureCache_TlutSamples);
texHash ^= tlutHash;
if (g_ActiveConfig.bSafeTextureCache)
{
texID = texID ^ tlutHash;
//DebugLog("addr: %08x | texID: %08x | texHash: %08x", address, texID, hash_value);
}
}
else
{
texHash = TexDecoder_GetFastHash(ptr, TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, tex_format));
int tempsize = TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, tex_format);
tempsize = (g_ActiveConfig.iSafeTextureCache_ColorMaxSize != 0 && g_ActiveConfig.iSafeTextureCache_ColorMaxSize < tempsize)?g_ActiveConfig.iSafeTextureCache_ColorMaxSize : tempsize;
texHash = TexDecoder_GetHash64(ptr, tempsize,g_ActiveConfig.iSafeTextureCache_ColorSamples);
}
if (g_ActiveConfig.bSafeTextureCache)
hash_value = texHash;