From 7947543d98327a082d6f7006339b265882c40801 Mon Sep 17 00:00:00 2001 From: Rodolfo Osvaldo Bogado Date: Mon, 17 May 2010 22:17:46 +0000 Subject: [PATCH] a little optimization in shaders constants setting , based on the same idea of nodchip changes. a MAYORRRRRR change in pixel shader generation, please review all games you can and leave comments this must improve accuracy in graphic emulation a lot. for example: mario eyes in super mario galaxy, water pod transparency an water transparency on game intro in super mario sunshine, etc. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5457 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/Common/Src/LinearDiskCache.cpp | 2 +- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 70 +++++++++++-------- .../VideoCommon/Src/PixelShaderManager.cpp | 5 +- Source/Core/VideoCommon/Src/VideoConfig.cpp | 2 +- .../Plugin_VideoDX9/Src/PixelShaderCache.cpp | 25 ++++--- .../Plugin_VideoDX9/Src/VertexShaderCache.cpp | 14 +--- .../Plugin_VideoOGL/Src/PixelShaderCache.cpp | 24 ++++--- .../Plugin_VideoOGL/Src/VertexShaderCache.cpp | 27 ++----- 8 files changed, 82 insertions(+), 87 deletions(-) diff --git a/Source/Core/Common/Src/LinearDiskCache.cpp b/Source/Core/Common/Src/LinearDiskCache.cpp index ae52433ba0..05ce5b1864 100644 --- a/Source/Core/Common/Src/LinearDiskCache.cpp +++ b/Source/Core/Common/Src/LinearDiskCache.cpp @@ -18,7 +18,7 @@ #include "LinearDiskCache.h" static const char ID[4] = {'D', 'C', 'A', 'C'}; -const int version = 4888; // TODO: Get from SVN_REV +const int version = 5457; // TODO: Get from SVN_REV LinearDiskCache::LinearDiskCache() : file_(NULL), num_entries_(0) { diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 031653d817..5d9d7b82e6 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -157,13 +157,13 @@ const float epsilon8bit = 1.0f / 255.0f; static const char *tevKSelTableC[] = // KCSEL { "1.0f,1.0f,1.0f", // 1 = 0x00 - "0.875f,0.875f,0.875f", // 7_8 = 0x01 - "0.75f,0.75f,0.75f", // 3_4 = 0x02 - "0.625f,0.625f,0.625f", // 5_8 = 0x03 - "0.5f,0.5f,0.5f", // 1_2 = 0x04 - "0.375f,0.375f,0.375f", // 3_8 = 0x05 - "0.25f,0.25f,0.25f", // 1_4 = 0x06 - "0.125f,0.125f,0.125f", // 1_8 = 0x07 + "0.8745098f,0.8745098f,0.8745098f", // 7_8 = 0x01 + "0.7490196f,0.7490196f,0.7490196f", // 3_4 = 0x02 + "0.6235294f,0.6235294f,0.6235294f", // 5_8 = 0x03 + "0.4980392f,0.4980392f,0.4980392f", // 1_2 = 0x04 + "0.372549f,0.372549f,0.372549f", // 3_8 = 0x05 + "0.2470588f,0.2470588f,0.2470588f", // 1_4 = 0x06 + "0.1215686f,0.1215686f,0.1215686f", // 1_8 = 0x07 "ERROR", // 0x08 "ERROR", // 0x09 "ERROR", // 0x0a @@ -193,13 +193,13 @@ static const char *tevKSelTableC[] = // KCSEL static const char *tevKSelTableA[] = // KASEL { "1.0f", // 1 = 0x00 - "0.875f",// 7_8 = 0x01 - "0.75f", // 3_4 = 0x02 - "0.625f",// 5_8 = 0x03 - "0.5f", // 1_2 = 0x04 - "0.375f",// 3_8 = 0x05 - "0.25f", // 1_4 = 0x06 - "0.125f",// 1_8 = 0x07 + "0.8745098f",// 7_8 = 0x01 + "0.7490196f", // 3_4 = 0x02 + "0.6235294f",// 5_8 = 0x03 + "0.4980392f", // 1_2 = 0x04 + "0.372549f",// 3_8 = 0x05 + "0.2470588f", // 1_4 = 0x06 + "0.1215686f",// 1_8 = 0x07 "ERROR", // 0x08 "ERROR", // 0x09 "ERROR", // 0x0a @@ -237,8 +237,8 @@ static const char *tevScaleTable[] = // CS static const char *tevBiasTable[] = // TB { "", // ZERO, - "+0.5f", // ADDHALF, - "-0.5f", // SUBHALF, + "+0.4980392f", // ADDHALF, + "-0.4980392f", // SUBHALF, "", }; @@ -269,7 +269,7 @@ static const char *tevCInputTable[] = // CC "rastemp.rgb", // RASC, "rastemp.aaa", // RASA, "float3(1.0f,1.0f,1.0f)", // ONE, - "float3(0.5f,0.5f,0.5f)", // HALF, + "float3(0.4980392f,0.4980392f,0.4980392f)", // HALF, "konsttemp.rgb", // KONST, "float3(0.0f,0.0f,0.0f)", // ZERO "PADERROR", "PADERROR", "PADERROR", "PADERROR", @@ -294,7 +294,7 @@ static const char *tevCInputTable2[] = // CC "rastemp", // RASC, "(rastemp.aaa)", // RASA, "float3(1.0f,1.0f,1.0f)", // ONE - "float3(0.5f,0.5f,0.5f)", // HALF + "float3(0.4980392f,0.4980392f,0.4980392f)", // HALF "konsttemp", //"konsttemp.rgb", // KONST "float3(0.0f,0.0f,0.0f)", // ZERO "PADERROR", "PADERROR", "PADERROR", "PADERROR", @@ -350,8 +350,8 @@ static const char *tevRasTable[] = static const char *alphaRef[2] = { - I_ALPHA"[0].x", - I_ALPHA"[0].y" + I_ALPHA"[0].r", + I_ALPHA"[0].g" }; //static const char *tevTexFunc[] = { "tex2D", "texRECT" }; @@ -521,8 +521,15 @@ const char *GeneratePixelShaderCode(u32 texture_mask, bool dstAlphaEnable, u32 H for (int i = 0; i < numStages; i++) WriteStage(p, i, texture_mask,HLSL); //build the equation for this stage - WRITE(p, "prev = saturate(prev);\n"); - + // emulation of unisgned 8 overflow when casting + if(HLSL) + { + WRITE(p, "prev = ((((prev * 255.0f) %% 256.0f) + 256.0f) %% 256.0f) / 255.0f;\n"); + } + else + { + WRITE(p, "prev = mod(mod(prev * 255.0f,256.0f) + 256.0f,256.0f) / 255.0f;\n"); + } if (!WriteAlphaTest(p, HLSL)) { @@ -586,11 +593,11 @@ static const char *TEVCMPColorOPTable[16] = "float3(0.0f,0.0f,0.0f)",//5 "float3(0.0f,0.0f,0.0f)",//6 "float3(0.0f,0.0f,0.0f)",//7 - " %s + ((%s.r > %s.r + (0.25f/255.0f)) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_R8_GT 8 + " %s + ((%s.r >= %s.r + (0.25f/255.0f)) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_R8_GT 8 " %s + ((abs(%s.r - %s.r) < (0.5f/255.0f)) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_R8_EQ 9 - " %s + (( dot(%s.rgb, comp16) > (dot(%s.rgb, comp16) + (0.25f/255.0f))) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_GR16_GT 10 + " %s + (( dot(%s.rgb, comp16) >= (dot(%s.rgb, comp16) + (0.25f/255.0f))) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_GR16_GT 10 " %s + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (0.5f/255.0f) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_GR16_EQ 11 - " %s + (( dot(%s.rgb, comp24) > (dot(%s.rgb, comp24) + (0.25f/255.0f))) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_BGR24_GT 12 + " %s + (( dot(%s.rgb, comp24) >= (dot(%s.rgb, comp24) + (0.25f/255.0f))) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_BGR24_GT 12 " %s + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (0.5f/255.0f) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_BGR24_EQ 13 " %s + (max(sign(%s.rgb - %s.rgb - (0.25f/255.0f)),float3(0.0f,0.0f,0.0f)) * %s)",//#define TEVCMP_RGB8_GT 14 " %s + ((float3(1.0f,1.0f,1.0f) - max(sign(abs(%s.rgb - %s.rgb) - (0.5f/255.0f)),float3(0.0f,0.0f,0.0f))) * %s)"//#define TEVCMP_RGB8_EQ 15 @@ -607,13 +614,13 @@ static const char *TEVCMPAlphaOPTable[16] = "0.0f",//5 "0.0f",//6 "0.0f",//7 - " %s + ((%s.r > (%s.r + (0.25f/255.0f))) ? %s : 0.0f)",//#define TEVCMP_R8_GT 8 + " %s + ((%s.r >= (%s.r + (0.25f/255.0f))) ? %s : 0.0f)",//#define TEVCMP_R8_GT 8 " %s + (abs(%s.r - %s.r) < (0.5f/255.0f) ? %s : 0.0f)",//#define TEVCMP_R8_EQ 9 - " %s + ((dot(%s.rgb, comp16) > (dot(%s.rgb, comp16) + (0.25f/255.0f))) ? %s : 0.0f)",//#define TEVCMP_GR16_GT 10 + " %s + ((dot(%s.rgb, comp16) >= (dot(%s.rgb, comp16) + (0.25f/255.0f))) ? %s : 0.0f)",//#define TEVCMP_GR16_GT 10 " %s + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (0.5f/255.0f) ? %s : 0.0f)",//#define TEVCMP_GR16_EQ 11 - " %s + ((dot(%s.rgb, comp24) > (dot(%s.rgb, comp24) + (0.25f/255.0f))) ? %s : 0.0f)",//#define TEVCMP_BGR24_GT 12 + " %s + ((dot(%s.rgb, comp24) >= (dot(%s.rgb, comp24) + (0.25f/255.0f))) ? %s : 0.0f)",//#define TEVCMP_BGR24_GT 12 " %s + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (0.5f/255.0f) ? %s : 0.0f)",//#define TEVCMP_BGR24_EQ 13 - " %s + ((%s.a > (%s.a + (0.25f/255.0f))) ? %s : 0.0f)",//#define TEVCMP_A8_GT 14 + " %s + ((%s.a >= (%s.a + (0.25f/255.0f))) ? %s : 0.0f)",//#define TEVCMP_A8_GT 14 " %s + (abs(%s.a - %s.a) < (0.5f/255.0f) ? %s : 0.0f)"//#define TEVCMP_A8_EQ 15 }; @@ -777,7 +784,7 @@ static void WriteStage(char *&p, int n, u32 texture_mask, u32 HLSL) } if (cc.clamp) WRITE(p,")"); - WRITE(p,";\n"); + WRITE(p,";\n"); // combine the alpha channel if (ac.clamp) @@ -813,6 +820,7 @@ static void WriteStage(char *&p, int n, u32 texture_mask, u32 HLSL) if (ac.shift>0) WRITE(p, ")"); + } else { @@ -826,7 +834,7 @@ static void WriteStage(char *&p, int n, u32 texture_mask, u32 HLSL) } if (ac.clamp) WRITE(p, ")"); - WRITE(p, ";\n\n"); + WRITE(p, ";\n\n"); } void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, u32 texture_mask, u32 HLSL) diff --git a/Source/Core/VideoCommon/Src/PixelShaderManager.cpp b/Source/Core/VideoCommon/Src/PixelShaderManager.cpp index 4ec14ea948..83627a5277 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderManager.cpp @@ -396,10 +396,7 @@ void PixelShaderManager::SetFogParamChanged() void PixelShaderManager::SetColorMatrix(const float* pmatrix, const float* pfConstAdd) { - SetPSConstant4fv(C_COLORMATRIX, pmatrix); - SetPSConstant4fv(C_COLORMATRIX+1, pmatrix+4); - SetPSConstant4fv(C_COLORMATRIX+2, pmatrix+8); - SetPSConstant4fv(C_COLORMATRIX+3, pmatrix+12); + SetMultiPSConstant4fv(C_COLORMATRIX,4,pmatrix); SetPSConstant4fv(C_COLORMATRIX+4, pfConstAdd); } diff --git a/Source/Core/VideoCommon/Src/VideoConfig.cpp b/Source/Core/VideoCommon/Src/VideoConfig.cpp index 92f6b55b6e..a956a53cf2 100644 --- a/Source/Core/VideoCommon/Src/VideoConfig.cpp +++ b/Source/Core/VideoCommon/Src/VideoConfig.cpp @@ -53,7 +53,7 @@ void VideoConfig::Load(const char *ini_file) iniFile.Get("Settings", "wideScreenHack", &bWidescreenHack, false); iniFile.Get("Settings", "AspectRatio", &iAspectRatio, (int)ASPECT_AUTO); iniFile.Get("Settings", "Crop", &bCrop, false); - iniFile.Get("Settings", "UseXFB", &bUseXFB, true); + iniFile.Get("Settings", "UseXFB", &bUseXFB, 0); iniFile.Get("Settings", "UseRealXFB", &bUseRealXFB, 0); iniFile.Get("Settings", "AutoScale", &bAutoScale, true); iniFile.Get("Settings", "UseNativeMips", &bUseNativeMips, true); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp index c4008331ff..84985d8f4f 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp @@ -72,27 +72,30 @@ LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetClearProgram() void SetPSConstant4f(int const_number, float f1, float f2, float f3, float f4) { if (lastPSconstants[const_number][0] != f1 || lastPSconstants[const_number][1] != f2 || - lastPSconstants[const_number][2] != f3 || lastPSconstants[const_number][3] != f4 ) + lastPSconstants[const_number][2] != f3 || lastPSconstants[const_number][3] != f4) { - const float f[4] = {f1, f2, f3, f4}; - D3D::dev->SetPixelShaderConstantF(const_number, f, 1); lastPSconstants[const_number][0] = f1; lastPSconstants[const_number][1] = f2; lastPSconstants[const_number][2] = f3; lastPSconstants[const_number][3] = f4; - } + D3D::dev->SetPixelShaderConstantF(const_number, lastPSconstants[const_number], 1); + + } } void SetPSConstant4fv(int const_number, const float *f) { - if (lastPSconstants[const_number][0] != f[0] || lastPSconstants[const_number][1] != f[1] || - lastPSconstants[const_number][2] != f[2] || lastPSconstants[const_number][3] != f[3] ) - { + if (memcmp(&lastPSconstants[const_number], f, sizeof(float) * 4)) { + memcpy(&lastPSconstants[const_number], f, sizeof(float) * 4); D3D::dev->SetPixelShaderConstantF(const_number, f, 1); - lastPSconstants[const_number][0] = f[0]; - lastPSconstants[const_number][1] = f[1]; - lastPSconstants[const_number][2] = f[2]; - lastPSconstants[const_number][3] = f[3]; + } +} + +void SetMultiPSConstant4fv(int const_number, int count, const float *f) +{ + if (memcmp(&lastPSconstants[const_number], f, count * sizeof(float) * 4)) { + memcpy(&lastPSconstants[const_number], f, count * sizeof(float) * 4); + D3D::dev->SetPixelShaderConstantF(const_number, f, count); } } diff --git a/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp index 8e0660d98c..ce84e66ace 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp @@ -68,7 +68,6 @@ void SetVSConstant4f(int const_number, float f1, float f2, float f3, float f4) lastVSconstants[const_number][2] != f3 || lastVSconstants[const_number][3] != f4) { - const float f[4] = {f1, f2, f3, f4}; lastVSconstants[const_number][0] = f1; lastVSconstants[const_number][1] = f2; lastVSconstants[const_number][2] = f3; @@ -79,17 +78,10 @@ void SetVSConstant4f(int const_number, float f1, float f2, float f3, float f4) void SetVSConstant4fv(int const_number, const float *f) { - if (lastVSconstants[const_number][0] != f[0] || - lastVSconstants[const_number][1] != f[1] || - lastVSconstants[const_number][2] != f[2] || - lastVSconstants[const_number][3] != f[3]) - { - lastVSconstants[const_number][0] = f[0]; - lastVSconstants[const_number][1] = f[1]; - lastVSconstants[const_number][2] = f[2]; - lastVSconstants[const_number][3] = f[3]; + if (memcmp(&lastVSconstants[const_number], f, sizeof(float) * 4)) { + memcpy(&lastVSconstants[const_number], f, sizeof(float) * 4); D3D::dev->SetVertexShaderConstantF(const_number, lastVSconstants[const_number], 1); - } + } } void SetMultiVSConstant3fv(int const_number, int count, const float *f) diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp index d35be5e8d2..cdcd552e2e 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp @@ -53,24 +53,32 @@ void SetPSConstant4f(int const_number, float f1, float f2, float f3, float f4) if (lastPSconstants[const_number][0] != f1 || lastPSconstants[const_number][1] != f2 || lastPSconstants[const_number][2] != f3 || lastPSconstants[const_number][3] != f4) { - glProgramEnvParameter4fARB(GL_FRAGMENT_PROGRAM_ARB, const_number, f1, f2, f3, f4); lastPSconstants[const_number][0] = f1; lastPSconstants[const_number][1] = f2; lastPSconstants[const_number][2] = f3; lastPSconstants[const_number][3] = f4; + glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, const_number, lastPSconstants[const_number]); + } } void SetPSConstant4fv(int const_number, const float *f) { - if (lastPSconstants[const_number][0] != f[0] || lastPSconstants[const_number][1] != f[1] || - lastPSconstants[const_number][2] != f[2] || lastPSconstants[const_number][3] != f[3]) - { + if (memcmp(&lastPSconstants[const_number], f, sizeof(float) * 4)) { + memcpy(&lastPSconstants[const_number], f, sizeof(float) * 4); glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, const_number, f); - lastPSconstants[const_number][0] = f[0]; - lastPSconstants[const_number][1] = f[1]; - lastPSconstants[const_number][2] = f[2]; - lastPSconstants[const_number][3] = f[3]; + } +} + +void SetMultiPSConstant4fv(int const_number, int count, const float *f) +{ + const float *f0 = f; + for (int i = 0; i < count ;i++,f0+=4) + { + if (memcmp(&lastPSconstants[const_number + i], f0, sizeof(float) * 4)) { + memcpy(&lastPSconstants[const_number + i], f0, sizeof(float) * 4); + glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, const_number + i, lastPSconstants[const_number + i]); + } } } diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp index 4cfe2caabf..c01ee7eb00 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp @@ -63,32 +63,19 @@ void SetVSConstant4f(int const_number, float f1, float f2, float f3, float f4) void SetVSConstant4fv(int const_number, const float *f) { - if (lastVSconstants[const_number][0] != f[0] || - lastVSconstants[const_number][1] != f[1] || - lastVSconstants[const_number][2] != f[2] || - lastVSconstants[const_number][3] != f[3]) - { - lastVSconstants[const_number][0] = f[0]; - lastVSconstants[const_number][1] = f[1]; - lastVSconstants[const_number][2] = f[2]; - lastVSconstants[const_number][3] = f[3]; + if (memcmp(&lastVSconstants[const_number], f, sizeof(float) * 4)) { + memcpy(&lastVSconstants[const_number], f, sizeof(float) * 4); glProgramEnvParameter4fvARB(GL_VERTEX_PROGRAM_ARB, const_number, lastVSconstants[const_number]); - } + } } void SetMultiVSConstant4fv(int const_number, int count, const float *f) { - for (int i = 0; i < count; i++) + const float *f0 = f; + for (int i = 0; i < count; i++,f0+=4) { - if (lastVSconstants[const_number + i][0] != f[0 + i*4] || - lastVSconstants[const_number + i][1] != f[1 + i*4] || - lastVSconstants[const_number + i][2] != f[2 + i*4] || - lastVSconstants[const_number + i][3] != f[3 + i*4]) - { - lastVSconstants[const_number + i][0] = f[0 + i*4]; - lastVSconstants[const_number + i][1] = f[1 + i*4]; - lastVSconstants[const_number + i][2] = f[2 + i*4]; - lastVSconstants[const_number + i][3] = f[3 + i*4]; + if (memcmp(&lastVSconstants[const_number + i], f0, sizeof(float) * 4)) { + memcpy(&lastVSconstants[const_number + i], f0, sizeof(float) * 4); glProgramEnvParameter4fvARB(GL_VERTEX_PROGRAM_ARB, const_number + i, lastVSconstants[const_number + i]); } }