From 7a1940020d6689e19a18becc75e734669dceeaf2 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Mon, 12 Aug 2013 12:04:56 +0200 Subject: [PATCH 1/9] VertexShaderGen: Optimize shader uid data order. --- Source/Core/VideoCommon/Src/VertexShaderGen.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.h b/Source/Core/VideoCommon/Src/VertexShaderGen.h index eef0646e6f..ab9a378107 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.h +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.h @@ -69,12 +69,12 @@ struct vertex_shader_uid_data { u32 NumValues() const { return sizeof(vertex_shader_uid_data); } - u32 components; - u32 numColorChans : 2; + u32 components : 23; u32 numTexGens : 4; - + u32 numColorChans : 2; u32 dualTexTrans_enabled : 1; u32 pixel_lighting : 1; + u32 pad0 : 1; u32 texMtxInfo_n_projection : 16; // Stored separately to guarantee that the texMtxInfo struct is 8 bits wide struct { From 69a5a79c0313f512493b7cdb90b6457f09c3dbac Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Mon, 12 Aug 2013 18:21:35 +0200 Subject: [PATCH 2/9] PixelShaderGen: Optimize shader uid data order. --- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 3 +- Source/Core/VideoCommon/Src/PixelShaderGen.h | 40 ++++++++----------- 2 files changed, 18 insertions(+), 25 deletions(-) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 28e609112e..fcf87215db 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -331,7 +331,7 @@ static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE Api out.Write("VARYIN float4 colors_02;\n"); out.Write("VARYIN float4 colors_12;\n"); - + // compute window position if needed because binding semantic WPOS is not widely supported // Let's set up attributes if (xfregs.numTexGen.numTexGens < 7) @@ -500,7 +500,6 @@ static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE Api if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) { - uid_data.xfregs_numTexGen_numTexGens = xfregs.numTexGen.numTexGens; if (xfregs.numTexGen.numTexGens < 7) { out.Write("\tfloat3 _norm0 = normalize(Normal.xyz);\n\n"); diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h index c1ca009438..7763cbd574 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.h +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h @@ -63,18 +63,30 @@ struct pixel_shader_uid_data u32 num_values; // TODO: Shouldn't be a u32 u32 NumValues() const { return num_values; } - u32 components; + u32 components : 23; u32 dstAlphaMode : 2; u32 Pretest : 2; + u32 nIndirectStagesUsed : 4; + u32 pad0 : 1; u32 genMode_numtexgens : 4; u32 genMode_numtevstages : 4; u32 genMode_numindstages : 3; - - u32 nIndirectStagesUsed : 8; + u32 alpha_test_comp0 : 3; + u32 alpha_test_comp1 : 3; + u32 alpha_test_logic : 2; + u32 alpha_test_use_zcomploc_hack : 1; + u32 fog_proj : 1; + u32 fog_fsel : 3; + u32 fog_RangeBaseEnabled : 1; + u32 ztex_op : 2; + u32 fast_depth_calc : 1; + u32 per_pixel_depth : 1; + u32 forced_early_z : 1; + u32 early_ztest : 1; + u32 pad1 : 1; u32 texMtxInfo_n_projection : 8; // 8x1 bit - u32 tevindref_bi0 : 3; u32 tevindref_bc0 : 3; u32 tevindref_bi1 : 3; @@ -83,6 +95,7 @@ struct pixel_shader_uid_data u32 tevindref_bc3 : 3; u32 tevindref_bi4 : 3; u32 tevindref_bc4 : 3; + inline void SetTevindrefValues(int index, u32 texcoord, u32 texmap) { if (index == 0) { tevindref_bc0 = texcoord; tevindref_bi0 = texmap; } @@ -98,25 +111,6 @@ struct pixel_shader_uid_data else if (index == 3) { tevindref_bi4 = texmap; } } - u32 alpha_test_comp0 : 3; - u32 alpha_test_comp1 : 3; - u32 alpha_test_logic : 2; - - u32 alpha_test_use_zcomploc_hack : 1; - - u32 fog_proj : 1; - u32 fog_fsel : 3; - u32 fog_RangeBaseEnabled : 1; - - u32 ztex_op : 2; - - u32 fast_depth_calc : 1; - u32 per_pixel_depth : 1; - u32 forced_early_z : 1; - u32 early_ztest : 1; - - u32 xfregs_numTexGen_numTexGens : 4; - struct { // TODO: Can save a lot space by removing the padding bits u32 cc : 24; From 22d97367873f91efe0fa0d7e2883d2d53154ee7d Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Mon, 12 Aug 2013 12:52:28 +0200 Subject: [PATCH 3/9] ShaderGen: Static inline everything. --- Source/Core/VideoCommon/Src/BPMemory.cpp | 35 ------------------- Source/Core/VideoCommon/Src/BPMemory.h | 35 ++++++++++++++++++- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 18 +++++----- Source/Core/VideoCommon/Src/ShaderGenCommon.h | 6 ++-- 4 files changed, 46 insertions(+), 48 deletions(-) diff --git a/Source/Core/VideoCommon/Src/BPMemory.cpp b/Source/Core/VideoCommon/Src/BPMemory.cpp index 77f2f6bab3..7272ed0993 100644 --- a/Source/Core/VideoCommon/Src/BPMemory.cpp +++ b/Source/Core/VideoCommon/Src/BPMemory.cpp @@ -299,38 +299,3 @@ void GetBPRegInfo(const u8* data, char* name, size_t name_size, char* desc, size #undef SetRegName } } - -AlphaTest::TEST_RESULT AlphaTest::TestResult() -{ - switch(logic) - { - case 0: // AND - if (comp0 == ALPHACMP_ALWAYS && comp1 == ALPHACMP_ALWAYS) - return PASS; - if (comp0 == ALPHACMP_NEVER || comp1 == ALPHACMP_NEVER) - return FAIL; - break; - - case 1: // OR - if (comp0 == ALPHACMP_ALWAYS || comp1 == ALPHACMP_ALWAYS) - return PASS; - if (comp0 == ALPHACMP_NEVER && comp1 == ALPHACMP_NEVER) - return FAIL; - break; - - case 2: // XOR - if ((comp0 == ALPHACMP_ALWAYS && comp1 == ALPHACMP_NEVER) || (comp0 == ALPHACMP_NEVER && comp1 == ALPHACMP_ALWAYS)) - return PASS; - if ((comp0 == ALPHACMP_ALWAYS && comp1 == ALPHACMP_ALWAYS) || (comp0 == ALPHACMP_NEVER && comp1 == ALPHACMP_NEVER)) - return FAIL; - break; - - case 3: // XNOR - if ((comp0 == ALPHACMP_ALWAYS && comp1 == ALPHACMP_NEVER) || (comp0 == ALPHACMP_NEVER && comp1 == ALPHACMP_ALWAYS)) - return FAIL; - if ((comp0 == ALPHACMP_ALWAYS && comp1 == ALPHACMP_ALWAYS) || (comp0 == ALPHACMP_NEVER && comp1 == ALPHACMP_NEVER)) - return PASS; - break; - } - return UNDETERMINED; -} diff --git a/Source/Core/VideoCommon/Src/BPMemory.h b/Source/Core/VideoCommon/Src/BPMemory.h index 7ce19376f5..83dbefed50 100644 --- a/Source/Core/VideoCommon/Src/BPMemory.h +++ b/Source/Core/VideoCommon/Src/BPMemory.h @@ -885,7 +885,40 @@ union AlphaTest PASS = 2, }; - TEST_RESULT TestResult(); + inline TEST_RESULT TestResult() const + { + switch(logic) + { + case 0: // AND + if (comp0 == ALPHACMP_ALWAYS && comp1 == ALPHACMP_ALWAYS) + return PASS; + if (comp0 == ALPHACMP_NEVER || comp1 == ALPHACMP_NEVER) + return FAIL; + break; + + case 1: // OR + if (comp0 == ALPHACMP_ALWAYS || comp1 == ALPHACMP_ALWAYS) + return PASS; + if (comp0 == ALPHACMP_NEVER && comp1 == ALPHACMP_NEVER) + return FAIL; + break; + + case 2: // XOR + if ((comp0 == ALPHACMP_ALWAYS && comp1 == ALPHACMP_NEVER) || (comp0 == ALPHACMP_NEVER && comp1 == ALPHACMP_ALWAYS)) + return PASS; + if ((comp0 == ALPHACMP_ALWAYS && comp1 == ALPHACMP_ALWAYS) || (comp0 == ALPHACMP_NEVER && comp1 == ALPHACMP_NEVER)) + return FAIL; + break; + + case 3: // XNOR + if ((comp0 == ALPHACMP_ALWAYS && comp1 == ALPHACMP_NEVER) || (comp0 == ALPHACMP_NEVER && comp1 == ALPHACMP_ALWAYS)) + return FAIL; + if ((comp0 == ALPHACMP_ALWAYS && comp1 == ALPHACMP_ALWAYS) || (comp0 == ALPHACMP_NEVER && comp1 == ALPHACMP_NEVER)) + return PASS; + break; + } + return UNDETERMINED; + } }; union UPE_Copy diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index fcf87215db..56b2667a48 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -216,7 +216,7 @@ static char swapModeTable[4][5]; static char text[16384]; -static void BuildSwapModeTable() +static inline void BuildSwapModeTable() { static const char *swapColors = "rgba"; for (int i = 0; i < 4; i++) @@ -229,13 +229,13 @@ static void BuildSwapModeTable() } } -template static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE ApiType, RegisterState RegisterStates[4]); -template static void SampleTexture(T& out, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType); -template static void WriteAlphaTest(T& out, pixel_shader_uid_data& uid_data, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth); -template static void WriteFog(T& out, pixel_shader_uid_data& uid_data); +template static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE ApiType, RegisterState RegisterStates[4]); +template static inline void SampleTexture(T& out, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType); +template static inline void WriteAlphaTest(T& out, pixel_shader_uid_data& uid_data, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth); +template static inline void WriteFog(T& out, pixel_shader_uid_data& uid_data); template -static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) +static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) { // Non-uid template parameters will write to the dummy data (=> gets optimized out) pixel_shader_uid_data dummy_data; @@ -762,7 +762,7 @@ static const char *TEVCMPAlphaOPTable[16] = }; template -static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE ApiType, RegisterState RegisterStates[4]) +static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE ApiType, RegisterState RegisterStates[4]) { int texcoord = bpmem.tevorders[n/2].getTexCoord(n&1); bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens; @@ -1160,7 +1160,7 @@ static const char *tevAlphaFunclogicTable[] = }; template -static void WriteAlphaTest(T& out, pixel_shader_uid_data& uid_data, API_TYPE ApiType, DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth) +static inline void WriteAlphaTest(T& out, pixel_shader_uid_data& uid_data, API_TYPE ApiType, DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth) { static const char *alphaRef[2] = { @@ -1231,7 +1231,7 @@ static const char *tevFogFuncsTable[] = }; template -static void WriteFog(T& out, pixel_shader_uid_data& uid_data) +static inline void WriteFog(T& out, pixel_shader_uid_data& uid_data) { uid_data.fog_fsel = bpmem.fog.c_proj_fsel.fsel; if(bpmem.fog.c_proj_fsel.fsel == 0) diff --git a/Source/Core/VideoCommon/Src/ShaderGenCommon.h b/Source/Core/VideoCommon/Src/ShaderGenCommon.h index ec4fe27416..a804c08f13 100644 --- a/Source/Core/VideoCommon/Src/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/Src/ShaderGenCommon.h @@ -173,7 +173,7 @@ private: }; template -static void WriteRegister(T& object, API_TYPE ApiType, const char *prefix, const u32 num) +static inline void WriteRegister(T& object, API_TYPE ApiType, const char *prefix, const u32 num) { if (ApiType == API_OPENGL) return; // Nothing to do here @@ -182,7 +182,7 @@ static void WriteRegister(T& object, API_TYPE ApiType, const char *prefix, const } template -static void WriteLocation(T& object, API_TYPE ApiType, bool using_ubos) +static inline void WriteLocation(T& object, API_TYPE ApiType, bool using_ubos) { if (using_ubos) return; @@ -191,7 +191,7 @@ static void WriteLocation(T& object, API_TYPE ApiType, bool using_ubos) } template -static void DeclareUniform(T& object, API_TYPE api_type, bool using_ubos, const u32 num, const char* type, const char* name) +static inline void DeclareUniform(T& object, API_TYPE api_type, bool using_ubos, const u32 num, const char* type, const char* name) { WriteLocation(object, api_type, using_ubos); object.Write("%s %s ", type, name); From fe2ca814c57483924dc8b8969ad1b17979f05bbf Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Mon, 12 Aug 2013 12:52:42 +0200 Subject: [PATCH 4/9] LightingShaderGen: Use macro magic instead of snprintf. Should fix performance problems. --- .../Core/VideoCommon/Src/LightingShaderGen.h | 71 ++++++++----------- .../Core/VideoCommon/Src/VertexShaderGen.cpp | 8 +-- 2 files changed, 32 insertions(+), 47 deletions(-) diff --git a/Source/Core/VideoCommon/Src/LightingShaderGen.h b/Source/Core/VideoCommon/Src/LightingShaderGen.h index b7f596bdd3..fc107a2e2b 100644 --- a/Source/Core/VideoCommon/Src/LightingShaderGen.h +++ b/Source/Core/VideoCommon/Src/LightingShaderGen.h @@ -9,40 +9,22 @@ #include "NativeVertexFormat.h" #include "XFMemory.h" -static const char* LightCol(const char* lightsName, unsigned int index, const char* swizzle) -{ - static char result[32]; - snprintf(result, sizeof(result), "%s[5*%d].%s", lightsName, index, swizzle); - return result; -} -static const char* LightCosAtt(const char* lightsName, unsigned int index) -{ - static char result[32]; - snprintf(result, sizeof(result), "%s[5*%d+1]", lightsName, index); - return result; -} +#define LIGHT_COL "%s[5*%d].%s" +#define LIGHT_COL_PARAMS(lightsName, index, swizzle) (lightsName), (index), (swizzle) -static const char* LightDistAtt(const char* lightsName, unsigned int index) -{ - static char result[32]; - snprintf(result, sizeof(result), "%s[5*%d+2]", lightsName, index); - return result; -} +#define LIGHT_COSATT "%s[5*%d+1]" +#define LIGHT_COSATT_PARAMS(lightsName, index) (lightsName), (index) -static const char* LightPos(const char* lightsName, unsigned int index) -{ - static char result[32]; - snprintf(result, sizeof(result), "%s[5*%d+3]", lightsName, index); - return result; -} +#define LIGHT_DISTATT "%s[5*%d+2]" +#define LIGHT_DISTATT_PARAMS(lightsName, index) (lightsName), (index) + +#define LIGHT_POS "%s[5*%d+3]" +#define LIGHT_POS_PARAMS(lightsName, index) (lightsName), (index) + +#define LIGHT_DIR "%s[5*%d+4]" +#define LIGHT_DIR_PARAMS(lightsName, index) (lightsName), (index) -static const char* LightDir(const char* lightsName, unsigned int index) -{ - static char result[32]; - snprintf(result, sizeof(result), "%s[5*%d+4]", lightsName, index); - return result; -} template static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, int litchan_index, const char* lightsName, int coloralpha) @@ -62,13 +44,13 @@ static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, switch (chan.diffusefunc) { case LIGHTDIF_NONE: - object.Write("lacc.%s += %s;\n", swizzle, LightCol(lightsName, index, swizzle)); + object.Write("lacc.%s += " LIGHT_COL";\n", swizzle, LIGHT_COL_PARAMS(lightsName, index, swizzle)); break; case LIGHTDIF_SIGN: case LIGHTDIF_CLAMP: - object.Write("ldir = normalize(%s.xyz - pos.xyz);\n", LightPos(lightsName, index)); - object.Write("lacc.%s += %sdot(ldir, _norm0)) * %s;\n", - swizzle, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", LightCol(lightsName, index, swizzle)); + object.Write("ldir = normalize(" LIGHT_POS".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(lightsName, index)); + object.Write("lacc.%s += %sdot(ldir, _norm0)) * " LIGHT_COL";\n", + swizzle, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", LIGHT_COL_PARAMS(lightsName, index, swizzle)); break; default: _assert_(0); } @@ -77,31 +59,34 @@ static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, { if (chan.attnfunc == 3) { // spot - object.Write("ldir = %s.xyz - pos.xyz;\n", LightPos(lightsName, index)); + object.Write("ldir = " LIGHT_POS".xyz - pos.xyz;\n", LIGHT_POS_PARAMS(lightsName, index)); object.Write("dist2 = dot(ldir, ldir);\n" "dist = sqrt(dist2);\n" "ldir = ldir / dist;\n" - "attn = max(0.0f, dot(ldir, %s.xyz));\n", LightDir(lightsName, index)); - object.Write("attn = max(0.0f, dot(%s.xyz, float3(1.0f, attn, attn*attn))) / dot(%s.xyz, float3(1.0f,dist,dist2));\n", LightCosAtt(lightsName, index), LightDistAtt(lightsName, index)); + "attn = max(0.0f, dot(ldir, " LIGHT_DIR".xyz));\n", + LIGHT_DIR_PARAMS(lightsName, index)); + object.Write("attn = max(0.0f, dot(" LIGHT_COSATT".xyz, float3(1.0f, attn, attn*attn))) / dot(" LIGHT_DISTATT".xyz, float3(1.0f,dist,dist2));\n", + LIGHT_COSATT_PARAMS(lightsName, index), LIGHT_DISTATT_PARAMS(lightsName, index)); } else if (chan.attnfunc == 1) { // specular - object.Write("ldir = normalize(%s.xyz);\n", LightPos(lightsName, index)); - object.Write("attn = (dot(_norm0,ldir) >= 0.0f) ? max(0.0f, dot(_norm0, %s.xyz)) : 0.0f;\n", LightDir(lightsName, index)); - object.Write("attn = max(0.0f, dot(%s.xyz, float3(1,attn,attn*attn))) / dot(%s.xyz, float3(1,attn,attn*attn));\n", LightCosAtt(lightsName, index), LightDistAtt(lightsName, index)); + object.Write("ldir = normalize(" LIGHT_POS".xyz);\n", LIGHT_POS_PARAMS(lightsName, index)); + object.Write("attn = (dot(_norm0,ldir) >= 0.0f) ? max(0.0f, dot(_norm0, " LIGHT_DIR".xyz)) : 0.0f;\n", LIGHT_DIR_PARAMS(lightsName, index)); + object.Write("attn = max(0.0f, dot(" LIGHT_COSATT".xyz, float3(1,attn,attn*attn))) / dot(" LIGHT_DISTATT".xyz, float3(1,attn,attn*attn));\n", + LIGHT_COSATT_PARAMS(lightsName, index), LIGHT_DISTATT_PARAMS(lightsName, index)); } switch (chan.diffusefunc) { case LIGHTDIF_NONE: - object.Write("lacc.%s += attn * %s;\n", swizzle, LightCol(lightsName, index, swizzle)); + object.Write("lacc.%s += attn * " LIGHT_COL";\n", swizzle, LIGHT_COL_PARAMS(lightsName, index, swizzle)); break; case LIGHTDIF_SIGN: case LIGHTDIF_CLAMP: - object.Write("lacc.%s += attn * %sdot(ldir, _norm0)) * %s;\n", + object.Write("lacc.%s += attn * %sdot(ldir, _norm0)) * " LIGHT_COL";\n", swizzle, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", - LightCol(lightsName, index, swizzle)); + LIGHT_COL_PARAMS(lightsName, index, swizzle)); break; default: _assert_(0); } diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index a682b4ea21..c4088bf5fa 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -20,7 +20,7 @@ static char text[16768]; template -static void DefineVSOutputStructMember(T& object, API_TYPE api_type, const char* type, const char* name, int var_index, const char* semantic, int semantic_index = -1) +static inline void DefineVSOutputStructMember(T& object, API_TYPE api_type, const char* type, const char* name, int var_index, const char* semantic, int semantic_index = -1) { object.Write(" %s %s", type, name); if (var_index != -1) @@ -38,7 +38,7 @@ static void DefineVSOutputStructMember(T& object, API_TYPE api_type, const char* } template -static void GenerateVSOutputStruct(T& object, u32 components, API_TYPE api_type) +static inline void GenerateVSOutputStruct(T& object, u32 components, API_TYPE api_type) { object.Write("struct VS_OUTPUT {\n"); DefineVSOutputStructMember(object, api_type, "float4", "pos", -1, "POSITION"); @@ -67,7 +67,7 @@ static void GenerateVSOutputStruct(T& object, u32 components, API_TYPE api_type) } template -static void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) +static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) { // Non-uid template parameters will write to the dummy data (=> gets optimized out) vertex_shader_uid_data dummy_data; @@ -353,7 +353,7 @@ static void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) // transform the light dir into tangent space uid_data.texMtxInfo[i].embosslightshift = xfregs.texMtxInfo[i].embosslightshift; uid_data.texMtxInfo[i].embosssourceshift = xfregs.texMtxInfo[i].embosssourceshift; - out.Write("ldir = normalize(%s.xyz - pos.xyz);\n", LightPos(I_LIGHTS, texinfo.embosslightshift)); + out.Write("ldir = normalize(" LIGHT_POS".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(I_LIGHTS, texinfo.embosslightshift)); out.Write("o.tex%d.xyz = o.tex%d.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0f);\n", i, texinfo.embosssourceshift); } else From c05aa0141d6069b9fa1bcb9e7ef2fb69d5b4ec7b Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Mon, 12 Aug 2013 13:31:29 +0200 Subject: [PATCH 5/9] ShaderGen: Optimize out most function calls for uid generation. --- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 26 ++++++++++--------- .../Core/VideoCommon/Src/VertexShaderGen.cpp | 21 ++++++++------- 2 files changed, 26 insertions(+), 21 deletions(-) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 56b2667a48..7de5d12638 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -243,17 +243,19 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T ? out.template GetUidData() : dummy_data; out.SetBuffer(text); + const bool is_writing_shadercode = (out.GetBuffer() != NULL); #ifndef ANDROID locale_t locale; locale_t old_locale; - if (out.GetBuffer() != NULL) + if (is_writing_shadercode) { locale = newlocale(LC_NUMERIC_MASK, "C", NULL); // New locale for compilation old_locale = uselocale(locale); // Apply the locale for this thread } #endif - text[sizeof(text) - 1] = 0x7C; // canary + if (is_writing_shadercode) + text[sizeof(text) - 1] = 0x7C; // canary unsigned int numStages = bpmem.genMode.numtevstages + 1; unsigned int numTexgen = bpmem.genMode.numtexgens; @@ -372,7 +374,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T // It just allows it, but it seems that all drivers do. out.Write("layout(early_fragment_tests) in;\n"); } - else if (bpmem.UseEarlyDepthTest() && (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED)) + else if (bpmem.UseEarlyDepthTest() && (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED) && is_writing_shadercode) { static bool warn_once = true; if (warn_once) @@ -388,7 +390,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T { out.Write("[earlydepthstencil]\n"); } - else if (bpmem.UseEarlyDepthTest() && (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED)) + else if (bpmem.UseEarlyDepthTest() && (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED) && is_writing_shadercode) { static bool warn_once = true; if (warn_once) @@ -705,16 +707,16 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T out.Write("}\n"); - if (text[sizeof(text) - 1] != 0x7C) - PanicAlert("PixelShader generator - buffer too small, canary has been eaten!"); + if (is_writing_shadercode) + { + if (text[sizeof(text) - 1] != 0x7C) + PanicAlert("PixelShader generator - buffer too small, canary has been eaten!"); #ifndef ANDROID - if (out.GetBuffer() != NULL) - { uselocale(old_locale); // restore locale freelocale(locale); - } #endif + } } @@ -905,7 +907,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap]; int texmap = bpmem.tevorders[n/2].getTexMap(n&1); uid_data.SetTevindrefTexmap(i, texmap); - + out.Write("textemp = "); SampleTexture(out, "tevcoord", texswap, texmap, ApiType); } @@ -1132,7 +1134,7 @@ template void SampleTexture(T& out, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType) { out.SetConstantsUsed(C_TEXDIMS+texmap,C_TEXDIMS+texmap); - + if (ApiType == API_D3D11) out.Write("Tex%d.Sample(samp%d,%s.xy * " I_TEXDIMS"[%d].xy).%s;\n", texmap,texmap, texcoords, texmap, texswap); else @@ -1273,7 +1275,7 @@ static inline void WriteFog(T& out, pixel_shader_uid_data& uid_data) } else { - if (bpmem.fog.c_proj_fsel.fsel != 2) + if (bpmem.fog.c_proj_fsel.fsel != 2 && out.GetBuffer() != NULL) WARN_LOG(VIDEO, "Unknown Fog Type! %08x", bpmem.fog.c_proj_fsel.fsel); } diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index c4088bf5fa..e44e01a13b 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -20,7 +20,7 @@ static char text[16768]; template -static inline void DefineVSOutputStructMember(T& object, API_TYPE api_type, const char* type, const char* name, int var_index, const char* semantic, int semantic_index = -1) +static void DefineVSOutputStructMember(T& object, API_TYPE api_type, const char* type, const char* name, int var_index, const char* semantic, int semantic_index = -1) { object.Write(" %s %s", type, name); if (var_index != -1) @@ -75,16 +75,19 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ ? out.template GetUidData() : dummy_data; out.SetBuffer(text); + const bool is_writing_shadercode = (out.GetBuffer() != NULL); #ifndef ANDROID locale_t locale; locale_t old_locale; - if (out.GetBuffer() != NULL) + if (is_writing_shadercode) { locale = newlocale(LC_NUMERIC_MASK, "C", NULL); // New locale for compilation old_locale = uselocale(locale); // Apply the locale for this thread } #endif - text[sizeof(text) - 1] = 0x7C; // canary + + if (is_writing_shadercode) + text[sizeof(text) - 1] = 0x7C; // canary _assert_(bpmem.genMode.numtexgens == xfregs.numTexGen.numTexGens); _assert_(bpmem.genMode.numcolchans == xfregs.numChan.numColorChans); @@ -225,7 +228,7 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ out.Write("int posmtx = int(fposmtx);\n"); } - if (DriverDetails::HasBug(DriverDetails::BUG_NODYNUBOACCESS)) + if (is_writing_shadercode && DriverDetails::HasBug(DriverDetails::BUG_NODYNUBOACCESS)) { // This'll cause issues, but it can't be helped out.Write("float4 pos = float4(dot(" I_TRANSFORMMATRICES"[0], rawpos), dot(" I_TRANSFORMMATRICES"[1], rawpos), dot(" I_TRANSFORMMATRICES"[2], rawpos), 1);\n"); @@ -547,16 +550,16 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ out.Write("return o;\n}\n"); } - if (text[sizeof(text) - 1] != 0x7C) - PanicAlert("VertexShader generator - buffer too small, canary has been eaten!"); + if (is_writing_shadercode) + { + if (text[sizeof(text) - 1] != 0x7C) + PanicAlert("VertexShader generator - buffer too small, canary has been eaten!"); #ifndef ANDROID - if (out.GetBuffer() != NULL) - { uselocale(old_locale); // restore locale freelocale(locale); - } #endif + } } void GetVertexShaderUid(VertexShaderUid& object, u32 components, API_TYPE api_type) From 057551ada7eb5742e95ee9810086d4209998a66d Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Mon, 12 Aug 2013 18:25:32 +0200 Subject: [PATCH 6/9] Software Renderer: Show each backend's display name instead of its short name in the config dialog. --- Source/Plugins/Plugin_VideoSoftware/Src/VideoConfigDialog.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/VideoConfigDialog.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/VideoConfigDialog.cpp index 5f1f4aa2df..eea908edfc 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/VideoConfigDialog.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/VideoConfigDialog.cpp @@ -51,7 +51,7 @@ VideoConfigDialog::VideoConfigDialog(wxWindow* parent, const std::string& title, it = g_available_video_backends.begin(), itend = g_available_video_backends.end(); for (; it != itend; ++it) - choice_backend->AppendString(StrToWxStr((*it)->GetName())); + choice_backend->AppendString(StrToWxStr((*it)->GetDisplayName())); // TODO: How to get the translated plugin name? choice_backend->SetStringSelection(StrToWxStr(g_video_backend->GetName())); From 68e12407a5835687255e104019a75de9f8dfe4f5 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Mon, 12 Aug 2013 15:16:15 -0400 Subject: [PATCH 7/9] [Android] Remove unnecessary explicit indexing of entries in a List within AboutFragment.java Indexes are handled internally within a List object. --- .../Android/src/org/dolphinemu/dolphinemu/AboutFragment.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Source/Android/src/org/dolphinemu/dolphinemu/AboutFragment.java b/Source/Android/src/org/dolphinemu/dolphinemu/AboutFragment.java index 5c47b79a3e..906736b039 100644 --- a/Source/Android/src/org/dolphinemu/dolphinemu/AboutFragment.java +++ b/Source/Android/src/org/dolphinemu/dolphinemu/AboutFragment.java @@ -37,10 +37,9 @@ public class AboutFragment extends Fragment { mMainList = (ListView) rootView.findViewById(R.id.gamelist); List Input = new ArrayList(); - int a = 0; + Input.add(new GameListItem(m_activity, "Build Revision", NativeLibrary.GetVersionString(), "", true)); + Input.add(new GameListItem(m_activity, "Supports OpenGL ES 3", PrefsFragment.SupportsGLES3() ? "Yes" : "No", "", true)); - Input.add(a++, new GameListItem(m_activity, "Build Revision", NativeLibrary.GetVersionString(), "", true)); - Input.add(a++, new GameListItem(m_activity, "Supports OpenGL ES 3", PrefsFragment.SupportsGLES3() ? "Yes" : "No", "", true)); adapter = new FolderBrowserAdapter(m_activity, R.layout.folderbrowser, Input); mMainList.setAdapter(adapter); From 00b034f991ffedd2fb30040dea715b2287563cc0 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Mon, 12 Aug 2013 15:32:52 -0400 Subject: [PATCH 8/9] [Android] Seems like InputConfigFragment.java also had explicit list indexing. Removed it from here too. --- .../dolphinemu/InputConfigFragment.java | 44 +++++++++---------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/Source/Android/src/org/dolphinemu/dolphinemu/InputConfigFragment.java b/Source/Android/src/org/dolphinemu/dolphinemu/InputConfigFragment.java index 3a6e855c3b..91f550d2a2 100644 --- a/Source/Android/src/org/dolphinemu/dolphinemu/InputConfigFragment.java +++ b/Source/Android/src/org/dolphinemu/dolphinemu/InputConfigFragment.java @@ -47,29 +47,27 @@ public class InputConfigFragment extends Fragment Bundle savedInstanceState) { List Input = new ArrayList(); - int a = 0; - - Input.add(a++, new InputConfigItem("Draw on-screen controls", "Android-ScreenControls", "True")); - Input.add(a++, new InputConfigItem("Button A", "Android-InputA")); - Input.add(a++, new InputConfigItem("Button B", "Android-InputB")); - Input.add(a++, new InputConfigItem("Button Start", "Android-InputStart")); - Input.add(a++, new InputConfigItem("Button X", "Android-InputX")); - Input.add(a++, new InputConfigItem("Button Y", "Android-InputY")); - Input.add(a++, new InputConfigItem("Button Z", "Android-InputZ")); - Input.add(a++, new InputConfigItem("D-Pad Up", "Android-DPadUp")); - Input.add(a++, new InputConfigItem("D-Pad Down", "Android-DPadDown")); - Input.add(a++, new InputConfigItem("D-Pad Left", "Android-DPadLeft")); - Input.add(a++, new InputConfigItem("D-Pad Right", "Android-DPadRight")); - Input.add(a++, new InputConfigItem("Main Stick Up", "Android-MainUp")); - Input.add(a++, new InputConfigItem("Main Stick Down", "Android-MainDown")); - Input.add(a++, new InputConfigItem("Main Stick Left", "Android-MainLeft")); - Input.add(a++, new InputConfigItem("Main Stick Right", "Android-MainRight")); - Input.add(a++, new InputConfigItem("C Stick Up", "Android-CStickUp")); - Input.add(a++, new InputConfigItem("C Stick Down", "Android-CStickDown")); - Input.add(a++, new InputConfigItem("C Stick Left", "Android-CStickLeft")); - Input.add(a++, new InputConfigItem("C Stick Right", "Android-CStickRight")); - Input.add(a++, new InputConfigItem("Trigger L", "Android-InputL")); - Input.add(a++, new InputConfigItem("Trigger R", "Android-InputR")); + Input.add(new InputConfigItem("Draw on-screen controls", "Android-ScreenControls", "True")); + Input.add(new InputConfigItem("Button A", "Android-InputA")); + Input.add(new InputConfigItem("Button B", "Android-InputB")); + Input.add(new InputConfigItem("Button Start", "Android-InputStart")); + Input.add(new InputConfigItem("Button X", "Android-InputX")); + Input.add(new InputConfigItem("Button Y", "Android-InputY")); + Input.add(new InputConfigItem("Button Z", "Android-InputZ")); + Input.add(new InputConfigItem("D-Pad Up", "Android-DPadUp")); + Input.add(new InputConfigItem("D-Pad Down", "Android-DPadDown")); + Input.add(new InputConfigItem("D-Pad Left", "Android-DPadLeft")); + Input.add(new InputConfigItem("D-Pad Right", "Android-DPadRight")); + Input.add(new InputConfigItem("Main Stick Up", "Android-MainUp")); + Input.add(new InputConfigItem("Main Stick Down", "Android-MainDown")); + Input.add(new InputConfigItem("Main Stick Left", "Android-MainLeft")); + Input.add(new InputConfigItem("Main Stick Right", "Android-MainRight")); + Input.add(new InputConfigItem("C Stick Up", "Android-CStickUp")); + Input.add(new InputConfigItem("C Stick Down", "Android-CStickDown")); + Input.add(new InputConfigItem("C Stick Left", "Android-CStickLeft")); + Input.add(new InputConfigItem("C Stick Right", "Android-CStickRight")); + Input.add(new InputConfigItem("Trigger L", "Android-InputL")); + Input.add(new InputConfigItem("Trigger R", "Android-InputR")); adapter = new InputConfigAdapter(m_activity, R.layout.folderbrowser, Input); View rootView = inflater.inflate(R.layout.gamelist_listview, container, false); From 3b272d81b45f47d3fb0aab8ff0a1736fb1788f1e Mon Sep 17 00:00:00 2001 From: Lioncash Date: Mon, 12 Aug 2013 19:41:23 -0400 Subject: [PATCH 9/9] [Android] Use a HashMap in PrefsFragment.java instead of two CharSequence arrays. This way, we hold the [key|value] pairs together in one object and reduce overall code clutter. --- .../dolphinemu/dolphinemu/PrefsFragment.java | 45 ++++++++----------- 1 file changed, 19 insertions(+), 26 deletions(-) diff --git a/Source/Android/src/org/dolphinemu/dolphinemu/PrefsFragment.java b/Source/Android/src/org/dolphinemu/dolphinemu/PrefsFragment.java index daf9aeb1c6..e16b2503c3 100644 --- a/Source/Android/src/org/dolphinemu/dolphinemu/PrefsFragment.java +++ b/Source/Android/src/org/dolphinemu/dolphinemu/PrefsFragment.java @@ -1,5 +1,7 @@ package org.dolphinemu.dolphinemu; +import java.util.HashMap; + import android.app.Activity; import android.os.Build; import android.os.Bundle; @@ -138,36 +140,28 @@ public class PrefsFragment extends PreferenceFragment { addPreferencesFromResource(R.layout.prefs); final ListPreference etp = new ListPreference(m_activity); - CharSequence[] _entries; - CharSequence[] _entryvalues; + final HashMap entries = new HashMap(); if (Build.CPU_ABI.contains("x86")) { - _entries = new CharSequence[] { - "Interpreter", - "JIT64 Recompiler", - "JITIL Recompiler", - }; - _entryvalues = new CharSequence[] {"0", "1", "2"}; + entries.put("Interpreter", "0"); + entries.put("JIT64 Recompiler", "1"); + entries.put("JITIL Recompiler", "2"); } else if (Build.CPU_ABI.contains("arm")) { - _entries = new CharSequence[] { - "Interpreter", - "JIT ARM Recompiler", - }; - _entryvalues = new CharSequence[] {"0", "3"}; + entries.put("Interpreter", "0"); + entries.put("JIT ARM Recompiler", "3"); } else { - _entries = new CharSequence[] { - "Interpreter", - }; - _entryvalues = new CharSequence[] {"0"}; + entries.put("Interpreter", "0"); } - - etp.setEntries(_entries); - etp.setEntryValues(_entryvalues); + + // Convert the key/value sections to arrays respectively so the list can be set. + // If Java had proper generics it wouldn't look this disgusting. + etp.setEntries(entries.keySet().toArray(new CharSequence[entries.size()])); + etp.setEntryValues(entries.values().toArray(new CharSequence[entries.size()])); etp.setKey("cpupref"); etp.setTitle("CPU Core"); etp.setSummary("Emulation core to use"); @@ -185,17 +179,16 @@ public class PrefsFragment extends PreferenceFragment { final ListPreference videobackend = new ListPreference(m_activity); - _entries = new CharSequence[] { - "Software Renderer", - }; - _entryvalues = new CharSequence[] {"Software Renderer"}; + // Add available graphics renderers to the hashmap to add to the list. + entries.clear(); + entries.put("Software Renderer", "Software Renderer"); // TODO: I think this is a bug? The value shouldn't be the same as the key? videobackend.setKey("gpupref"); videobackend.setTitle("Video Backend"); videobackend.setSummary("Video backend to use"); - videobackend.setEntries(_entries); - videobackend.setEntryValues(_entryvalues); + videobackend.setEntries(entries.keySet().toArray(new CharSequence[entries.size()])); + videobackend.setEntryValues(entries.values().toArray(new CharSequence[entries.size()])); mCategory.addPreference(videobackend); }