From 16ada5fa3d0d21171c00060d08433f3a53d2a55c Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Sat, 22 Jun 2013 20:41:32 +0200 Subject: [PATCH 1/5] Compactify generated UID generator assembly and generally cleanup code by storing tev stage hash values in a struct. --- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 62 +++++--------- Source/Core/VideoCommon/Src/PixelShaderGen.h | 80 ++++++------------- 2 files changed, 45 insertions(+), 97 deletions(-) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 1ec80ba56d..dce8bfbe96 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -706,13 +706,11 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE out.Write("// TEV stage %d\n", n); - uid_data.bHasIndStage |= bHasIndStage << n; - uid_data.tevorders_n_texcoord |= (u64)texcoord << (3 * n); + uid_data.stagehash[n].hasindstage = bHasIndStage; + uid_data.stagehash[n].tevorders_texcoord = texcoord; if (bHasIndStage) { - uid_data.tevind_n_bs |= bpmem.tevind[n].bs << (2*n); - uid_data.tevind_n_bt |= bpmem.tevind[n].bt << (2*n); - uid_data.tevind_n_fmt |= bpmem.tevind[n].fmt << (2*n); + uid_data.stagehash[n].tevind = bpmem.tevind[n].hex & 0x7FFFFF; out.Write("// indirect op\n"); // perform the indirect op on the incoming regular coordinates using indtex%d as the offset coords @@ -727,12 +725,10 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE out.Write("float3 indtevcrd%d = indtex%d * %s;\n", n, bpmem.tevind[n].bt, tevIndFmtScale[bpmem.tevind[n].fmt]); // bias - uid_data.Set_tevind_bias(n, bpmem.tevind[n].bias); if (bpmem.tevind[n].bias != ITB_NONE ) out.Write("indtevcrd%d.%s += %s;\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt]); // multiply by offset matrix and scale - uid_data.Set_tevind_mid(n, bpmem.tevind[n].mid); if (bpmem.tevind[n].mid != 0) { if (bpmem.tevind[n].mid <= 3) @@ -769,9 +765,6 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE // --------- // Wrapping // --------- - uid_data.Set_tevind_sw(n, bpmem.tevind[n].sw); - uid_data.Set_tevind_tw(n, bpmem.tevind[n].tw); - uid_data.tevind_n_fb_addprev |= bpmem.tevind[n].fb_addprev << n; // wrap S if (bpmem.tevind[n].sw == ITW_OFF) @@ -798,26 +791,8 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE TevStageCombiner::ColorCombiner &cc = bpmem.combiners[n].colorC; TevStageCombiner::AlphaCombiner &ac = bpmem.combiners[n].alphaC; - uid_data.cc_n_d |= (u64)cc.d << (4*n); - uid_data.cc_n_c |= (u64)cc.c << (4*n); - uid_data.cc_n_b |= (u64)cc.b << (4*n); - uid_data.cc_n_a |= (u64)cc.a << (4*n); - uid_data.cc_n_bias |= cc.bias << (2*n); - uid_data.cc_n_op |= cc.op << n; - uid_data.cc_n_clamp |= cc.clamp << n; - uid_data.cc_n_shift |= cc.shift << (2*n); - uid_data.cc_n_dest |= cc.dest << (2*n); - uid_data.ac_n_rswap |= ac.rswap << (2*n); - uid_data.ac_n_tswap |= ac.tswap << (2*n); - uid_data.ac_n_d |= (u64)ac.d << (3*n); - uid_data.ac_n_c |= (u64)ac.c << (3*n); - uid_data.ac_n_b |= (u64)ac.b << (3*n); - uid_data.ac_n_a |= (u64)ac.a << (3*n); - uid_data.ac_n_bias |= ac.bias << (2*n); - uid_data.ac_n_op |= ac.op << n; - uid_data.ac_n_clamp |= ac.clamp << n; - uid_data.ac_n_shift |= ac.shift << (2*n); - uid_data.ac_n_dest |= ac.dest << (2*n); + uid_data.stagehash[n].cc = cc.hex & 0xFFFFFF; + uid_data.stagehash[n].ac = ac.hex & 0xFFFFF0; // Storing rswap and tswap later if(cc.a == TEVCOLORARG_RASA || cc.a == TEVCOLORARG_RASC || cc.b == TEVCOLORARG_RASA || cc.b == TEVCOLORARG_RASC @@ -827,17 +802,19 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE || ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA) { const int i = bpmem.combiners[n].alphaC.rswap; - uid_data.tevksel_n_swap1 |= bpmem.tevksel[i*2 ].swap1 << (2 * (i*2 )); - uid_data.tevksel_n_swap1 |= bpmem.tevksel[i*2+1].swap1 << (2 * (i*2+1)); - uid_data.tevksel_n_swap2 |= bpmem.tevksel[i*2 ].swap2 << (2 * (i*2 )); - uid_data.tevksel_n_swap2 |= bpmem.tevksel[i*2+1].swap2 << (2 * (i*2+1)); + uid_data.stagehash[n].ac |= bpmem.combiners[n].alphaC.rswap; + uid_data.stagehash[n].tevksel_swap1a = bpmem.tevksel[i*2].swap1; + uid_data.stagehash[n].tevksel_swap2a = bpmem.tevksel[i*2].swap2; + uid_data.stagehash[n].tevksel_swap1b = bpmem.tevksel[i*2+1].swap1; + uid_data.stagehash[n].tevksel_swap2b = bpmem.tevksel[i*2+1].swap2; + uid_data.stagehash[n].tevorders_colorchan = bpmem.tevorders[n / 2].getColorChan(n & 1); char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap]; out.Write("rastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], rasswap); out.Write("crastemp = frac(rastemp * (255.0f/256.0f)) * (256.0f/255.0f);\n"); } - + uid_data.stagehash[n].tevorders_enable = bpmem.tevorders[n / 2].getEnable(n & 1); if (bpmem.tevorders[n/2].getEnable(n&1)) { if (!bHasIndStage) @@ -850,10 +827,13 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE } const int i = bpmem.combiners[n].alphaC.tswap; - uid_data.tevksel_n_swap1 |= bpmem.tevksel[i*2 ].swap1 << (2 * (i*2 )); - uid_data.tevksel_n_swap1 |= bpmem.tevksel[i*2+1].swap1 << (2 * (i*2+1)); - uid_data.tevksel_n_swap2 |= bpmem.tevksel[i*2 ].swap2 << (2 * (i*2 )); - uid_data.tevksel_n_swap2 |= bpmem.tevksel[i*2+1].swap2 << (2 * (i*2+1)); + uid_data.stagehash[n].ac |= bpmem.combiners[n].alphaC.tswap << 2; + uid_data.stagehash[n].tevksel_swap1c = bpmem.tevksel[i*2].swap1; + uid_data.stagehash[n].tevksel_swap2c = bpmem.tevksel[i*2].swap2; + uid_data.stagehash[n].tevksel_swap1d = bpmem.tevksel[i*2+1].swap1; + uid_data.stagehash[n].tevksel_swap2d = bpmem.tevksel[i*2+1].swap2; + + uid_data.stagehash[n].tevorders_texmap= bpmem.tevorders[n/2].getTexMap(n&1); char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap]; int texmap = bpmem.tevorders[n/2].getTexMap(n&1); @@ -871,8 +851,8 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE { int kc = bpmem.tevksel[n / 2].getKC(n & 1); int ka = bpmem.tevksel[n / 2].getKA(n & 1); - uid_data.set_tevksel_kcsel(n/2, n & 1, kc); - uid_data.set_tevksel_kasel(n/2, n & 1, ka); + uid_data.stagehash[n].tevksel_kc = kc; + uid_data.stagehash[n].tevksel_ka = ka; out.Write("konsttemp = float4(%s, %s);\n", tevKSelTableC[kc], tevKSelTableA[ka]); if(kc > 7 || ka > 7) { diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h index 826ec561d0..01b0ad3489 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.h +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h @@ -96,69 +96,38 @@ struct pixel_shader_uid_data else if (index == 3) { tevindref_bi4 = texmap; } } - u64 tevorders_n_texcoord : 48; // 16 x 3 bits + struct { + u32 cc : 24; + u32 ac : 24; - u64 tevind_n_sw : 48; // 16 x 3 bits - u64 tevind_n_tw : 48; // 16 x 3 bits - u32 tevind_n_fb_addprev : 16; // 16 x 1 bit - u32 tevind_n_bs : 32; // 16 x 2 bits - u32 tevind_n_fmt : 32; // 16 x 2 bits - u32 tevind_n_bt : 32; // 16 x 2 bits - u64 tevind_n_bias : 48; // 16 x 3 bits - u64 tevind_n_mid : 64; // 16 x 4 bits + u32 tevorders_texmap : 3; + u32 tevorders_texcoord : 3; + u32 tevorders_enable : 1; + u32 tevorders_colorchan : 3; + u32 pad1 : 6; - // NOTE: These assume that the affected bits are zero before calling - void Set_tevind_sw(int index, u64 val) - { - tevind_n_sw |= val << (3*index); - } - void Set_tevind_tw(int index, u64 val) - { - tevind_n_tw |= val << (3*index); - } - void Set_tevind_bias(int index, u64 val) - { - tevind_n_bias |= val << (3*index); - } - void Set_tevind_mid(int index, u64 val) - { - tevind_n_mid |= val << (4*index); - } + u32 hasindstage : 1; + u32 tevind : 21; + u32 tevksel_swap1a : 2; // TODO: Doesn't fit here.. + u32 tevksel_swap2a : 2; // TODO: Doesn't fit here.. + u32 tevksel_swap1b : 2; // TODO: Doesn't fit here.. + u32 tevksel_swap2b : 2; // TODO: Doesn't fit here.. + u32 pad2 : 2; - u32 tevksel_n_swap1 : 16; // 8x2 bits - u32 tevksel_n_swap2 : 16; // 8x2 bits - u64 tevksel_n_kcsel0 : 40; // 8x5 bits - u64 tevksel_n_kasel0 : 40; // 8x5 bits - u64 tevksel_n_kcsel1 : 40; // 8x5 bits - u64 tevksel_n_kasel1 : 40; // 8x5 bits - void set_tevksel_kcsel(int index, int i, u64 value) { if (i) tevksel_n_kcsel1 |= value << (5*index); else tevksel_n_kcsel0 |= value << (5*index); } - void set_tevksel_kasel(int index, int i, u64 value) { if( i) tevksel_n_kasel1 |= value << (5*index); else tevksel_n_kasel0 |= value << (5*index); } + u32 tevksel_swap1c : 2; // TODO: Doesn't fit here.. + u32 tevksel_swap2c : 2; // TODO: Doesn't fit here.. + u32 tevksel_swap1d : 2; // TODO: Doesn't fit here.. + u32 tevksel_swap2d : 2; // TODO: Doesn't fit here.. + u32 tevksel_kc : 5; + u32 tevksel_ka : 5; + u32 pad3 : 14; + } stagehash[16]; - u64 cc_n_d : 64; // 16x4 bits - u64 cc_n_c : 64; // 16x4 bits - u64 cc_n_b : 64; // 16x4 bits - u64 cc_n_a : 64; // 16x4 bits - u32 cc_n_bias : 32; // 16x2 bits - u32 cc_n_op : 16; // 16x1 bit - u32 cc_n_clamp : 16; // 16x1 bit - u32 cc_n_shift : 32; // 16x2 bits - u32 cc_n_dest : 32; // 16x2 bits - - u32 ac_n_rswap : 32; // 16x2 bits - u32 ac_n_tswap : 32; // 16x2 bits - u64 ac_n_d : 48; // 16x3 bits - u64 ac_n_c : 48; // 16x3 bits - u64 ac_n_b : 48; // 16x3 bits - u64 ac_n_a : 48; // 16x3 bits - u32 ac_n_bias : 32; // 16x2 bits - u32 ac_n_op : 16; // 16x1 bit - u32 ac_n_clamp : 16; // 16x1 bit - u32 ac_n_shift : 32; // 16x2 bits - u32 ac_n_dest : 32; // 16x2 bits u32 alpha_test_comp0 : 3; u32 alpha_test_comp1 : 3; u32 alpha_test_logic : 2; + u32 alpha_test_use_zcomploc_hack : 1; u32 fog_proj : 1; @@ -169,7 +138,6 @@ struct pixel_shader_uid_data u32 fast_depth_calc : 1; u32 per_pixel_depth : 1; - u32 bHasIndStage : 16; u32 xfregs_numTexGen_numTexGens : 4; From bdc28106eed06af0d9c0da4c8ee9672a59a7bf7e Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Sat, 22 Jun 2013 21:24:21 +0200 Subject: [PATCH 2/5] Optimize shader uid checks by checking the number of uid values which are actually used. --- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 5 ++ Source/Core/VideoCommon/Src/PixelShaderGen.h | 59 ++++++++++--------- Source/Core/VideoCommon/Src/ShaderGenCommon.h | 4 +- .../Core/VideoCommon/Src/VertexShaderGen.cpp | 2 + Source/Core/VideoCommon/Src/VertexShaderGen.h | 3 + 5 files changed, 44 insertions(+), 29 deletions(-) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index dce8bfbe96..dd284ced7d 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -535,6 +535,11 @@ static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE Api for (unsigned int i = 0; i < numStages; i++) WriteStage(out, uid_data, i, ApiType, RegisterStates); // build the equation for this stage +#define MY_STRUCT_OFFSET(str,elem) ((u32)((u64)&(str).elem-(u64)&(str))) + bool enable_pl = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting; + uid_data.num_values = (enable_pl) ? sizeof(uid_data)/sizeof(32) : MY_STRUCT_OFFSET(uid_data,stagehash[numStages])/sizeof(u32); + + if (numStages) { // The results of the last texenv stage are put onto the screen, diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h index 01b0ad3489..210dd9dd32 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.h +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h @@ -61,6 +61,9 @@ struct pixel_shader_uid_data { // TODO: Optimize field order for easy access! + u32 num_values; // TODO: Shouldn't be a u32 + u32 NumValues() const { return num_values; } // TODO: Can be optimized :) + u32 components; u32 dstAlphaMode : 2; u32 Pretest : 2; @@ -96,34 +99,6 @@ struct pixel_shader_uid_data else if (index == 3) { tevindref_bi4 = texmap; } } - struct { - u32 cc : 24; - u32 ac : 24; - - u32 tevorders_texmap : 3; - u32 tevorders_texcoord : 3; - u32 tevorders_enable : 1; - u32 tevorders_colorchan : 3; - u32 pad1 : 6; - - u32 hasindstage : 1; - u32 tevind : 21; - u32 tevksel_swap1a : 2; // TODO: Doesn't fit here.. - u32 tevksel_swap2a : 2; // TODO: Doesn't fit here.. - u32 tevksel_swap1b : 2; // TODO: Doesn't fit here.. - u32 tevksel_swap2b : 2; // TODO: Doesn't fit here.. - u32 pad2 : 2; - - u32 tevksel_swap1c : 2; // TODO: Doesn't fit here.. - u32 tevksel_swap2c : 2; // TODO: Doesn't fit here.. - u32 tevksel_swap1d : 2; // TODO: Doesn't fit here.. - u32 tevksel_swap2d : 2; // TODO: Doesn't fit here.. - u32 tevksel_kc : 5; - u32 tevksel_ka : 5; - u32 pad3 : 14; - } stagehash[16]; - - u32 alpha_test_comp0 : 3; u32 alpha_test_comp1 : 3; u32 alpha_test_logic : 2; @@ -141,6 +116,34 @@ struct pixel_shader_uid_data u32 xfregs_numTexGen_numTexGens : 4; + struct { + u32 cc : 24; + u32 ac : 24; + + u32 tevorders_texmap : 3; + u32 tevorders_texcoord : 3; + u32 tevorders_enable : 1; + u32 tevorders_colorchan : 3; + u32 pad1 : 6; + + // TODO: Clean up the swapXY mess + u32 hasindstage : 1; + u32 tevind : 21; + u32 tevksel_swap1a : 2; + u32 tevksel_swap2a : 2; + u32 tevksel_swap1b : 2; + u32 tevksel_swap2b : 2; + u32 pad2 : 2; + + u32 tevksel_swap1c : 2; + u32 tevksel_swap2c : 2; + u32 tevksel_swap1d : 2; + u32 tevksel_swap2d : 2; + u32 tevksel_kc : 5; + u32 tevksel_ka : 5; + u32 pad3 : 14; + } stagehash[16]; + // TODO: I think we're fine without an enablePixelLighting field, should probably double check, though.. LightingUidData lighting; }; diff --git a/Source/Core/VideoCommon/Src/ShaderGenCommon.h b/Source/Core/VideoCommon/Src/ShaderGenCommon.h index b8820e2e98..53f2405f8a 100644 --- a/Source/Core/VideoCommon/Src/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/Src/ShaderGenCommon.h @@ -100,7 +100,7 @@ public: bool operator < (const ShaderUid& obj) const { // TODO: Store last frame used and order by that? makes much more sense anyway... - for (unsigned int i = 0; i < sizeof(uid_data) / sizeof(u32); ++i) + for (unsigned int i = 0; i < data.NumValues(); ++i) { if (this->values[i] < obj.values[i]) return true; @@ -212,6 +212,8 @@ struct LightingUidData u32 diffusefunc : 8; // 4x2 bits u32 attnfunc : 8; // 4x2 bits u32 light_mask : 32; // 4x8 bits + + u32 NumValues() const { return sizeof(LightingUidData) / sizeof(u32); } }; #pragma pack() diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index 3edb51c505..8c157dc030 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -74,6 +74,8 @@ static void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) vertex_shader_uid_data& uid_data = (&out.template GetUidData() != NULL) ? out.template GetUidData() : dummy_data; + uid_data.num_values = sizeof(uid_data)/sizeof(u32); + out.SetBuffer(text); #ifndef ANDROID locale_t locale; diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.h b/Source/Core/VideoCommon/Src/VertexShaderGen.h index eb7236678a..3f3087783d 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.h +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.h @@ -67,6 +67,9 @@ const s_svar VSVar_Loc[] = { {I_POSNORMALMATRIX, C_POSNORMALMATRIX, 6 }, struct vertex_shader_uid_data { + u32 num_values; // TODO: Shouldn't be a u32 + + u32 NumValues() const { return num_values; } u32 components; u32 numColorChans : 2; u32 numTexGens : 4; From 597a6b34cb146c9b3488af5006386c3a18109446 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Sun, 23 Jun 2013 19:28:36 +0200 Subject: [PATCH 3/5] Compactify VertexShader uid struct. --- .../Core/VideoCommon/Src/VertexShaderGen.cpp | 8 ++++---- Source/Core/VideoCommon/Src/VertexShaderGen.h | 20 +++++++++---------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index 8c157dc030..a7e3a3d923 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -376,7 +376,7 @@ static void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) break; case XF_TEXGEN_REGULAR: default: - uid_data.texMtxInfo[i].projection = xfregs.texMtxInfo[i].projection; + uid_data.texMtxInfo_n_projection |= xfregs.texMtxInfo[i].projection << i; if (components & (VB_HAS_TEXMTXIDX0< Date: Tue, 25 Jun 2013 13:37:38 +0200 Subject: [PATCH 4/5] PixelShaderManager: Revert code introduced mainly in revision 0fdeb81038d9. The shader constant usage profile functionality is still buggy and the code using it wasn't ever meant to be merged to master. --- .../VideoCommon/Src/PixelShaderManager.cpp | 100 ++++-------------- 1 file changed, 22 insertions(+), 78 deletions(-) diff --git a/Source/Core/VideoCommon/Src/PixelShaderManager.cpp b/Source/Core/VideoCommon/Src/PixelShaderManager.cpp index 9fbe096607..aef4baa14c 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderManager.cpp @@ -29,45 +29,19 @@ static u32 lastTexDims[8]; // width | height << 16 | wrap_s << 28 | wrap_t << 30 static u32 lastZBias; static int nMaterialsChanged; -static float s_constant_cache[C_PENVCONST_END*4]; - inline void SetPSConstant4f(unsigned int const_number, float f1, float f2, float f3, float f4) { -// if (s_constant_cache[const_number*4] == f1 && s_constant_cache[const_number*4+1] == f2 && -// s_constant_cache[const_number*4+2] == f3 && s_constant_cache[const_number*4+3] == f4) -// return; - g_renderer->SetPSConstant4f(const_number, f1, f2, f3, f4); - s_constant_cache[const_number*4] = f1; - s_constant_cache[const_number*4+1] = f2; - s_constant_cache[const_number*4+2] = f3; - s_constant_cache[const_number*4+3] = f4; } inline void SetPSConstant4fv(unsigned int const_number, const float *f) { -// if (s_constant_cache[const_number*4] == f[0] && s_constant_cache[const_number*4+1] == f[1] && -// s_constant_cache[const_number*4+2] == f[2] && s_constant_cache[const_number*4+3] == f[3]) -// return; - g_renderer->SetPSConstant4fv(const_number, f); - s_constant_cache[const_number*4] = f[0]; - s_constant_cache[const_number*4+1] = f[1]; - s_constant_cache[const_number*4+2] = f[2]; - s_constant_cache[const_number*4+3] = f[3]; } inline void SetMultiPSConstant4fv(unsigned int const_number, unsigned int count, const float *f) { -// for (unsigned int i = 0; i < 4*count; ++i) -// if (s_constant_cache[const_number*4+i] != f[i]) -// break; -// else if (i == 4*count-1) -// return; - g_renderer->SetMultiPSConstant4fv(const_number, count, f); - for (unsigned int i = 0; i < 4*count; ++i) - s_constant_cache[const_number*4+i] = f[i]; } void PixelShaderManager::Init() @@ -76,7 +50,6 @@ void PixelShaderManager::Init() memset(lastTexDims, 0, sizeof(lastTexDims)); lastZBias = 0; memset(lastRGBAfull, 0, sizeof(lastRGBAfull)); - memset(s_constant_cache, 0, sizeof(s_constant_cache)); // TODO: Should reflect that on the GPU side.... Dirty(); } @@ -102,19 +75,6 @@ void PixelShaderManager::SetConstants(u32 components) if (g_ActiveConfig.backend_info.APIType == API_OPENGL && !g_ActiveConfig.backend_info.bSupportsGLSLUBO) Dirty(); - // TODO: Probably broken in the non-UBO path - PixelShaderConstantProfile constant_profile(C_PENVCONST_END); - /// TODO: dst alpha/api/components type parameter... - GetPixelShaderConstantProfile(constant_profile, DSTALPHA_DUAL_SOURCE_BLEND, API_OPENGL, components); - - static int saved_updates = 0; - static int necessary_updates = 0; - -// TODO: Remove this! -#define IncStuff() { \ - saved_updates++; \ - /*printf("Saved a constant update at line %d! Saved %d against %d now!\n", __LINE__, saved_updates, necessary_updates);*/ } - for (int i = 0; i < 2; ++i) { if (s_nColorsChanged[i]) @@ -122,12 +82,11 @@ void PixelShaderManager::SetConstants(u32 components) int baseind = i ? C_KCOLORS : C_COLORS; for (int j = 0; j < 4; ++j) { - if ((s_nColorsChanged[i] & (1 << j)) && constant_profile.ConstantIsUsed(baseind+j)) + if ((s_nColorsChanged[i] & (1 << j))) { SetPSConstant4fv(baseind+j, &lastRGBAfull[i][j][0]); s_nColorsChanged[i] &= ~(1<>8)&0xff)/255.0f, 0, ((lastAlpha>>16)&0xff)/255.0f); s_bAlphaChanged = false; - } else if (s_bAlphaChanged) IncStuff(); + } - if (s_bZTextureTypeChanged && constant_profile.ConstantIsUsed(C_ZBIAS)) + if (s_bZTextureTypeChanged) { float ftemp[4]; switch (bpmem.ztex2.type) @@ -170,12 +127,11 @@ void PixelShaderManager::SetConstants(u32 components) ftemp[0] = 16711680.0f/16777215.0f; ftemp[1] = 65280.0f/16777215.0f; ftemp[2] = 255.0f/16777215.0f; ftemp[3] = 0; break; } - ++necessary_updates; SetPSConstant4fv(C_ZBIAS, ftemp); s_bZTextureTypeChanged = false; - } else if (s_bZTextureTypeChanged) IncStuff(); + } - if ((s_bZBiasChanged || s_bDepthRangeChanged) && constant_profile.ConstantIsUsed(C_ZBIAS+1)) + if (s_bZBiasChanged || s_bDepthRangeChanged) { // reversed gxsetviewport(xorig, yorig, width, height, nearz, farz) // [0] = width/2 @@ -186,10 +142,9 @@ void PixelShaderManager::SetConstants(u32 components) // [5] = 16777215 * farz //ERROR_LOG("pixel=%x,%x, bias=%x\n", bpmem.zcontrol.pixel_format, bpmem.ztex2.type, lastZBias); - ++necessary_updates; SetPSConstant4f(C_ZBIAS+1, xfregs.viewport.farZ / 16777216.0f, xfregs.viewport.zRange / 16777216.0f, 0, (float)(lastZBias)/16777215.0f); s_bZBiasChanged = s_bDepthRangeChanged = false; - }else if ((s_bZBiasChanged || s_bDepthRangeChanged)) IncStuff(); + } // indirect incoming texture scales if (s_nIndTexScaleChanged) @@ -197,7 +152,7 @@ void PixelShaderManager::SetConstants(u32 components) // set as two sets of vec4s, each containing S and T of two ind stages. float f[8]; - if ((s_nIndTexScaleChanged & 0x03) && constant_profile.ConstantIsUsed(C_INDTEXSCALE)) + if (s_nIndTexScaleChanged & 0x03) { for (u32 i = 0; i < 2; ++i) { @@ -205,13 +160,10 @@ void PixelShaderManager::SetConstants(u32 components) f[2 * i + 1] = bpmem.texscale[0].getScaleT(i & 1); PRIM_LOG("tex indscale%d: %f %f\n", i, f[2 * i], f[2 * i + 1]); } - ++necessary_updates; SetPSConstant4fv(C_INDTEXSCALE, f); - s_nIndTexScaleChanged &= ~0x03; } - else if ((s_nIndTexScaleChanged & 0x03)) IncStuff(); - if ((s_nIndTexScaleChanged & 0x0c) && constant_profile.ConstantIsUsed(C_INDTEXSCALE+1)) + if (s_nIndTexScaleChanged & 0x0c) { for (u32 i = 2; i < 4; ++i) { @@ -219,18 +171,16 @@ void PixelShaderManager::SetConstants(u32 components) f[2 * i + 1] = bpmem.texscale[1].getScaleT(i & 1); PRIM_LOG("tex indscale%d: %f %f\n", i, f[2 * i], f[2 * i + 1]); } - ++necessary_updates; SetPSConstant4fv(C_INDTEXSCALE+1, &f[4]); - s_nIndTexScaleChanged &= ~0x0c; } - else if ((s_nIndTexScaleChanged & 0x0c)) IncStuff(); + s_nIndTexScaleChanged = 0; } if (s_nIndTexMtxChanged) { for (int i = 0; i < 3; ++i) { - if ((s_nIndTexMtxChanged & (1 << i)) && (constant_profile.ConstantIsUsed(C_INDTEXMTX+2*i) || constant_profile.ConstantIsUsed(C_INDTEXMTX+2*i+1))) + if (s_nIndTexMtxChanged & (1 << i)) { int scale = ((u32)bpmem.indmtx[i].col0.s0 << 0) | ((u32)bpmem.indmtx[i].col1.s1 << 2) | @@ -240,8 +190,6 @@ void PixelShaderManager::SetConstants(u32 components) // xyz - static matrix // TODO w - dynamic matrix scale / 256...... somehow / 4 works better // rev 2972 - now using / 256.... verify that this works - ++necessary_updates; - ++necessary_updates; SetPSConstant4f(C_INDTEXMTX + 2 * i, bpmem.indmtx[i].col0.ma * fscale, bpmem.indmtx[i].col1.mc * fscale, @@ -259,20 +207,18 @@ void PixelShaderManager::SetConstants(u32 components) bpmem.indmtx[i].col0.mb * fscale, bpmem.indmtx[i].col1.md * fscale, bpmem.indmtx[i].col2.mf * fscale); s_nIndTexMtxChanged &= ~(1 << i); - }else if ((s_nIndTexMtxChanged & (1 << i))) {IncStuff();IncStuff();} + } } } - if (s_bFogColorChanged && constant_profile.ConstantIsUsed(C_FOG)) + if (s_bFogColorChanged) { - ++necessary_updates; SetPSConstant4f(C_FOG, bpmem.fog.color.r / 255.0f, bpmem.fog.color.g / 255.0f, bpmem.fog.color.b / 255.0f, 0); s_bFogColorChanged = false; - }else if (s_bFogColorChanged) IncStuff(); + } - if (s_bFogParamChanged && constant_profile.ConstantIsUsed(C_FOG+1)) + if (s_bFogParamChanged) { - ++necessary_updates; if(!g_ActiveConfig.bDisableFog) { //downscale magnitude to 0.24 bits @@ -285,11 +231,10 @@ void PixelShaderManager::SetConstants(u32 components) SetPSConstant4f(C_FOG + 1, 0.0, 1.0, 0.0, 1.0); s_bFogParamChanged = false; - }else if ( s_bFogParamChanged) IncStuff(); + } - if (s_bFogRangeAdjustChanged && constant_profile.ConstantIsUsed(C_FOG+2)) + if (s_bFogRangeAdjustChanged) { - ++necessary_updates; if(!g_ActiveConfig.bDisableFog && bpmem.fogRange.Base.Enabled == 1) { //bpmem.fogRange.Base.Center : center of the viewport in x axis. observation: bpmem.fogRange.Base.Center = realcenter + 342; @@ -310,9 +255,8 @@ void PixelShaderManager::SetConstants(u32 components) } s_bFogRangeAdjustChanged = false; - }else if ( s_bFogRangeAdjustChanged) IncStuff(); + } - // TODO: use constant profile here! if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) // config check added because the code in here was crashing for me inside SetPSConstant4f { if (nLightsChanged[0] >= 0) @@ -372,7 +316,7 @@ void PixelShaderManager::SetConstants(u32 components) SetPSConstant4fv(C_PMATERIALS + i, material); } } - + for (int i = 0; i < 2; ++i) { if (nMaterialsChanged & (1 << (i + 2))) From 166a9c56371bd8d0f5f9673a541e5f9a05d91bcb Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Fri, 28 Jun 2013 17:43:53 +0200 Subject: [PATCH 5/5] Finishing touches. --- Source/Core/VideoCommon/CMakeLists.txt | 1 - .../VideoCommon/Src/LightingShaderGen.cpp | 7 ------ Source/Core/VideoCommon/Src/PixelShaderGen.h | 8 +++---- Source/Core/VideoCommon/Src/ShaderGenCommon.h | 3 +-- .../Core/VideoCommon/Src/VertexShaderGen.cpp | 4 ++-- Source/Core/VideoCommon/Src/VertexShaderGen.h | 22 +++++++++++-------- Source/Core/VideoCommon/VideoCommon.vcxproj | 1 - .../VideoCommon/VideoCommon.vcxproj.filters | 5 +---- 8 files changed, 21 insertions(+), 30 deletions(-) delete mode 100644 Source/Core/VideoCommon/Src/LightingShaderGen.cpp diff --git a/Source/Core/VideoCommon/CMakeLists.txt b/Source/Core/VideoCommon/CMakeLists.txt index 1bbc69a150..a4dad8293b 100644 --- a/Source/Core/VideoCommon/CMakeLists.txt +++ b/Source/Core/VideoCommon/CMakeLists.txt @@ -11,7 +11,6 @@ set(SRCS Src/BPFunctions.cpp Src/HiresTextures.cpp Src/ImageWrite.cpp Src/IndexGenerator.cpp - Src/LightingShaderGen.cpp Src/MainBase.cpp Src/OnScreenDisplay.cpp Src/OpcodeDecoding.cpp diff --git a/Source/Core/VideoCommon/Src/LightingShaderGen.cpp b/Source/Core/VideoCommon/Src/LightingShaderGen.cpp deleted file mode 100644 index 58da3612f5..0000000000 --- a/Source/Core/VideoCommon/Src/LightingShaderGen.cpp +++ /dev/null @@ -1,7 +0,0 @@ -// Copyright 2013 Dolphin Emulator Project -// Licensed under GPLv2 -// Refer to the license.txt file included. - -#include "LightingShaderGen.h" -#include "NativeVertexFormat.h" -#include "XFMemory.h" diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h index 210dd9dd32..150a69902e 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.h +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h @@ -55,14 +55,13 @@ const s_svar PSVar_Loc[] = { {I_COLORS, C_COLORS, 4 }, {I_PMATERIALS, C_PMATERIALS, 4 }, }; -// TODO: Should compact packing be enabled? -//#pragma pack(4) +#pragma pack(1) struct pixel_shader_uid_data { // TODO: Optimize field order for easy access! u32 num_values; // TODO: Shouldn't be a u32 - u32 NumValues() const { return num_values; } // TODO: Can be optimized :) + u32 NumValues() const { return num_values; } u32 components; u32 dstAlphaMode : 2; @@ -117,6 +116,7 @@ struct pixel_shader_uid_data u32 xfregs_numTexGen_numTexGens : 4; struct { + // TODO: Can save a lot space by removing the padding bits u32 cc : 24; u32 ac : 24; @@ -147,7 +147,7 @@ struct pixel_shader_uid_data // TODO: I think we're fine without an enablePixelLighting field, should probably double check, though.. LightingUidData lighting; }; -//#pragma pack() +#pragma pack() typedef ShaderUid PixelShaderUid; typedef ShaderCode PixelShaderCode; // TODO: Obsolete diff --git a/Source/Core/VideoCommon/Src/ShaderGenCommon.h b/Source/Core/VideoCommon/Src/ShaderGenCommon.h index 53f2405f8a..c8f8ff8345 100644 --- a/Source/Core/VideoCommon/Src/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/Src/ShaderGenCommon.h @@ -199,10 +199,9 @@ static void DeclareUniform(T& object, API_TYPE api_type, bool using_ubos, const object.Write(";\n"); } -#pragma pack(4) +#pragma pack(1) /** * Common uid data used for shader generators that use lighting calculations. - * Expected to be stored as a member called "lighting". */ struct LightingUidData { diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index a7e3a3d923..81d45cd6ad 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -401,7 +401,7 @@ static void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) { const PostMtxInfo& postInfo = xfregs.postMtxInfo[i]; - uid_data.postMtxInfo[i] = xfregs.postMtxInfo[i].index; + uid_data.postMtxInfo[i].index = xfregs.postMtxInfo[i].index; int postidx = postInfo.index; out.Write("float4 P0 = " I_POSTTRANSFORMMATRICES"[%d];\n" "float4 P1 = " I_POSTTRANSFORMMATRICES"[%d];\n" @@ -419,7 +419,7 @@ static void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) } else { - uid_data.postMtxInfo[i] |= xfregs.postMtxInfo[i].normalize << 6; + uid_data.postMtxInfo[i].normalize = xfregs.postMtxInfo[i].normalize; if (postInfo.normalize) out.Write("o.tex%d.xyz = normalize(o.tex%d.xyz);\n", i, i); diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.h b/Source/Core/VideoCommon/Src/VertexShaderGen.h index 3b233a22fe..797fe7d108 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.h +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.h @@ -71,22 +71,26 @@ struct vertex_shader_uid_data u32 NumValues() const { return num_values; } u32 components; - u32 num_values : 16; // TODO: Shouldn't be a u32 + u32 num_values : 16; // TODO: u8 might be enough, actually u32 numColorChans : 2; u32 numTexGens : 4; u32 dualTexTrans_enabled : 1; - u32 texMtxInfo_n_projection : 16; // XF_TEXPROJ_X + u32 texMtxInfo_n_projection : 16; // Stored separately to guarantee that the texMtxInfo struct is 8 bits wide struct { - u32 inputform : 2; // XF_TEXINPUT_X - u32 texgentype : 3; // XF_TEXGEN_X - u32 sourcerow : 5; // XF_SRCGEOM_X - u32 embosssourceshift : 3; // what generated texcoord to use - u32 embosslightshift : 3; // light index that is used - } texMtxInfo[8]; // TODO: Wasting space + u32 inputform : 2; + u32 texgentype : 3; + u32 sourcerow : 5; + u32 embosssourceshift : 3; + u32 embosslightshift : 3; + } texMtxInfo[8]; - u8 postMtxInfo[8]; // index + normalize + 1 padding bit, TODO: Can be made a struct again.. + struct { + u32 index : 6; + u32 normalize : 1; + u32 pad : 1; + } postMtxInfo[8]; LightingUidData lighting; }; diff --git a/Source/Core/VideoCommon/VideoCommon.vcxproj b/Source/Core/VideoCommon/VideoCommon.vcxproj index 617e4ec567..1e7a56f578 100644 --- a/Source/Core/VideoCommon/VideoCommon.vcxproj +++ b/Source/Core/VideoCommon/VideoCommon.vcxproj @@ -190,7 +190,6 @@ - diff --git a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters index e988d34e12..785e55877f 100644 --- a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters +++ b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters @@ -113,9 +113,6 @@ Base - - Shader Generators - Util @@ -294,4 +291,4 @@ {e2a527a2-ccc8-4ab8-a93e-dd2628c0f3b6} - \ No newline at end of file +