From 1a831cfc7d4c43f418ae4d65b579bf430aeb278d Mon Sep 17 00:00:00 2001 From: Scott Mansell Date: Sun, 17 Jan 2016 00:34:06 +1300 Subject: [PATCH] Multithreadded Shadergen: Second Pass over vertex/lighting Shadergens As much as possible, the asserts have been moved out of the GetUID function. But there are some places where asserts depend on variables that aren't stored in the shader UID. --- .../VideoBackends/D3D/VertexShaderCache.cpp | 6 +- .../Core/VideoBackends/D3D12/ShaderCache.cpp | 6 +- Source/Core/VideoBackends/Null/ShaderCache.h | 4 +- .../VideoBackends/OGL/ProgramShaderCache.cpp | 6 +- Source/Core/VideoCommon/LightingShaderGen.h | 55 ++++++---- Source/Core/VideoCommon/PixelShaderGen.cpp | 6 +- Source/Core/VideoCommon/VertexShaderGen.cpp | 103 +++++++++++------- Source/Core/VideoCommon/VertexShaderGen.h | 4 +- 8 files changed, 114 insertions(+), 76 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp b/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp index 7b3f71da26..30367724de 100644 --- a/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp @@ -199,10 +199,10 @@ void VertexShaderCache::Shutdown() bool VertexShaderCache::SetShader() { - VertexShaderUid uid = GetVertexShaderUid(API_D3D); + VertexShaderUid uid = GetVertexShaderUid(); if (g_ActiveConfig.bEnableShaderDebugging) { - ShaderCode code = GenerateVertexShaderCode(API_D3D); + ShaderCode code = GenerateVertexShaderCode(API_D3D, uid.GetUidData()); vertex_uid_checker.AddToIndexAndCheck(code, uid, "Vertex", "v"); } @@ -227,7 +227,7 @@ bool VertexShaderCache::SetShader() return (entry.shader != nullptr); } - ShaderCode code = GenerateVertexShaderCode(API_D3D); + ShaderCode code = GenerateVertexShaderCode(API_D3D, uid.GetUidData()); D3DBlob* pbytecode = nullptr; D3D::CompileVertexShader(code.GetBuffer(), &pbytecode); diff --git a/Source/Core/VideoBackends/D3D12/ShaderCache.cpp b/Source/Core/VideoBackends/D3D12/ShaderCache.cpp index 677d50f426..5c4cf17962 100644 --- a/Source/Core/VideoBackends/D3D12/ShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D12/ShaderCache.cpp @@ -164,7 +164,7 @@ void ShaderCache::LoadAndSetActiveShaders(DSTALPHA_MODE ps_dst_alpha_mode, u32 g GeometryShaderUid gs_uid = GetGeometryShaderUid(gs_primitive_type); PixelShaderUid ps_uid = GetPixelShaderUid(ps_dst_alpha_mode, API_D3D); - VertexShaderUid vs_uid = GetVertexShaderUid(API_D3D); + VertexShaderUid vs_uid = GetVertexShaderUid(); bool gs_changed = gs_uid != s_last_geometry_shader_uid; bool ps_changed = ps_uid != s_last_pixel_shader_uid; @@ -304,7 +304,7 @@ void ShaderCache::HandleVSUIDChange(VertexShaderUid vs_uid) if (g_ActiveConfig.bEnableShaderDebugging) { - ShaderCode code = GenerateVertexShaderCode(API_D3D); + ShaderCode code = GenerateVertexShaderCode(API_D3D, vs_uid.GetUidData()); s_vertex_uid_checker.AddToIndexAndCheck(code, vs_uid, "Vertex", "v"); } @@ -316,7 +316,7 @@ void ShaderCache::HandleVSUIDChange(VertexShaderUid vs_uid) } else { - ShaderCode vs_code = GenerateVertexShaderCode(API_D3D); + ShaderCode vs_code = GenerateVertexShaderCode(API_D3D, vs_uid.GetUidData()); ID3DBlob* vs_bytecode = nullptr; if (!D3D::CompileVertexShader(vs_code.GetBuffer(), &vs_bytecode)) diff --git a/Source/Core/VideoBackends/Null/ShaderCache.h b/Source/Core/VideoBackends/Null/ShaderCache.h index 3ef3479ed2..9cdc57c892 100644 --- a/Source/Core/VideoBackends/Null/ShaderCache.h +++ b/Source/Core/VideoBackends/Null/ShaderCache.h @@ -45,12 +45,12 @@ protected: VertexShaderUid GetUid(DSTALPHA_MODE dst_alpha_mode, u32 primitive_type, API_TYPE api_type) override { - return GetVertexShaderUid(api_type); + return GetVertexShaderUid(); } ShaderCode GenerateCode(DSTALPHA_MODE dst_alpha_mode, u32 primitive_type, API_TYPE api_type, VertexShaderUid uid) override { - return GenerateVertexShaderCode(api_type); + return GenerateVertexShaderCode(api_type, uid.GetUidData()); } }; diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp index 4f1a5deb11..5d9da051bb 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp @@ -209,7 +209,7 @@ SHADER* ProgramShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 primitive_ last_entry = &newentry; newentry.in_cache = 0; - ShaderCode vcode = GenerateVertexShaderCode(API_OPENGL); + ShaderCode vcode = GenerateVertexShaderCode(API_OPENGL, uid.vuid.GetUidData()); ShaderCode pcode = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL); ShaderCode gcode; if (g_ActiveConfig.backend_info.bSupportsGeometryShaders && @@ -398,7 +398,7 @@ GLuint ProgramShaderCache::CompileSingleShader(GLuint type, const std::string& c void ProgramShaderCache::GetShaderId(SHADERUID* uid, DSTALPHA_MODE dstAlphaMode, u32 primitive_type) { uid->puid = GetPixelShaderUid(dstAlphaMode, API_OPENGL); - uid->vuid = GetVertexShaderUid(API_OPENGL); + uid->vuid = GetVertexShaderUid(); uid->guid = GetGeometryShaderUid(primitive_type); if (g_ActiveConfig.bEnableShaderDebugging) @@ -406,7 +406,7 @@ void ProgramShaderCache::GetShaderId(SHADERUID* uid, DSTALPHA_MODE dstAlphaMode, ShaderCode pcode = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL); pixel_uid_checker.AddToIndexAndCheck(pcode, uid->puid, "Pixel", "p"); - ShaderCode vcode = GenerateVertexShaderCode(API_OPENGL); + ShaderCode vcode = GenerateVertexShaderCode(API_OPENGL, uid->vuid.GetUidData()); vertex_uid_checker.AddToIndexAndCheck(vcode, uid->vuid, "Vertex", "v"); ShaderCode gcode = diff --git a/Source/Core/VideoCommon/LightingShaderGen.h b/Source/Core/VideoCommon/LightingShaderGen.h index 7603d72dde..8cabb7fc57 100644 --- a/Source/Core/VideoCommon/LightingShaderGen.h +++ b/Source/Core/VideoCommon/LightingShaderGen.h @@ -46,9 +46,8 @@ static const char s_lighting_struct[] = "struct Light {\n" "\tfloat4 dir;\n" "};\n"; -template -static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, int litchan_index, - bool alpha) +static void GenerateLightShader(ShaderCode& object, const LightingUidData& uid_data, int index, + int litchan_index, bool alpha) { const char* swizzle = alpha ? "a" : "rgb"; const char* swizzle_components = (alpha) ? "" : "3"; @@ -114,18 +113,13 @@ static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, // materials name is I_MATERIALS in vs and I_PMATERIALS in ps // inColorName is color in vs and colors_ in ps // dest is o.colors_ in vs and colors_ in ps -template -static void GenerateLightingShader(T& object, LightingUidData& uid_data, int components, - const char* inColorName, const char* dest) +static void GenerateLightingShaderCode(ShaderCode& object, const LightingUidData& uid_data, + int components, const char* inColorName, const char* dest) { for (unsigned int j = 0; j < xfmem.numChan.numColorChans; j++) { - const LitChannel& color = xfmem.color[j]; - const LitChannel& alpha = xfmem.alpha[j]; - object.Write("{\n"); - uid_data.matsource |= xfmem.color[j].matsource << j; bool colormatsource = !!(uid_data.matsource & (1 << j)); if (colormatsource) // from vertex { @@ -141,10 +135,8 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com object.Write("int4 mat = %s[%d];\n", I_MATERIALS, j + 2); } - uid_data.enablelighting |= xfmem.color[j].enablelighting << j; if (uid_data.enablelighting & (1 << j)) { - uid_data.ambsource |= xfmem.color[j].ambsource << j; if (uid_data.ambsource & (1 << j)) // from vertex { if (components & (VB_HAS_COL0 << j)) @@ -168,7 +160,6 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com } // check if alpha is different - uid_data.matsource |= xfmem.alpha[j].matsource << (j + 2); bool alphamatsource = !!(uid_data.matsource & (1 << (j + 2))); if (alphamatsource != colormatsource) { @@ -187,10 +178,8 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com } } - uid_data.enablelighting |= xfmem.alpha[j].enablelighting << (j + 2); if (uid_data.enablelighting & (1 << (j + 2))) { - uid_data.ambsource |= xfmem.alpha[j].ambsource << (j + 2); if (uid_data.ambsource & (1 << (j + 2))) // from vertex { if (components & (VB_HAS_COL0 << j)) @@ -213,24 +202,44 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com if (uid_data.enablelighting & (1 << j)) // Color lights { - uid_data.attnfunc |= color.attnfunc << (2 * j); - uid_data.diffusefunc |= color.diffusefunc << (2 * j); - uid_data.light_mask |= color.GetFullLightMask() << (8 * j); for (int i = 0; i < 8; ++i) if (uid_data.light_mask & (1 << (i + 8 * j))) - GenerateLightShader(object, uid_data, i, j, false); + GenerateLightShader(object, uid_data, i, j, false); } if (uid_data.enablelighting & (1 << (j + 2))) // Alpha lights { - uid_data.attnfunc |= alpha.attnfunc << (2 * (j + 2)); - uid_data.diffusefunc |= alpha.diffusefunc << (2 * (j + 2)); - uid_data.light_mask |= alpha.GetFullLightMask() << (8 * (j + 2)); for (int i = 0; i < 8; ++i) if (uid_data.light_mask & (1 << (i + 8 * (j + 2)))) - GenerateLightShader(object, uid_data, i, j + 2, true); + GenerateLightShader(object, uid_data, i, j + 2, true); } object.Write("lacc = clamp(lacc, 0, 255);\n"); object.Write("%s%d = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n", dest, j); object.Write("}\n"); } } + +static void GetLightingShaderUid(LightingUidData& uid_data) +{ + for (unsigned int j = 0; j < xfmem.numChan.numColorChans; j++) + { + uid_data.matsource |= xfmem.color[j].matsource << j; + uid_data.matsource |= xfmem.alpha[j].matsource << (j + 2); + uid_data.enablelighting |= xfmem.color[j].enablelighting << j; + uid_data.enablelighting |= xfmem.alpha[j].enablelighting << (j + 2); + + if (uid_data.enablelighting & (1 << j)) // Color lights + { + uid_data.ambsource |= xfmem.color[j].ambsource << j; + uid_data.attnfunc |= xfmem.color[j].attnfunc << (2 * j); + uid_data.diffusefunc |= xfmem.color[j].diffusefunc << (2 * j); + uid_data.light_mask |= xfmem.color[j].GetFullLightMask() << (8 * j); + } + if (uid_data.enablelighting & (1 << (j + 2))) // Alpha lights + { + uid_data.ambsource |= xfmem.alpha[j].ambsource << (j + 2); + uid_data.attnfunc |= xfmem.alpha[j].attnfunc << (2 * (j + 2)); + uid_data.diffusefunc |= xfmem.alpha[j].diffusefunc << (2 * (j + 2)); + uid_data.light_mask |= xfmem.alpha[j].GetFullLightMask() << (8 * (j + 2)); + } + } +} diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 2d1df801db..006fa7584a 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -488,8 +488,10 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) // out.SetConstantsUsed(C_PLIGHT_COLORS, C_PLIGHT_COLORS+7); // TODO: Can be optimized further // out.SetConstantsUsed(C_PLIGHTS, C_PLIGHTS+31); // TODO: Can be optimized further // out.SetConstantsUsed(C_PMATERIALS, C_PMATERIALS+3); - GenerateLightingShader(out, uid_data->lighting, uid_data->components << VB_COL_SHIFT, - "colors_", "col"); + + // FIXME: Disabled until pixelshadergen is split + // GenerateLightingShader(out, uid_data->lighting, uid_data->components << VB_COL_SHIFT, + // "colors_", "col"); } // HACK to handle cases where the tex gen is not enabled diff --git a/Source/Core/VideoCommon/VertexShaderGen.cpp b/Source/Core/VideoCommon/VertexShaderGen.cpp index f417988a21..77444e7dfc 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/VertexShaderGen.cpp @@ -14,20 +14,73 @@ #include "VideoCommon/VertexShaderGen.h" #include "VideoCommon/VideoConfig.h" -template -static T GenerateVertexShader(API_TYPE api_type) +VertexShaderUid GetVertexShaderUid() { - T out; - // Non-uid template parameters will write to the dummy data (=> gets optimized out) - vertex_shader_uid_data dummy_data; - vertex_shader_uid_data* uid_data = out.template GetUidData(); - if (uid_data == nullptr) - uid_data = &dummy_data; + VertexShaderUid out; + vertex_shader_uid_data* uid_data = out.GetUidData(); memset(uid_data, 0, sizeof(*uid_data)); _assert_(bpmem.genMode.numtexgens == xfmem.numTexGen.numTexGens); _assert_(bpmem.genMode.numcolchans == xfmem.numChan.numColorChans); + uid_data->numTexGens = xfmem.numTexGen.numTexGens; + uid_data->components = VertexLoaderManager::g_current_components; + uid_data->pixel_lighting = g_ActiveConfig.bEnablePixelLighting; + uid_data->msaa = g_ActiveConfig.iMultisamples > 1; + uid_data->ssaa = g_ActiveConfig.iMultisamples > 1 && g_ActiveConfig.bSSAA; + uid_data->numColorChans = xfmem.numChan.numColorChans; + + GetLightingShaderUid(uid_data->lighting); + + // transform texcoords + for (unsigned int i = 0; i < uid_data->numTexGens; ++i) + { + auto& texinfo = uid_data->texMtxInfo[i]; + + texinfo.sourcerow = xfmem.texMtxInfo[i].sourcerow; + texinfo.texgentype = xfmem.texMtxInfo[i].texgentype; + texinfo.inputform = xfmem.texMtxInfo[i].inputform; + + // first transformation + switch (texinfo.texgentype) + { + case XF_TEXGEN_EMBOSS_MAP: // calculate tex coords into bump map + if (uid_data->components & (VB_HAS_NRM1 | VB_HAS_NRM2)) + { + // transform the light dir into tangent space + texinfo.embosslightshift = xfmem.texMtxInfo[i].embosslightshift; + texinfo.embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift; + } + else + { + texinfo.embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift; + } + break; + case XF_TEXGEN_COLOR_STRGBC0: + case XF_TEXGEN_COLOR_STRGBC1: + break; + case XF_TEXGEN_REGULAR: + default: + uid_data->texMtxInfo_n_projection |= xfmem.texMtxInfo[i].projection << i; + break; + } + + uid_data->dualTexTrans_enabled = xfmem.dualTexTrans.enabled; + // CHECKME: does this only work for regular tex gen types? + if (uid_data->dualTexTrans_enabled && texinfo.texgentype == XF_TEXGEN_REGULAR) + { + auto& postInfo = uid_data->postMtxInfo[i]; + postInfo.index = xfmem.postMtxInfo[i].index; + postInfo.normalize = xfmem.postMtxInfo[i].normalize; + } + } + + return out; +} + +ShaderCode GenerateVertexShaderCode(API_TYPE api_type, const vertex_shader_uid_data* uid_data) +{ + ShaderCode out; out.Write("%s", s_lighting_struct); // uniforms @@ -39,14 +92,8 @@ static T GenerateVertexShader(API_TYPE api_type) out.Write(s_shader_uniforms); out.Write("};\n"); - uid_data->numTexGens = xfmem.numTexGen.numTexGens; - uid_data->components = VertexLoaderManager::g_current_components; - uid_data->pixel_lighting = g_ActiveConfig.bEnablePixelLighting; - uid_data->msaa = g_ActiveConfig.iMultisamples > 1; - uid_data->ssaa = g_ActiveConfig.iMultisamples > 1 && g_ActiveConfig.bSSAA; - out.Write("struct VS_OUTPUT {\n"); - GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, uid_data->pixel_lighting, ""); + GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, uid_data->pixel_lighting, ""); out.Write("};\n"); if (api_type == API_OPENGL) @@ -77,7 +124,7 @@ static T GenerateVertexShader(API_TYPE api_type) if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { out.Write("out VertexData {\n"); - GenerateVSOutputMembers( + GenerateVSOutputMembers( out, api_type, uid_data->numTexGens, uid_data->pixel_lighting, GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa, false, true)); out.Write("} vs;\n"); @@ -190,7 +237,6 @@ static T GenerateVertexShader(API_TYPE api_type) "float3 ldir, h, cosAttn, distAttn;\n" "float dist, dist2, attn;\n"); - uid_data->numColorChans = xfmem.numChan.numColorChans; if (uid_data->numColorChans == 0) { if (uid_data->components & VB_HAS_COL0) @@ -199,7 +245,7 @@ static T GenerateVertexShader(API_TYPE api_type) out.Write("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n"); } - GenerateLightingShader(out, uid_data->lighting, uid_data->components, "color", "o.colors_"); + GenerateLightingShaderCode(out, uid_data->lighting, uid_data->components, "color", "o.colors_"); if (uid_data->numColorChans < 2) { @@ -217,8 +263,6 @@ static T GenerateVertexShader(API_TYPE api_type) out.Write("{\n"); out.Write("coord = float4(0.0, 0.0, 1.0, 1.0);\n"); - texinfo.sourcerow = xfmem.texMtxInfo[i].sourcerow; - texinfo.texgentype = xfmem.texMtxInfo[i].texgentype; switch (texinfo.sourcerow) { case XF_SRCGEOM_INROW: @@ -254,7 +298,7 @@ static T GenerateVertexShader(API_TYPE api_type) break; } // Input form of AB11 sets z element to 1.0 - uid_data->texMtxInfo[i].inputform = xfmem.texMtxInfo[i].inputform; + if (texinfo.inputform == XF_TEXINPUT_AB11) out.Write("coord.z = 1.0;\n"); @@ -266,8 +310,6 @@ static T GenerateVertexShader(API_TYPE api_type) if (uid_data->components & (VB_HAS_NRM1 | VB_HAS_NRM2)) { // transform the light dir into tangent space - texinfo.embosslightshift = xfmem.texMtxInfo[i].embosslightshift; - texinfo.embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift; out.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(texinfo.embosslightshift)); out.Write( @@ -279,7 +321,6 @@ static T GenerateVertexShader(API_TYPE api_type) // The following assert was triggered in House of the Dead Overkill and Star Wars Rogue // Squadron 2 //_assert_(0); // should have normals - texinfo.embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift; out.Write("o.tex%d.xyz = o.tex%d.xyz;\n", i, texinfo.embosssourceshift); } @@ -292,7 +333,6 @@ static T GenerateVertexShader(API_TYPE api_type) break; case XF_TEXGEN_REGULAR: default: - uid_data->texMtxInfo_n_projection |= xfmem.texMtxInfo[i].projection << i; if (uid_data->components & (VB_HAS_TEXMTXIDX0 << i)) { out.Write("int tmp = int(tex%d.z);\n", i); @@ -321,19 +361,16 @@ static T GenerateVertexShader(API_TYPE api_type) break; } - uid_data->dualTexTrans_enabled = xfmem.dualTexTrans.enabled; // CHECKME: does this only work for regular tex gen types? if (uid_data->dualTexTrans_enabled && texinfo.texgentype == XF_TEXGEN_REGULAR) { auto& postInfo = uid_data->postMtxInfo[i]; - postInfo.index = xfmem.postMtxInfo[i].index; out.Write("float4 P0 = " I_POSTTRANSFORMMATRICES "[%d];\n" "float4 P1 = " I_POSTTRANSFORMMATRICES "[%d];\n" "float4 P2 = " I_POSTTRANSFORMMATRICES "[%d];\n", postInfo.index & 0x3f, (postInfo.index + 1) & 0x3f, (postInfo.index + 2) & 0x3f); - postInfo.normalize = xfmem.postMtxInfo[i].normalize; if (postInfo.normalize) out.Write("o.tex%d.xyz = normalize(o.tex%d.xyz);\n", i, i); @@ -425,13 +462,3 @@ static T GenerateVertexShader(API_TYPE api_type) return out; } - -VertexShaderUid GetVertexShaderUid(API_TYPE api_type) -{ - return GenerateVertexShader(api_type); -} - -ShaderCode GenerateVertexShaderCode(API_TYPE api_type) -{ - return GenerateVertexShader(api_type); -} diff --git a/Source/Core/VideoCommon/VertexShaderGen.h b/Source/Core/VideoCommon/VertexShaderGen.h index 16931ec26d..27ca46e19d 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.h +++ b/Source/Core/VideoCommon/VertexShaderGen.h @@ -66,5 +66,5 @@ struct vertex_shader_uid_data typedef ShaderUid VertexShaderUid; -VertexShaderUid GetVertexShaderUid(API_TYPE api_type); -ShaderCode GenerateVertexShaderCode(API_TYPE api_type); +VertexShaderUid GetVertexShaderUid(); +ShaderCode GenerateVertexShaderCode(API_TYPE api_type, const vertex_shader_uid_data* uid_data);