Multithreadded Shadergen: Second Pass over vertex/lighting Shadergens

As much as possible, the asserts have been moved out of the GetUID
function. But there are some places where asserts depend on variables
that aren't stored in the shader UID.
This commit is contained in:
Scott Mansell 2016-01-17 00:34:06 +13:00
parent 28c7113e41
commit 1a831cfc7d
8 changed files with 114 additions and 76 deletions

View File

@ -199,10 +199,10 @@ void VertexShaderCache::Shutdown()
bool VertexShaderCache::SetShader()
{
VertexShaderUid uid = GetVertexShaderUid(API_D3D);
VertexShaderUid uid = GetVertexShaderUid();
if (g_ActiveConfig.bEnableShaderDebugging)
{
ShaderCode code = GenerateVertexShaderCode(API_D3D);
ShaderCode code = GenerateVertexShaderCode(API_D3D, uid.GetUidData());
vertex_uid_checker.AddToIndexAndCheck(code, uid, "Vertex", "v");
}
@ -227,7 +227,7 @@ bool VertexShaderCache::SetShader()
return (entry.shader != nullptr);
}
ShaderCode code = GenerateVertexShaderCode(API_D3D);
ShaderCode code = GenerateVertexShaderCode(API_D3D, uid.GetUidData());
D3DBlob* pbytecode = nullptr;
D3D::CompileVertexShader(code.GetBuffer(), &pbytecode);

View File

@ -164,7 +164,7 @@ void ShaderCache::LoadAndSetActiveShaders(DSTALPHA_MODE ps_dst_alpha_mode, u32 g
GeometryShaderUid gs_uid = GetGeometryShaderUid(gs_primitive_type);
PixelShaderUid ps_uid = GetPixelShaderUid(ps_dst_alpha_mode, API_D3D);
VertexShaderUid vs_uid = GetVertexShaderUid(API_D3D);
VertexShaderUid vs_uid = GetVertexShaderUid();
bool gs_changed = gs_uid != s_last_geometry_shader_uid;
bool ps_changed = ps_uid != s_last_pixel_shader_uid;
@ -304,7 +304,7 @@ void ShaderCache::HandleVSUIDChange(VertexShaderUid vs_uid)
if (g_ActiveConfig.bEnableShaderDebugging)
{
ShaderCode code = GenerateVertexShaderCode(API_D3D);
ShaderCode code = GenerateVertexShaderCode(API_D3D, vs_uid.GetUidData());
s_vertex_uid_checker.AddToIndexAndCheck(code, vs_uid, "Vertex", "v");
}
@ -316,7 +316,7 @@ void ShaderCache::HandleVSUIDChange(VertexShaderUid vs_uid)
}
else
{
ShaderCode vs_code = GenerateVertexShaderCode(API_D3D);
ShaderCode vs_code = GenerateVertexShaderCode(API_D3D, vs_uid.GetUidData());
ID3DBlob* vs_bytecode = nullptr;
if (!D3D::CompileVertexShader(vs_code.GetBuffer(), &vs_bytecode))

View File

@ -45,12 +45,12 @@ protected:
VertexShaderUid GetUid(DSTALPHA_MODE dst_alpha_mode, u32 primitive_type,
API_TYPE api_type) override
{
return GetVertexShaderUid(api_type);
return GetVertexShaderUid();
}
ShaderCode GenerateCode(DSTALPHA_MODE dst_alpha_mode, u32 primitive_type, API_TYPE api_type,
VertexShaderUid uid) override
{
return GenerateVertexShaderCode(api_type);
return GenerateVertexShaderCode(api_type, uid.GetUidData());
}
};

View File

@ -209,7 +209,7 @@ SHADER* ProgramShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 primitive_
last_entry = &newentry;
newentry.in_cache = 0;
ShaderCode vcode = GenerateVertexShaderCode(API_OPENGL);
ShaderCode vcode = GenerateVertexShaderCode(API_OPENGL, uid.vuid.GetUidData());
ShaderCode pcode = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL);
ShaderCode gcode;
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders &&
@ -398,7 +398,7 @@ GLuint ProgramShaderCache::CompileSingleShader(GLuint type, const std::string& c
void ProgramShaderCache::GetShaderId(SHADERUID* uid, DSTALPHA_MODE dstAlphaMode, u32 primitive_type)
{
uid->puid = GetPixelShaderUid(dstAlphaMode, API_OPENGL);
uid->vuid = GetVertexShaderUid(API_OPENGL);
uid->vuid = GetVertexShaderUid();
uid->guid = GetGeometryShaderUid(primitive_type);
if (g_ActiveConfig.bEnableShaderDebugging)
@ -406,7 +406,7 @@ void ProgramShaderCache::GetShaderId(SHADERUID* uid, DSTALPHA_MODE dstAlphaMode,
ShaderCode pcode = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL);
pixel_uid_checker.AddToIndexAndCheck(pcode, uid->puid, "Pixel", "p");
ShaderCode vcode = GenerateVertexShaderCode(API_OPENGL);
ShaderCode vcode = GenerateVertexShaderCode(API_OPENGL, uid->vuid.GetUidData());
vertex_uid_checker.AddToIndexAndCheck(vcode, uid->vuid, "Vertex", "v");
ShaderCode gcode =

View File

@ -46,9 +46,8 @@ static const char s_lighting_struct[] = "struct Light {\n"
"\tfloat4 dir;\n"
"};\n";
template <class T>
static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, int litchan_index,
bool alpha)
static void GenerateLightShader(ShaderCode& object, const LightingUidData& uid_data, int index,
int litchan_index, bool alpha)
{
const char* swizzle = alpha ? "a" : "rgb";
const char* swizzle_components = (alpha) ? "" : "3";
@ -114,18 +113,13 @@ static void GenerateLightShader(T& object, LightingUidData& uid_data, int index,
// materials name is I_MATERIALS in vs and I_PMATERIALS in ps
// inColorName is color in vs and colors_ in ps
// dest is o.colors_ in vs and colors_ in ps
template <class T>
static void GenerateLightingShader(T& object, LightingUidData& uid_data, int components,
const char* inColorName, const char* dest)
static void GenerateLightingShaderCode(ShaderCode& object, const LightingUidData& uid_data,
int components, const char* inColorName, const char* dest)
{
for (unsigned int j = 0; j < xfmem.numChan.numColorChans; j++)
{
const LitChannel& color = xfmem.color[j];
const LitChannel& alpha = xfmem.alpha[j];
object.Write("{\n");
uid_data.matsource |= xfmem.color[j].matsource << j;
bool colormatsource = !!(uid_data.matsource & (1 << j));
if (colormatsource) // from vertex
{
@ -141,10 +135,8 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com
object.Write("int4 mat = %s[%d];\n", I_MATERIALS, j + 2);
}
uid_data.enablelighting |= xfmem.color[j].enablelighting << j;
if (uid_data.enablelighting & (1 << j))
{
uid_data.ambsource |= xfmem.color[j].ambsource << j;
if (uid_data.ambsource & (1 << j)) // from vertex
{
if (components & (VB_HAS_COL0 << j))
@ -168,7 +160,6 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com
}
// check if alpha is different
uid_data.matsource |= xfmem.alpha[j].matsource << (j + 2);
bool alphamatsource = !!(uid_data.matsource & (1 << (j + 2)));
if (alphamatsource != colormatsource)
{
@ -187,10 +178,8 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com
}
}
uid_data.enablelighting |= xfmem.alpha[j].enablelighting << (j + 2);
if (uid_data.enablelighting & (1 << (j + 2)))
{
uid_data.ambsource |= xfmem.alpha[j].ambsource << (j + 2);
if (uid_data.ambsource & (1 << (j + 2))) // from vertex
{
if (components & (VB_HAS_COL0 << j))
@ -213,24 +202,44 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com
if (uid_data.enablelighting & (1 << j)) // Color lights
{
uid_data.attnfunc |= color.attnfunc << (2 * j);
uid_data.diffusefunc |= color.diffusefunc << (2 * j);
uid_data.light_mask |= color.GetFullLightMask() << (8 * j);
for (int i = 0; i < 8; ++i)
if (uid_data.light_mask & (1 << (i + 8 * j)))
GenerateLightShader<T>(object, uid_data, i, j, false);
GenerateLightShader(object, uid_data, i, j, false);
}
if (uid_data.enablelighting & (1 << (j + 2))) // Alpha lights
{
uid_data.attnfunc |= alpha.attnfunc << (2 * (j + 2));
uid_data.diffusefunc |= alpha.diffusefunc << (2 * (j + 2));
uid_data.light_mask |= alpha.GetFullLightMask() << (8 * (j + 2));
for (int i = 0; i < 8; ++i)
if (uid_data.light_mask & (1 << (i + 8 * (j + 2))))
GenerateLightShader<T>(object, uid_data, i, j + 2, true);
GenerateLightShader(object, uid_data, i, j + 2, true);
}
object.Write("lacc = clamp(lacc, 0, 255);\n");
object.Write("%s%d = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n", dest, j);
object.Write("}\n");
}
}
static void GetLightingShaderUid(LightingUidData& uid_data)
{
for (unsigned int j = 0; j < xfmem.numChan.numColorChans; j++)
{
uid_data.matsource |= xfmem.color[j].matsource << j;
uid_data.matsource |= xfmem.alpha[j].matsource << (j + 2);
uid_data.enablelighting |= xfmem.color[j].enablelighting << j;
uid_data.enablelighting |= xfmem.alpha[j].enablelighting << (j + 2);
if (uid_data.enablelighting & (1 << j)) // Color lights
{
uid_data.ambsource |= xfmem.color[j].ambsource << j;
uid_data.attnfunc |= xfmem.color[j].attnfunc << (2 * j);
uid_data.diffusefunc |= xfmem.color[j].diffusefunc << (2 * j);
uid_data.light_mask |= xfmem.color[j].GetFullLightMask() << (8 * j);
}
if (uid_data.enablelighting & (1 << (j + 2))) // Alpha lights
{
uid_data.ambsource |= xfmem.alpha[j].ambsource << (j + 2);
uid_data.attnfunc |= xfmem.alpha[j].attnfunc << (2 * (j + 2));
uid_data.diffusefunc |= xfmem.alpha[j].diffusefunc << (2 * (j + 2));
uid_data.light_mask |= xfmem.alpha[j].GetFullLightMask() << (8 * (j + 2));
}
}
}

View File

@ -488,8 +488,10 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
// out.SetConstantsUsed(C_PLIGHT_COLORS, C_PLIGHT_COLORS+7); // TODO: Can be optimized further
// out.SetConstantsUsed(C_PLIGHTS, C_PLIGHTS+31); // TODO: Can be optimized further
// out.SetConstantsUsed(C_PMATERIALS, C_PMATERIALS+3);
GenerateLightingShader<T>(out, uid_data->lighting, uid_data->components << VB_COL_SHIFT,
"colors_", "col");
// FIXME: Disabled until pixelshadergen is split
// GenerateLightingShader(out, uid_data->lighting, uid_data->components << VB_COL_SHIFT,
// "colors_", "col");
}
// HACK to handle cases where the tex gen is not enabled

View File

@ -14,20 +14,73 @@
#include "VideoCommon/VertexShaderGen.h"
#include "VideoCommon/VideoConfig.h"
template <class T>
static T GenerateVertexShader(API_TYPE api_type)
VertexShaderUid GetVertexShaderUid()
{
T out;
// Non-uid template parameters will write to the dummy data (=> gets optimized out)
vertex_shader_uid_data dummy_data;
vertex_shader_uid_data* uid_data = out.template GetUidData<vertex_shader_uid_data>();
if (uid_data == nullptr)
uid_data = &dummy_data;
VertexShaderUid out;
vertex_shader_uid_data* uid_data = out.GetUidData<vertex_shader_uid_data>();
memset(uid_data, 0, sizeof(*uid_data));
_assert_(bpmem.genMode.numtexgens == xfmem.numTexGen.numTexGens);
_assert_(bpmem.genMode.numcolchans == xfmem.numChan.numColorChans);
uid_data->numTexGens = xfmem.numTexGen.numTexGens;
uid_data->components = VertexLoaderManager::g_current_components;
uid_data->pixel_lighting = g_ActiveConfig.bEnablePixelLighting;
uid_data->msaa = g_ActiveConfig.iMultisamples > 1;
uid_data->ssaa = g_ActiveConfig.iMultisamples > 1 && g_ActiveConfig.bSSAA;
uid_data->numColorChans = xfmem.numChan.numColorChans;
GetLightingShaderUid(uid_data->lighting);
// transform texcoords
for (unsigned int i = 0; i < uid_data->numTexGens; ++i)
{
auto& texinfo = uid_data->texMtxInfo[i];
texinfo.sourcerow = xfmem.texMtxInfo[i].sourcerow;
texinfo.texgentype = xfmem.texMtxInfo[i].texgentype;
texinfo.inputform = xfmem.texMtxInfo[i].inputform;
// first transformation
switch (texinfo.texgentype)
{
case XF_TEXGEN_EMBOSS_MAP: // calculate tex coords into bump map
if (uid_data->components & (VB_HAS_NRM1 | VB_HAS_NRM2))
{
// transform the light dir into tangent space
texinfo.embosslightshift = xfmem.texMtxInfo[i].embosslightshift;
texinfo.embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift;
}
else
{
texinfo.embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift;
}
break;
case XF_TEXGEN_COLOR_STRGBC0:
case XF_TEXGEN_COLOR_STRGBC1:
break;
case XF_TEXGEN_REGULAR:
default:
uid_data->texMtxInfo_n_projection |= xfmem.texMtxInfo[i].projection << i;
break;
}
uid_data->dualTexTrans_enabled = xfmem.dualTexTrans.enabled;
// CHECKME: does this only work for regular tex gen types?
if (uid_data->dualTexTrans_enabled && texinfo.texgentype == XF_TEXGEN_REGULAR)
{
auto& postInfo = uid_data->postMtxInfo[i];
postInfo.index = xfmem.postMtxInfo[i].index;
postInfo.normalize = xfmem.postMtxInfo[i].normalize;
}
}
return out;
}
ShaderCode GenerateVertexShaderCode(API_TYPE api_type, const vertex_shader_uid_data* uid_data)
{
ShaderCode out;
out.Write("%s", s_lighting_struct);
// uniforms
@ -39,14 +92,8 @@ static T GenerateVertexShader(API_TYPE api_type)
out.Write(s_shader_uniforms);
out.Write("};\n");
uid_data->numTexGens = xfmem.numTexGen.numTexGens;
uid_data->components = VertexLoaderManager::g_current_components;
uid_data->pixel_lighting = g_ActiveConfig.bEnablePixelLighting;
uid_data->msaa = g_ActiveConfig.iMultisamples > 1;
uid_data->ssaa = g_ActiveConfig.iMultisamples > 1 && g_ActiveConfig.bSSAA;
out.Write("struct VS_OUTPUT {\n");
GenerateVSOutputMembers<T>(out, api_type, uid_data->numTexGens, uid_data->pixel_lighting, "");
GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, uid_data->pixel_lighting, "");
out.Write("};\n");
if (api_type == API_OPENGL)
@ -77,7 +124,7 @@ static T GenerateVertexShader(API_TYPE api_type)
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
{
out.Write("out VertexData {\n");
GenerateVSOutputMembers<T>(
GenerateVSOutputMembers(
out, api_type, uid_data->numTexGens, uid_data->pixel_lighting,
GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa, false, true));
out.Write("} vs;\n");
@ -190,7 +237,6 @@ static T GenerateVertexShader(API_TYPE api_type)
"float3 ldir, h, cosAttn, distAttn;\n"
"float dist, dist2, attn;\n");
uid_data->numColorChans = xfmem.numChan.numColorChans;
if (uid_data->numColorChans == 0)
{
if (uid_data->components & VB_HAS_COL0)
@ -199,7 +245,7 @@ static T GenerateVertexShader(API_TYPE api_type)
out.Write("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n");
}
GenerateLightingShader<T>(out, uid_data->lighting, uid_data->components, "color", "o.colors_");
GenerateLightingShaderCode(out, uid_data->lighting, uid_data->components, "color", "o.colors_");
if (uid_data->numColorChans < 2)
{
@ -217,8 +263,6 @@ static T GenerateVertexShader(API_TYPE api_type)
out.Write("{\n");
out.Write("coord = float4(0.0, 0.0, 1.0, 1.0);\n");
texinfo.sourcerow = xfmem.texMtxInfo[i].sourcerow;
texinfo.texgentype = xfmem.texMtxInfo[i].texgentype;
switch (texinfo.sourcerow)
{
case XF_SRCGEOM_INROW:
@ -254,7 +298,7 @@ static T GenerateVertexShader(API_TYPE api_type)
break;
}
// Input form of AB11 sets z element to 1.0
uid_data->texMtxInfo[i].inputform = xfmem.texMtxInfo[i].inputform;
if (texinfo.inputform == XF_TEXINPUT_AB11)
out.Write("coord.z = 1.0;\n");
@ -266,8 +310,6 @@ static T GenerateVertexShader(API_TYPE api_type)
if (uid_data->components & (VB_HAS_NRM1 | VB_HAS_NRM2))
{
// transform the light dir into tangent space
texinfo.embosslightshift = xfmem.texMtxInfo[i].embosslightshift;
texinfo.embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift;
out.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n",
LIGHT_POS_PARAMS(texinfo.embosslightshift));
out.Write(
@ -279,7 +321,6 @@ static T GenerateVertexShader(API_TYPE api_type)
// The following assert was triggered in House of the Dead Overkill and Star Wars Rogue
// Squadron 2
//_assert_(0); // should have normals
texinfo.embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift;
out.Write("o.tex%d.xyz = o.tex%d.xyz;\n", i, texinfo.embosssourceshift);
}
@ -292,7 +333,6 @@ static T GenerateVertexShader(API_TYPE api_type)
break;
case XF_TEXGEN_REGULAR:
default:
uid_data->texMtxInfo_n_projection |= xfmem.texMtxInfo[i].projection << i;
if (uid_data->components & (VB_HAS_TEXMTXIDX0 << i))
{
out.Write("int tmp = int(tex%d.z);\n", i);
@ -321,19 +361,16 @@ static T GenerateVertexShader(API_TYPE api_type)
break;
}
uid_data->dualTexTrans_enabled = xfmem.dualTexTrans.enabled;
// CHECKME: does this only work for regular tex gen types?
if (uid_data->dualTexTrans_enabled && texinfo.texgentype == XF_TEXGEN_REGULAR)
{
auto& postInfo = uid_data->postMtxInfo[i];
postInfo.index = xfmem.postMtxInfo[i].index;
out.Write("float4 P0 = " I_POSTTRANSFORMMATRICES "[%d];\n"
"float4 P1 = " I_POSTTRANSFORMMATRICES "[%d];\n"
"float4 P2 = " I_POSTTRANSFORMMATRICES "[%d];\n",
postInfo.index & 0x3f, (postInfo.index + 1) & 0x3f, (postInfo.index + 2) & 0x3f);
postInfo.normalize = xfmem.postMtxInfo[i].normalize;
if (postInfo.normalize)
out.Write("o.tex%d.xyz = normalize(o.tex%d.xyz);\n", i, i);
@ -425,13 +462,3 @@ static T GenerateVertexShader(API_TYPE api_type)
return out;
}
VertexShaderUid GetVertexShaderUid(API_TYPE api_type)
{
return GenerateVertexShader<VertexShaderUid>(api_type);
}
ShaderCode GenerateVertexShaderCode(API_TYPE api_type)
{
return GenerateVertexShader<ShaderCode>(api_type);
}

View File

@ -66,5 +66,5 @@ struct vertex_shader_uid_data
typedef ShaderUid<vertex_shader_uid_data> VertexShaderUid;
VertexShaderUid GetVertexShaderUid(API_TYPE api_type);
ShaderCode GenerateVertexShaderCode(API_TYPE api_type);
VertexShaderUid GetVertexShaderUid();
ShaderCode GenerateVertexShaderCode(API_TYPE api_type, const vertex_shader_uid_data* uid_data);