Multithreadded Shadergen: First Pass over vertex/lighting Shadergens

The only code which touches xfmem is code which writes directly into
uid_data.

All the rest now read their parameters out of uid_data.

I also simplified the lighting code so it always generated seperate
codepaths for alpha and color channels instead of trying to combine
them on the off-chance that the same equation works for all 4 channels.

As modern (post 2008) GPUs generally don't calcualte all 4 channels
in a single vector, this optimisation is pointless. The shader compiler
will undo it during the GLSL/HLSL to IR step.

Bug Fix: The about optimisation was also broken, applying the color light
         equation to the alpha light channel instead of the alpha light
	 euqation. But doesn't look like anything trigged this bug.
This commit is contained in:
Scott Mansell 2016-01-14 18:51:37 +13:00
parent 3a26167148
commit 53c402dbc5
3 changed files with 90 additions and 126 deletions

View File

@ -48,17 +48,15 @@ static const char s_lighting_struct[] = "struct Light {\n"
template <class T> template <class T>
static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, int litchan_index, static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, int litchan_index,
int coloralpha) bool alpha)
{ {
const LitChannel& chan = const char* swizzle = alpha ? "a" : "rgb";
(litchan_index > 1) ? xfmem.alpha[litchan_index - 2] : xfmem.color[litchan_index]; const char* swizzle_components = (alpha) ? "" : "3";
const char* swizzle = (coloralpha == 1) ? "xyz" : (coloralpha == 2) ? "w" : "xyzw";
const char* swizzle_components = (coloralpha == 1) ? "3" : (coloralpha == 2) ? "" : "4";
uid_data.attnfunc |= chan.attnfunc << (2 * litchan_index); int attnfunc = (uid_data.attnfunc >> (2 * litchan_index)) & 0x3;
uid_data.diffusefunc |= chan.diffusefunc << (2 * litchan_index); int diffusefunc = (uid_data.diffusefunc >> (2 * litchan_index)) & 0x3;
switch (chan.attnfunc) switch (attnfunc)
{ {
case LIGHTATTN_NONE: case LIGHTATTN_NONE:
case LIGHTATTN_DIR: case LIGHTATTN_DIR:
@ -73,8 +71,7 @@ static void GenerateLightShader(T& object, LightingUidData& uid_data, int index,
LIGHT_DIR_PARAMS(index)); LIGHT_DIR_PARAMS(index));
object.Write("cosAttn = " LIGHT_COSATT ".xyz;\n", LIGHT_COSATT_PARAMS(index)); object.Write("cosAttn = " LIGHT_COSATT ".xyz;\n", LIGHT_COSATT_PARAMS(index));
object.Write("distAttn = %s(" LIGHT_DISTATT ".xyz);\n", object.Write("distAttn = %s(" LIGHT_DISTATT ".xyz);\n",
(chan.diffusefunc == LIGHTDIF_NONE) ? "" : "normalize", (diffusefunc == LIGHTDIF_NONE) ? "" : "normalize", LIGHT_DISTATT_PARAMS(index));
LIGHT_DISTATT_PARAMS(index));
object.Write("attn = max(0.0f, dot(cosAttn, float3(1.0, attn, attn*attn))) / dot(distAttn, " object.Write("attn = max(0.0f, dot(cosAttn, float3(1.0, attn, attn*attn))) / dot(distAttn, "
"float3(1.0, attn, attn*attn));\n"); "float3(1.0, attn, attn*attn));\n");
break; break;
@ -91,11 +88,9 @@ static void GenerateLightShader(T& object, LightingUidData& uid_data, int index,
LIGHT_COSATT_PARAMS(index), LIGHT_COSATT_PARAMS(index), LIGHT_COSATT_PARAMS(index), LIGHT_COSATT_PARAMS(index), LIGHT_COSATT_PARAMS(index), LIGHT_COSATT_PARAMS(index),
LIGHT_DISTATT_PARAMS(index)); LIGHT_DISTATT_PARAMS(index));
break; break;
default:
_assert_(0);
} }
switch (chan.diffusefunc) switch (diffusefunc)
{ {
case LIGHTDIF_NONE: case LIGHTDIF_NONE:
object.Write("lacc.%s += int%s(round(attn * float%s(" LIGHT_COL ")));\n", swizzle, object.Write("lacc.%s += int%s(round(attn * float%s(" LIGHT_COL ")));\n", swizzle,
@ -104,7 +99,7 @@ static void GenerateLightShader(T& object, LightingUidData& uid_data, int index,
case LIGHTDIF_SIGN: case LIGHTDIF_SIGN:
case LIGHTDIF_CLAMP: case LIGHTDIF_CLAMP:
object.Write("lacc.%s += int%s(round(attn * %sdot(ldir, _norm0)) * float%s(" LIGHT_COL ")));\n", object.Write("lacc.%s += int%s(round(attn * %sdot(ldir, _norm0)) * float%s(" LIGHT_COL ")));\n",
swizzle, swizzle_components, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0," : "(", swizzle, swizzle_components, diffusefunc != LIGHTDIF_SIGN ? "max(0.0," : "(",
swizzle_components, LIGHT_COL_PARAMS(index, swizzle)); swizzle_components, LIGHT_COL_PARAMS(index, swizzle));
break; break;
default: default:
@ -131,7 +126,8 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com
object.Write("{\n"); object.Write("{\n");
uid_data.matsource |= xfmem.color[j].matsource << j; uid_data.matsource |= xfmem.color[j].matsource << j;
if (color.matsource) // from vertex bool colormatsource = !!(uid_data.matsource & (1 << j));
if (colormatsource) // from vertex
{ {
if (components & (VB_HAS_COL0 << j)) if (components & (VB_HAS_COL0 << j))
object.Write("int4 mat = int4(round(%s%d * 255.0));\n", inColorName, j); object.Write("int4 mat = int4(round(%s%d * 255.0));\n", inColorName, j);
@ -146,10 +142,10 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com
} }
uid_data.enablelighting |= xfmem.color[j].enablelighting << j; uid_data.enablelighting |= xfmem.color[j].enablelighting << j;
if (color.enablelighting) if (uid_data.enablelighting & (1 << j))
{ {
uid_data.ambsource |= xfmem.color[j].ambsource << j; uid_data.ambsource |= xfmem.color[j].ambsource << j;
if (color.ambsource) // from vertex if (uid_data.ambsource & (1 << j)) // from vertex
{ {
if (components & (VB_HAS_COL0 << j)) if (components & (VB_HAS_COL0 << j))
object.Write("lacc = int4(round(%s%d * 255.0));\n", inColorName, j); object.Write("lacc = int4(round(%s%d * 255.0));\n", inColorName, j);
@ -158,7 +154,7 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com
else else
// TODO: this isn't verified. Here we want to read the ambient from the vertex, // TODO: this isn't verified. Here we want to read the ambient from the vertex,
// but the vertex itself has no color. So we don't know which value to read. // but the vertex itself has no color. So we don't know which value to read.
// Returing 1.0 is the same as disabled lightning, so this could be fine // Returning 1.0 is the same as disabled lightning, so this could be fine
object.Write("lacc = int4(255, 255, 255, 255);\n"); object.Write("lacc = int4(255, 255, 255, 255);\n");
} }
else // from color else // from color
@ -173,9 +169,10 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com
// check if alpha is different // check if alpha is different
uid_data.matsource |= xfmem.alpha[j].matsource << (j + 2); uid_data.matsource |= xfmem.alpha[j].matsource << (j + 2);
if (alpha.matsource != color.matsource) bool alphamatsource = !!(uid_data.matsource & (1 << (j + 2)));
if (alphamatsource != colormatsource)
{ {
if (alpha.matsource) // from vertex if (alphamatsource) // from vertex
{ {
if (components & (VB_HAS_COL0 << j)) if (components & (VB_HAS_COL0 << j))
object.Write("mat.w = int(round(%s%d.w * 255.0));\n", inColorName, j); object.Write("mat.w = int(round(%s%d.w * 255.0));\n", inColorName, j);
@ -191,10 +188,10 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com
} }
uid_data.enablelighting |= xfmem.alpha[j].enablelighting << (j + 2); uid_data.enablelighting |= xfmem.alpha[j].enablelighting << (j + 2);
if (alpha.enablelighting) if (uid_data.enablelighting & (1 << (j + 2)))
{ {
uid_data.ambsource |= xfmem.alpha[j].ambsource << (j + 2); uid_data.ambsource |= xfmem.alpha[j].ambsource << (j + 2);
if (alpha.ambsource) // from vertex if (uid_data.ambsource & (1 << (j + 2))) // from vertex
{ {
if (components & (VB_HAS_COL0 << j)) if (components & (VB_HAS_COL0 << j))
object.Write("lacc.w = int(round(%s%d.w * 255.0));\n", inColorName, j); object.Write("lacc.w = int(round(%s%d.w * 255.0));\n", inColorName, j);
@ -214,53 +211,23 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com
object.Write("lacc.w = 255;\n"); object.Write("lacc.w = 255;\n");
} }
if (color.enablelighting && alpha.enablelighting) if (uid_data.enablelighting & (1 << j)) // Color lights
{ {
// both have lighting, test if they use the same lights
int mask = 0;
uid_data.attnfunc |= color.attnfunc << (2 * j); uid_data.attnfunc |= color.attnfunc << (2 * j);
uid_data.attnfunc |= alpha.attnfunc << (2 * (j + 2));
uid_data.diffusefunc |= color.diffusefunc << (2 * j); uid_data.diffusefunc |= color.diffusefunc << (2 * j);
uid_data.diffusefunc |= alpha.diffusefunc << (2 * (j + 2));
uid_data.light_mask |= color.GetFullLightMask() << (8 * j); uid_data.light_mask |= color.GetFullLightMask() << (8 * j);
for (int i = 0; i < 8; ++i)
if (uid_data.light_mask & (1 << (i + 8 * j)))
GenerateLightShader<T>(object, uid_data, i, j, false);
}
if (uid_data.enablelighting & (1 << (j + 2))) // Alpha lights
{
uid_data.attnfunc |= alpha.attnfunc << (2 * (j + 2));
uid_data.diffusefunc |= alpha.diffusefunc << (2 * (j + 2));
uid_data.light_mask |= alpha.GetFullLightMask() << (8 * (j + 2)); uid_data.light_mask |= alpha.GetFullLightMask() << (8 * (j + 2));
if (color.lightparams == alpha.lightparams)
{
mask = color.GetFullLightMask() & alpha.GetFullLightMask();
if (mask)
{
for (int i = 0; i < 8; ++i) for (int i = 0; i < 8; ++i)
{ if (uid_data.light_mask & (1 << (i + 8 * (j + 2))))
if (mask & (1 << i)) GenerateLightShader<T>(object, uid_data, i, j + 2, true);
{
GenerateLightShader<T>(object, uid_data, i, j, 3);
}
}
}
}
// no shared lights
for (int i = 0; i < 8; ++i)
{
if (!(mask & (1 << i)) && (color.GetFullLightMask() & (1 << i)))
GenerateLightShader<T>(object, uid_data, i, j, 1);
if (!(mask & (1 << i)) && (alpha.GetFullLightMask() & (1 << i)))
GenerateLightShader<T>(object, uid_data, i, j + 2, 2);
}
}
else if (color.enablelighting || alpha.enablelighting)
{
// lights are disabled on one channel so process only the active ones
const LitChannel& workingchannel = color.enablelighting ? color : alpha;
const int lit_index = color.enablelighting ? j : (j + 2);
int coloralpha = color.enablelighting ? 1 : 2;
uid_data.light_mask |= workingchannel.GetFullLightMask() << (8 * lit_index);
for (int i = 0; i < 8; ++i)
{
if (workingchannel.GetFullLightMask() & (1 << i))
GenerateLightShader<T>(object, uid_data, i, lit_index, coloralpha);
}
} }
object.Write("lacc = clamp(lacc, 0, 255);\n"); object.Write("lacc = clamp(lacc, 0, 255);\n");
object.Write("%s%d = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n", dest, j); object.Write("%s%d = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n", dest, j);

View File

@ -319,7 +319,7 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
// can be made and // can be made and
// doesn't define what will happen if we discard the fragment. But the way modern graphics // doesn't define what will happen if we discard the fragment. But the way modern graphics
// hardware is implemented // hardware is implemented
// means it is not unreasonable to expect the the same behaviour as early_fragment_tests. // means it is not unreasonable to expect the same behaviour as early_fragment_tests.
// We can also assume that if a driver has gone out of its way to support conservative depth and // We can also assume that if a driver has gone out of its way to support conservative depth and
// not image_load_store // not image_load_store
// as required by OpenGL 4.2 that it will be doing the optimisation. // as required by OpenGL 4.2 that it will be doing the optimisation.
@ -579,7 +579,7 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
out.Write("\tfloat2 screenpos = rawpos.xy * " I_EFBSCALE ".xy;\n"); out.Write("\tfloat2 screenpos = rawpos.xy * " I_EFBSCALE ".xy;\n");
// Opengl has reversed vertical screenspace coordiantes // Opengl has reversed vertical screenspace coordinates
if (ApiType == API_OPENGL) if (ApiType == API_OPENGL)
out.Write("\tscreenpos.y = %i.0 - screenpos.y;\n", EFB_HEIGHT); out.Write("\tscreenpos.y = %i.0 - screenpos.y;\n", EFB_HEIGHT);

View File

@ -18,14 +18,12 @@ template <class T>
static T GenerateVertexShader(API_TYPE api_type) static T GenerateVertexShader(API_TYPE api_type)
{ {
T out; T out;
const u32 components = VertexLoaderManager::g_current_components;
// Non-uid template parameters will write to the dummy data (=> gets optimized out) // Non-uid template parameters will write to the dummy data (=> gets optimized out)
vertex_shader_uid_data dummy_data; vertex_shader_uid_data dummy_data;
vertex_shader_uid_data* uid_data = out.template GetUidData<vertex_shader_uid_data>(); vertex_shader_uid_data* uid_data = out.template GetUidData<vertex_shader_uid_data>();
if (uid_data != nullptr) if (uid_data == nullptr)
memset(uid_data, 0, sizeof(*uid_data));
else
uid_data = &dummy_data; uid_data = &dummy_data;
memset(uid_data, 0, sizeof(*uid_data));
_assert_(bpmem.genMode.numtexgens == xfmem.numTexGen.numTexGens); _assert_(bpmem.genMode.numtexgens == xfmem.numTexGen.numTexGens);
_assert_(bpmem.genMode.numcolchans == xfmem.numChan.numColorChans); _assert_(bpmem.genMode.numcolchans == xfmem.numChan.numColorChans);
@ -46,30 +44,30 @@ static T GenerateVertexShader(API_TYPE api_type)
out.Write("};\n"); out.Write("};\n");
uid_data->numTexGens = xfmem.numTexGen.numTexGens; uid_data->numTexGens = xfmem.numTexGen.numTexGens;
uid_data->components = components; uid_data->components = VertexLoaderManager::g_current_components;
uid_data->pixel_lighting = g_ActiveConfig.bEnablePixelLighting; uid_data->pixel_lighting = g_ActiveConfig.bEnablePixelLighting;
if (api_type == API_OPENGL) if (api_type == API_OPENGL)
{ {
out.Write("in float4 rawpos; // ATTR%d,\n", SHADER_POSITION_ATTRIB); out.Write("in float4 rawpos; // ATTR%d,\n", SHADER_POSITION_ATTRIB);
if (components & VB_HAS_POSMTXIDX) if (uid_data->components & VB_HAS_POSMTXIDX)
out.Write("in int posmtx; // ATTR%d,\n", SHADER_POSMTX_ATTRIB); out.Write("in int posmtx; // ATTR%d,\n", SHADER_POSMTX_ATTRIB);
if (components & VB_HAS_NRM0) if (uid_data->components & VB_HAS_NRM0)
out.Write("in float3 rawnorm0; // ATTR%d,\n", SHADER_NORM0_ATTRIB); out.Write("in float3 rawnorm0; // ATTR%d,\n", SHADER_NORM0_ATTRIB);
if (components & VB_HAS_NRM1) if (uid_data->components & VB_HAS_NRM1)
out.Write("in float3 rawnorm1; // ATTR%d,\n", SHADER_NORM1_ATTRIB); out.Write("in float3 rawnorm1; // ATTR%d,\n", SHADER_NORM1_ATTRIB);
if (components & VB_HAS_NRM2) if (uid_data->components & VB_HAS_NRM2)
out.Write("in float3 rawnorm2; // ATTR%d,\n", SHADER_NORM2_ATTRIB); out.Write("in float3 rawnorm2; // ATTR%d,\n", SHADER_NORM2_ATTRIB);
if (components & VB_HAS_COL0) if (uid_data->components & VB_HAS_COL0)
out.Write("in float4 color0; // ATTR%d,\n", SHADER_COLOR0_ATTRIB); out.Write("in float4 color0; // ATTR%d,\n", SHADER_COLOR0_ATTRIB);
if (components & VB_HAS_COL1) if (uid_data->components & VB_HAS_COL1)
out.Write("in float4 color1; // ATTR%d,\n", SHADER_COLOR1_ATTRIB); out.Write("in float4 color1; // ATTR%d,\n", SHADER_COLOR1_ATTRIB);
for (int i = 0; i < 8; ++i) for (int i = 0; i < 8; ++i)
{ {
u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0 << i)); u32 hastexmtx = (uid_data->components & (VB_HAS_TEXMTXIDX0 << i));
if ((components & (VB_HAS_UV0 << i)) || hastexmtx) if ((uid_data->components & (VB_HAS_UV0 << i)) || hastexmtx)
out.Write("in float%d tex%d; // ATTR%d,\n", hastexmtx ? 3 : 2, i, out.Write("in float%d tex%d; // ATTR%d,\n", hastexmtx ? 3 : 2, i,
SHADER_TEXTURE0_ATTRIB + i); SHADER_TEXTURE0_ATTRIB + i);
} }
@ -85,13 +83,13 @@ static T GenerateVertexShader(API_TYPE api_type)
// Let's set up attributes // Let's set up attributes
for (u32 i = 0; i < 8; ++i) for (u32 i = 0; i < 8; ++i)
{ {
if (i < xfmem.numTexGen.numTexGens) if (i < uid_data->numTexGens)
{ {
out.Write("%s out float3 uv%u;\n", GetInterpolationQualifier(), i); out.Write("%s out float3 uv%u;\n", GetInterpolationQualifier(), i);
} }
} }
out.Write("%s out float4 clipPos;\n", GetInterpolationQualifier()); out.Write("%s out float4 clipPos;\n", GetInterpolationQualifier());
if (g_ActiveConfig.bEnablePixelLighting) if (uid_data->pixel_lighting)
{ {
out.Write("%s out float3 Normal;\n", GetInterpolationQualifier()); out.Write("%s out float3 Normal;\n", GetInterpolationQualifier());
out.Write("%s out float3 WorldPos;\n", GetInterpolationQualifier()); out.Write("%s out float3 WorldPos;\n", GetInterpolationQualifier());
@ -107,23 +105,23 @@ static T GenerateVertexShader(API_TYPE api_type)
out.Write("VS_OUTPUT main(\n"); out.Write("VS_OUTPUT main(\n");
// inputs // inputs
if (components & VB_HAS_NRM0) if (uid_data->components & VB_HAS_NRM0)
out.Write(" float3 rawnorm0 : NORMAL0,\n"); out.Write(" float3 rawnorm0 : NORMAL0,\n");
if (components & VB_HAS_NRM1) if (uid_data->components & VB_HAS_NRM1)
out.Write(" float3 rawnorm1 : NORMAL1,\n"); out.Write(" float3 rawnorm1 : NORMAL1,\n");
if (components & VB_HAS_NRM2) if (uid_data->components & VB_HAS_NRM2)
out.Write(" float3 rawnorm2 : NORMAL2,\n"); out.Write(" float3 rawnorm2 : NORMAL2,\n");
if (components & VB_HAS_COL0) if (uid_data->components & VB_HAS_COL0)
out.Write(" float4 color0 : COLOR0,\n"); out.Write(" float4 color0 : COLOR0,\n");
if (components & VB_HAS_COL1) if (uid_data->components & VB_HAS_COL1)
out.Write(" float4 color1 : COLOR1,\n"); out.Write(" float4 color1 : COLOR1,\n");
for (int i = 0; i < 8; ++i) for (int i = 0; i < 8; ++i)
{ {
u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0 << i)); u32 hastexmtx = (uid_data->components & (VB_HAS_TEXMTXIDX0 << i));
if ((components & (VB_HAS_UV0 << i)) || hastexmtx) if ((uid_data->components & (VB_HAS_UV0 << i)) || hastexmtx)
out.Write(" float%d tex%d : TEXCOORD%d,\n", hastexmtx ? 3 : 2, i, i); out.Write(" float%d tex%d : TEXCOORD%d,\n", hastexmtx ? 3 : 2, i, i);
} }
if (components & VB_HAS_POSMTXIDX) if (uid_data->components & VB_HAS_POSMTXIDX)
out.Write(" int posmtx : BLENDINDICES,\n"); out.Write(" int posmtx : BLENDINDICES,\n");
out.Write(" float4 rawpos : POSITION) {\n"); out.Write(" float4 rawpos : POSITION) {\n");
} }
@ -131,26 +129,26 @@ static T GenerateVertexShader(API_TYPE api_type)
out.Write("VS_OUTPUT o;\n"); out.Write("VS_OUTPUT o;\n");
// transforms // transforms
if (components & VB_HAS_POSMTXIDX) if (uid_data->components & VB_HAS_POSMTXIDX)
{ {
out.Write("float4 pos = float4(dot(" I_TRANSFORMMATRICES out.Write("float4 pos = float4(dot(" I_TRANSFORMMATRICES
"[posmtx], rawpos), dot(" I_TRANSFORMMATRICES "[posmtx], rawpos), dot(" I_TRANSFORMMATRICES
"[posmtx+1], rawpos), dot(" I_TRANSFORMMATRICES "[posmtx+2], rawpos), 1);\n"); "[posmtx+1], rawpos), dot(" I_TRANSFORMMATRICES "[posmtx+2], rawpos), 1);\n");
if (components & VB_HAS_NRMALL) if (uid_data->components & VB_HAS_NRMALL)
{ {
out.Write("int normidx = posmtx & 31;\n"); out.Write("int normidx = posmtx & 31;\n");
out.Write("float3 N0 = " I_NORMALMATRICES "[normidx].xyz, N1 = " I_NORMALMATRICES out.Write("float3 N0 = " I_NORMALMATRICES "[normidx].xyz, N1 = " I_NORMALMATRICES
"[normidx+1].xyz, N2 = " I_NORMALMATRICES "[normidx+2].xyz;\n"); "[normidx+1].xyz, N2 = " I_NORMALMATRICES "[normidx+2].xyz;\n");
} }
if (components & VB_HAS_NRM0) if (uid_data->components & VB_HAS_NRM0)
out.Write("float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, " out.Write("float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, "
"rawnorm0)));\n"); "rawnorm0)));\n");
if (components & VB_HAS_NRM1) if (uid_data->components & VB_HAS_NRM1)
out.Write( out.Write(
"float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n"); "float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n");
if (components & VB_HAS_NRM2) if (uid_data->components & VB_HAS_NRM2)
out.Write( out.Write(
"float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n"); "float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n");
} }
@ -158,21 +156,21 @@ static T GenerateVertexShader(API_TYPE api_type)
{ {
out.Write("float4 pos = float4(dot(" I_POSNORMALMATRIX "[0], rawpos), dot(" I_POSNORMALMATRIX out.Write("float4 pos = float4(dot(" I_POSNORMALMATRIX "[0], rawpos), dot(" I_POSNORMALMATRIX
"[1], rawpos), dot(" I_POSNORMALMATRIX "[2], rawpos), 1.0);\n"); "[1], rawpos), dot(" I_POSNORMALMATRIX "[2], rawpos), 1.0);\n");
if (components & VB_HAS_NRM0) if (uid_data->components & VB_HAS_NRM0)
out.Write("float3 _norm0 = normalize(float3(dot(" I_POSNORMALMATRIX out.Write("float3 _norm0 = normalize(float3(dot(" I_POSNORMALMATRIX
"[3].xyz, rawnorm0), dot(" I_POSNORMALMATRIX "[3].xyz, rawnorm0), dot(" I_POSNORMALMATRIX
"[4].xyz, rawnorm0), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm0)));\n"); "[4].xyz, rawnorm0), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm0)));\n");
if (components & VB_HAS_NRM1) if (uid_data->components & VB_HAS_NRM1)
out.Write("float3 _norm1 = float3(dot(" I_POSNORMALMATRIX out.Write("float3 _norm1 = float3(dot(" I_POSNORMALMATRIX
"[3].xyz, rawnorm1), dot(" I_POSNORMALMATRIX "[3].xyz, rawnorm1), dot(" I_POSNORMALMATRIX
"[4].xyz, rawnorm1), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm1));\n"); "[4].xyz, rawnorm1), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm1));\n");
if (components & VB_HAS_NRM2) if (uid_data->components & VB_HAS_NRM2)
out.Write("float3 _norm2 = float3(dot(" I_POSNORMALMATRIX out.Write("float3 _norm2 = float3(dot(" I_POSNORMALMATRIX
"[3].xyz, rawnorm2), dot(" I_POSNORMALMATRIX "[3].xyz, rawnorm2), dot(" I_POSNORMALMATRIX
"[4].xyz, rawnorm2), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm2));\n"); "[4].xyz, rawnorm2), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm2));\n");
} }
if (!(components & VB_HAS_NRM0)) if (!(uid_data->components & VB_HAS_NRM0))
out.Write("float3 _norm0 = float3(0.0, 0.0, 0.0);\n"); out.Write("float3 _norm0 = float3(0.0, 0.0, 0.0);\n");
out.Write("o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION out.Write("o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION
@ -183,19 +181,19 @@ static T GenerateVertexShader(API_TYPE api_type)
"float dist, dist2, attn;\n"); "float dist, dist2, attn;\n");
uid_data->numColorChans = xfmem.numChan.numColorChans; uid_data->numColorChans = xfmem.numChan.numColorChans;
if (xfmem.numChan.numColorChans == 0) if (uid_data->numColorChans == 0)
{ {
if (components & VB_HAS_COL0) if (uid_data->components & VB_HAS_COL0)
out.Write("o.colors_0 = color0;\n"); out.Write("o.colors_0 = color0;\n");
else else
out.Write("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n"); out.Write("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n");
} }
GenerateLightingShader<T>(out, uid_data->lighting, components, "color", "o.colors_"); GenerateLightingShader<T>(out, uid_data->lighting, uid_data->components, "color", "o.colors_");
if (xfmem.numChan.numColorChans < 2) if (uid_data->numColorChans < 2)
{ {
if (components & VB_HAS_COL1) if (uid_data->components & VB_HAS_COL1)
out.Write("o.colors_1 = color1;\n"); out.Write("o.colors_1 = color1;\n");
else else
out.Write("o.colors_1 = o.colors_0;\n"); out.Write("o.colors_1 = o.colors_0;\n");
@ -203,20 +201,21 @@ static T GenerateVertexShader(API_TYPE api_type)
// transform texcoords // transform texcoords
out.Write("float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n"); out.Write("float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n");
for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i) for (unsigned int i = 0; i < uid_data->numTexGens; ++i)
{ {
TexMtxInfo& texinfo = xfmem.texMtxInfo[i]; auto& texinfo = uid_data->texMtxInfo[i];
out.Write("{\n"); out.Write("{\n");
out.Write("coord = float4(0.0, 0.0, 1.0, 1.0);\n"); out.Write("coord = float4(0.0, 0.0, 1.0, 1.0);\n");
uid_data->texMtxInfo[i].sourcerow = xfmem.texMtxInfo[i].sourcerow; texinfo.sourcerow = xfmem.texMtxInfo[i].sourcerow;
texinfo.texgentype = xfmem.texMtxInfo[i].texgentype;
switch (texinfo.sourcerow) switch (texinfo.sourcerow)
{ {
case XF_SRCGEOM_INROW: case XF_SRCGEOM_INROW:
out.Write("coord.xyz = rawpos.xyz;\n"); out.Write("coord.xyz = rawpos.xyz;\n");
break; break;
case XF_SRCNORMAL_INROW: case XF_SRCNORMAL_INROW:
if (components & VB_HAS_NRM0) if (uid_data->components & VB_HAS_NRM0)
{ {
out.Write("coord.xyz = rawnorm0.xyz;\n"); out.Write("coord.xyz = rawnorm0.xyz;\n");
} }
@ -226,20 +225,20 @@ static T GenerateVertexShader(API_TYPE api_type)
texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC1); texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC1);
break; break;
case XF_SRCBINORMAL_T_INROW: case XF_SRCBINORMAL_T_INROW:
if (components & VB_HAS_NRM1) if (uid_data->components & VB_HAS_NRM1)
{ {
out.Write("coord.xyz = rawnorm1.xyz;\n"); out.Write("coord.xyz = rawnorm1.xyz;\n");
} }
break; break;
case XF_SRCBINORMAL_B_INROW: case XF_SRCBINORMAL_B_INROW:
if (components & VB_HAS_NRM2) if (uid_data->components & VB_HAS_NRM2)
{ {
out.Write("coord.xyz = rawnorm2.xyz;\n"); out.Write("coord.xyz = rawnorm2.xyz;\n");
} }
break; break;
default: default:
_assert_(texinfo.sourcerow <= XF_SRCTEX7_INROW); _assert_(texinfo.sourcerow <= XF_SRCTEX7_INROW);
if (components & (VB_HAS_UV0 << (texinfo.sourcerow - XF_SRCTEX0_INROW))) if (uid_data->components & (VB_HAS_UV0 << (texinfo.sourcerow - XF_SRCTEX0_INROW)))
out.Write("coord = float4(tex%d.x, tex%d.y, 1.0, 1.0);\n", out.Write("coord = float4(tex%d.x, tex%d.y, 1.0, 1.0);\n",
texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW); texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW);
break; break;
@ -250,16 +249,15 @@ static T GenerateVertexShader(API_TYPE api_type)
out.Write("coord.z = 1.0;\n"); out.Write("coord.z = 1.0;\n");
// first transformation // first transformation
uid_data->texMtxInfo[i].texgentype = xfmem.texMtxInfo[i].texgentype;
switch (texinfo.texgentype) switch (texinfo.texgentype)
{ {
case XF_TEXGEN_EMBOSS_MAP: // calculate tex coords into bump map case XF_TEXGEN_EMBOSS_MAP: // calculate tex coords into bump map
if (components & (VB_HAS_NRM1 | VB_HAS_NRM2)) if (uid_data->components & (VB_HAS_NRM1 | VB_HAS_NRM2))
{ {
// transform the light dir into tangent space // transform the light dir into tangent space
uid_data->texMtxInfo[i].embosslightshift = xfmem.texMtxInfo[i].embosslightshift; texinfo.embosslightshift = xfmem.texMtxInfo[i].embosslightshift;
uid_data->texMtxInfo[i].embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift; texinfo.embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift;
out.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", out.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n",
LIGHT_POS_PARAMS(texinfo.embosslightshift)); LIGHT_POS_PARAMS(texinfo.embosslightshift));
out.Write( out.Write(
@ -271,7 +269,7 @@ static T GenerateVertexShader(API_TYPE api_type)
// The following assert was triggered in House of the Dead Overkill and Star Wars Rogue // The following assert was triggered in House of the Dead Overkill and Star Wars Rogue
// Squadron 2 // Squadron 2
//_assert_(0); // should have normals //_assert_(0); // should have normals
uid_data->texMtxInfo[i].embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift; texinfo.embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift;
out.Write("o.tex%d.xyz = o.tex%d.xyz;\n", i, texinfo.embosssourceshift); out.Write("o.tex%d.xyz = o.tex%d.xyz;\n", i, texinfo.embosssourceshift);
} }
@ -285,10 +283,10 @@ static T GenerateVertexShader(API_TYPE api_type)
case XF_TEXGEN_REGULAR: case XF_TEXGEN_REGULAR:
default: default:
uid_data->texMtxInfo_n_projection |= xfmem.texMtxInfo[i].projection << i; uid_data->texMtxInfo_n_projection |= xfmem.texMtxInfo[i].projection << i;
if (components & (VB_HAS_TEXMTXIDX0 << i)) if (uid_data->components & (VB_HAS_TEXMTXIDX0 << i))
{ {
out.Write("int tmp = int(tex%d.z);\n", i); out.Write("int tmp = int(tex%d.z);\n", i);
if (texinfo.projection == XF_TEXPROJ_STQ) if (((uid_data->texMtxInfo_n_projection >> i) & 1) == XF_TEXPROJ_STQ)
out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES
"[tmp]), dot(coord, " I_TRANSFORMMATRICES "[tmp]), dot(coord, " I_TRANSFORMMATRICES
"[tmp+1]), dot(coord, " I_TRANSFORMMATRICES "[tmp+2]));\n", "[tmp+1]), dot(coord, " I_TRANSFORMMATRICES "[tmp+2]));\n",
@ -300,7 +298,7 @@ static T GenerateVertexShader(API_TYPE api_type)
} }
else else
{ {
if (texinfo.projection == XF_TEXPROJ_STQ) if (((uid_data->texMtxInfo_n_projection >> i) & 1) == XF_TEXPROJ_STQ)
out.Write("o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES out.Write("o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES
"[%d]), dot(coord, " I_TEXMATRICES "[%d]), dot(coord, " I_TEXMATRICES "[%d]), dot(coord, " I_TEXMATRICES "[%d]), dot(coord, " I_TEXMATRICES
"[%d]));\n", "[%d]));\n",
@ -315,18 +313,17 @@ static T GenerateVertexShader(API_TYPE api_type)
uid_data->dualTexTrans_enabled = xfmem.dualTexTrans.enabled; uid_data->dualTexTrans_enabled = xfmem.dualTexTrans.enabled;
// CHECKME: does this only work for regular tex gen types? // CHECKME: does this only work for regular tex gen types?
if (xfmem.dualTexTrans.enabled && texinfo.texgentype == XF_TEXGEN_REGULAR) if (uid_data->dualTexTrans_enabled && texinfo.texgentype == XF_TEXGEN_REGULAR)
{ {
const PostMtxInfo& postInfo = xfmem.postMtxInfo[i]; auto& postInfo = uid_data->postMtxInfo[i];
uid_data->postMtxInfo[i].index = xfmem.postMtxInfo[i].index; postInfo.index = xfmem.postMtxInfo[i].index;
int postidx = postInfo.index;
out.Write("float4 P0 = " I_POSTTRANSFORMMATRICES "[%d];\n" out.Write("float4 P0 = " I_POSTTRANSFORMMATRICES "[%d];\n"
"float4 P1 = " I_POSTTRANSFORMMATRICES "[%d];\n" "float4 P1 = " I_POSTTRANSFORMMATRICES "[%d];\n"
"float4 P2 = " I_POSTTRANSFORMMATRICES "[%d];\n", "float4 P2 = " I_POSTTRANSFORMMATRICES "[%d];\n",
postidx & 0x3f, (postidx + 1) & 0x3f, (postidx + 2) & 0x3f); postInfo.index & 0x3f, (postInfo.index + 1) & 0x3f, (postInfo.index + 2) & 0x3f);
uid_data->postMtxInfo[i].normalize = xfmem.postMtxInfo[i].normalize; postInfo.normalize = xfmem.postMtxInfo[i].normalize;
if (postInfo.normalize) if (postInfo.normalize)
out.Write("o.tex%d.xyz = normalize(o.tex%d.xyz);\n", i, i); out.Write("o.tex%d.xyz = normalize(o.tex%d.xyz);\n", i, i);
@ -342,15 +339,15 @@ static T GenerateVertexShader(API_TYPE api_type)
// clipPos/w needs to be done in pixel shader, not here // clipPos/w needs to be done in pixel shader, not here
out.Write("o.clipPos = o.pos;\n"); out.Write("o.clipPos = o.pos;\n");
if (g_ActiveConfig.bEnablePixelLighting) if (uid_data->pixel_lighting)
{ {
out.Write("o.Normal = _norm0;\n"); out.Write("o.Normal = _norm0;\n");
out.Write("o.WorldPos = pos.xyz;\n"); out.Write("o.WorldPos = pos.xyz;\n");
if (components & VB_HAS_COL0) if (uid_data->components & VB_HAS_COL0)
out.Write("o.colors_0 = color0;\n"); out.Write("o.colors_0 = color0;\n");
if (components & VB_HAS_COL1) if (uid_data->components & VB_HAS_COL1)
out.Write("o.colors_1 = color1;\n"); out.Write("o.colors_1 = color1;\n");
} }
@ -396,10 +393,10 @@ static T GenerateVertexShader(API_TYPE api_type)
{ {
// TODO: Pass interface blocks between shader stages even if geometry shaders // TODO: Pass interface blocks between shader stages even if geometry shaders
// are not supported, however that will require at least OpenGL 3.2 support. // are not supported, however that will require at least OpenGL 3.2 support.
for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i) for (unsigned int i = 0; i < uid_data->numTexGens; ++i)
out.Write("uv%d.xyz = o.tex%d;\n", i, i); out.Write("uv%d.xyz = o.tex%d;\n", i, i);
out.Write("clipPos = o.clipPos;\n"); out.Write("clipPos = o.clipPos;\n");
if (g_ActiveConfig.bEnablePixelLighting) if (uid_data->pixel_lighting)
{ {
out.Write("Normal = o.Normal;\n"); out.Write("Normal = o.Normal;\n");
out.Write("WorldPos = o.WorldPos;\n"); out.Write("WorldPos = o.WorldPos;\n");