UberShaderVertex: Migrate over to fmt

Continues migration of the shader generators over to fmt.

With this, all that's left to move over are the pixel shaders (regular
and ubershader variants)
This commit is contained in:
Lioncash 2020-10-20 09:00:39 -04:00
parent dc5ae5ee66
commit 751d67a59a
1 changed files with 296 additions and 288 deletions

View File

@ -23,193 +23,197 @@ VertexShaderUid GetVertexShaderUid()
return out; return out;
} }
static void GenVertexShaderTexGens(APIType ApiType, u32 numTexgen, ShaderCode& out); static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode& out);
ShaderCode GenVertexShader(APIType ApiType, const ShaderHostConfig& host_config, ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config,
const vertex_ubershader_uid_data* uid_data) const vertex_ubershader_uid_data* uid_data)
{ {
const bool msaa = host_config.msaa; const bool msaa = host_config.msaa;
const bool ssaa = host_config.ssaa; const bool ssaa = host_config.ssaa;
const bool per_pixel_lighting = host_config.per_pixel_lighting; const bool per_pixel_lighting = host_config.per_pixel_lighting;
const bool vertex_rounding = host_config.vertex_rounding; const bool vertex_rounding = host_config.vertex_rounding;
const u32 numTexgen = uid_data->num_texgens; const u32 num_texgen = uid_data->num_texgens;
ShaderCode out; ShaderCode out;
out.Write("// Vertex UberShader\n\n"); out.WriteFmt("// Vertex UberShader\n\n");
out.Write("%s", s_lighting_struct); out.WriteFmt("{}", s_lighting_struct);
// uniforms // uniforms
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
out.Write("UBO_BINDING(std140, 2) uniform VSBlock {\n"); out.WriteFmt("UBO_BINDING(std140, 2) uniform VSBlock {{\n");
else else
out.Write("cbuffer VSBlock {\n"); out.WriteFmt("cbuffer VSBlock {{\n");
out.Write(s_shader_uniforms); out.WriteFmt("{}", s_shader_uniforms);
out.Write("};\n"); out.WriteFmt("}};\n");
out.Write("struct VS_OUTPUT {\n"); out.WriteFmt("struct VS_OUTPUT {{\n");
GenerateVSOutputMembers(out, ApiType, numTexgen, host_config, ""); GenerateVSOutputMembers(out, api_type, num_texgen, host_config, "");
out.Write("};\n\n"); out.WriteFmt("}};\n\n");
WriteUberShaderCommonHeader(out, ApiType, host_config); WriteUberShaderCommonHeader(out, api_type, host_config);
WriteLightingFunction(out); WriteLightingFunction(out);
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{ {
out.Write("ATTRIBUTE_LOCATION(%d) in float4 rawpos;\n", SHADER_POSITION_ATTRIB); out.WriteFmt("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB);
out.Write("ATTRIBUTE_LOCATION(%d) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB); out.WriteFmt("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB);
out.Write("ATTRIBUTE_LOCATION(%d) in float3 rawnorm0;\n", SHADER_NORM0_ATTRIB); out.WriteFmt("ATTRIBUTE_LOCATION({}) in float3 rawnorm0;\n", SHADER_NORM0_ATTRIB);
out.Write("ATTRIBUTE_LOCATION(%d) in float3 rawnorm1;\n", SHADER_NORM1_ATTRIB); out.WriteFmt("ATTRIBUTE_LOCATION({}) in float3 rawnorm1;\n", SHADER_NORM1_ATTRIB);
out.Write("ATTRIBUTE_LOCATION(%d) in float3 rawnorm2;\n", SHADER_NORM2_ATTRIB); out.WriteFmt("ATTRIBUTE_LOCATION({}) in float3 rawnorm2;\n", SHADER_NORM2_ATTRIB);
out.Write("ATTRIBUTE_LOCATION(%d) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB); out.WriteFmt("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB);
out.Write("ATTRIBUTE_LOCATION(%d) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB); out.WriteFmt("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB);
for (int i = 0; i < 8; ++i) for (int i = 0; i < 8; ++i)
out.Write("ATTRIBUTE_LOCATION(%d) in float3 rawtex%d;\n", SHADER_TEXTURE0_ATTRIB + i, i); out.WriteFmt("ATTRIBUTE_LOCATION({}) in float3 rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i, i);
if (host_config.backend_geometry_shaders) if (host_config.backend_geometry_shaders)
{ {
out.Write("VARYING_LOCATION(0) out VertexData {\n"); out.WriteFmt("VARYING_LOCATION(0) out VertexData {{\n");
GenerateVSOutputMembers(out, ApiType, numTexgen, host_config, GenerateVSOutputMembers(out, api_type, num_texgen, host_config,
GetInterpolationQualifier(msaa, ssaa, true, false)); GetInterpolationQualifier(msaa, ssaa, true, false));
out.Write("} vs;\n"); out.WriteFmt("}} vs;\n");
} }
else else
{ {
// Let's set up attributes // Let's set up attributes
u32 counter = 0; u32 counter = 0;
out.Write("VARYING_LOCATION(%u) %s out float4 colors_0;\n", counter++, out.WriteFmt("VARYING_LOCATION({}) {} out float4 colors_0;\n", counter++,
GetInterpolationQualifier(msaa, ssaa)); GetInterpolationQualifier(msaa, ssaa));
out.Write("VARYING_LOCATION(%u) %s out float4 colors_1;\n", counter++, out.WriteFmt("VARYING_LOCATION({}) {} out float4 colors_1;\n", counter++,
GetInterpolationQualifier(msaa, ssaa)); GetInterpolationQualifier(msaa, ssaa));
for (u32 i = 0; i < numTexgen; ++i) for (u32 i = 0; i < num_texgen; ++i)
{ {
out.Write("VARYING_LOCATION(%u) %s out float3 tex%u;\n", counter++, out.WriteFmt("VARYING_LOCATION({}) {} out float3 tex{};\n", counter++,
GetInterpolationQualifier(msaa, ssaa), i); GetInterpolationQualifier(msaa, ssaa), i);
} }
if (!host_config.fast_depth_calc) if (!host_config.fast_depth_calc)
{ {
out.Write("VARYING_LOCATION(%u) %s out float4 clipPos;\n", counter++, out.WriteFmt("VARYING_LOCATION({}) {} out float4 clipPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa)); GetInterpolationQualifier(msaa, ssaa));
} }
if (per_pixel_lighting) if (per_pixel_lighting)
{ {
out.Write("VARYING_LOCATION(%u) %s out float3 Normal;\n", counter++, out.WriteFmt("VARYING_LOCATION({}) {} out float3 Normal;\n", counter++,
GetInterpolationQualifier(msaa, ssaa)); GetInterpolationQualifier(msaa, ssaa));
out.Write("VARYING_LOCATION(%u) %s out float3 WorldPos;\n", counter++, out.WriteFmt("VARYING_LOCATION({}) {} out float3 WorldPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa)); GetInterpolationQualifier(msaa, ssaa));
} }
} }
out.Write("void main()\n{\n"); out.WriteFmt("void main()\n{{\n");
} }
else // D3D else // D3D
{ {
out.Write("VS_OUTPUT main(\n"); out.WriteFmt("VS_OUTPUT main(\n");
// inputs // inputs
out.Write(" float3 rawnorm0 : NORMAL0,\n"); out.WriteFmt(" float3 rawnorm0 : NORMAL0,\n"
out.Write(" float3 rawnorm1 : NORMAL1,\n"); " float3 rawnorm1 : NORMAL1,\n"
out.Write(" float3 rawnorm2 : NORMAL2,\n"); " float3 rawnorm2 : NORMAL2,\n"
out.Write(" float4 rawcolor0 : COLOR0,\n"); " float4 rawcolor0 : COLOR0,\n"
out.Write(" float4 rawcolor1 : COLOR1,\n"); " float4 rawcolor1 : COLOR1,\n");
for (int i = 0; i < 8; ++i) for (int i = 0; i < 8; ++i)
out.Write(" float3 rawtex%d : TEXCOORD%d,\n", i, i); out.WriteFmt(" float3 rawtex{} : TEXCOORD{},\n", i, i);
out.Write(" uint posmtx : BLENDINDICES,\n"); out.WriteFmt(" uint posmtx : BLENDINDICES,\n");
out.Write(" float4 rawpos : POSITION) {\n"); out.WriteFmt(" float4 rawpos : POSITION) {{\n");
} }
out.Write("VS_OUTPUT o;\n" out.WriteFmt("VS_OUTPUT o;\n"
"\n"); "\n");
// Transforms // Transforms
out.Write("// Position matrix\n" out.WriteFmt("// Position matrix\n"
"float4 P0;\n" "float4 P0;\n"
"float4 P1;\n" "float4 P1;\n"
"float4 P2;\n" "float4 P2;\n"
"\n" "\n"
"// Normal matrix\n" "// Normal matrix\n"
"float3 N0;\n" "float3 N0;\n"
"float3 N1;\n" "float3 N1;\n"
"float3 N2;\n" "float3 N2;\n"
"\n" "\n"
"if ((components & %uu) != 0u) {// VB_HAS_POSMTXIDX\n", "if ((components & {}u) != 0u) {{// VB_HAS_POSMTXIDX\n",
VB_HAS_POSMTXIDX); VB_HAS_POSMTXIDX);
out.Write(" // Vertex format has a per-vertex matrix\n" out.WriteFmt(" // Vertex format has a per-vertex matrix\n"
" int posidx = int(posmtx.r);\n" " int posidx = int(posmtx.r);\n"
" P0 = " I_TRANSFORMMATRICES "[posidx];\n" " P0 = " I_TRANSFORMMATRICES "[posidx];\n"
" P1 = " I_TRANSFORMMATRICES "[posidx+1];\n" " P1 = " I_TRANSFORMMATRICES "[posidx+1];\n"
" P2 = " I_TRANSFORMMATRICES "[posidx+2];\n" " P2 = " I_TRANSFORMMATRICES "[posidx+2];\n"
"\n" "\n"
" int normidx = posidx >= 32 ? (posidx - 32) : posidx;\n" " int normidx = posidx >= 32 ? (posidx - 32) : posidx;\n"
" N0 = " I_NORMALMATRICES "[normidx].xyz;\n" " N0 = " I_NORMALMATRICES "[normidx].xyz;\n"
" N1 = " I_NORMALMATRICES "[normidx+1].xyz;\n" " N1 = " I_NORMALMATRICES "[normidx+1].xyz;\n"
" N2 = " I_NORMALMATRICES "[normidx+2].xyz;\n" " N2 = " I_NORMALMATRICES "[normidx+2].xyz;\n"
"} else {\n" "}} else {{\n"
" // One shared matrix\n" " // One shared matrix\n"
" P0 = " I_POSNORMALMATRIX "[0];\n" " P0 = " I_POSNORMALMATRIX "[0];\n"
" P1 = " I_POSNORMALMATRIX "[1];\n" " P1 = " I_POSNORMALMATRIX "[1];\n"
" P2 = " I_POSNORMALMATRIX "[2];\n" " P2 = " I_POSNORMALMATRIX "[2];\n"
" N0 = " I_POSNORMALMATRIX "[3].xyz;\n" " N0 = " I_POSNORMALMATRIX "[3].xyz;\n"
" N1 = " I_POSNORMALMATRIX "[4].xyz;\n" " N1 = " I_POSNORMALMATRIX "[4].xyz;\n"
" N2 = " I_POSNORMALMATRIX "[5].xyz;\n" " N2 = " I_POSNORMALMATRIX "[5].xyz;\n"
"}\n" "}}\n"
"\n" "\n"
"float4 pos = float4(dot(P0, rawpos), dot(P1, rawpos), dot(P2, rawpos), 1.0);\n" "float4 pos = float4(dot(P0, rawpos), dot(P1, rawpos), dot(P2, rawpos), 1.0);\n"
"o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION "o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION
"[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n" "[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n"
"\n" "\n"
"// Only the first normal gets normalized (TODO: why?)\n" "// Only the first normal gets normalized (TODO: why?)\n"
"float3 _norm0 = float3(0.0, 0.0, 0.0);\n" "float3 _norm0 = float3(0.0, 0.0, 0.0);\n"
"if ((components & %uu) != 0u) // VB_HAS_NRM0\n", "if ((components & {}u) != 0u) // VB_HAS_NRM0\n",
VB_HAS_NRM0); VB_HAS_NRM0);
out.Write( out.WriteFmt(
" _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n" " _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n"
"\n" "\n"
"float3 _norm1 = float3(0.0, 0.0, 0.0);\n" "float3 _norm1 = float3(0.0, 0.0, 0.0);\n"
"if ((components & %uu) != 0u) // VB_HAS_NRM1\n", "if ((components & {}u) != 0u) // VB_HAS_NRM1\n",
VB_HAS_NRM1); VB_HAS_NRM1);
out.Write(" _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n" out.WriteFmt(" _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n"
"\n" "\n"
"float3 _norm2 = float3(0.0, 0.0, 0.0);\n" "float3 _norm2 = float3(0.0, 0.0, 0.0);\n"
"if ((components & %uu) != 0u) // VB_HAS_NRM2\n", "if ((components & {}u) != 0u) // VB_HAS_NRM2\n",
VB_HAS_NRM2); VB_HAS_NRM2);
out.Write(" _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n" out.WriteFmt(" _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n"
"\n"); "\n");
// Hardware Lighting // Hardware Lighting
WriteVertexLighting(out, ApiType, "pos.xyz", "_norm0", "rawcolor0", "rawcolor1", "o.colors_0", WriteVertexLighting(out, api_type, "pos.xyz", "_norm0", "rawcolor0", "rawcolor1", "o.colors_0",
"o.colors_1"); "o.colors_1");
// Texture Coordinates // Texture Coordinates
if (numTexgen > 0) if (num_texgen > 0)
GenVertexShaderTexGens(ApiType, numTexgen, out); GenVertexShaderTexGens(api_type, num_texgen, out);
out.Write("if (xfmem_numColorChans == 0u) {\n"); out.WriteFmt("if (xfmem_numColorChans == 0u) {{\n"
out.Write(" if ((components & %uu) != 0u)\n", VB_HAS_COL0); " if ((components & {}u) != 0u)\n"
out.Write(" o.colors_0 = rawcolor0;\n"); " o.colors_0 = rawcolor0;\n"
out.Write(" else\n"); " else\n"
out.Write(" o.colors_1 = float4(1.0, 1.0, 1.0, 1.0);\n"); " o.colors_1 = float4(1.0, 1.0, 1.0, 1.0);\n"
out.Write("}\n"); "}}\n",
out.Write("if (xfmem_numColorChans < 2u) {\n"); VB_HAS_COL0);
out.Write(" if ((components & %uu) != 0u)\n", VB_HAS_COL1); out.WriteFmt("if (xfmem_numColorChans < 2u) {{\n"
out.Write(" o.colors_0 = rawcolor1;\n"); " if ((components & {}u) != 0u)\n"
out.Write(" else\n"); " o.colors_0 = rawcolor1;\n"
out.Write(" o.colors_1 = float4(1.0, 1.0, 1.0, 1.0);\n"); " else\n"
out.Write("}\n"); " o.colors_1 = float4(1.0, 1.0, 1.0, 1.0);\n"
"}}\n",
VB_HAS_COL1);
if (!host_config.fast_depth_calc) if (!host_config.fast_depth_calc)
{ {
// clipPos/w needs to be done in pixel shader, not here // clipPos/w needs to be done in pixel shader, not here
out.Write("o.clipPos = o.pos;\n"); out.WriteFmt("o.clipPos = o.pos;\n");
} }
if (per_pixel_lighting) if (per_pixel_lighting)
{ {
out.Write("o.Normal = _norm0;\n"); out.WriteFmt("o.Normal = _norm0;\n"
out.Write("o.WorldPos = pos.xyz;\n"); "o.WorldPos = pos.xyz;\n");
out.Write("if ((components & %uu) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0); out.WriteFmt("if ((components & {}u) != 0u) // VB_HAS_COL0\n"
out.Write(" o.colors_0 = rawcolor0;\n"); " o.colors_0 = rawcolor0;\n",
out.Write("if ((components & %uu) != 0u) // VB_HAS_COL1\n", VB_HAS_COL1); VB_HAS_COL0);
out.Write(" o.colors_1 = rawcolor1;\n"); out.WriteFmt("if ((components & {}u) != 0u) // VB_HAS_COL1\n"
" o.colors_1 = rawcolor1;\n",
VB_HAS_COL1);
} }
// If we can disable the incorrect depth clipping planes using depth clamping, then we can do // If we can disable the incorrect depth clipping planes using depth clamping, then we can do
@ -221,13 +225,13 @@ ShaderCode GenVertexShader(APIType ApiType, const ShaderHostConfig& host_config,
// own clipping. We want to clip so that -w <= z <= 0, which matches the console -1..0 range. // own clipping. We want to clip so that -w <= z <= 0, which matches the console -1..0 range.
// We adjust our depth value for clipping purposes to match the perspective projection in the // We adjust our depth value for clipping purposes to match the perspective projection in the
// software backend, which is a hack to fix Sonic Adventure and Unleashed games. // software backend, which is a hack to fix Sonic Adventure and Unleashed games.
out.Write("float clipDepth = o.pos.z * (1.0 - 1e-7);\n"); out.WriteFmt("float clipDepth = o.pos.z * (1.0 - 1e-7);\n"
out.Write("float clipDist0 = clipDepth + o.pos.w;\n"); // Near: z < -w "float clipDist0 = clipDepth + o.pos.w;\n" // Near: z < -w
out.Write("float clipDist1 = -clipDepth;\n"); // Far: z > 0 "float clipDist1 = -clipDepth;\n"); // Far: z > 0
if (host_config.backend_geometry_shaders) if (host_config.backend_geometry_shaders)
{ {
out.Write("o.clipDist0 = clipDist0;\n"); out.WriteFmt("o.clipDist0 = clipDist0;\n"
out.Write("o.clipDist1 = clipDist1;\n"); "o.clipDist1 = clipDist1;\n");
} }
} }
@ -242,20 +246,20 @@ ShaderCode GenVertexShader(APIType ApiType, const ShaderHostConfig& host_config,
// divide, because some games will use a depth range larger than what is allowed by the // divide, because some games will use a depth range larger than what is allowed by the
// graphics API. These large depth ranges will still be clipped to the 0..1 range, so these // graphics API. These large depth ranges will still be clipped to the 0..1 range, so these
// games effectively add a depth bias to the values written to the depth buffer. // games effectively add a depth bias to the values written to the depth buffer.
out.Write("o.pos.z = o.pos.w * " I_PIXELCENTERCORRECTION ".w - " out.WriteFmt("o.pos.z = o.pos.w * " I_PIXELCENTERCORRECTION ".w - "
"o.pos.z * " I_PIXELCENTERCORRECTION ".z;\n"); "o.pos.z * " I_PIXELCENTERCORRECTION ".z;\n");
if (!host_config.backend_clip_control) if (!host_config.backend_clip_control)
{ {
// If the graphics API doesn't support a depth range of 0..1, then we need to map z to // If the graphics API doesn't support a depth range of 0..1, then we need to map z to
// the -1..1 range. Unfortunately we have to use a substraction, which is a lossy floating-point // the -1..1 range. Unfortunately we have to use a substraction, which is a lossy floating-point
// operation that can introduce a round-trip error. // operation that can introduce a round-trip error.
out.Write("o.pos.z = o.pos.z * 2.0 - o.pos.w;\n"); out.WriteFmt("o.pos.z = o.pos.z * 2.0 - o.pos.w;\n");
} }
// Correct for negative viewports by mirroring all vertices. We need to negate the height here, // Correct for negative viewports by mirroring all vertices. We need to negate the height here,
// since the viewport height is already negated by the render backend. // since the viewport height is already negated by the render backend.
out.Write("o.pos.xy *= sign(" I_PIXELCENTERCORRECTION ".xy * float2(1.0, -1.0));\n"); out.WriteFmt("o.pos.xy *= sign(" I_PIXELCENTERCORRECTION ".xy * float2(1.0, -1.0));\n");
// The console GPU places the pixel center at 7/12 in screen space unless // The console GPU places the pixel center at 7/12 in screen space unless
// antialiasing is enabled, while D3D and OpenGL place it at 0.5. This results // antialiasing is enabled, while D3D and OpenGL place it at 0.5. This results
@ -263,7 +267,7 @@ ShaderCode GenVertexShader(APIType ApiType, const ShaderHostConfig& host_config,
// which in turn can be critical if it happens for clear quads. // which in turn can be critical if it happens for clear quads.
// Hence, we compensate for this pixel center difference so that primitives // Hence, we compensate for this pixel center difference so that primitives
// get rasterized correctly. // get rasterized correctly.
out.Write("o.pos.xy = o.pos.xy - o.pos.w * " I_PIXELCENTERCORRECTION ".xy;\n"); out.WriteFmt("o.pos.xy = o.pos.xy - o.pos.w * " I_PIXELCENTERCORRECTION ".xy;\n");
if (vertex_rounding) if (vertex_rounding)
{ {
@ -271,219 +275,223 @@ ShaderCode GenVertexShader(APIType ApiType, const ShaderHostConfig& host_config,
// cause an additional pixel offset. Due to a higher pixel density we need to correct this // cause an additional pixel offset. Due to a higher pixel density we need to correct this
// by converting our clip-space position into the Wii's screen-space. // by converting our clip-space position into the Wii's screen-space.
// Acquire the right pixel and then convert it back. // Acquire the right pixel and then convert it back.
out.Write("if (o.pos.w == 1.0f)\n"); out.WriteFmt("if (o.pos.w == 1.0f)\n"
out.Write("{\n"); "{{\n");
out.Write("\tfloat ss_pixel_x = ((o.pos.x + 1.0f) * (" I_VIEWPORT_SIZE ".x * 0.5f));\n"); out.WriteFmt("\tfloat ss_pixel_x = ((o.pos.x + 1.0f) * (" I_VIEWPORT_SIZE ".x * 0.5f));\n"
out.Write("\tfloat ss_pixel_y = ((o.pos.y + 1.0f) * (" I_VIEWPORT_SIZE ".y * 0.5f));\n"); "\tfloat ss_pixel_y = ((o.pos.y + 1.0f) * (" I_VIEWPORT_SIZE ".y * 0.5f));\n");
out.Write("\tss_pixel_x = round(ss_pixel_x);\n"); out.WriteFmt("\tss_pixel_x = round(ss_pixel_x);\n"
out.Write("\tss_pixel_y = round(ss_pixel_y);\n"); "\tss_pixel_y = round(ss_pixel_y);\n");
out.Write("\to.pos.x = ((ss_pixel_x / (" I_VIEWPORT_SIZE ".x * 0.5f)) - 1.0f);\n"); out.WriteFmt("\to.pos.x = ((ss_pixel_x / (" I_VIEWPORT_SIZE ".x * 0.5f)) - 1.0f);\n"
out.Write("\to.pos.y = ((ss_pixel_y / (" I_VIEWPORT_SIZE ".y * 0.5f)) - 1.0f);\n"); "\to.pos.y = ((ss_pixel_y / (" I_VIEWPORT_SIZE ".y * 0.5f)) - 1.0f);\n"
out.Write("}\n"); "}}\n");
} }
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{ {
if (host_config.backend_geometry_shaders) if (host_config.backend_geometry_shaders)
{ {
AssignVSOutputMembers(out, "vs", "o", numTexgen, host_config); AssignVSOutputMembers(out, "vs", "o", num_texgen, host_config);
} }
else else
{ {
// TODO: Pass interface blocks between shader stages even if geometry shaders // TODO: Pass interface blocks between shader stages even if geometry shaders
// are not supported, however that will require at least OpenGL 3.2 support. // are not supported, however that will require at least OpenGL 3.2 support.
for (u32 i = 0; i < numTexgen; ++i) for (u32 i = 0; i < num_texgen; ++i)
out.Write("tex%d.xyz = o.tex%d;\n", i, i); out.WriteFmt("tex{}.xyz = o.tex{};\n", i, i);
if (!host_config.fast_depth_calc) if (!host_config.fast_depth_calc)
out.Write("clipPos = o.clipPos;\n"); out.WriteFmt("clipPos = o.clipPos;\n");
if (per_pixel_lighting) if (per_pixel_lighting)
{ {
out.Write("Normal = o.Normal;\n"); out.WriteFmt("Normal = o.Normal;\n"
out.Write("WorldPos = o.WorldPos;\n"); "WorldPos = o.WorldPos;\n");
} }
out.Write("colors_0 = o.colors_0;\n"); out.WriteFmt("colors_0 = o.colors_0;\n"
out.Write("colors_1 = o.colors_1;\n"); "colors_1 = o.colors_1;\n");
} }
if (host_config.backend_depth_clamp) if (host_config.backend_depth_clamp)
{ {
out.Write("gl_ClipDistance[0] = clipDist0;\n"); out.WriteFmt("gl_ClipDistance[0] = clipDist0;\n"
out.Write("gl_ClipDistance[1] = clipDist1;\n"); "gl_ClipDistance[1] = clipDist1;\n");
} }
// Vulkan NDC space has Y pointing down (right-handed NDC space). // Vulkan NDC space has Y pointing down (right-handed NDC space).
if (ApiType == APIType::Vulkan) if (api_type == APIType::Vulkan)
out.Write("gl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n"); out.WriteFmt("gl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n");
else else
out.Write("gl_Position = o.pos;\n"); out.WriteFmt("gl_Position = o.pos;\n");
} }
else // D3D else // D3D
{ {
out.Write("return o;\n"); out.WriteFmt("return o;\n");
} }
out.Write("}\n"); out.WriteFmt("}}\n");
return out; return out;
} }
void GenVertexShaderTexGens(APIType ApiType, u32 numTexgen, ShaderCode& out) static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode& out)
{ {
// The HLSL compiler complains that the output texture coordinates are uninitialized when trying // The HLSL compiler complains that the output texture coordinates are uninitialized when trying
// to dynamically index them. // to dynamically index them.
for (u32 i = 0; i < numTexgen; i++) for (u32 i = 0; i < num_texgen; i++)
out.Write("o.tex%u = float3(0.0, 0.0, 0.0);\n", i); out.WriteFmt("o.tex{} = float3(0.0, 0.0, 0.0);\n", i);
out.Write("// Texture coordinate generation\n"); out.WriteFmt("// Texture coordinate generation\n");
if (numTexgen == 1) if (num_texgen == 1)
out.Write("{ const uint texgen = 0u;\n"); {
out.WriteFmt("{{ const uint texgen = 0u;\n");
}
else else
out.Write("%sfor (uint texgen = 0u; texgen < %uu; texgen++) {\n", {
ApiType == APIType::D3D ? "[loop] " : "", numTexgen); out.WriteFmt("{}for (uint texgen = 0u; texgen < {}u; texgen++) {{\n",
api_type == APIType::D3D ? "[loop] " : "", num_texgen);
}
out.Write(" // Texcoord transforms\n"); out.WriteFmt(" // Texcoord transforms\n");
out.Write(" float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n" out.WriteFmt(" float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n"
" uint texMtxInfo = xfmem_texMtxInfo(texgen);\n"); " uint texMtxInfo = xfmem_texMtxInfo(texgen);\n");
out.Write(" switch (%s) {\n", BitfieldExtract("texMtxInfo", TexMtxInfo().sourcerow).c_str()); out.WriteFmt(" switch ({}) {{\n", BitfieldExtract("texMtxInfo", TexMtxInfo().sourcerow));
out.Write(" case %uu: // XF_SRCGEOM_INROW\n", XF_SRCGEOM_INROW); out.WriteFmt(" case {}u: // XF_SRCGEOM_INROW\n", XF_SRCGEOM_INROW);
out.Write(" coord.xyz = rawpos.xyz;\n"); out.WriteFmt(" coord.xyz = rawpos.xyz;\n");
out.Write(" break;\n\n"); out.WriteFmt(" break;\n\n");
out.Write(" case %uu: // XF_SRCNORMAL_INROW\n", XF_SRCNORMAL_INROW); out.WriteFmt(" case {}u: // XF_SRCNORMAL_INROW\n", XF_SRCNORMAL_INROW);
out.Write( out.WriteFmt(
" coord.xyz = ((components & %uu /* VB_HAS_NRM0 */) != 0u) ? rawnorm0.xyz : coord.xyz;", " coord.xyz = ((components & {}u /* VB_HAS_NRM0 */) != 0u) ? rawnorm0.xyz : coord.xyz;",
VB_HAS_NRM0); VB_HAS_NRM0);
out.Write(" break;\n\n"); out.WriteFmt(" break;\n\n");
out.Write(" case %uu: // XF_SRCBINORMAL_T_INROW\n", XF_SRCBINORMAL_T_INROW); out.WriteFmt(" case {}u: // XF_SRCBINORMAL_T_INROW\n", XF_SRCBINORMAL_T_INROW);
out.Write( out.WriteFmt(
" coord.xyz = ((components & %uu /* VB_HAS_NRM1 */) != 0u) ? rawnorm1.xyz : coord.xyz;", " coord.xyz = ((components & {}u /* VB_HAS_NRM1 */) != 0u) ? rawnorm1.xyz : coord.xyz;",
VB_HAS_NRM1); VB_HAS_NRM1);
out.Write(" break;\n\n"); out.WriteFmt(" break;\n\n");
out.Write(" case %uu: // XF_SRCBINORMAL_B_INROW\n", XF_SRCBINORMAL_B_INROW); out.WriteFmt(" case {}u: // XF_SRCBINORMAL_B_INROW\n", XF_SRCBINORMAL_B_INROW);
out.Write( out.WriteFmt(
" coord.xyz = ((components & %uu /* VB_HAS_NRM2 */) != 0u) ? rawnorm2.xyz : coord.xyz;", " coord.xyz = ((components & {}u /* VB_HAS_NRM2 */) != 0u) ? rawnorm2.xyz : coord.xyz;",
VB_HAS_NRM2); VB_HAS_NRM2);
out.Write(" break;\n\n"); out.WriteFmt(" break;\n\n");
for (u32 i = 0; i < 8; i++) for (u32 i = 0; i < 8; i++)
{ {
out.Write(" case %uu: // XF_SRCTEX%u_INROW\n", XF_SRCTEX0_INROW + i, i); out.WriteFmt(" case {}u: // XF_SRCTEX{}_INROW\n", XF_SRCTEX0_INROW + i, i);
out.Write( out.WriteFmt(
" coord = ((components & %uu /* VB_HAS_UV%u */) != 0u) ? float4(rawtex%u.x, rawtex%u.y, " " coord = ((components & {}u /* VB_HAS_UV{} */) != 0u) ? float4(rawtex{}.x, rawtex{}.y, "
"1.0, 1.0) : coord;\n", "1.0, 1.0) : coord;\n",
VB_HAS_UV0 << i, i, i, i); VB_HAS_UV0 << i, i, i, i);
out.Write(" break;\n\n"); out.WriteFmt(" break;\n\n");
} }
out.Write(" }\n"); out.WriteFmt(" }}\n"
out.Write("\n"); "\n");
out.Write(" // Input form of AB11 sets z element to 1.0\n"); out.WriteFmt(" // Input form of AB11 sets z element to 1.0\n");
out.Write(" if (%s == %uu) // inputform == XF_TEXINPUT_AB11\n", out.WriteFmt(" if ({} == {}u) // inputform == XF_TEXINPUT_AB11\n",
BitfieldExtract("texMtxInfo", TexMtxInfo().inputform).c_str(), XF_TEXINPUT_AB11); BitfieldExtract("texMtxInfo", TexMtxInfo().inputform), XF_TEXINPUT_AB11);
out.Write(" coord.z = 1.0f;\n"); out.WriteFmt(" coord.z = 1.0f;\n"
out.Write("\n"); "\n");
out.Write(" // first transformation\n"); out.WriteFmt(" // first transformation\n");
out.Write(" uint texgentype = %s;\n", out.WriteFmt(" uint texgentype = {};\n", BitfieldExtract("texMtxInfo", TexMtxInfo().texgentype));
BitfieldExtract("texMtxInfo", TexMtxInfo().texgentype).c_str()); out.WriteFmt(" float3 output_tex;\n"
out.Write(" float3 output_tex;\n" " switch (texgentype)\n"
" switch (texgentype)\n" " {{\n");
" {\n"); out.WriteFmt(" case {}u: // XF_TEXGEN_EMBOSS_MAP\n", XF_TEXGEN_EMBOSS_MAP);
out.Write(" case %uu: // XF_TEXGEN_EMBOSS_MAP\n", XF_TEXGEN_EMBOSS_MAP); out.WriteFmt(" {{\n");
out.Write(" {\n"); out.WriteFmt(" uint light = {};\n",
out.Write(" uint light = %s;\n", BitfieldExtract("texMtxInfo", TexMtxInfo().embosslightshift));
BitfieldExtract("texMtxInfo", TexMtxInfo().embosslightshift).c_str()); out.WriteFmt(" uint source = {};\n",
out.Write(" uint source = %s;\n", BitfieldExtract("texMtxInfo", TexMtxInfo().embosssourceshift));
BitfieldExtract("texMtxInfo", TexMtxInfo().embosssourceshift).c_str()); out.WriteFmt(" switch (source) {{\n");
out.Write(" switch (source) {\n"); for (u32 i = 0; i < num_texgen; i++)
for (u32 i = 0; i < numTexgen; i++) out.WriteFmt(" case {}u: output_tex.xyz = o.tex{}; break;\n", i, i);
out.Write(" case %uu: output_tex.xyz = o.tex%u; break;\n", i, i); out.WriteFmt(" default: output_tex.xyz = float3(0.0, 0.0, 0.0); break;\n"
out.Write(" default: output_tex.xyz = float3(0.0, 0.0, 0.0); break;\n" " }}\n");
" }\n"); out.WriteFmt(" if ((components & {}u) != 0u) {{ // VB_HAS_NRM1 | VB_HAS_NRM2\n",
out.Write(" if ((components & %uu) != 0u) { // VB_HAS_NRM1 | VB_HAS_NRM2\n", VB_HAS_NRM1 | VB_HAS_NRM2); // Should this be VB_HAS_NRM1 | VB_HAS_NRM2
VB_HAS_NRM1 | VB_HAS_NRM2); // Should this be VB_HAS_NRM1 | VB_HAS_NRM2 out.WriteFmt(" float3 ldir = normalize(" I_LIGHTS "[light].pos.xyz - pos.xyz);\n"
out.Write(" float3 ldir = normalize(" I_LIGHTS "[light].pos.xyz - pos.xyz);\n" " output_tex.xyz += float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0);\n"
" output_tex.xyz += float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0);\n" " }}\n"
" }\n" " }}\n"
" }\n" " break;\n\n");
" break;\n\n"); out.WriteFmt(" case {}u: // XF_TEXGEN_COLOR_STRGBC0\n", XF_TEXGEN_COLOR_STRGBC0);
out.Write(" case %uu: // XF_TEXGEN_COLOR_STRGBC0\n", XF_TEXGEN_COLOR_STRGBC0); out.WriteFmt(" output_tex.xyz = float3(o.colors_0.x, o.colors_0.y, 1.0);\n"
out.Write(" output_tex.xyz = float3(o.colors_0.x, o.colors_0.y, 1.0);\n" " break;\n\n");
" break;\n\n"); out.WriteFmt(" case {}u: // XF_TEXGEN_COLOR_STRGBC1\n", XF_TEXGEN_COLOR_STRGBC1);
out.Write(" case %uu: // XF_TEXGEN_COLOR_STRGBC1\n", XF_TEXGEN_COLOR_STRGBC1); out.WriteFmt(" output_tex.xyz = float3(o.colors_1.x, o.colors_1.y, 1.0);\n"
out.Write(" output_tex.xyz = float3(o.colors_1.x, o.colors_1.y, 1.0);\n" " break;\n\n");
" break;\n\n"); out.WriteFmt(" default: // Also XF_TEXGEN_REGULAR\n"
out.Write(" default: // Also XF_TEXGEN_REGULAR\n" " {{\n");
" {\n"); out.WriteFmt(" if ((components & ({}u /* VB_HAS_TEXMTXIDX0 */ << texgen)) != 0u) {{\n",
out.Write(" if ((components & (%uu /* VB_HAS_TEXMTXIDX0 */ << texgen)) != 0u) {\n", VB_HAS_TEXMTXIDX0);
VB_HAS_TEXMTXIDX0); out.WriteFmt(
out.Write(" // This is messy, due to dynamic indexing of the input texture coordinates.\n" " // This is messy, due to dynamic indexing of the input texture coordinates.\n"
" // Hopefully the compiler will unroll this whole loop anyway and the switch.\n" " // Hopefully the compiler will unroll this whole loop anyway and the switch.\n"
" int tmp = 0;\n" " int tmp = 0;\n"
" switch (texgen) {\n"); " switch (texgen) {{\n");
for (u32 i = 0; i < numTexgen; i++) for (u32 i = 0; i < num_texgen; i++)
out.Write(" case %uu: tmp = int(rawtex%u.z); break;\n", i, i); out.WriteFmt(" case {}u: tmp = int(rawtex{}.z); break;\n", i, i);
out.Write(" }\n" out.WriteFmt(" }}\n"
"\n"); "\n");
out.Write(" if (%s == %uu) {\n", out.WriteFmt(" if ({} == {}u) {{\n",
BitfieldExtract("texMtxInfo", TexMtxInfo().projection).c_str(), XF_TEXPROJ_STQ); BitfieldExtract("texMtxInfo", TexMtxInfo().projection), XF_TEXPROJ_STQ);
out.Write(" output_tex.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]),\n" out.WriteFmt(" output_tex.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]),\n"
" dot(coord, " I_TRANSFORMMATRICES "[tmp + 1]),\n" " dot(coord, " I_TRANSFORMMATRICES "[tmp + 1]),\n"
" dot(coord, " I_TRANSFORMMATRICES "[tmp + 2]));\n" " dot(coord, " I_TRANSFORMMATRICES "[tmp + 2]));\n"
" } else {\n" " }} else {{\n"
" output_tex.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]),\n" " output_tex.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]),\n"
" dot(coord, " I_TRANSFORMMATRICES "[tmp + 1]),\n" " dot(coord, " I_TRANSFORMMATRICES "[tmp + 1]),\n"
" 1.0);\n" " 1.0);\n"
" }\n" " }}\n"
" } else {\n"); " }} else {{\n");
out.Write(" if (%s == %uu) {\n", out.WriteFmt(" if ({} == {}u) {{\n",
BitfieldExtract("texMtxInfo", TexMtxInfo().projection).c_str(), XF_TEXPROJ_STQ); BitfieldExtract("texMtxInfo", TexMtxInfo().projection), XF_TEXPROJ_STQ);
out.Write(" output_tex.xyz = float3(dot(coord, " I_TEXMATRICES "[3u * texgen]),\n" out.WriteFmt(
" dot(coord, " I_TEXMATRICES "[3u * texgen + 1u]),\n" " output_tex.xyz = float3(dot(coord, " I_TEXMATRICES "[3u * texgen]),\n"
" dot(coord, " I_TEXMATRICES "[3u * texgen + 2u]));\n" " dot(coord, " I_TEXMATRICES "[3u * texgen + 1u]),\n"
" } else {\n" " dot(coord, " I_TEXMATRICES "[3u * texgen + 2u]));\n"
" output_tex.xyz = float3(dot(coord, " I_TEXMATRICES "[3u * texgen]),\n" " }} else {{\n"
" dot(coord, " I_TEXMATRICES "[3u * texgen + 1u]),\n" " output_tex.xyz = float3(dot(coord, " I_TEXMATRICES "[3u * texgen]),\n"
" 1.0);\n" " dot(coord, " I_TEXMATRICES "[3u * texgen + 1u]),\n"
" }\n" " 1.0);\n"
" }\n" " }}\n"
" }\n" " }}\n"
" break;\n\n" " }}\n"
" }\n" " break;\n\n"
"\n"); " }}\n"
"\n");
out.Write(" if (xfmem_dualTexInfo != 0u) {\n"); out.WriteFmt(" if (xfmem_dualTexInfo != 0u) {{\n");
out.Write(" uint postMtxInfo = xfmem_postMtxInfo(texgen);"); out.WriteFmt(" uint postMtxInfo = xfmem_postMtxInfo(texgen);");
out.Write(" uint base_index = %s;\n", out.WriteFmt(" uint base_index = {};\n", BitfieldExtract("postMtxInfo", PostMtxInfo().index));
BitfieldExtract("postMtxInfo", PostMtxInfo().index).c_str()); out.WriteFmt(" float4 P0 = " I_POSTTRANSFORMMATRICES "[base_index & 0x3fu];\n"
out.Write(" float4 P0 = " I_POSTTRANSFORMMATRICES "[base_index & 0x3fu];\n" " float4 P1 = " I_POSTTRANSFORMMATRICES "[(base_index + 1u) & 0x3fu];\n"
" float4 P1 = " I_POSTTRANSFORMMATRICES "[(base_index + 1u) & 0x3fu];\n" " float4 P2 = " I_POSTTRANSFORMMATRICES "[(base_index + 2u) & 0x3fu];\n"
" float4 P2 = " I_POSTTRANSFORMMATRICES "[(base_index + 2u) & 0x3fu];\n" "\n");
"\n"); out.WriteFmt(" if ({} != 0u)\n", BitfieldExtract("postMtxInfo", PostMtxInfo().normalize));
out.Write(" if (%s != 0u)\n", BitfieldExtract("postMtxInfo", PostMtxInfo().normalize).c_str()); out.WriteFmt(" output_tex.xyz = normalize(output_tex.xyz);\n"
out.Write(" output_tex.xyz = normalize(output_tex.xyz);\n" "\n"
"\n" " // multiply by postmatrix\n"
" // multiply by postmatrix\n" " output_tex.xyz = float3(dot(P0.xyz, output_tex.xyz) + P0.w,\n"
" output_tex.xyz = float3(dot(P0.xyz, output_tex.xyz) + P0.w,\n" " dot(P1.xyz, output_tex.xyz) + P1.w,\n"
" dot(P1.xyz, output_tex.xyz) + P1.w,\n" " dot(P2.xyz, output_tex.xyz) + P2.w);\n"
" dot(P2.xyz, output_tex.xyz) + P2.w);\n" " }}\n\n");
" }\n\n");
// When q is 0, the GameCube appears to have a special case // When q is 0, the GameCube appears to have a special case
// This can be seen in devkitPro's neheGX Lesson08 example for Wii // This can be seen in devkitPro's neheGX Lesson08 example for Wii
// Makes differences in Rogue Squadron 3 (Hoth sky) and The Last Story (shadow culling) // Makes differences in Rogue Squadron 3 (Hoth sky) and The Last Story (shadow culling)
out.Write(" if (texgentype == %uu && output_tex.z == 0.0) // XF_TEXGEN_REGULAR\n", out.WriteFmt(" if (texgentype == {}u && output_tex.z == 0.0) // XF_TEXGEN_REGULAR\n",
XF_TEXGEN_REGULAR); XF_TEXGEN_REGULAR);
out.Write( out.WriteFmt(
" output_tex.xy = clamp(output_tex.xy / 2.0f, float2(-1.0f,-1.0f), float2(1.0f,1.0f));\n" " output_tex.xy = clamp(output_tex.xy / 2.0f, float2(-1.0f,-1.0f), float2(1.0f,1.0f));\n"
"\n"); "\n");
out.Write(" // Hopefully GPUs that can support dynamic indexing will optimize this.\n"); out.WriteFmt(" // Hopefully GPUs that can support dynamic indexing will optimize this.\n");
out.Write(" switch (texgen) {\n"); out.WriteFmt(" switch (texgen) {{\n");
for (u32 i = 0; i < numTexgen; i++) for (u32 i = 0; i < num_texgen; i++)
out.Write(" case %uu: o.tex%u = output_tex; break;\n", i, i); out.WriteFmt(" case {}u: o.tex{} = output_tex; break;\n", i, i);
out.Write(" }\n" out.WriteFmt(" }}\n"
"}\n"); "}}\n");
} }
void EnumerateVertexShaderUids(const std::function<void(const VertexShaderUid&)>& callback) void EnumerateVertexShaderUids(const std::function<void(const VertexShaderUid&)>& callback)