Merge pull request #9234 from lioncash/rename-fmt

ShaderGenCommon: Rename WriteFmt() to Write()
This commit is contained in:
LC 2020-11-09 22:08:42 -05:00 committed by GitHub
commit 0a2564a89d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 2340 additions and 2381 deletions

View File

@ -27,9 +27,9 @@ APIType GetAPIType()
void EmitUniformBufferDeclaration(ShaderCode& code)
{
if (GetAPIType() == APIType::D3D)
code.WriteFmt("cbuffer PSBlock : register(b0)\n");
code.Write("cbuffer PSBlock : register(b0)\n");
else
code.WriteFmt("UBO_BINDING(std140, 1) uniform PSBlock\n");
code.Write("UBO_BINDING(std140, 1) uniform PSBlock\n");
}
void EmitSamplerDeclarations(ShaderCode& code, u32 start = 0, u32 end = 1,
@ -43,8 +43,8 @@ void EmitSamplerDeclarations(ShaderCode& code, u32 start = 0, u32 end = 1,
for (u32 i = start; i < end; i++)
{
code.WriteFmt("{} tex{} : register(t{});\n", array_type, i, i);
code.WriteFmt("SamplerState samp{} : register(s{});\n", i, i);
code.Write("{} tex{} : register(t{});\n", array_type, i, i);
code.Write("SamplerState samp{} : register(s{});\n", i, i);
}
}
break;
@ -56,7 +56,7 @@ void EmitSamplerDeclarations(ShaderCode& code, u32 start = 0, u32 end = 1,
for (u32 i = start; i < end; i++)
{
code.WriteFmt("SAMPLER_BINDING({}) uniform {} samp{};\n", i, array_type, i);
code.Write("SAMPLER_BINDING({}) uniform {} samp{};\n", i, array_type, i);
}
}
break;
@ -70,12 +70,12 @@ void EmitSampleTexture(ShaderCode& code, u32 n, std::string_view coords)
switch (GetAPIType())
{
case APIType::D3D:
code.WriteFmt("tex{}.Sample(samp{}, {})", n, n, coords);
code.Write("tex{}.Sample(samp{}, {})", n, n, coords);
break;
case APIType::OpenGL:
case APIType::Vulkan:
code.WriteFmt("texture(samp{}, {})", n, coords);
code.Write("texture(samp{}, {})", n, coords);
break;
default:
@ -90,12 +90,12 @@ void EmitTextureLoad(ShaderCode& code, u32 n, std::string_view coords)
switch (GetAPIType())
{
case APIType::D3D:
code.WriteFmt("tex{}.Load({})", n, coords);
code.Write("tex{}.Load({})", n, coords);
break;
case APIType::OpenGL:
case APIType::Vulkan:
code.WriteFmt("texelFetch(samp{}, ({}).xyz, ({}).w)", n, coords, coords);
code.Write("texelFetch(samp{}, ({}).xyz, ({}).w)", n, coords, coords);
break;
default:
@ -111,19 +111,19 @@ void EmitVertexMainDeclaration(ShaderCode& code, u32 num_tex_inputs, u32 num_col
{
case APIType::D3D:
{
code.WriteFmt("void main(");
code.Write("void main(");
for (u32 i = 0; i < num_tex_inputs; i++)
code.WriteFmt("in float3 rawtex{} : TEXCOORD{}, ", i, i);
code.Write("in float3 rawtex{} : TEXCOORD{}, ", i, i);
for (u32 i = 0; i < num_color_inputs; i++)
code.WriteFmt("in float4 rawcolor{} : COLOR{}, ", i, i);
code.Write("in float4 rawcolor{} : COLOR{}, ", i, i);
if (position_input)
code.WriteFmt("in float4 rawpos : POSITION, ");
code.WriteFmt("{}", extra_inputs);
code.Write("in float4 rawpos : POSITION, ");
code.Write("{}", extra_inputs);
for (u32 i = 0; i < num_tex_outputs; i++)
code.WriteFmt("out float3 v_tex{} : TEXCOORD{}, ", i, i);
code.Write("out float3 v_tex{} : TEXCOORD{}, ", i, i);
for (u32 i = 0; i < num_color_outputs; i++)
code.WriteFmt("out float4 v_col{} : COLOR{}, ", i, i);
code.WriteFmt("out float4 opos : SV_Position)\n");
code.Write("out float4 v_col{} : COLOR{}, ", i, i);
code.Write("out float4 opos : SV_Position)\n");
}
break;
@ -133,35 +133,35 @@ void EmitVertexMainDeclaration(ShaderCode& code, u32 num_tex_inputs, u32 num_col
for (u32 i = 0; i < num_tex_inputs; i++)
{
const auto attribute = SHADER_TEXTURE0_ATTRIB + i;
code.WriteFmt("ATTRIBUTE_LOCATION({}) in float3 rawtex{};\n", attribute, i);
code.Write("ATTRIBUTE_LOCATION({}) in float3 rawtex{};\n", attribute, i);
}
for (u32 i = 0; i < num_color_inputs; i++)
{
const auto attribute = SHADER_COLOR0_ATTRIB + i;
code.WriteFmt("ATTRIBUTE_LOCATION({}) in float4 rawcolor{};\n", attribute, i);
code.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor{};\n", attribute, i);
}
if (position_input)
code.WriteFmt("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB);
code.Write("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB);
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
{
code.WriteFmt("VARYING_LOCATION(0) out VertexData {{\n");
code.Write("VARYING_LOCATION(0) out VertexData {{\n");
for (u32 i = 0; i < num_tex_outputs; i++)
code.WriteFmt(" float3 v_tex{};\n", i);
code.Write(" float3 v_tex{};\n", i);
for (u32 i = 0; i < num_color_outputs; i++)
code.WriteFmt(" float4 v_col{};\n", i);
code.WriteFmt("}};\n");
code.Write(" float4 v_col{};\n", i);
code.Write("}};\n");
}
else
{
for (u32 i = 0; i < num_tex_outputs; i++)
code.WriteFmt("VARYING_LOCATION({}) out float3 v_tex{};\n", i, i);
code.Write("VARYING_LOCATION({}) out float3 v_tex{};\n", i, i);
for (u32 i = 0; i < num_color_outputs; i++)
code.WriteFmt("VARYING_LOCATION({}) out float4 v_col{};\n", num_tex_inputs + i, i);
code.Write("VARYING_LOCATION({}) out float4 v_col{};\n", num_tex_inputs + i, i);
}
code.WriteFmt("#define opos gl_Position\n");
code.WriteFmt("{}\n", extra_inputs);
code.WriteFmt("void main()\n");
code.Write("#define opos gl_Position\n");
code.Write("{}\n", extra_inputs);
code.Write("void main()\n");
}
break;
default:
@ -177,14 +177,14 @@ void EmitPixelMainDeclaration(ShaderCode& code, u32 num_tex_inputs, u32 num_colo
{
case APIType::D3D:
{
code.WriteFmt("void main(");
code.Write("void main(");
for (u32 i = 0; i < num_tex_inputs; i++)
code.WriteFmt("in float3 v_tex{} : TEXCOORD{}, ", i, i);
code.Write("in float3 v_tex{} : TEXCOORD{}, ", i, i);
for (u32 i = 0; i < num_color_inputs; i++)
code.WriteFmt("in float4 v_col{} : COLOR{}, ", i, i);
code.Write("in float4 v_col{} : COLOR{}, ", i, i);
if (emit_frag_coord)
code.WriteFmt("in float4 frag_coord : SV_Position, ");
code.WriteFmt("{}out {} ocol0 : SV_Target)\n", extra_vars, output_type);
code.Write("in float4 frag_coord : SV_Position, ");
code.Write("{}out {} ocol0 : SV_Target)\n", extra_vars, output_type);
}
break;
@ -193,26 +193,26 @@ void EmitPixelMainDeclaration(ShaderCode& code, u32 num_tex_inputs, u32 num_colo
{
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
{
code.WriteFmt("VARYING_LOCATION(0) in VertexData {{\n");
code.Write("VARYING_LOCATION(0) in VertexData {{\n");
for (u32 i = 0; i < num_tex_inputs; i++)
code.WriteFmt(" in float3 v_tex{};\n", i);
code.Write(" in float3 v_tex{};\n", i);
for (u32 i = 0; i < num_color_inputs; i++)
code.WriteFmt(" in float4 v_col{};\n", i);
code.WriteFmt("}};\n");
code.Write(" in float4 v_col{};\n", i);
code.Write("}};\n");
}
else
{
for (u32 i = 0; i < num_tex_inputs; i++)
code.WriteFmt("VARYING_LOCATION({}) in float3 v_tex{};\n", i, i);
code.Write("VARYING_LOCATION({}) in float3 v_tex{};\n", i, i);
for (u32 i = 0; i < num_color_inputs; i++)
code.WriteFmt("VARYING_LOCATION({}) in float4 v_col{};\n", num_tex_inputs + i, i);
code.Write("VARYING_LOCATION({}) in float4 v_col{};\n", num_tex_inputs + i, i);
}
code.WriteFmt("FRAGMENT_OUTPUT_LOCATION(0) out {} ocol0;\n", output_type);
code.WriteFmt("{}\n", extra_vars);
code.Write("FRAGMENT_OUTPUT_LOCATION(0) out {} ocol0;\n", output_type);
code.Write("{}\n", extra_vars);
if (emit_frag_coord)
code.WriteFmt("#define frag_coord gl_FragCoord\n");
code.WriteFmt("void main()\n");
code.Write("#define frag_coord gl_FragCoord\n");
code.Write("void main()\n");
}
break;
@ -228,16 +228,16 @@ std::string GenerateScreenQuadVertexShader()
EmitVertexMainDeclaration(code, 0, 0, false, 1, 0,
GetAPIType() == APIType::D3D ? "in uint id : SV_VertexID, " :
"#define id gl_VertexID\n");
code.WriteFmt(
code.Write(
"{{\n"
" v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n"
" opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n");
// NDC space is flipped in Vulkan. We also flip in GL so that (0,0) is in the lower-left.
if (GetAPIType() == APIType::Vulkan || GetAPIType() == APIType::OpenGL)
code.WriteFmt(" opos.y = -opos.y;\n");
code.Write(" opos.y = -opos.y;\n");
code.WriteFmt("}}\n");
code.Write("}}\n");
return code.GetBuffer();
}
@ -247,26 +247,26 @@ std::string GeneratePassthroughGeometryShader(u32 num_tex, u32 num_colors)
ShaderCode code;
if (GetAPIType() == APIType::D3D)
{
code.WriteFmt("struct VS_OUTPUT\n"
code.Write("struct VS_OUTPUT\n"
"{{\n");
for (u32 i = 0; i < num_tex; i++)
code.WriteFmt(" float3 tex{} : TEXCOORD{};\n", i, i);
code.Write(" float3 tex{} : TEXCOORD{};\n", i, i);
for (u32 i = 0; i < num_colors; i++)
code.WriteFmt(" float4 color{} : COLOR{};\n", i, i);
code.WriteFmt(" float4 position : SV_Position;\n"
code.Write(" float4 color{} : COLOR{};\n", i, i);
code.Write(" float4 position : SV_Position;\n"
"}};\n");
code.WriteFmt("struct GS_OUTPUT\n"
code.Write("struct GS_OUTPUT\n"
"{{");
for (u32 i = 0; i < num_tex; i++)
code.WriteFmt(" float3 tex{} : TEXCOORD{};\n", i, i);
code.Write(" float3 tex{} : TEXCOORD{};\n", i, i);
for (u32 i = 0; i < num_colors; i++)
code.WriteFmt(" float4 color{} : COLOR{};\n", i, i);
code.WriteFmt(" float4 position : SV_Position;\n"
code.Write(" float4 color{} : COLOR{};\n", i, i);
code.Write(" float4 position : SV_Position;\n"
" uint slice : SV_RenderTargetArrayIndex;\n"
"}};\n\n");
code.WriteFmt("[maxvertexcount(6)]\n"
code.Write("[maxvertexcount(6)]\n"
"void main(triangle VS_OUTPUT vso[3], inout TriangleStream<GS_OUTPUT> output)\n"
"{{\n"
" for (uint slice = 0; slice < 2u; slice++)\n"
@ -276,10 +276,10 @@ std::string GeneratePassthroughGeometryShader(u32 num_tex, u32 num_colors)
" GS_OUTPUT gso;\n"
" gso.position = vso[i].position;\n");
for (u32 i = 0; i < num_tex; i++)
code.WriteFmt(" gso.tex{} = float3(vso[i].tex{}.xy, float(slice));\n", i, i);
code.Write(" gso.tex{} = float3(vso[i].tex{}.xy, float(slice));\n", i, i);
for (u32 i = 0; i < num_colors; i++)
code.WriteFmt(" gso.color{} = vso[i].color{};\n", i, i);
code.WriteFmt(" gso.slice = slice;\n"
code.Write(" gso.color{} = vso[i].color{};\n", i, i);
code.Write(" gso.slice = slice;\n"
" output.Append(gso);\n"
" }}\n"
" output.RestartStrip();\n"
@ -288,26 +288,26 @@ std::string GeneratePassthroughGeometryShader(u32 num_tex, u32 num_colors)
}
else if (GetAPIType() == APIType::OpenGL || GetAPIType() == APIType::Vulkan)
{
code.WriteFmt("layout(triangles) in;\n"
code.Write("layout(triangles) in;\n"
"layout(triangle_strip, max_vertices = 6) out;\n");
if (num_tex > 0 || num_colors > 0)
{
code.WriteFmt("VARYING_LOCATION(0) in VertexData {{\n");
code.Write("VARYING_LOCATION(0) in VertexData {{\n");
for (u32 i = 0; i < num_tex; i++)
code.WriteFmt(" float3 v_tex{};\n", i);
code.Write(" float3 v_tex{};\n", i);
for (u32 i = 0; i < num_colors; i++)
code.WriteFmt(" float4 v_col{};\n", i);
code.WriteFmt("}} v_in[];\n");
code.Write(" float4 v_col{};\n", i);
code.Write("}} v_in[];\n");
code.WriteFmt("VARYING_LOCATION(0) out VertexData {{\n");
code.Write("VARYING_LOCATION(0) out VertexData {{\n");
for (u32 i = 0; i < num_tex; i++)
code.WriteFmt(" float3 v_tex{};\n", i);
code.Write(" float3 v_tex{};\n", i);
for (u32 i = 0; i < num_colors; i++)
code.WriteFmt(" float4 v_col{};\n", i);
code.WriteFmt("}} v_out;\n");
code.Write(" float4 v_col{};\n", i);
code.Write("}} v_out;\n");
}
code.WriteFmt("\n"
code.Write("\n"
"void main()\n"
"{{\n"
" for (int j = 0; j < 2; j++)\n"
@ -317,16 +317,16 @@ std::string GeneratePassthroughGeometryShader(u32 num_tex, u32 num_colors)
// We have to explicitly unroll this loop otherwise the GL compiler gets cranky.
for (u32 v = 0; v < 3; v++)
{
code.WriteFmt(" gl_Position = gl_in[{}].gl_Position;\n", v);
code.Write(" gl_Position = gl_in[{}].gl_Position;\n", v);
for (u32 i = 0; i < num_tex; i++)
{
code.WriteFmt(" v_out.v_tex{} = float3(v_in[{}].v_tex{}.xy, float(j));\n", i, v, i);
code.Write(" v_out.v_tex{} = float3(v_in[{}].v_tex{}.xy, float(j));\n", i, v, i);
}
for (u32 i = 0; i < num_colors; i++)
code.WriteFmt(" v_out.v_col{} = v_in[{}].v_col{};\n", i, v, i);
code.WriteFmt(" EmitVertex();\n\n");
code.Write(" v_out.v_col{} = v_in[{}].v_col{};\n", i, v, i);
code.Write(" EmitVertex();\n\n");
}
code.WriteFmt(" EndPrimitive();\n"
code.Write(" EndPrimitive();\n"
" }}\n"
"}}\n");
}
@ -338,7 +338,7 @@ std::string GenerateTextureCopyVertexShader()
{
ShaderCode code;
EmitUniformBufferDeclaration(code);
code.WriteFmt("{{"
code.Write("{{"
" float2 src_offset;\n"
" float2 src_size;\n"
"}};\n\n");
@ -346,17 +346,16 @@ std::string GenerateTextureCopyVertexShader()
EmitVertexMainDeclaration(code, 0, 0, false, 1, 0,
GetAPIType() == APIType::D3D ? "in uint id : SV_VertexID, " :
"#define id gl_VertexID");
code.WriteFmt(
"{{\n"
code.Write("{{\n"
" v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n"
" opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n"
" v_tex0 = float3(src_offset + (src_size * v_tex0.xy), 0.0f);\n");
// NDC space is flipped in Vulkan. We also flip in GL so that (0,0) is in the lower-left.
if (GetAPIType() == APIType::Vulkan || GetAPIType() == APIType::OpenGL)
code.WriteFmt(" opos.y = -opos.y;\n");
code.Write(" opos.y = -opos.y;\n");
code.WriteFmt("}}\n");
code.Write("}}\n");
return code.GetBuffer();
}
@ -366,10 +365,10 @@ std::string GenerateTextureCopyPixelShader()
ShaderCode code;
EmitSamplerDeclarations(code, 0, 1, false);
EmitPixelMainDeclaration(code, 1, 0);
code.WriteFmt("{{\n"
code.Write("{{\n"
" ocol0 = ");
EmitSampleTexture(code, 0, "v_tex0");
code.WriteFmt(";\n"
code.Write(";\n"
"}}\n");
return code.GetBuffer();
}
@ -378,7 +377,7 @@ std::string GenerateColorPixelShader()
{
ShaderCode code;
EmitPixelMainDeclaration(code, 0, 1);
code.WriteFmt("{{\n"
code.Write("{{\n"
" ocol0 = v_col0;\n"
"}}\n");
return code.GetBuffer();
@ -390,25 +389,25 @@ std::string GenerateResolveDepthPixelShader(u32 samples)
EmitSamplerDeclarations(code, 0, 1, true);
EmitPixelMainDeclaration(code, 1, 0, "float",
GetAPIType() == APIType::D3D ? "in float4 ipos : SV_Position, " : "");
code.WriteFmt("{{\n"
code.Write("{{\n"
" int layer = int(v_tex0.z);\n");
if (GetAPIType() == APIType::D3D)
code.WriteFmt(" int3 coords = int3(int2(ipos.xy), layer);\n");
code.Write(" int3 coords = int3(int2(ipos.xy), layer);\n");
else
code.WriteFmt(" int3 coords = int3(int2(gl_FragCoord.xy), layer);\n");
code.Write(" int3 coords = int3(int2(gl_FragCoord.xy), layer);\n");
// Take the minimum of all depth samples.
if (GetAPIType() == APIType::D3D)
code.WriteFmt(" ocol0 = tex0.Load(coords, 0).r;\n");
code.Write(" ocol0 = tex0.Load(coords, 0).r;\n");
else
code.WriteFmt(" ocol0 = texelFetch(samp0, coords, 0).r;\n");
code.WriteFmt(" for (int i = 1; i < {}; i++)\n", samples);
code.Write(" ocol0 = texelFetch(samp0, coords, 0).r;\n");
code.Write(" for (int i = 1; i < {}; i++)\n", samples);
if (GetAPIType() == APIType::D3D)
code.WriteFmt(" ocol0 = min(ocol0, tex0.Load(coords, i).r);\n");
code.Write(" ocol0 = min(ocol0, tex0.Load(coords, i).r);\n");
else
code.WriteFmt(" ocol0 = min(ocol0, texelFetch(samp0, coords, i).r);\n");
code.Write(" ocol0 = min(ocol0, texelFetch(samp0, coords, i).r);\n");
code.WriteFmt("}}\n");
code.Write("}}\n");
return code.GetBuffer();
}
@ -416,7 +415,7 @@ std::string GenerateClearVertexShader()
{
ShaderCode code;
EmitUniformBufferDeclaration(code);
code.WriteFmt("{{\n"
code.Write("{{\n"
" float4 clear_color;\n"
" float clear_depth;\n"
"}};\n");
@ -424,7 +423,7 @@ std::string GenerateClearVertexShader()
EmitVertexMainDeclaration(code, 0, 0, false, 0, 1,
GetAPIType() == APIType::D3D ? "in uint id : SV_VertexID, " :
"#define id gl_VertexID\n");
code.WriteFmt(
code.Write(
"{{\n"
" float2 coord = float2(float((id << 1) & 2), float(id & 2));\n"
" opos = float4(coord * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), clear_depth, 1.0f);\n"
@ -432,9 +431,9 @@ std::string GenerateClearVertexShader()
// NDC space is flipped in Vulkan
if (GetAPIType() == APIType::Vulkan)
code.WriteFmt(" opos.y = -opos.y;\n");
code.Write(" opos.y = -opos.y;\n");
code.WriteFmt("}}\n");
code.Write("}}\n");
return code.GetBuffer();
}
@ -443,17 +442,17 @@ std::string GenerateEFBPokeVertexShader()
{
ShaderCode code;
EmitVertexMainDeclaration(code, 0, 1, true, 0, 1);
code.WriteFmt("{{\n"
code.Write("{{\n"
" v_col0 = rawcolor0;\n"
" opos = float4(rawpos.xyz, 1.0f);\n");
if (g_ActiveConfig.backend_info.bSupportsLargePoints)
code.WriteFmt(" gl_PointSize = rawpos.w;\n");
code.Write(" gl_PointSize = rawpos.w;\n");
// NDC space is flipped in Vulkan.
if (GetAPIType() == APIType::Vulkan)
code.WriteFmt(" opos.y = -opos.y;\n");
code.Write(" opos.y = -opos.y;\n");
code.WriteFmt("}}\n");
code.Write("}}\n");
return code.GetBuffer();
}
@ -468,45 +467,45 @@ std::string GenerateFormatConversionShader(EFBReinterpretType convtype, u32 samp
"in float4 ipos : SV_Position, in uint isample : SV_SampleIndex, " :
"in float4 ipos : SV_Position, ") :
"");
code.WriteFmt("{{\n"
code.Write("{{\n"
" int layer = int(v_tex0.z);\n");
if (GetAPIType() == APIType::D3D)
code.WriteFmt(" int3 coords = int3(int2(ipos.xy), layer);\n");
code.Write(" int3 coords = int3(int2(ipos.xy), layer);\n");
else
code.WriteFmt(" int3 coords = int3(int2(gl_FragCoord.xy), layer);\n");
code.Write(" int3 coords = int3(int2(gl_FragCoord.xy), layer);\n");
if (samples == 1)
{
// No MSAA at all.
if (GetAPIType() == APIType::D3D)
code.WriteFmt(" float4 val = tex0.Load(int4(coords, 0));\n");
code.Write(" float4 val = tex0.Load(int4(coords, 0));\n");
else
code.WriteFmt(" float4 val = texelFetch(samp0, coords, 0);\n");
code.Write(" float4 val = texelFetch(samp0, coords, 0);\n");
}
else if (g_ActiveConfig.bSSAA)
{
// Sample shading, shader runs once per sample
if (GetAPIType() == APIType::D3D)
code.WriteFmt(" float4 val = tex0.Load(coords, isample);");
code.Write(" float4 val = tex0.Load(coords, isample);");
else
code.WriteFmt(" float4 val = texelFetch(samp0, coords, gl_SampleID);");
code.Write(" float4 val = texelFetch(samp0, coords, gl_SampleID);");
}
else
{
// MSAA without sample shading, average out all samples.
code.WriteFmt(" float4 val = float4(0.0f, 0.0f, 0.0f, 0.0f);\n");
code.WriteFmt(" for (int i = 0; i < {}; i++)\n", samples);
code.Write(" float4 val = float4(0.0f, 0.0f, 0.0f, 0.0f);\n");
code.Write(" for (int i = 0; i < {}; i++)\n", samples);
if (GetAPIType() == APIType::D3D)
code.WriteFmt(" val += tex0.Load(coords, i);\n");
code.Write(" val += tex0.Load(coords, i);\n");
else
code.WriteFmt(" val += texelFetch(samp0, coords, i);\n");
code.WriteFmt(" val /= float({});\n", samples);
code.Write(" val += texelFetch(samp0, coords, i);\n");
code.Write(" val /= float({});\n", samples);
}
switch (convtype)
{
case EFBReinterpretType::RGB8ToRGBA6:
code.WriteFmt(" int4 src8 = int4(round(val * 255.f));\n"
code.Write(" int4 src8 = int4(round(val * 255.f));\n"
" int4 dst6;\n"
" dst6.r = src8.r >> 2;\n"
" dst6.g = ((src8.r & 0x3) << 4) | (src8.g >> 4);\n"
@ -516,11 +515,11 @@ std::string GenerateFormatConversionShader(EFBReinterpretType convtype, u32 samp
break;
case EFBReinterpretType::RGB8ToRGB565:
code.WriteFmt(" ocol0 = val;\n");
code.Write(" ocol0 = val;\n");
break;
case EFBReinterpretType::RGBA6ToRGB8:
code.WriteFmt(" int4 src6 = int4(round(val * 63.f));\n"
code.Write(" int4 src6 = int4(round(val * 63.f));\n"
" int4 dst8;\n"
" dst8.r = (src6.r << 2) | (src6.g >> 4);\n"
" dst8.g = ((src6.g & 0xF) << 4) | (src6.b >> 2);\n"
@ -530,20 +529,20 @@ std::string GenerateFormatConversionShader(EFBReinterpretType convtype, u32 samp
break;
case EFBReinterpretType::RGBA6ToRGB565:
code.WriteFmt(" ocol0 = val;\n");
code.Write(" ocol0 = val;\n");
break;
case EFBReinterpretType::RGB565ToRGB8:
code.WriteFmt(" ocol0 = val;\n");
code.Write(" ocol0 = val;\n");
break;
case EFBReinterpretType::RGB565ToRGBA6:
//
code.WriteFmt(" ocol0 = val;\n");
code.Write(" ocol0 = val;\n");
break;
}
code.WriteFmt("}}\n");
code.Write("}}\n");
return code.GetBuffer();
}
@ -552,58 +551,57 @@ std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureF
ShaderCode code;
EmitSamplerDeclarations(code, 0, 1, false);
EmitPixelMainDeclaration(code, 1, 0, "float4", "", true);
code.WriteFmt("{{\n"
code.Write("{{\n"
" int layer = int(v_tex0.z);\n"
" int4 coords = int4(int2(frag_coord.xy), layer, 0);\n");
// Convert to a 32-bit value encompassing all channels, filling the most significant bits with
// zeroes.
code.WriteFmt(" uint raw_value;\n");
code.Write(" uint raw_value;\n");
switch (from_format)
{
case TextureFormat::I8:
case TextureFormat::C8:
{
code.WriteFmt(" float4 temp_value = ");
code.Write(" float4 temp_value = ");
EmitTextureLoad(code, 0, "coords");
code.WriteFmt(";\n"
code.Write(";\n"
" raw_value = uint(temp_value.r * 255.0);\n");
}
break;
case TextureFormat::IA8:
{
code.WriteFmt(" float4 temp_value = ");
code.Write(" float4 temp_value = ");
EmitTextureLoad(code, 0, "coords");
code.WriteFmt(
";\n"
code.Write(";\n"
" raw_value = uint(temp_value.r * 255.0) | (uint(temp_value.a * 255.0) << 8);\n");
}
break;
case TextureFormat::I4:
{
code.WriteFmt(" float4 temp_value = ");
code.Write(" float4 temp_value = ");
EmitTextureLoad(code, 0, "coords");
code.WriteFmt(";\n"
code.Write(";\n"
" raw_value = uint(temp_value.r * 15.0);\n");
}
break;
case TextureFormat::IA4:
{
code.WriteFmt(" float4 temp_value = ");
code.Write(" float4 temp_value = ");
EmitTextureLoad(code, 0, "coords");
code.WriteFmt(";\n"
code.Write(";\n"
" raw_value = uint(temp_value.r * 15.0) | (uint(temp_value.a * 15.0) << 4);\n");
}
break;
case TextureFormat::RGB565:
{
code.WriteFmt(" float4 temp_value = ");
code.Write(" float4 temp_value = ");
EmitTextureLoad(code, 0, "coords");
code.WriteFmt(";\n"
code.Write(";\n"
" raw_value = uint(temp_value.b * 31.0) | (uint(temp_value.g * 63.0) << 5) |\n"
" (uint(temp_value.r * 31.0) << 11);\n");
}
@ -611,12 +609,12 @@ std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureF
case TextureFormat::RGB5A3:
{
code.WriteFmt(" float4 temp_value = ");
code.Write(" float4 temp_value = ");
EmitTextureLoad(code, 0, "coords");
code.WriteFmt(";\n");
code.Write(";\n");
// 0.8784 = 224 / 255 which is the maximum alpha value that can be represented in 3 bits
code.WriteFmt(
code.Write(
" if (temp_value.a > 0.878f) {{\n"
" raw_value = (uint(temp_value.b * 31.0)) | (uint(temp_value.g * 31.0) << 5) |\n"
" (uint(temp_value.r * 31.0) << 10) | 0x8000u;\n"
@ -638,28 +636,28 @@ std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureF
case TextureFormat::I8:
case TextureFormat::C8:
{
code.WriteFmt(" float orgba = float(raw_value & 0xFFu) / 255.0;\n"
code.Write(" float orgba = float(raw_value & 0xFFu) / 255.0;\n"
" ocol0 = float4(orgba, orgba, orgba, orgba);\n");
}
break;
case TextureFormat::IA8:
{
code.WriteFmt(" float orgb = float(raw_value & 0xFFu) / 255.0;\n"
code.Write(" float orgb = float(raw_value & 0xFFu) / 255.0;\n"
" ocol0 = float4(orgb, orgb, orgb, float((raw_value >> 8) & 0xFFu) / 255.0);\n");
}
break;
case TextureFormat::IA4:
{
code.WriteFmt(" float orgb = float(raw_value & 0xFu) / 15.0;\n"
code.Write(" float orgb = float(raw_value & 0xFu) / 15.0;\n"
" ocol0 = float4(orgb, orgb, orgb, float((raw_value >> 4) & 0xFu) / 15.0);\n");
}
break;
case TextureFormat::RGB565:
{
code.WriteFmt(" ocol0 = float4(float((raw_value >> 10) & 0x1Fu) / 31.0,\n"
code.Write(" ocol0 = float4(float((raw_value >> 10) & 0x1Fu) / 31.0,\n"
" float((raw_value >> 5) & 0x1Fu) / 31.0,\n"
" float(raw_value & 0x1Fu) / 31.0, 1.0);\n");
}
@ -667,7 +665,7 @@ std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureF
case TextureFormat::RGB5A3:
{
code.WriteFmt(" if ((raw_value & 0x8000u) != 0u) {{\n"
code.Write(" if ((raw_value & 0x8000u) != 0u) {{\n"
" ocol0 = float4(float((raw_value >> 10) & 0x1Fu) / 31.0,\n"
" float((raw_value >> 5) & 0x1Fu) / 31.0,\n"
" float(raw_value & 0x1Fu) / 31.0, 1.0);\n"
@ -684,7 +682,7 @@ std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureF
return "{}\n";
}
code.WriteFmt("}}\n");
code.Write("}}\n");
return code.GetBuffer();
}
@ -694,13 +692,13 @@ std::string GenerateEFBRestorePixelShader()
EmitSamplerDeclarations(code, 0, 2, false);
EmitPixelMainDeclaration(code, 1, 0, "float4",
GetAPIType() == APIType::D3D ? "out float depth : SV_Depth, " : "");
code.WriteFmt("{{\n"
code.Write("{{\n"
" ocol0 = ");
EmitSampleTexture(code, 0, "v_tex0");
code.WriteFmt(";\n");
code.WriteFmt(" {} = ", GetAPIType() == APIType::D3D ? "depth" : "gl_FragDepth");
code.Write(";\n");
code.Write(" {} = ", GetAPIType() == APIType::D3D ? "depth" : "gl_FragDepth");
EmitSampleTexture(code, 1, "v_tex0");
code.WriteFmt(".r;\n"
code.Write(".r;\n"
"}}\n");
return code.GetBuffer();
}
@ -711,12 +709,12 @@ std::string GenerateImGuiVertexShader()
// Uniform buffer contains the viewport size, and we transform in the vertex shader.
EmitUniformBufferDeclaration(code);
code.WriteFmt("{{\n"
code.Write("{{\n"
"float2 u_rcp_viewport_size_mul2;\n"
"}};\n\n");
EmitVertexMainDeclaration(code, 1, 1, true, 1, 1);
code.WriteFmt("{{\n"
code.Write("{{\n"
" v_tex0 = float3(rawtex0.xy, 0.0);\n"
" v_col0 = rawcolor0;\n"
" opos = float4(rawpos.x * u_rcp_viewport_size_mul2.x - 1.0,"
@ -724,9 +722,9 @@ std::string GenerateImGuiVertexShader()
// NDC space is flipped in Vulkan.
if (GetAPIType() == APIType::Vulkan)
code.WriteFmt(" opos.y = -opos.y;\n");
code.Write(" opos.y = -opos.y;\n");
code.WriteFmt("}}\n");
code.Write("}}\n");
return code.GetBuffer();
}
@ -735,10 +733,10 @@ std::string GenerateImGuiPixelShader()
ShaderCode code;
EmitSamplerDeclarations(code, 0, 1, false);
EmitPixelMainDeclaration(code, 1, 1);
code.WriteFmt("{{\n"
code.Write("{{\n"
" ocol0 = ");
EmitSampleTexture(code, 0, "float3(v_tex0.xy, 0.0)");
code.WriteFmt(" * v_col0;\n"
code.Write(" * v_col0;\n"
"}}\n");
return code.GetBuffer();

View File

@ -73,104 +73,102 @@ ShaderCode GenerateGeometryShaderCode(APIType ApiType, const ShaderHostConfig& h
// Insert layout parameters
if (host_config.backend_gs_instancing)
{
out.WriteFmt("layout({}, invocations = {}) in;\n", primitives_ogl[primitive_type_index],
out.Write("layout({}, invocations = {}) in;\n", primitives_ogl[primitive_type_index],
stereo ? 2 : 1);
out.WriteFmt("layout({}_strip, max_vertices = {}) out;\n", wireframe ? "line" : "triangle",
out.Write("layout({}_strip, max_vertices = {}) out;\n", wireframe ? "line" : "triangle",
vertex_out);
}
else
{
out.WriteFmt("layout({}) in;\n", primitives_ogl[primitive_type_index]);
out.WriteFmt("layout({}_strip, max_vertices = {}) out;\n", wireframe ? "line" : "triangle",
out.Write("layout({}) in;\n", primitives_ogl[primitive_type_index]);
out.Write("layout({}_strip, max_vertices = {}) out;\n", wireframe ? "line" : "triangle",
stereo ? vertex_out * 2 : vertex_out);
}
}
out.WriteFmt("{}", s_lighting_struct);
out.Write("{}", s_lighting_struct);
// uniforms
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
out.WriteFmt("UBO_BINDING(std140, 3) uniform GSBlock {{\n");
out.Write("UBO_BINDING(std140, 3) uniform GSBlock {{\n");
else
out.WriteFmt("cbuffer GSBlock {{\n");
out.Write("cbuffer GSBlock {{\n");
out.WriteFmt("\tfloat4 " I_STEREOPARAMS ";\n"
out.Write("\tfloat4 " I_STEREOPARAMS ";\n"
"\tfloat4 " I_LINEPTPARAMS ";\n"
"\tint4 " I_TEXOFFSET ";\n"
"}};\n");
out.WriteFmt("struct VS_OUTPUT {{\n");
out.Write("struct VS_OUTPUT {{\n");
GenerateVSOutputMembers(out, ApiType, uid_data->numTexGens, host_config, "");
out.WriteFmt("}};\n");
out.Write("}};\n");
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
{
if (host_config.backend_gs_instancing)
out.WriteFmt("#define InstanceID gl_InvocationID\n");
out.Write("#define InstanceID gl_InvocationID\n");
out.WriteFmt("VARYING_LOCATION(0) in VertexData {{\n");
out.Write("VARYING_LOCATION(0) in VertexData {{\n");
GenerateVSOutputMembers(out, ApiType, uid_data->numTexGens, host_config,
GetInterpolationQualifier(msaa, ssaa, true, true));
out.WriteFmt("}} vs[{}];\n", vertex_in);
out.Write("}} vs[{}];\n", vertex_in);
out.WriteFmt("VARYING_LOCATION(0) out VertexData {{\n");
out.Write("VARYING_LOCATION(0) out VertexData {{\n");
GenerateVSOutputMembers(out, ApiType, uid_data->numTexGens, host_config,
GetInterpolationQualifier(msaa, ssaa, true, false));
if (stereo)
out.WriteFmt("\tflat int layer;\n");
out.Write("\tflat int layer;\n");
out.WriteFmt("}} ps;\n");
out.Write("}} ps;\n");
out.WriteFmt("void main()\n{{\n");
out.Write("void main()\n{{\n");
}
else // D3D
{
out.WriteFmt("struct VertexData {{\n");
out.WriteFmt("\tVS_OUTPUT o;\n");
out.Write("struct VertexData {{\n");
out.Write("\tVS_OUTPUT o;\n");
if (stereo)
out.WriteFmt("\tuint layer : SV_RenderTargetArrayIndex;\n");
out.Write("\tuint layer : SV_RenderTargetArrayIndex;\n");
out.WriteFmt("}};\n");
out.Write("}};\n");
if (host_config.backend_gs_instancing)
{
out.WriteFmt("[maxvertexcount({})]\n[instance({})]\n", vertex_out, stereo ? 2 : 1);
out.WriteFmt("void main({} VS_OUTPUT o[{}], inout {}Stream<VertexData> output, in uint "
out.Write("[maxvertexcount({})]\n[instance({})]\n", vertex_out, stereo ? 2 : 1);
out.Write("void main({} VS_OUTPUT o[{}], inout {}Stream<VertexData> output, in uint "
"InstanceID : SV_GSInstanceID)\n{{\n",
primitives_d3d[primitive_type_index], vertex_in,
wireframe ? "Line" : "Triangle");
primitives_d3d[primitive_type_index], vertex_in, wireframe ? "Line" : "Triangle");
}
else
{
out.WriteFmt("[maxvertexcount({})]\n", stereo ? vertex_out * 2 : vertex_out);
out.WriteFmt("void main({} VS_OUTPUT o[{}], inout {}Stream<VertexData> output)\n{{\n",
primitives_d3d[primitive_type_index], vertex_in,
wireframe ? "Line" : "Triangle");
out.Write("[maxvertexcount({})]\n", stereo ? vertex_out * 2 : vertex_out);
out.Write("void main({} VS_OUTPUT o[{}], inout {}Stream<VertexData> output)\n{{\n",
primitives_d3d[primitive_type_index], vertex_in, wireframe ? "Line" : "Triangle");
}
out.WriteFmt("\tVertexData ps;\n");
out.Write("\tVertexData ps;\n");
}
if (primitive_type == PrimitiveType::Lines)
{
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
{
out.WriteFmt("\tVS_OUTPUT start, end;\n");
out.Write("\tVS_OUTPUT start, end;\n");
AssignVSOutputMembers(out, "start", "vs[0]", uid_data->numTexGens, host_config);
AssignVSOutputMembers(out, "end", "vs[1]", uid_data->numTexGens, host_config);
}
else
{
out.WriteFmt("\tVS_OUTPUT start = o[0];\n"
out.Write("\tVS_OUTPUT start = o[0];\n"
"\tVS_OUTPUT end = o[1];\n");
}
// GameCube/Wii's line drawing algorithm is a little quirky. It does not
// use the correct line caps. Instead, the line caps are vertical or
// horizontal depending the slope of the line.
out.WriteFmt("\tfloat2 offset;\n"
out.Write("\tfloat2 offset;\n"
"\tfloat2 to = abs(end.pos.xy / end.pos.w - start.pos.xy / start.pos.w);\n"
// FIXME: What does real hardware do when line is at a 45-degree angle?
// FIXME: Lines aren't drawn at the correct width. See Twilight Princess map.
@ -188,17 +186,17 @@ ShaderCode GenerateGeometryShaderCode(APIType ApiType, const ShaderHostConfig& h
{
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
{
out.WriteFmt("\tVS_OUTPUT center;\n");
out.Write("\tVS_OUTPUT center;\n");
AssignVSOutputMembers(out, "center", "vs[0]", uid_data->numTexGens, host_config);
}
else
{
out.WriteFmt("\tVS_OUTPUT center = o[0];\n");
out.Write("\tVS_OUTPUT center = o[0];\n");
}
// Offset from center to upper right vertex
// Lerp PointSize/2 from [0,0..VpWidth,VpHeight] to [-1,1..1,-1]
out.WriteFmt("\tfloat2 offset = float2(" I_LINEPTPARAMS ".w / " I_LINEPTPARAMS
out.Write("\tfloat2 offset = float2(" I_LINEPTPARAMS ".w / " I_LINEPTPARAMS
".x, -" I_LINEPTPARAMS ".w / " I_LINEPTPARAMS ".y) * center.pos.w;\n");
}
@ -207,19 +205,19 @@ ShaderCode GenerateGeometryShaderCode(APIType ApiType, const ShaderHostConfig& h
// If the GPU supports invocation we don't need a for loop and can simply use the
// invocation identifier to determine which layer we're rendering.
if (host_config.backend_gs_instancing)
out.WriteFmt("\tint eye = InstanceID;\n");
out.Write("\tint eye = InstanceID;\n");
else
out.WriteFmt("\tfor (int eye = 0; eye < 2; ++eye) {{\n");
out.Write("\tfor (int eye = 0; eye < 2; ++eye) {{\n");
}
if (wireframe)
out.WriteFmt("\tVS_OUTPUT first;\n");
out.Write("\tVS_OUTPUT first;\n");
out.WriteFmt("\tfor (int i = 0; i < {}; ++i) {{\n", vertex_in);
out.Write("\tfor (int i = 0; i < {}; ++i) {{\n", vertex_in);
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
{
out.WriteFmt("\tVS_OUTPUT f;\n");
out.Write("\tVS_OUTPUT f;\n");
AssignVSOutputMembers(out, "f", "vs[i]", uid_data->numTexGens, host_config);
if (host_config.backend_depth_clamp &&
@ -227,21 +225,21 @@ ShaderCode GenerateGeometryShaderCode(APIType ApiType, const ShaderHostConfig& h
{
// On certain GPUs we have to consume the clip distance from the vertex shader
// or else the other vertex shader outputs will get corrupted.
out.WriteFmt("\tf.clipDist0 = gl_in[i].gl_ClipDistance[0];\n"
out.Write("\tf.clipDist0 = gl_in[i].gl_ClipDistance[0];\n"
"\tf.clipDist1 = gl_in[i].gl_ClipDistance[1];\n");
}
}
else
{
out.WriteFmt("\tVS_OUTPUT f = o[i];\n");
out.Write("\tVS_OUTPUT f = o[i];\n");
}
if (stereo)
{
// Select the output layer
out.WriteFmt("\tps.layer = eye;\n");
out.Write("\tps.layer = eye;\n");
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
out.WriteFmt("\tgl_Layer = eye;\n");
out.Write("\tgl_Layer = eye;\n");
// For stereoscopy add a small horizontal offset in Normalized Device Coordinates proportional
// to the depth of the vertex. We retrieve the depth value from the w-component of the projected
@ -250,56 +248,56 @@ ShaderCode GenerateGeometryShaderCode(APIType ApiType, const ShaderHostConfig& h
// the depth value. This results in objects at a distance smaller than the convergence
// distance to seemingly appear in front of the screen.
// This formula is based on page 13 of the "Nvidia 3D Vision Automatic, Best Practices Guide"
out.WriteFmt("\tfloat hoffset = (eye == 0) ? " I_STEREOPARAMS ".x : " I_STEREOPARAMS ".y;\n");
out.WriteFmt("\tf.pos.x += hoffset * (f.pos.w - " I_STEREOPARAMS ".z);\n");
out.Write("\tfloat hoffset = (eye == 0) ? " I_STEREOPARAMS ".x : " I_STEREOPARAMS ".y;\n");
out.Write("\tf.pos.x += hoffset * (f.pos.w - " I_STEREOPARAMS ".z);\n");
}
if (primitive_type == PrimitiveType::Lines)
{
out.WriteFmt("\tVS_OUTPUT l = f;\n"
out.Write("\tVS_OUTPUT l = f;\n"
"\tVS_OUTPUT r = f;\n");
out.WriteFmt("\tl.pos.xy -= offset * l.pos.w;\n"
out.Write("\tl.pos.xy -= offset * l.pos.w;\n"
"\tr.pos.xy += offset * r.pos.w;\n");
out.WriteFmt("\tif (" I_TEXOFFSET "[2] != 0) {{\n");
out.WriteFmt("\tfloat texOffset = 1.0 / float(" I_TEXOFFSET "[2]);\n");
out.Write("\tif (" I_TEXOFFSET "[2] != 0) {{\n");
out.Write("\tfloat texOffset = 1.0 / float(" I_TEXOFFSET "[2]);\n");
for (u32 i = 0; i < uid_data->numTexGens; ++i)
{
out.WriteFmt("\tif (((" I_TEXOFFSET "[0] >> {}) & 0x1) != 0)\n", i);
out.WriteFmt("\t\tr.tex{}.x += texOffset;\n", i);
out.Write("\tif (((" I_TEXOFFSET "[0] >> {}) & 0x1) != 0)\n", i);
out.Write("\t\tr.tex{}.x += texOffset;\n", i);
}
out.WriteFmt("\t}}\n");
out.Write("\t}}\n");
EmitVertex(out, host_config, uid_data, "l", ApiType, wireframe, true);
EmitVertex(out, host_config, uid_data, "r", ApiType, wireframe);
}
else if (primitive_type == PrimitiveType::Points)
{
out.WriteFmt("\tVS_OUTPUT ll = f;\n"
out.Write("\tVS_OUTPUT ll = f;\n"
"\tVS_OUTPUT lr = f;\n"
"\tVS_OUTPUT ul = f;\n"
"\tVS_OUTPUT ur = f;\n");
out.WriteFmt("\tll.pos.xy += float2(-1,-1) * offset;\n"
out.Write("\tll.pos.xy += float2(-1,-1) * offset;\n"
"\tlr.pos.xy += float2(1,-1) * offset;\n"
"\tul.pos.xy += float2(-1,1) * offset;\n"
"\tur.pos.xy += offset;\n");
out.WriteFmt("\tif (" I_TEXOFFSET "[3] != 0) {{\n");
out.WriteFmt("\tfloat2 texOffset = float2(1.0 / float(" I_TEXOFFSET
out.Write("\tif (" I_TEXOFFSET "[3] != 0) {{\n");
out.Write("\tfloat2 texOffset = float2(1.0 / float(" I_TEXOFFSET
"[3]), 1.0 / float(" I_TEXOFFSET "[3]));\n");
for (u32 i = 0; i < uid_data->numTexGens; ++i)
{
out.WriteFmt("\tif (((" I_TEXOFFSET "[1] >> {}) & 0x1) != 0) {{\n", i);
out.WriteFmt("\t\tul.tex{}.xy += float2(0,1) * texOffset;\n", i);
out.WriteFmt("\t\tur.tex{}.xy += texOffset;\n", i);
out.WriteFmt("\t\tlr.tex{}.xy += float2(1,0) * texOffset;\n", i);
out.WriteFmt("\t}}\n");
out.Write("\tif (((" I_TEXOFFSET "[1] >> {}) & 0x1) != 0) {{\n", i);
out.Write("\t\tul.tex{}.xy += float2(0,1) * texOffset;\n", i);
out.Write("\t\tur.tex{}.xy += texOffset;\n", i);
out.Write("\t\tlr.tex{}.xy += float2(1,0) * texOffset;\n", i);
out.Write("\t}}\n");
}
out.WriteFmt("\t}}\n");
out.Write("\t}}\n");
EmitVertex(out, host_config, uid_data, "ll", ApiType, wireframe, true);
EmitVertex(out, host_config, uid_data, "lr", ApiType, wireframe);
@ -311,14 +309,14 @@ ShaderCode GenerateGeometryShaderCode(APIType ApiType, const ShaderHostConfig& h
EmitVertex(out, host_config, uid_data, "f", ApiType, wireframe, true);
}
out.WriteFmt("\t}}\n");
out.Write("\t}}\n");
EndPrimitive(out, host_config, uid_data, ApiType, wireframe);
if (stereo && !host_config.backend_gs_instancing)
out.WriteFmt("\t}}\n");
out.Write("\t}}\n");
out.WriteFmt("}}\n");
out.Write("}}\n");
return out;
}
@ -328,34 +326,34 @@ static void EmitVertex(ShaderCode& out, const ShaderHostConfig& host_config,
APIType ApiType, bool wireframe, bool first_vertex)
{
if (wireframe && first_vertex)
out.WriteFmt("\tif (i == 0) first = {};\n", vertex);
out.Write("\tif (i == 0) first = {};\n", vertex);
if (ApiType == APIType::OpenGL)
{
out.WriteFmt("\tgl_Position = {}.pos;\n", vertex);
out.Write("\tgl_Position = {}.pos;\n", vertex);
if (host_config.backend_depth_clamp)
{
out.WriteFmt("\tgl_ClipDistance[0] = {}.clipDist0;\n", vertex);
out.WriteFmt("\tgl_ClipDistance[1] = {}.clipDist1;\n", vertex);
out.Write("\tgl_ClipDistance[0] = {}.clipDist0;\n", vertex);
out.Write("\tgl_ClipDistance[1] = {}.clipDist1;\n", vertex);
}
AssignVSOutputMembers(out, "ps", vertex, uid_data->numTexGens, host_config);
}
else if (ApiType == APIType::Vulkan)
{
// Vulkan NDC space has Y pointing down (right-handed NDC space).
out.WriteFmt("\tgl_Position = {}.pos;\n", vertex);
out.WriteFmt("\tgl_Position.y = -gl_Position.y;\n");
out.Write("\tgl_Position = {}.pos;\n", vertex);
out.Write("\tgl_Position.y = -gl_Position.y;\n");
AssignVSOutputMembers(out, "ps", vertex, uid_data->numTexGens, host_config);
}
else
{
out.WriteFmt("\tps.o = {};\n", vertex);
out.Write("\tps.o = {};\n", vertex);
}
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
out.WriteFmt("\tEmitVertex();\n");
out.Write("\tEmitVertex();\n");
else
out.WriteFmt("\toutput.Append(ps);\n");
out.Write("\toutput.Append(ps);\n");
}
static void EndPrimitive(ShaderCode& out, const ShaderHostConfig& host_config,
@ -365,9 +363,9 @@ static void EndPrimitive(ShaderCode& out, const ShaderHostConfig& host_config,
EmitVertex(out, host_config, uid_data, "first", ApiType, wireframe);
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
out.WriteFmt("\tEndPrimitive();\n");
out.Write("\tEndPrimitive();\n");
else
out.WriteFmt("\toutput.RestartStrip();\n");
out.Write("\toutput.RestartStrip();\n");
}
void EnumerateGeometryShaderUids(const std::function<void(const GeometryShaderUid&)>& callback)

View File

@ -24,46 +24,45 @@ static void GenerateLightShader(ShaderCode& object, const LightingUidData& uid_d
{
case LIGHTATTN_NONE:
case LIGHTATTN_DIR:
object.WriteFmt("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(index));
object.WriteFmt("attn = 1.0;\n");
object.WriteFmt("if (length(ldir) == 0.0)\n\t ldir = _norm0;\n");
object.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(index));
object.Write("attn = 1.0;\n");
object.Write("if (length(ldir) == 0.0)\n\t ldir = _norm0;\n");
break;
case LIGHTATTN_SPEC:
object.WriteFmt("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(index));
object.WriteFmt("attn = (dot(_norm0, ldir) >= 0.0) ? max(0.0, dot(_norm0, " LIGHT_DIR
object.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(index));
object.Write("attn = (dot(_norm0, ldir) >= 0.0) ? max(0.0, dot(_norm0, " LIGHT_DIR
".xyz)) : 0.0;\n",
LIGHT_DIR_PARAMS(index));
object.WriteFmt("cosAttn = " LIGHT_COSATT ".xyz;\n", LIGHT_COSATT_PARAMS(index));
object.WriteFmt("distAttn = {}(" LIGHT_DISTATT ".xyz);\n",
object.Write("cosAttn = " LIGHT_COSATT ".xyz;\n", LIGHT_COSATT_PARAMS(index));
object.Write("distAttn = {}(" LIGHT_DISTATT ".xyz);\n",
(diffusefunc == LIGHTDIF_NONE) ? "" : "normalize", LIGHT_DISTATT_PARAMS(index));
object.WriteFmt("attn = max(0.0f, dot(cosAttn, float3(1.0, attn, attn*attn))) / dot(distAttn, "
object.Write("attn = max(0.0f, dot(cosAttn, float3(1.0, attn, attn*attn))) / dot(distAttn, "
"float3(1.0, attn, attn*attn));\n");
break;
case LIGHTATTN_SPOT:
object.WriteFmt("ldir = " LIGHT_POS ".xyz - pos.xyz;\n", LIGHT_POS_PARAMS(index));
object.WriteFmt("dist2 = dot(ldir, ldir);\n"
object.Write("ldir = " LIGHT_POS ".xyz - pos.xyz;\n", LIGHT_POS_PARAMS(index));
object.Write("dist2 = dot(ldir, ldir);\n"
"dist = sqrt(dist2);\n"
"ldir = ldir / dist;\n"
"attn = max(0.0, dot(ldir, " LIGHT_DIR ".xyz));\n",
LIGHT_DIR_PARAMS(index));
// attn*attn may overflow
object.WriteFmt("attn = max(0.0, " LIGHT_COSATT ".x + " LIGHT_COSATT ".y*attn + " LIGHT_COSATT
object.Write("attn = max(0.0, " LIGHT_COSATT ".x + " LIGHT_COSATT ".y*attn + " LIGHT_COSATT
".z*attn*attn) / dot(" LIGHT_DISTATT ".xyz, float3(1.0,dist,dist2));\n",
LIGHT_COSATT_PARAMS(index), LIGHT_COSATT_PARAMS(index),
LIGHT_COSATT_PARAMS(index), LIGHT_DISTATT_PARAMS(index));
LIGHT_COSATT_PARAMS(index), LIGHT_COSATT_PARAMS(index), LIGHT_COSATT_PARAMS(index),
LIGHT_DISTATT_PARAMS(index));
break;
}
switch (diffusefunc)
{
case LIGHTDIF_NONE:
object.WriteFmt("lacc.{} += int{}(round(attn * float{}(" LIGHT_COL ")));\n", swizzle,
object.Write("lacc.{} += int{}(round(attn * float{}(" LIGHT_COL ")));\n", swizzle,
swizzle_components, swizzle_components, LIGHT_COL_PARAMS(index, swizzle));
break;
case LIGHTDIF_SIGN:
case LIGHTDIF_CLAMP:
object.WriteFmt("lacc.{} += int{}(round(attn * {}dot(ldir, _norm0)) * float{}(" LIGHT_COL
")));\n",
object.Write("lacc.{} += int{}(round(attn * {}dot(ldir, _norm0)) * float{}(" LIGHT_COL ")));\n",
swizzle, swizzle_components, diffusefunc != LIGHTDIF_SIGN ? "max(0.0," : "(",
swizzle_components, LIGHT_COL_PARAMS(index, swizzle));
break;
@ -71,7 +70,7 @@ static void GenerateLightShader(ShaderCode& object, const LightingUidData& uid_d
ASSERT(0);
}
object.WriteFmt("\n");
object.Write("\n");
}
// vertex shader
@ -84,21 +83,21 @@ void GenerateLightingShaderCode(ShaderCode& object, const LightingUidData& uid_d
{
for (u32 j = 0; j < NUM_XF_COLOR_CHANNELS; j++)
{
object.WriteFmt("{{\n");
object.Write("{{\n");
const bool colormatsource = !!(uid_data.matsource & (1 << j));
if (colormatsource) // from vertex
{
if ((components & (VB_HAS_COL0 << j)) != 0)
object.WriteFmt("int4 mat = int4(round({}{} * 255.0));\n", in_color_name, j);
object.Write("int4 mat = int4(round({}{} * 255.0));\n", in_color_name, j);
else if ((components & VB_HAS_COL0) != 0)
object.WriteFmt("int4 mat = int4(round({}0 * 255.0));\n", in_color_name);
object.Write("int4 mat = int4(round({}0 * 255.0));\n", in_color_name);
else
object.WriteFmt("int4 mat = int4(255, 255, 255, 255);\n");
object.Write("int4 mat = int4(255, 255, 255, 255);\n");
}
else // from color
{
object.WriteFmt("int4 mat = {}[{}];\n", I_MATERIALS, j + 2);
object.Write("int4 mat = {}[{}];\n", I_MATERIALS, j + 2);
}
if ((uid_data.enablelighting & (1 << j)) != 0)
@ -107,28 +106,28 @@ void GenerateLightingShaderCode(ShaderCode& object, const LightingUidData& uid_d
{
if ((components & (VB_HAS_COL0 << j)) != 0)
{
object.WriteFmt("lacc = int4(round({}{} * 255.0));\n", in_color_name, j);
object.Write("lacc = int4(round({}{} * 255.0));\n", in_color_name, j);
}
else if ((components & VB_HAS_COL0) != 0)
{
object.WriteFmt("lacc = int4(round({}0 * 255.0));\n", in_color_name);
object.Write("lacc = int4(round({}0 * 255.0));\n", in_color_name);
}
else
{
// TODO: this isn't verified. Here we want to read the ambient from the vertex,
// but the vertex itself has no color. So we don't know which value to read.
// Returning 1.0 is the same as disabled lightning, so this could be fine
object.WriteFmt("lacc = int4(255, 255, 255, 255);\n");
object.Write("lacc = int4(255, 255, 255, 255);\n");
}
}
else // from color
{
object.WriteFmt("lacc = {}[{}];\n", I_MATERIALS, j);
object.Write("lacc = {}[{}];\n", I_MATERIALS, j);
}
}
else
{
object.WriteFmt("lacc = int4(255, 255, 255, 255);\n");
object.Write("lacc = int4(255, 255, 255, 255);\n");
}
// check if alpha is different
@ -138,15 +137,15 @@ void GenerateLightingShaderCode(ShaderCode& object, const LightingUidData& uid_d
if (alphamatsource) // from vertex
{
if ((components & (VB_HAS_COL0 << j)) != 0)
object.WriteFmt("mat.w = int(round({}{}.w * 255.0));\n", in_color_name, j);
object.Write("mat.w = int(round({}{}.w * 255.0));\n", in_color_name, j);
else if ((components & VB_HAS_COL0) != 0)
object.WriteFmt("mat.w = int(round({}0.w * 255.0));\n", in_color_name);
object.Write("mat.w = int(round({}0.w * 255.0));\n", in_color_name);
else
object.WriteFmt("mat.w = 255;\n");
object.Write("mat.w = 255;\n");
}
else // from color
{
object.WriteFmt("mat.w = {}[{}].w;\n", I_MATERIALS, j + 2);
object.Write("mat.w = {}[{}].w;\n", I_MATERIALS, j + 2);
}
}
@ -156,26 +155,26 @@ void GenerateLightingShaderCode(ShaderCode& object, const LightingUidData& uid_d
{
if ((components & (VB_HAS_COL0 << j)) != 0)
{
object.WriteFmt("lacc.w = int(round({}{}.w * 255.0));\n", in_color_name, j);
object.Write("lacc.w = int(round({}{}.w * 255.0));\n", in_color_name, j);
}
else if ((components & VB_HAS_COL0) != 0)
{
object.WriteFmt("lacc.w = int(round({}0.w * 255.0));\n", in_color_name);
object.Write("lacc.w = int(round({}0.w * 255.0));\n", in_color_name);
}
else
{
// TODO: The same for alpha: We want to read from vertex, but the vertex has no color
object.WriteFmt("lacc.w = 255;\n");
object.Write("lacc.w = 255;\n");
}
}
else // from color
{
object.WriteFmt("lacc.w = {}[{}].w;\n", I_MATERIALS, j);
object.Write("lacc.w = {}[{}].w;\n", I_MATERIALS, j);
}
}
else
{
object.WriteFmt("lacc.w = 255;\n");
object.Write("lacc.w = 255;\n");
}
if ((uid_data.enablelighting & (1 << j)) != 0) // Color lights
@ -194,9 +193,9 @@ void GenerateLightingShaderCode(ShaderCode& object, const LightingUidData& uid_d
GenerateLightShader(object, uid_data, i, j + 2, true);
}
}
object.WriteFmt("lacc = clamp(lacc, 0, 255);\n");
object.WriteFmt("{}{} = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n", dest, j);
object.WriteFmt("}}\n");
object.Write("lacc = clamp(lacc, 0, 255);\n");
object.Write("{}{} = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n", dest, j);
object.Write("}}\n");
}
}

View File

@ -365,43 +365,43 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type, u32 num_tex
const ShaderHostConfig& host_config, bool bounding_box)
{
// dot product for integer vectors
out.WriteFmt("int idot(int3 x, int3 y)\n"
out.Write("int idot(int3 x, int3 y)\n"
"{{\n"
"\tint3 tmp = x * y;\n"
"\treturn tmp.x + tmp.y + tmp.z;\n"
"}}\n");
out.WriteFmt("int idot(int4 x, int4 y)\n"
out.Write("int idot(int4 x, int4 y)\n"
"{{\n"
"\tint4 tmp = x * y;\n"
"\treturn tmp.x + tmp.y + tmp.z + tmp.w;\n"
"}}\n\n");
// rounding + casting to integer at once in a single function
out.WriteFmt("int iround(float x) {{ return int (round(x)); }}\n"
out.Write("int iround(float x) {{ return int (round(x)); }}\n"
"int2 iround(float2 x) {{ return int2(round(x)); }}\n"
"int3 iround(float3 x) {{ return int3(round(x)); }}\n"
"int4 iround(float4 x) {{ return int4(round(x)); }}\n\n");
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
out.WriteFmt("SAMPLER_BINDING(0) uniform sampler2DArray samp[8];\n");
out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp[8];\n");
}
else // D3D
{
// Declare samplers
out.WriteFmt("SamplerState samp[8] : register(s0);\n"
out.Write("SamplerState samp[8] : register(s0);\n"
"\n"
"Texture2DArray Tex[8] : register(t0);\n");
}
out.WriteFmt("\n");
out.Write("\n");
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
out.WriteFmt("UBO_BINDING(std140, 1) uniform PSBlock {{\n");
out.Write("UBO_BINDING(std140, 1) uniform PSBlock {{\n");
else
out.WriteFmt("cbuffer PSBlock : register(b0) {{\n");
out.Write("cbuffer PSBlock : register(b0) {{\n");
out.WriteFmt("\tint4 " I_COLORS "[4];\n"
out.Write("\tint4 " I_COLORS "[4];\n"
"\tint4 " I_KCOLORS "[4];\n"
"\tint4 " I_ALPHA ";\n"
"\tfloat4 " I_TEXDIMS "[8];\n"
@ -435,7 +435,7 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type, u32 num_tex
"\tbool blend_subtract;\n"
"\tbool blend_subtract_alpha;\n"
"}};\n\n");
out.WriteFmt("#define bpmem_combiners(i) (bpmem_pack1[(i)].xy)\n"
out.Write("#define bpmem_combiners(i) (bpmem_pack1[(i)].xy)\n"
"#define bpmem_tevind(i) (bpmem_pack1[(i)].z)\n"
"#define bpmem_iref(i) (bpmem_pack1[(i)].w)\n"
"#define bpmem_tevorder(i) (bpmem_pack2[(i)].x)\n"
@ -443,20 +443,20 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type, u32 num_tex
if (host_config.per_pixel_lighting)
{
out.WriteFmt("{}", s_lighting_struct);
out.Write("{}", s_lighting_struct);
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
out.WriteFmt("UBO_BINDING(std140, 2) uniform VSBlock {{\n");
out.Write("UBO_BINDING(std140, 2) uniform VSBlock {{\n");
else
out.WriteFmt("cbuffer VSBlock : register(b1) {{\n");
out.Write("cbuffer VSBlock : register(b1) {{\n");
out.WriteFmt("{}", s_shader_uniforms);
out.WriteFmt("}};\n");
out.Write("{}", s_shader_uniforms);
out.Write("}};\n");
}
if (bounding_box)
{
out.WriteFmt(R"(
out.Write(R"(
#ifdef API_D3D
globallycoherent RWBuffer<int> bbox_data : register(u2);
#define atomicMin InterlockedMin
@ -541,8 +541,8 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
const bool stereo = host_config.stereo;
const u32 numStages = uid_data->genMode_numtevstages + 1;
out.WriteFmt("// Pixel Shader for TEV stages\n");
out.WriteFmt("// {} TEV stages, {} texgens, {} IND stages\n", numStages,
out.Write("// Pixel Shader for TEV stages\n");
out.Write("// {} TEV stages, {} texgens, {} IND stages\n", numStages,
uid_data->genMode_numtexgens, uid_data->genMode_numindstages);
// Stuff that is shared between ubershaders and pixelgen.
@ -588,11 +588,11 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
// This is a #define which signals whatever early-z method the driver supports.
out.WriteFmt("FORCE_EARLY_Z; \n");
out.Write("FORCE_EARLY_Z; \n");
}
else
{
out.WriteFmt("[earlydepthstencil]\n");
out.Write("[earlydepthstencil]\n");
}
}
@ -610,12 +610,12 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
{
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION))
{
out.WriteFmt("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"
out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"
"FRAGMENT_OUTPUT_LOCATION(1) out vec4 ocol1;\n");
}
else
{
out.WriteFmt("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 ocol0;\n"
out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 ocol0;\n"
"FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n");
}
}
@ -627,119 +627,118 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
// shader
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION))
{
out.WriteFmt("FRAGMENT_OUTPUT_LOCATION(0) FRAGMENT_INOUT vec4 real_ocol0;\n");
out.Write("FRAGMENT_OUTPUT_LOCATION(0) FRAGMENT_INOUT vec4 real_ocol0;\n");
}
else
{
out.WriteFmt("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) FRAGMENT_INOUT vec4 real_ocol0;\n");
out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) FRAGMENT_INOUT vec4 real_ocol0;\n");
}
}
else
{
out.WriteFmt("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n");
out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n");
}
if (uid_data->per_pixel_depth)
out.WriteFmt("#define depth gl_FragDepth\n");
out.Write("#define depth gl_FragDepth\n");
if (host_config.backend_geometry_shaders)
{
out.WriteFmt("VARYING_LOCATION(0) in VertexData {{\n");
out.Write("VARYING_LOCATION(0) in VertexData {{\n");
GenerateVSOutputMembers(out, api_type, uid_data->genMode_numtexgens, host_config,
GetInterpolationQualifier(msaa, ssaa, true, true));
if (stereo)
out.WriteFmt("\tflat int layer;\n");
out.Write("\tflat int layer;\n");
out.WriteFmt("}};\n");
out.Write("}};\n");
}
else
{
// Let's set up attributes
u32 counter = 0;
out.WriteFmt("VARYING_LOCATION({}) {} in float4 colors_0;\n", counter++,
out.Write("VARYING_LOCATION({}) {} in float4 colors_0;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.WriteFmt("VARYING_LOCATION({}) {} in float4 colors_1;\n", counter++,
out.Write("VARYING_LOCATION({}) {} in float4 colors_1;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
for (u32 i = 0; i < uid_data->genMode_numtexgens; ++i)
{
out.WriteFmt("VARYING_LOCATION({}) {} in float3 tex{};\n", counter++,
out.Write("VARYING_LOCATION({}) {} in float3 tex{};\n", counter++,
GetInterpolationQualifier(msaa, ssaa), i);
}
if (!host_config.fast_depth_calc)
{
out.WriteFmt("VARYING_LOCATION({}) {} in float4 clipPos;\n", counter++,
out.Write("VARYING_LOCATION({}) {} in float4 clipPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
}
if (per_pixel_lighting)
{
out.WriteFmt("VARYING_LOCATION({}) {} in float3 Normal;\n", counter++,
out.Write("VARYING_LOCATION({}) {} in float3 Normal;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.WriteFmt("VARYING_LOCATION({}) {} in float3 WorldPos;\n", counter++,
out.Write("VARYING_LOCATION({}) {} in float3 WorldPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
}
}
out.WriteFmt("void main()\n{{\n");
out.WriteFmt("\tfloat4 rawpos = gl_FragCoord;\n");
out.Write("void main()\n{{\n");
out.Write("\tfloat4 rawpos = gl_FragCoord;\n");
if (use_shader_blend)
{
// Store off a copy of the initial fb value for blending
out.WriteFmt("\tfloat4 initial_ocol0 = FB_FETCH_VALUE;\n"
out.Write("\tfloat4 initial_ocol0 = FB_FETCH_VALUE;\n"
"\tfloat4 ocol0;\n"
"\tfloat4 ocol1;\n");
}
}
else // D3D
{
out.WriteFmt("void main(\n");
out.Write("void main(\n");
if (uid_data->uint_output)
{
out.WriteFmt(" out uint4 ocol0 : SV_Target,\n");
out.Write(" out uint4 ocol0 : SV_Target,\n");
}
else
{
out.WriteFmt(" out float4 ocol0 : SV_Target0,\n"
out.Write(" out float4 ocol0 : SV_Target0,\n"
" out float4 ocol1 : SV_Target1,\n");
}
out.WriteFmt("{}"
out.Write("{}"
" in float4 rawpos : SV_Position,\n",
uid_data->per_pixel_depth ? " out float depth : SV_Depth,\n" : "");
out.WriteFmt(" in {} float4 colors_0 : COLOR0,\n", GetInterpolationQualifier(msaa, ssaa));
out.WriteFmt(" in {} float4 colors_1 : COLOR1\n", GetInterpolationQualifier(msaa, ssaa));
out.Write(" in {} float4 colors_0 : COLOR0,\n", GetInterpolationQualifier(msaa, ssaa));
out.Write(" in {} float4 colors_1 : COLOR1\n", GetInterpolationQualifier(msaa, ssaa));
// compute window position if needed because binding semantic WPOS is not widely supported
for (u32 i = 0; i < uid_data->genMode_numtexgens; ++i)
{
out.WriteFmt(",\n in {} float3 tex{} : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa), i,
out.Write(",\n in {} float3 tex{} : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa), i,
i);
}
if (!host_config.fast_depth_calc)
{
out.WriteFmt(",\n in {} float4 clipPos : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa),
out.Write(",\n in {} float4 clipPos : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa),
uid_data->genMode_numtexgens);
}
if (per_pixel_lighting)
{
out.WriteFmt(",\n in {} float3 Normal : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa),
out.Write(",\n in {} float3 Normal : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa),
uid_data->genMode_numtexgens + 1);
out.WriteFmt(",\n in {} float3 WorldPos : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa),
out.Write(",\n in {} float3 WorldPos : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa),
uid_data->genMode_numtexgens + 2);
}
if (host_config.backend_geometry_shaders)
{
out.WriteFmt(",\n in float clipDist0 : SV_ClipDistance0\n"
out.Write(",\n in float clipDist0 : SV_ClipDistance0\n"
",\n in float clipDist1 : SV_ClipDistance1\n");
}
if (stereo)
out.WriteFmt(",\n in uint layer : SV_RenderTargetArrayIndex\n");
out.WriteFmt(" ) {{\n");
out.Write(",\n in uint layer : SV_RenderTargetArrayIndex\n");
out.Write(" ) {{\n");
}
out.WriteFmt(
"\tint4 c0 = " I_COLORS "[1], c1 = " I_COLORS "[2], c2 = " I_COLORS "[3], prev = " I_COLORS
"[0];\n"
out.Write("\tint4 c0 = " I_COLORS "[1], c1 = " I_COLORS "[2], c2 = " I_COLORS
"[3], prev = " I_COLORS "[0];\n"
"\tint4 rastemp = int4(0, 0, 0, 0), textemp = int4(0, 0, 0, 0), konsttemp = int4(0, 0, "
"0, 0);\n"
"\tint3 comp16 = int3(1, 256, 0), comp24 = int3(1, 256, 256*256);\n"
@ -752,15 +751,15 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
// On GLSL, input variables must not be assigned to.
// This is why we declare these variables locally instead.
out.WriteFmt("\tfloat4 col0 = colors_0;\n"
out.Write("\tfloat4 col0 = colors_0;\n"
"\tfloat4 col1 = colors_1;\n");
if (per_pixel_lighting)
{
out.WriteFmt("\tfloat3 _norm0 = normalize(Normal.xyz);\n\n"
out.Write("\tfloat3 _norm0 = normalize(Normal.xyz);\n\n"
"\tfloat3 pos = WorldPos;\n");
out.WriteFmt("\tint4 lacc;\n"
out.Write("\tint4 lacc;\n"
"\tfloat3 ldir, h, cosAttn, distAttn;\n"
"\tfloat dist, dist2, attn;\n");
@ -776,16 +775,16 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
// HACK to handle cases where the tex gen is not enabled
if (uid_data->genMode_numtexgens == 0)
{
out.WriteFmt("\tint2 fixpoint_uv0 = int2(0, 0);\n\n");
out.Write("\tint2 fixpoint_uv0 = int2(0, 0);\n\n");
}
else
{
out.SetConstantsUsed(C_TEXDIMS, C_TEXDIMS + uid_data->genMode_numtexgens - 1);
for (u32 i = 0; i < uid_data->genMode_numtexgens; ++i)
{
out.WriteFmt("\tint2 fixpoint_uv{} = int2(", i);
out.WriteFmt("(tex{}.z == 0.0 ? tex{}.xy : tex{}.xy / tex{}.z)", i, i, i, i);
out.WriteFmt(" * " I_TEXDIMS "[{}].zw);\n", i);
out.Write("\tint2 fixpoint_uv{} = int2(", i);
out.Write("(tex{}.z == 0.0 ? tex{}.xy : tex{}.xy / tex{}.z)", i, i, i, i);
out.Write(" * " I_TEXDIMS "[{}].zw);\n", i);
// TODO: S24 overflows here?
}
}
@ -800,15 +799,15 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
if (texcoord < uid_data->genMode_numtexgens)
{
out.SetConstantsUsed(C_INDTEXSCALE + i / 2, C_INDTEXSCALE + i / 2);
out.WriteFmt("\ttempcoord = fixpoint_uv{} >> " I_INDTEXSCALE "[{}].{};\n", texcoord, i / 2,
out.Write("\ttempcoord = fixpoint_uv{} >> " I_INDTEXSCALE "[{}].{};\n", texcoord, i / 2,
(i & 1) != 0 ? "zw" : "xy");
}
else
{
out.WriteFmt("\ttempcoord = int2(0, 0);\n");
out.Write("\ttempcoord = int2(0, 0);\n");
}
out.WriteFmt("\tint3 iindtex{} = ", i);
out.Write("\tint3 iindtex{} = ", i);
SampleTexture(out, "float2(tempcoord)", "abg", texmap, stereo, api_type);
}
}
@ -828,14 +827,14 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
last_ac.hex = uid_data->stagehash[uid_data->genMode_numtevstages].ac;
if (last_cc.dest != 0)
{
out.WriteFmt("\tprev.rgb = {};\n", tev_c_output_table[last_cc.dest]);
out.Write("\tprev.rgb = {};\n", tev_c_output_table[last_cc.dest]);
}
if (last_ac.dest != 0)
{
out.WriteFmt("\tprev.a = {};\n", tev_a_output_table[last_ac.dest]);
out.Write("\tprev.a = {};\n", tev_a_output_table[last_ac.dest]);
}
}
out.WriteFmt("\tprev = prev & 255;\n");
out.Write("\tprev = prev & 255;\n");
// NOTE: Fragment may not be discarded if alpha test always fails and early depth test is enabled
// (in this case we need to write a depth value if depth test passes regardless of the alpha
@ -852,13 +851,13 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
out.SetConstantsUsed(C_ZSLOPE, C_ZSLOPE);
out.SetConstantsUsed(C_EFBSCALE, C_EFBSCALE);
out.WriteFmt("\tfloat2 screenpos = rawpos.xy * " I_EFBSCALE ".xy;\n");
out.Write("\tfloat2 screenpos = rawpos.xy * " I_EFBSCALE ".xy;\n");
// Opengl has reversed vertical screenspace coordinates
if (api_type == APIType::OpenGL)
out.WriteFmt("\tscreenpos.y = {}.0 - screenpos.y;\n", EFB_HEIGHT);
out.Write("\tscreenpos.y = {}.0 - screenpos.y;\n", EFB_HEIGHT);
out.WriteFmt("\tint zCoord = int(" I_ZSLOPE ".z + " I_ZSLOPE ".x * screenpos.x + " I_ZSLOPE
out.Write("\tint zCoord = int(" I_ZSLOPE ".z + " I_ZSLOPE ".x * screenpos.x + " I_ZSLOPE
".y * screenpos.y);\n");
}
else if (!host_config.fast_depth_calc)
@ -870,17 +869,17 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
// the host GPU driver from performing any early depth test optimizations.
out.SetConstantsUsed(C_ZBIAS + 1, C_ZBIAS + 1);
// the screen space depth value = far z + (clip z / clip w) * z range
out.WriteFmt("\tint zCoord = " I_ZBIAS "[1].x + int((clipPos.z / clipPos.w) * float(" I_ZBIAS
out.Write("\tint zCoord = " I_ZBIAS "[1].x + int((clipPos.z / clipPos.w) * float(" I_ZBIAS
"[1].y));\n");
}
else
{
if (!host_config.backend_reversed_depth_range)
out.WriteFmt("\tint zCoord = int((1.0 - rawpos.z) * 16777216.0);\n");
out.Write("\tint zCoord = int((1.0 - rawpos.z) * 16777216.0);\n");
else
out.WriteFmt("\tint zCoord = int(rawpos.z * 16777216.0);\n");
out.Write("\tint zCoord = int(rawpos.z * 16777216.0);\n");
}
out.WriteFmt("\tzCoord = clamp(zCoord, 0, 0xFFFFFF);\n");
out.Write("\tzCoord = clamp(zCoord, 0, 0xFFFFFF);\n");
// depth texture can safely be ignored if the result won't be written to the depth buffer
// (early_ztest) and isn't used for fog either
@ -890,9 +889,9 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
if (uid_data->per_pixel_depth && uid_data->early_ztest)
{
if (!host_config.backend_reversed_depth_range)
out.WriteFmt("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n");
out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n");
else
out.WriteFmt("\tdepth = float(zCoord) / 16777216.0;\n");
out.Write("\tdepth = float(zCoord) / 16777216.0;\n");
}
// Note: depth texture output is only written to depth buffer if late depth test is used
@ -903,17 +902,17 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
// use the texture input of the last texture stage (textemp), hopefully this has been read and
// is in correct format...
out.SetConstantsUsed(C_ZBIAS, C_ZBIAS + 1);
out.WriteFmt("\tzCoord = idot(" I_ZBIAS "[0].xyzw, textemp.xyzw) + " I_ZBIAS "[1].w {};\n",
out.Write("\tzCoord = idot(" I_ZBIAS "[0].xyzw, textemp.xyzw) + " I_ZBIAS "[1].w {};\n",
(uid_data->ztex_op == ZTEXTURE_ADD) ? "+ zCoord" : "");
out.WriteFmt("\tzCoord = zCoord & 0xFFFFFF;\n");
out.Write("\tzCoord = zCoord & 0xFFFFFF;\n");
}
if (uid_data->per_pixel_depth && uid_data->late_ztest)
{
if (!host_config.backend_reversed_depth_range)
out.WriteFmt("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n");
out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n");
else
out.WriteFmt("\tdepth = float(zCoord) / 16777216.0;\n");
out.Write("\tdepth = float(zCoord) / 16777216.0;\n");
}
// No dithering for RGB8 mode
@ -921,8 +920,8 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
{
// Flipper uses a standard 2x2 Bayer Matrix for 6 bit dithering
// Here the matrix is encoded into the two factor constants
out.WriteFmt("\tint2 dither = int2(rawpos.xy) & 1;\n");
out.WriteFmt("\tprev.rgb = (prev.rgb - (prev.rgb >> 6)) + abs(dither.y * 3 - dither.x * 2);\n");
out.Write("\tint2 dither = int2(rawpos.xy) & 1;\n");
out.Write("\tprev.rgb = (prev.rgb - (prev.rgb >> 6)) + abs(dither.y * 3 - dither.x * 2);\n");
}
WriteFog(out, uid_data);
@ -935,9 +934,9 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
WriteBlend(out, uid_data);
if (uid_data->bounding_box)
out.WriteFmt("\tUpdateBoundingBox(rawpos.xy);\n");
out.Write("\tUpdateBoundingBox(rawpos.xy);\n");
out.WriteFmt("}}\n");
out.Write("}}\n");
return out;
}
@ -946,7 +945,7 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
APIType api_type, bool stereo)
{
const auto& stage = uid_data->stagehash[n];
out.WriteFmt("\n\t// TEV stage {}\n", n);
out.Write("\n\t// TEV stage {}\n", n);
// HACK to handle cases where the tex gen is not enabled
u32 texcoord = stage.tevorders_texcoord;
@ -959,7 +958,7 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
TevStageIndirect tevind;
tevind.hex = stage.tevind;
out.WriteFmt("\t// indirect op\n");
out.Write("\t// indirect op\n");
// Perform the indirect op on the incoming regular coordinates
// using iindtex{} as the offset coords
@ -980,8 +979,8 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
"248",
};
out.WriteFmt("alphabump = iindtex{}.{} & {};\n", tevind.bt.Value(),
tev_ind_alpha_sel[tevind.bs], tev_ind_alpha_mask[tevind.fmt]);
out.Write("alphabump = iindtex{}.{} & {};\n", tevind.bt.Value(), tev_ind_alpha_sel[tevind.bs],
tev_ind_alpha_mask[tevind.fmt]);
}
else
{
@ -997,7 +996,7 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
"15",
"7",
};
out.WriteFmt("\tint3 iindtevcrd{} = iindtex{} & {};\n", n, tevind.bt.Value(),
out.Write("\tint3 iindtevcrd{} = iindtex{} & {};\n", n, tevind.bt.Value(),
tev_ind_fmt_mask[tevind.fmt]);
// bias - TODO: Check if this needs to be this complicated...
@ -1016,17 +1015,17 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
if (tevind.bias == ITB_S || tevind.bias == ITB_T || tevind.bias == ITB_U)
{
out.WriteFmt("\tiindtevcrd{}.{} += int({});\n", n, tev_ind_bias_field[tevind.bias],
out.Write("\tiindtevcrd{}.{} += int({});\n", n, tev_ind_bias_field[tevind.bias],
tev_ind_bias_add[tevind.fmt]);
}
else if (tevind.bias == ITB_ST || tevind.bias == ITB_SU || tevind.bias == ITB_TU)
{
out.WriteFmt("\tiindtevcrd{}.{} += int2({}, {});\n", n, tev_ind_bias_field[tevind.bias],
out.Write("\tiindtevcrd{}.{} += int2({}, {});\n", n, tev_ind_bias_field[tevind.bias],
tev_ind_bias_add[tevind.fmt], tev_ind_bias_add[tevind.fmt]);
}
else if (tevind.bias == ITB_STU)
{
out.WriteFmt("\tiindtevcrd{}.{} += int3({}, {}, {});\n", n, tev_ind_bias_field[tevind.bias],
out.Write("\tiindtevcrd{}.{} += int3({}, {}, {});\n", n, tev_ind_bias_field[tevind.bias],
tev_ind_bias_add[tevind.fmt], tev_ind_bias_add[tevind.fmt],
tev_ind_bias_add[tevind.fmt]);
}
@ -1038,26 +1037,23 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
const u32 mtxidx = 2 * (tevind.mid - 1);
out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx);
out.WriteFmt("\tint2 indtevtrans{} = int2(idot(" I_INDTEXMTX
"[{}].xyz, iindtevcrd{}), idot(" I_INDTEXMTX
"[{}].xyz, iindtevcrd{})) >> 3;\n",
out.Write("\tint2 indtevtrans{} = int2(idot(" I_INDTEXMTX
"[{}].xyz, iindtevcrd{}), idot(" I_INDTEXMTX "[{}].xyz, iindtevcrd{})) >> 3;\n",
n, mtxidx, n, mtxidx + 1, n);
// TODO: should use a shader uid branch for this for better performance
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_BITWISE_OP_NEGATION))
{
out.WriteFmt("\tint indtexmtx_w_inverse_{} = -" I_INDTEXMTX "[{}].w;\n", n, mtxidx);
out.WriteFmt("\tif (" I_INDTEXMTX "[{}].w >= 0) indtevtrans{} >>= " I_INDTEXMTX
"[{}].w;\n",
out.Write("\tint indtexmtx_w_inverse_{} = -" I_INDTEXMTX "[{}].w;\n", n, mtxidx);
out.Write("\tif (" I_INDTEXMTX "[{}].w >= 0) indtevtrans{} >>= " I_INDTEXMTX "[{}].w;\n",
mtxidx, n, mtxidx);
out.WriteFmt("\telse indtevtrans{} <<= indtexmtx_w_inverse_{};\n", n, n);
out.Write("\telse indtevtrans{} <<= indtexmtx_w_inverse_{};\n", n, n);
}
else
{
out.WriteFmt("\tif (" I_INDTEXMTX "[{}].w >= 0) indtevtrans{} >>= " I_INDTEXMTX
"[{}].w;\n",
out.Write("\tif (" I_INDTEXMTX "[{}].w >= 0) indtevtrans{} >>= " I_INDTEXMTX "[{}].w;\n",
mtxidx, n, mtxidx);
out.WriteFmt("\telse indtevtrans{} <<= (-" I_INDTEXMTX "[{}].w);\n", n, mtxidx);
out.Write("\telse indtevtrans{} <<= (-" I_INDTEXMTX "[{}].w);\n", n, mtxidx);
}
}
else if (tevind.mid <= 7 && has_tex_coord)
@ -1066,22 +1062,20 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
const u32 mtxidx = 2 * (tevind.mid - 5);
out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx);
out.WriteFmt("\tint2 indtevtrans{} = int2(fixpoint_uv{} * iindtevcrd{}.xx) >> 8;\n", n,
out.Write("\tint2 indtevtrans{} = int2(fixpoint_uv{} * iindtevcrd{}.xx) >> 8;\n", n,
texcoord, n);
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_BITWISE_OP_NEGATION))
{
out.WriteFmt("\tint indtexmtx_w_inverse_{} = -" I_INDTEXMTX "[{}].w;\n", n, mtxidx);
out.WriteFmt("\tif (" I_INDTEXMTX "[{}].w >= 0) indtevtrans{} >>= " I_INDTEXMTX
"[{}].w;\n",
out.Write("\tint indtexmtx_w_inverse_{} = -" I_INDTEXMTX "[{}].w;\n", n, mtxidx);
out.Write("\tif (" I_INDTEXMTX "[{}].w >= 0) indtevtrans{} >>= " I_INDTEXMTX "[{}].w;\n",
mtxidx, n, mtxidx);
out.WriteFmt("\telse indtevtrans{} <<= (indtexmtx_w_inverse_{});\n", n, n);
out.Write("\telse indtevtrans{} <<= (indtexmtx_w_inverse_{});\n", n, n);
}
else
{
out.WriteFmt("\tif (" I_INDTEXMTX "[{}].w >= 0) indtevtrans{} >>= " I_INDTEXMTX
"[{}].w;\n",
out.Write("\tif (" I_INDTEXMTX "[{}].w >= 0) indtevtrans{} >>= " I_INDTEXMTX "[{}].w;\n",
mtxidx, n, mtxidx);
out.WriteFmt("\telse indtevtrans{} <<= (-" I_INDTEXMTX "[{}].w);\n", n, mtxidx);
out.Write("\telse indtevtrans{} <<= (-" I_INDTEXMTX "[{}].w);\n", n, mtxidx);
}
}
else if (tevind.mid <= 11 && has_tex_coord)
@ -1090,33 +1084,31 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
const u32 mtxidx = 2 * (tevind.mid - 9);
out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx);
out.WriteFmt("\tint2 indtevtrans{} = int2(fixpoint_uv{} * iindtevcrd{}.yy) >> 8;\n", n,
out.Write("\tint2 indtevtrans{} = int2(fixpoint_uv{} * iindtevcrd{}.yy) >> 8;\n", n,
texcoord, n);
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_BITWISE_OP_NEGATION))
{
out.WriteFmt("\tint indtexmtx_w_inverse_{} = -" I_INDTEXMTX "[{}].w;\n", n, mtxidx);
out.WriteFmt("\tif (" I_INDTEXMTX "[{}].w >= 0) indtevtrans{} >>= " I_INDTEXMTX
"[{}].w;\n",
out.Write("\tint indtexmtx_w_inverse_{} = -" I_INDTEXMTX "[{}].w;\n", n, mtxidx);
out.Write("\tif (" I_INDTEXMTX "[{}].w >= 0) indtevtrans{} >>= " I_INDTEXMTX "[{}].w;\n",
mtxidx, n, mtxidx);
out.WriteFmt("\telse indtevtrans{} <<= (indtexmtx_w_inverse_{});\n", n, n);
out.Write("\telse indtevtrans{} <<= (indtexmtx_w_inverse_{});\n", n, n);
}
else
{
out.WriteFmt("\tif (" I_INDTEXMTX "[{}].w >= 0) indtevtrans{} >>= " I_INDTEXMTX
"[{}].w;\n",
out.Write("\tif (" I_INDTEXMTX "[{}].w >= 0) indtevtrans{} >>= " I_INDTEXMTX "[{}].w;\n",
mtxidx, n, mtxidx);
out.WriteFmt("\telse indtevtrans{} <<= (-" I_INDTEXMTX "[{}].w);\n", n, mtxidx);
out.Write("\telse indtevtrans{} <<= (-" I_INDTEXMTX "[{}].w);\n", n, mtxidx);
}
}
else
{
out.WriteFmt("\tint2 indtevtrans{} = int2(0, 0);\n", n);
out.Write("\tint2 indtevtrans{} = int2(0, 0);\n", n);
}
}
else
{
out.WriteFmt("\tint2 indtevtrans{} = int2(0, 0);\n", n);
out.Write("\tint2 indtevtrans{} = int2(0, 0);\n", n);
}
// ---------
@ -1131,40 +1123,40 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
// wrap S
if (tevind.sw == ITW_OFF)
{
out.WriteFmt("\twrappedcoord.x = fixpoint_uv{}.x;\n", texcoord);
out.Write("\twrappedcoord.x = fixpoint_uv{}.x;\n", texcoord);
}
else if (tevind.sw == ITW_0)
{
out.WriteFmt("\twrappedcoord.x = 0;\n");
out.Write("\twrappedcoord.x = 0;\n");
}
else
{
out.WriteFmt("\twrappedcoord.x = fixpoint_uv{}.x & ({} - 1);\n", texcoord,
out.Write("\twrappedcoord.x = fixpoint_uv{}.x & ({} - 1);\n", texcoord,
tev_ind_wrap_start[tevind.sw]);
}
// wrap T
if (tevind.tw == ITW_OFF)
{
out.WriteFmt("\twrappedcoord.y = fixpoint_uv{}.y;\n", texcoord);
out.Write("\twrappedcoord.y = fixpoint_uv{}.y;\n", texcoord);
}
else if (tevind.tw == ITW_0)
{
out.WriteFmt("\twrappedcoord.y = 0;\n");
out.Write("\twrappedcoord.y = 0;\n");
}
else
{
out.WriteFmt("\twrappedcoord.y = fixpoint_uv{}.y & ({} - 1);\n", texcoord,
out.Write("\twrappedcoord.y = fixpoint_uv{}.y & ({} - 1);\n", texcoord,
tev_ind_wrap_start[tevind.tw]);
}
if (tevind.fb_addprev) // add previous tevcoord
out.WriteFmt("\ttevcoord.xy += wrappedcoord + indtevtrans{};\n", n);
out.Write("\ttevcoord.xy += wrappedcoord + indtevtrans{};\n", n);
else
out.WriteFmt("\ttevcoord.xy = wrappedcoord + indtevtrans{};\n", n);
out.Write("\ttevcoord.xy = wrappedcoord + indtevtrans{};\n", n);
// Emulate s24 overflows
out.WriteFmt("\ttevcoord.xy = (tevcoord.xy << 8) >> 8;\n");
out.Write("\ttevcoord.xy = (tevcoord.xy << 8) >> 8;\n");
}
TevStageCombiner::ColorCombiner cc;
@ -1186,7 +1178,7 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
'\0',
};
out.WriteFmt("\trastemp = {}.{};\n", tev_ras_table[stage.tevorders_colorchan], rasswap);
out.Write("\trastemp = {}.{};\n", tev_ras_table[stage.tevorders_colorchan], rasswap);
}
if (stage.tevorders_enable)
@ -1204,23 +1196,23 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
{
// calc tevcord
if (has_tex_coord)
out.WriteFmt("\ttevcoord.xy = fixpoint_uv{};\n", texcoord);
out.Write("\ttevcoord.xy = fixpoint_uv{};\n", texcoord);
else
out.WriteFmt("\ttevcoord.xy = int2(0, 0);\n");
out.Write("\ttevcoord.xy = int2(0, 0);\n");
}
out.WriteFmt("\ttextemp = ");
out.Write("\ttextemp = ");
SampleTexture(out, "float2(tevcoord.xy)", texswap, stage.tevorders_texmap, stereo, api_type);
}
else
{
out.WriteFmt("\ttextemp = int4(255, 255, 255, 255);\n");
out.Write("\ttextemp = int4(255, 255, 255, 255);\n");
}
if (cc.a == TEVCOLORARG_KONST || cc.b == TEVCOLORARG_KONST || cc.c == TEVCOLORARG_KONST ||
cc.d == TEVCOLORARG_KONST || ac.a == TEVALPHAARG_KONST || ac.b == TEVALPHAARG_KONST ||
ac.c == TEVALPHAARG_KONST || ac.d == TEVALPHAARG_KONST)
{
out.WriteFmt("\tkonsttemp = int4({}, {});\n", tev_ksel_table_c[stage.tevksel_kc],
out.Write("\tkonsttemp = int4({}, {});\n", tev_ksel_table_c[stage.tevksel_kc],
tev_ksel_table_a[stage.tevksel_ka]);
if (stage.tevksel_kc > 7)
@ -1250,16 +1242,16 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
if (ac.dest >= GX_TEVREG0)
out.SetConstantsUsed(C_COLORS + ac.dest, C_COLORS + ac.dest);
out.WriteFmt("\ttevin_a = int4({}, {})&int4(255, 255, 255, 255);\n", tev_c_input_table[cc.a],
out.Write("\ttevin_a = int4({}, {})&int4(255, 255, 255, 255);\n", tev_c_input_table[cc.a],
tev_a_input_table[ac.a]);
out.WriteFmt("\ttevin_b = int4({}, {})&int4(255, 255, 255, 255);\n", tev_c_input_table[cc.b],
out.Write("\ttevin_b = int4({}, {})&int4(255, 255, 255, 255);\n", tev_c_input_table[cc.b],
tev_a_input_table[ac.b]);
out.WriteFmt("\ttevin_c = int4({}, {})&int4(255, 255, 255, 255);\n", tev_c_input_table[cc.c],
out.Write("\ttevin_c = int4({}, {})&int4(255, 255, 255, 255);\n", tev_c_input_table[cc.c],
tev_a_input_table[ac.c]);
out.WriteFmt("\ttevin_d = int4({}, {});\n", tev_c_input_table[cc.d], tev_a_input_table[ac.d]);
out.Write("\ttevin_d = int4({}, {});\n", tev_c_input_table[cc.d], tev_a_input_table[ac.d]);
out.WriteFmt("\t// color combine\n");
out.WriteFmt("\t{} = clamp(", tev_c_output_table[cc.dest]);
out.Write("\t// color combine\n");
out.Write("\t{} = clamp(", tev_c_output_table[cc.dest]);
if (cc.bias != TEVBIAS_COMPARE)
{
WriteTevRegular(out, "rgb", cc.bias, cc.op, cc.clamp, cc.shift, false);
@ -1282,17 +1274,17 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
};
const u32 mode = (cc.shift << 1) | cc.op;
out.WriteFmt(" tevin_d.rgb + ");
out.WriteFmt("{}", function_table[mode]);
out.Write(" tevin_d.rgb + ");
out.Write("{}", function_table[mode]);
}
if (cc.clamp)
out.WriteFmt(", int3(0,0,0), int3(255,255,255))");
out.Write(", int3(0,0,0), int3(255,255,255))");
else
out.WriteFmt(", int3(-1024,-1024,-1024), int3(1023,1023,1023))");
out.WriteFmt(";\n");
out.Write(", int3(-1024,-1024,-1024), int3(1023,1023,1023))");
out.Write(";\n");
out.WriteFmt("\t// alpha combine\n");
out.WriteFmt("\t{} = clamp(", tev_a_output_table[ac.dest]);
out.Write("\t// alpha combine\n");
out.Write("\t{} = clamp(", tev_a_output_table[ac.dest]);
if (ac.bias != TEVBIAS_COMPARE)
{
WriteTevRegular(out, "a", ac.bias, ac.op, ac.clamp, ac.shift, true);
@ -1311,15 +1303,15 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
};
const u32 mode = (ac.shift << 1) | ac.op;
out.WriteFmt(" tevin_d.a + ");
out.WriteFmt("{}", function_table[mode]);
out.Write(" tevin_d.a + ");
out.Write("{}", function_table[mode]);
}
if (ac.clamp)
out.WriteFmt(", 0, 255)");
out.Write(", 0, 255)");
else
out.WriteFmt(", -1024, 1023)");
out.Write(", -1024, 1023)");
out.WriteFmt(";\n");
out.Write(";\n");
}
static void WriteTevRegular(ShaderCode& out, std::string_view components, int bias, int op,
@ -1364,14 +1356,12 @@ static void WriteTevRegular(ShaderCode& out, std::string_view components, int bi
// - c is scaled from 0..255 to 0..256, which allows dividing the result by 256 instead of 255
// - if scale is bigger than one, it is moved inside the lerp calculation for increased accuracy
// - a rounding bias is added before dividing by 256
out.WriteFmt("(((tevin_d.{}{}){})", components, tev_bias_table[bias],
tev_scale_table_left[shift]);
out.WriteFmt(" {} ", tev_op_table[op]);
out.WriteFmt(
"(((((tevin_a.{}<<8) + (tevin_b.{}-tevin_a.{})*(tevin_c.{}+(tevin_c.{}>>7))){}){})>>8)",
out.Write("(((tevin_d.{}{}){})", components, tev_bias_table[bias], tev_scale_table_left[shift]);
out.Write(" {} ", tev_op_table[op]);
out.Write("(((((tevin_a.{}<<8) + (tevin_b.{}-tevin_a.{})*(tevin_c.{}+(tevin_c.{}>>7))){}){})>>8)",
components, components, components, components, components, tev_scale_table_left[shift],
tev_lerp_bias[2 * op + ((shift == 3) == alpha)]);
out.WriteFmt("){}", tev_scale_table_right[shift]);
out.Write("){}", tev_scale_table_right[shift]);
}
static void SampleTexture(ShaderCode& out, std::string_view texcoords, std::string_view texswap,
@ -1381,14 +1371,13 @@ static void SampleTexture(ShaderCode& out, std::string_view texcoords, std::stri
if (api_type == APIType::D3D)
{
out.WriteFmt("iround(255.0 * Tex[{}].Sample(samp[{}], float3({}.xy * " I_TEXDIMS
out.Write("iround(255.0 * Tex[{}].Sample(samp[{}], float3({}.xy * " I_TEXDIMS
"[{}].xy, {}))).{};\n",
texmap, texmap, texcoords, texmap, stereo ? "layer" : "0.0", texswap);
}
else
{
out.WriteFmt("iround(255.0 * texture(samp[{}], float3({}.xy * " I_TEXDIMS
"[{}].xy, {}))).{};\n",
out.Write("iround(255.0 * texture(samp[{}], float3({}.xy * " I_TEXDIMS "[{}].xy, {}))).{};\n",
texmap, texcoords, texmap, stereo ? "layer" : "0.0", texswap);
}
}
@ -1422,52 +1411,52 @@ static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_dat
const auto write_alpha_func = [&out](int index, std::string_view ref) {
const bool has_no_arguments = index == 0 || index == tev_alpha_funcs_table.size() - 1;
if (has_no_arguments)
out.WriteFmt("{}", tev_alpha_funcs_table[index]);
out.Write("{}", tev_alpha_funcs_table[index]);
else
out.WriteFmt(tev_alpha_funcs_table[index], ref);
out.Write(tev_alpha_funcs_table[index], ref);
};
out.SetConstantsUsed(C_ALPHA, C_ALPHA);
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_NEGATED_BOOLEAN))
out.WriteFmt("\tif(( ");
out.Write("\tif(( ");
else
out.WriteFmt("\tif(!( ");
out.Write("\tif(!( ");
// Lookup the first component from the alpha function table
const int comp0_index = uid_data->alpha_test_comp0;
write_alpha_func(comp0_index, alpha_ref[0]);
// Lookup the logic op
out.WriteFmt("{}", tev_alpha_funclogic_table[uid_data->alpha_test_logic]);
out.Write("{}", tev_alpha_funclogic_table[uid_data->alpha_test_logic]);
// Lookup the second component from the alpha function table
const int comp1_index = uid_data->alpha_test_comp1;
write_alpha_func(comp1_index, alpha_ref[1]);
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_NEGATED_BOOLEAN))
out.WriteFmt(") == false) {{\n");
out.Write(") == false) {{\n");
else
out.WriteFmt(")) {{\n");
out.Write(")) {{\n");
out.WriteFmt("\t\tocol0 = float4(0.0, 0.0, 0.0, 0.0);\n");
out.Write("\t\tocol0 = float4(0.0, 0.0, 0.0, 0.0);\n");
if (use_dual_source && !(api_type == APIType::D3D && uid_data->uint_output))
out.WriteFmt("\t\tocol1 = float4(0.0, 0.0, 0.0, 0.0);\n");
out.Write("\t\tocol1 = float4(0.0, 0.0, 0.0, 0.0);\n");
if (per_pixel_depth)
{
out.WriteFmt("\t\tdepth = {};\n",
out.Write("\t\tdepth = {};\n",
!g_ActiveConfig.backend_info.bSupportsReversedDepthRange ? "0.0" : "1.0");
}
// ZCOMPLOC HACK:
if (!uid_data->alpha_test_use_zcomploc_hack)
{
out.WriteFmt("\t\tdiscard;\n");
out.Write("\t\tdiscard;\n");
if (api_type == APIType::D3D)
out.WriteFmt("\t\treturn;\n");
out.Write("\t\treturn;\n");
}
out.WriteFmt("\t}}\n");
out.Write("\t}}\n");
}
constexpr std::array<const char*, 8> tev_fog_funcs_table{
@ -1497,14 +1486,14 @@ static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data)
// renderer)
// Maybe we want to use "ze = (A << B_SHF)/((B << B_SHF) - Zs)" instead?
// That's equivalent, but keeps the lower bits of Zs.
out.WriteFmt("\tfloat ze = (" I_FOGF ".x * 16777216.0) / float(" I_FOGI
".y - (zCoord >> " I_FOGI ".w));\n");
out.Write("\tfloat ze = (" I_FOGF ".x * 16777216.0) / float(" I_FOGI ".y - (zCoord >> " I_FOGI
".w));\n");
}
else
{
// orthographic
// ze = a*Zs (here, no B_SHF)
out.WriteFmt("\tfloat ze = " I_FOGF ".x * float(zCoord) / 16777216.0;\n");
out.Write("\tfloat ze = " I_FOGF ".x * float(zCoord) / 16777216.0;\n");
}
// x_adjust = sqrt((x-center)^2 + k^2)/k
@ -1512,7 +1501,7 @@ static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data)
if (uid_data->fog_RangeBaseEnabled)
{
out.SetConstantsUsed(C_FOGF, C_FOGF);
out.WriteFmt("\tfloat offset = (2.0 * (rawpos.x / " I_FOGF ".w)) - 1.0 - " I_FOGF ".z;\n"
out.Write("\tfloat offset = (2.0 * (rawpos.x / " I_FOGF ".w)) - 1.0 - " I_FOGF ".z;\n"
"\tfloat floatindex = clamp(9.0 - abs(offset) * 9.0, 0.0, 9.0);\n"
"\tuint indexlower = uint(floatindex);\n"
"\tuint indexupper = indexlower + 1u;\n"
@ -1523,11 +1512,11 @@ static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data)
"\tze *= x_adjust;\n");
}
out.WriteFmt("\tfloat fog = clamp(ze - " I_FOGF ".y, 0.0, 1.0);\n");
out.Write("\tfloat fog = clamp(ze - " I_FOGF ".y, 0.0, 1.0);\n");
if (uid_data->fog_fsel > 3)
{
out.WriteFmt("{}", tev_fog_funcs_table[uid_data->fog_fsel]);
out.Write("{}", tev_fog_funcs_table[uid_data->fog_fsel]);
}
else
{
@ -1535,8 +1524,8 @@ static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data)
WARN_LOG(VIDEO, "Unknown Fog Type! %08x", uid_data->fog_fsel);
}
out.WriteFmt("\tint ifog = iround(fog * 256.0);\n");
out.WriteFmt("\tprev.rgb = (prev.rgb * (256 - ifog) + " I_FOGCOLOR ".rgb * ifog) >> 8;\n");
out.Write("\tint ifog = iround(fog * 256.0);\n");
out.Write("\tprev.rgb = (prev.rgb * (256 - ifog) + " I_FOGCOLOR ".rgb * ifog) >> 8;\n");
}
static void WriteColor(ShaderCode& out, APIType api_type, const pixel_shader_uid_data* uid_data,
@ -1546,33 +1535,33 @@ static void WriteColor(ShaderCode& out, APIType api_type, const pixel_shader_uid
if (api_type == APIType::D3D && uid_data->uint_output)
{
if (uid_data->rgba6_format)
out.WriteFmt("\tocol0 = uint4(prev & 0xFC);\n");
out.Write("\tocol0 = uint4(prev & 0xFC);\n");
else
out.WriteFmt("\tocol0 = uint4(prev);\n");
out.Write("\tocol0 = uint4(prev);\n");
return;
}
if (uid_data->rgba6_format)
out.WriteFmt("\tocol0.rgb = float3(prev.rgb >> 2) / 63.0;\n");
out.Write("\tocol0.rgb = float3(prev.rgb >> 2) / 63.0;\n");
else
out.WriteFmt("\tocol0.rgb = float3(prev.rgb) / 255.0;\n");
out.Write("\tocol0.rgb = float3(prev.rgb) / 255.0;\n");
// Colors will be blended against the 8-bit alpha from ocol1 and
// the 6-bit alpha from ocol0 will be written to the framebuffer
if (uid_data->useDstAlpha)
{
out.SetConstantsUsed(C_ALPHA, C_ALPHA);
out.WriteFmt("\tocol0.a = float(" I_ALPHA ".a >> 2) / 63.0;\n");
out.Write("\tocol0.a = float(" I_ALPHA ".a >> 2) / 63.0;\n");
// Use dual-source color blending to perform dst alpha in a single pass
if (use_dual_source)
out.WriteFmt("\tocol1 = float4(0.0, 0.0, 0.0, float(prev.a) / 255.0);\n");
out.Write("\tocol1 = float4(0.0, 0.0, 0.0, float(prev.a) / 255.0);\n");
}
else
{
out.WriteFmt("\tocol0.a = float(prev.a >> 2) / 63.0;\n");
out.Write("\tocol0.a = float(prev.a >> 2) / 63.0;\n");
if (use_dual_source)
out.WriteFmt("\tocol1 = float4(0.0, 0.0, 0.0, float(prev.a) / 255.0);\n");
out.Write("\tocol1 = float4(0.0, 0.0, 0.0, float(prev.a) / 255.0);\n");
}
}
@ -1620,34 +1609,34 @@ static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data)
"initial_ocol0.a;", // DSTALPHA
"1.0 - initial_ocol0.a;", // INVDSTALPHA
};
out.WriteFmt("\tfloat4 blend_src;\n");
out.WriteFmt("\tblend_src.rgb = {}\n", blend_src_factor[uid_data->blend_src_factor]);
out.WriteFmt("\tblend_src.a = {}\n", blend_src_factor_alpha[uid_data->blend_src_factor_alpha]);
out.WriteFmt("\tfloat4 blend_dst;\n");
out.WriteFmt("\tblend_dst.rgb = {}\n", blend_dst_factor[uid_data->blend_dst_factor]);
out.WriteFmt("\tblend_dst.a = {}\n", blend_dst_factor_alpha[uid_data->blend_dst_factor_alpha]);
out.Write("\tfloat4 blend_src;\n");
out.Write("\tblend_src.rgb = {}\n", blend_src_factor[uid_data->blend_src_factor]);
out.Write("\tblend_src.a = {}\n", blend_src_factor_alpha[uid_data->blend_src_factor_alpha]);
out.Write("\tfloat4 blend_dst;\n");
out.Write("\tblend_dst.rgb = {}\n", blend_dst_factor[uid_data->blend_dst_factor]);
out.Write("\tblend_dst.a = {}\n", blend_dst_factor_alpha[uid_data->blend_dst_factor_alpha]);
out.WriteFmt("\tfloat4 blend_result;\n");
out.Write("\tfloat4 blend_result;\n");
if (uid_data->blend_subtract)
{
out.WriteFmt("\tblend_result.rgb = initial_ocol0.rgb * blend_dst.rgb - ocol0.rgb * "
out.Write("\tblend_result.rgb = initial_ocol0.rgb * blend_dst.rgb - ocol0.rgb * "
"blend_src.rgb;\n");
}
else
{
out.WriteFmt(
out.Write(
"\tblend_result.rgb = initial_ocol0.rgb * blend_dst.rgb + ocol0.rgb * blend_src.rgb;\n");
}
if (uid_data->blend_subtract_alpha)
out.WriteFmt("\tblend_result.a = initial_ocol0.a * blend_dst.a - ocol0.a * blend_src.a;\n");
out.Write("\tblend_result.a = initial_ocol0.a * blend_dst.a - ocol0.a * blend_src.a;\n");
else
out.WriteFmt("\tblend_result.a = initial_ocol0.a * blend_dst.a + ocol0.a * blend_src.a;\n");
out.Write("\tblend_result.a = initial_ocol0.a * blend_dst.a + ocol0.a * blend_src.a;\n");
}
else
{
out.WriteFmt("\tfloat4 blend_result = ocol0;\n");
out.Write("\tfloat4 blend_result = ocol0;\n");
}
out.WriteFmt("\treal_ocol0 = blend_result;\n");
out.Write("\treal_ocol0 = blend_result;\n");
}

View File

@ -91,20 +91,20 @@ static void DefineOutputMember(ShaderCode& object, APIType api_type, std::string
std::string_view type, std::string_view name, int var_index,
std::string_view semantic = {}, int semantic_index = -1)
{
object.WriteFmt("\t{} {} {}", qualifier, type, name);
object.Write("\t{} {} {}", qualifier, type, name);
if (var_index != -1)
object.WriteFmt("{}", var_index);
object.Write("{}", var_index);
if (api_type == APIType::D3D && !semantic.empty())
{
if (semantic_index != -1)
object.WriteFmt(" : {}{}", semantic, semantic_index);
object.Write(" : {}{}", semantic, semantic_index);
else
object.WriteFmt(" : {}", semantic);
object.Write(" : {}", semantic);
}
object.WriteFmt(";\n");
object.Write(";\n");
}
void GenerateVSOutputMembers(ShaderCode& object, APIType api_type, u32 texgens,
@ -138,26 +138,26 @@ void GenerateVSOutputMembers(ShaderCode& object, APIType api_type, u32 texgens,
void AssignVSOutputMembers(ShaderCode& object, std::string_view a, std::string_view b, u32 texgens,
const ShaderHostConfig& host_config)
{
object.WriteFmt("\t{}.pos = {}.pos;\n", a, b);
object.WriteFmt("\t{}.colors_0 = {}.colors_0;\n", a, b);
object.WriteFmt("\t{}.colors_1 = {}.colors_1;\n", a, b);
object.Write("\t{}.pos = {}.pos;\n", a, b);
object.Write("\t{}.colors_0 = {}.colors_0;\n", a, b);
object.Write("\t{}.colors_1 = {}.colors_1;\n", a, b);
for (unsigned int i = 0; i < texgens; ++i)
object.WriteFmt("\t{}.tex{} = {}.tex{};\n", a, i, b, i);
object.Write("\t{}.tex{} = {}.tex{};\n", a, i, b, i);
if (!host_config.fast_depth_calc)
object.WriteFmt("\t{}.clipPos = {}.clipPos;\n", a, b);
object.Write("\t{}.clipPos = {}.clipPos;\n", a, b);
if (host_config.per_pixel_lighting)
{
object.WriteFmt("\t{}.Normal = {}.Normal;\n", a, b);
object.WriteFmt("\t{}.WorldPos = {}.WorldPos;\n", a, b);
object.Write("\t{}.Normal = {}.Normal;\n", a, b);
object.Write("\t{}.WorldPos = {}.WorldPos;\n", a, b);
}
if (host_config.backend_geometry_shaders)
{
object.WriteFmt("\t{}.clipDist0 = {}.clipDist0;\n", a, b);
object.WriteFmt("\t{}.clipDist1 = {}.clipDist1;\n", a, b);
object.Write("\t{}.clipDist0 = {}.clipDist0;\n", a, b);
object.Write("\t{}.clipDist1 = {}.clipDist1;\n", a, b);
}
}

View File

@ -4,7 +4,6 @@
#pragma once
#include <cstdarg>
#include <cstring>
#include <iterator>
#include <string>
@ -104,21 +103,9 @@ public:
ShaderCode() { m_buffer.reserve(16384); }
const std::string& GetBuffer() const { return m_buffer; }
// Deprecated: Writes format strings using traditional printf format strings.
void Write(const char* fmt, ...)
#ifdef __GNUC__
__attribute__((format(printf, 2, 3)))
#endif
{
va_list arglist;
va_start(arglist, fmt);
m_buffer += StringFromFormatV(fmt, arglist);
va_end(arglist);
}
// Writes format strings using fmtlib format strings.
template <typename... Args>
void WriteFmt(std::string_view format, Args&&... args)
void Write(std::string_view format, Args&&... args)
{
fmt::format_to(std::back_inserter(m_buffer), format, std::forward<Args>(args)...);
}

View File

@ -60,7 +60,7 @@ static void WriteHeader(ShaderCode& code, APIType api_type)
{
// left, top, of source rectangle within source texture
// width of the destination rectangle, scale_factor (1 or 2)
code.WriteFmt("UBO_BINDING(std140, 1) uniform PSBlock {{\n"
code.Write("UBO_BINDING(std140, 1) uniform PSBlock {{\n"
" int4 position;\n"
" float y_scale;\n"
" float gamma_rcp;\n"
@ -69,20 +69,20 @@ static void WriteHeader(ShaderCode& code, APIType api_type)
"}};\n");
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
{
code.WriteFmt("VARYING_LOCATION(0) in VertexData {{\n"
code.Write("VARYING_LOCATION(0) in VertexData {{\n"
" float3 v_tex0;\n"
"}};\n");
}
else
{
code.WriteFmt("VARYING_LOCATION(0) in float3 v_tex0;\n");
code.Write("VARYING_LOCATION(0) in float3 v_tex0;\n");
}
code.WriteFmt("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"
code.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"
"FRAGMENT_OUTPUT_LOCATION(0) out float4 ocol0;\n");
}
else // D3D
{
code.WriteFmt("cbuffer PSBlock : register(b0) {{\n"
code.Write("cbuffer PSBlock : register(b0) {{\n"
" int4 position;\n"
" float y_scale;\n"
" float gamma_rcp;\n"
@ -97,10 +97,10 @@ static void WriteHeader(ShaderCode& code, APIType api_type)
// This differs from the roundEven() behavior, but to get consistency across drivers in OpenGL
// we need to use roundEven().
if (api_type == APIType::D3D)
code.WriteFmt("#define roundEven(x) round(x)\n");
code.Write("#define roundEven(x) round(x)\n");
// Alpha channel in the copy is set to 1 the EFB format does not have an alpha channel.
code.WriteFmt("float4 RGBA8ToRGB8(float4 src)\n"
code.Write("float4 RGBA8ToRGB8(float4 src)\n"
"{{\n"
" return float4(src.xyz, 1.0);\n"
"}}\n"
@ -127,16 +127,16 @@ static void WriteSampleFunction(ShaderCode& code, const EFBCopyParams& params, A
switch (params.efb_format)
{
case PEControl::RGB8_Z24:
code.WriteFmt("RGBA8ToRGB8(");
code.Write("RGBA8ToRGB8(");
break;
case PEControl::RGBA6_Z24:
code.WriteFmt("RGBA8ToRGBA6(");
code.Write("RGBA8ToRGBA6(");
break;
case PEControl::RGB565_Z16:
code.WriteFmt("RGBA8ToRGB565(");
code.Write("RGBA8ToRGB565(");
break;
default:
code.WriteFmt("(");
code.Write("(");
break;
}
}
@ -144,49 +144,49 @@ static void WriteSampleFunction(ShaderCode& code, const EFBCopyParams& params, A
{
// Handle D3D depth inversion.
if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange)
code.WriteFmt("1.0 - (");
code.Write("1.0 - (");
else
code.WriteFmt("(");
code.Write("(");
}
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
code.WriteFmt("texture(samp0, float3(");
code.Write("texture(samp0, float3(");
else
code.WriteFmt("Tex0.Sample(samp0, float3(");
code.Write("Tex0.Sample(samp0, float3(");
code.WriteFmt("uv.x + float(xoffset) * pixel_size.x, ");
code.Write("uv.x + float(xoffset) * pixel_size.x, ");
// Reverse the direction for OpenGL, since positive numbers are distance from the bottom row.
if (yoffset != 0)
{
if (api_type == APIType::OpenGL)
code.WriteFmt("clamp(uv.y - float({}) * pixel_size.y, clamp_tb.x, clamp_tb.y)", yoffset);
code.Write("clamp(uv.y - float({}) * pixel_size.y, clamp_tb.x, clamp_tb.y)", yoffset);
else
code.WriteFmt("clamp(uv.y + float({}) * pixel_size.y, clamp_tb.x, clamp_tb.y)", yoffset);
code.Write("clamp(uv.y + float({}) * pixel_size.y, clamp_tb.x, clamp_tb.y)", yoffset);
}
else
{
code.WriteFmt("uv.y");
code.Write("uv.y");
}
code.WriteFmt(", 0.0)))");
code.Write(", 0.0)))");
};
// The copy filter applies to both color and depth copies. This has been verified on hardware.
// The filter is only applied to the RGB channels, the alpha channel is left intact.
code.WriteFmt("float4 SampleEFB(float2 uv, float2 pixel_size, int xoffset)\n"
code.Write("float4 SampleEFB(float2 uv, float2 pixel_size, int xoffset)\n"
"{{\n");
if (params.copy_filter)
{
code.WriteFmt(" float4 prev_row = ");
code.Write(" float4 prev_row = ");
WriteSampleOp(-1);
code.WriteFmt(";\n"
code.Write(";\n"
" float4 current_row = ");
WriteSampleOp(0);
code.WriteFmt(";\n"
code.Write(";\n"
" float4 next_row = ");
WriteSampleOp(1);
code.WriteFmt(";\n"
code.Write(";\n"
" return float4(min(prev_row.rgb * filter_coefficients[0] +\n"
" current_row.rgb * filter_coefficients[1] +\n"
" next_row.rgb * filter_coefficients[2], \n"
@ -194,13 +194,13 @@ static void WriteSampleFunction(ShaderCode& code, const EFBCopyParams& params, A
}
else
{
code.WriteFmt(" float4 current_row = ");
code.Write(" float4 current_row = ");
WriteSampleOp(0);
code.WriteFmt(";\n"
code.Write(";\n"
"return float4(min(current_row.rgb * filter_coefficients[1], float3(1, 1, 1)),\n"
" current_row.a);\n");
}
code.WriteFmt("}}\n");
code.Write("}}\n");
}
// Block dimensions : widthStride, heightStride
@ -213,14 +213,14 @@ static void WriteSwizzler(ShaderCode& code, const EFBCopyParams& params, EFBCopy
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
code.WriteFmt("void main()\n"
code.Write("void main()\n"
"{{\n"
" int2 sampleUv;\n"
" int2 uv1 = int2(gl_FragCoord.xy);\n");
}
else // D3D
{
code.WriteFmt("void main(\n"
code.Write("void main(\n"
" in float3 v_tex0 : TEXCOORD0,\n"
" in float4 rawpos : SV_Position,\n"
" out float4 ocol0 : SV_Target)\n"
@ -233,81 +233,81 @@ static void WriteSwizzler(ShaderCode& code, const EFBCopyParams& params, EFBCopy
const int blkH = TexDecoder_GetEFBCopyBlockHeightInTexels(format);
int samples = GetEncodedSampleCount(format);
code.WriteFmt(" int x_block_position = (uv1.x >> {}) << {};\n", IntLog2(blkH * blkW / samples),
code.Write(" int x_block_position = (uv1.x >> {}) << {};\n", IntLog2(blkH * blkW / samples),
IntLog2(blkW));
code.WriteFmt(" int y_block_position = uv1.y << {};\n", IntLog2(blkH));
code.Write(" int y_block_position = uv1.y << {};\n", IntLog2(blkH));
if (samples == 1)
{
// With samples == 1, we write out pairs of blocks; one A8R8, one G8B8.
code.WriteFmt(" bool first = (uv1.x & {}) == 0;\n", blkH * blkW / 2);
code.Write(" bool first = (uv1.x & {}) == 0;\n", blkH * blkW / 2);
samples = 2;
}
code.WriteFmt(" int offset_in_block = uv1.x & {};\n", (blkH * blkW / samples) - 1);
code.WriteFmt(" int y_offset_in_block = offset_in_block >> {};\n", IntLog2(blkW / samples));
code.WriteFmt(" int x_offset_in_block = (offset_in_block & {}) << {};\n", (blkW / samples) - 1,
code.Write(" int offset_in_block = uv1.x & {};\n", (blkH * blkW / samples) - 1);
code.Write(" int y_offset_in_block = offset_in_block >> {};\n", IntLog2(blkW / samples));
code.Write(" int x_offset_in_block = (offset_in_block & {}) << {};\n", (blkW / samples) - 1,
IntLog2(samples));
code.WriteFmt(" sampleUv.x = x_block_position + x_offset_in_block;\n"
code.Write(" sampleUv.x = x_block_position + x_offset_in_block;\n"
" sampleUv.y = y_block_position + y_offset_in_block;\n");
// sampleUv is the sample position in (int)gx_coords
code.WriteFmt(" float2 uv0 = float2(sampleUv);\n");
code.Write(" float2 uv0 = float2(sampleUv);\n");
// Move to center of pixel
code.WriteFmt(" uv0 += float2(0.5, 0.5);\n");
code.Write(" uv0 += float2(0.5, 0.5);\n");
// Scale by two if needed (also move to pixel borders
// so that linear filtering will average adjacent
// pixel)
code.WriteFmt(" uv0 *= float(position.w);\n");
code.Write(" uv0 *= float(position.w);\n");
// Move to copied rect
code.WriteFmt(" uv0 += float2(position.xy);\n");
code.Write(" uv0 += float2(position.xy);\n");
// Normalize to [0:1]
code.WriteFmt(" uv0 /= float2({}, {});\n", EFB_WIDTH, EFB_HEIGHT);
code.Write(" uv0 /= float2({}, {});\n", EFB_WIDTH, EFB_HEIGHT);
// Apply the y scaling
code.WriteFmt(" uv0 /= float2(1, y_scale);\n");
code.Write(" uv0 /= float2(1, y_scale);\n");
// OGL has to flip up and down
if (api_type == APIType::OpenGL)
{
code.WriteFmt(" uv0.y = 1.0-uv0.y;\n");
code.Write(" uv0.y = 1.0-uv0.y;\n");
}
code.WriteFmt(" float2 pixel_size = float2(position.w, position.w) / float2({}, {});\n",
EFB_WIDTH, EFB_HEIGHT);
code.Write(" float2 pixel_size = float2(position.w, position.w) / float2({}, {});\n", EFB_WIDTH,
EFB_HEIGHT);
}
static void WriteSampleColor(ShaderCode& code, std::string_view color_comp, std::string_view dest,
int x_offset, APIType api_type, const EFBCopyParams& params)
{
code.WriteFmt(" {} = SampleEFB(uv0, pixel_size, {}).{};\n", dest, x_offset, color_comp);
code.Write(" {} = SampleEFB(uv0, pixel_size, {}).{};\n", dest, x_offset, color_comp);
}
static void WriteColorToIntensity(ShaderCode& code, std::string_view src, std::string_view dest)
{
if (!IntensityConstantAdded)
{
code.WriteFmt(" float4 IntensityConst = float4(0.257f,0.504f,0.098f,0.0625f);\n");
code.Write(" float4 IntensityConst = float4(0.257f,0.504f,0.098f,0.0625f);\n");
IntensityConstantAdded = true;
}
code.WriteFmt(" {} = dot(IntensityConst.rgb, {}.rgb);\n", dest, src);
code.Write(" {} = dot(IntensityConst.rgb, {}.rgb);\n", dest, src);
// don't add IntensityConst.a yet, because doing it later is faster and uses less instructions,
// due to vectorization
}
static void WriteToBitDepth(ShaderCode& code, u8 depth, std::string_view src, std::string_view dest)
{
code.WriteFmt(" {} = floor({} * 255.0 / exp2(8.0 - {}.0));\n", dest, src, depth);
code.Write(" {} = floor({} * 255.0 / exp2(8.0 - {}.0));\n", dest, src, depth);
}
static void WriteEncoderEnd(ShaderCode& code)
{
code.WriteFmt("}}\n");
code.Write("}}\n");
IntensityConstantAdded = false;
}
static void WriteI8Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params)
{
WriteSwizzler(code, params, EFBCopyFormat::R8, api_type);
code.WriteFmt(" float3 texSample;\n");
code.Write(" float3 texSample;\n");
WriteSampleColor(code, "rgb", "texSample", 0, api_type, params);
WriteColorToIntensity(code, "texSample", "ocol0.b");
@ -322,7 +322,7 @@ static void WriteI8Encoder(ShaderCode& code, APIType api_type, const EFBCopyPara
WriteColorToIntensity(code, "texSample", "ocol0.a");
// See WriteColorToIntensity
code.WriteFmt(" ocol0.rgba += IntensityConst.aaaa;\n");
code.Write(" ocol0.rgba += IntensityConst.aaaa;\n");
WriteEncoderEnd(code);
}
@ -330,7 +330,7 @@ static void WriteI8Encoder(ShaderCode& code, APIType api_type, const EFBCopyPara
static void WriteI4Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params)
{
WriteSwizzler(code, params, EFBCopyFormat::R4, api_type);
code.WriteFmt(" float3 texSample;\n"
code.Write(" float3 texSample;\n"
" float4 color0;\n"
" float4 color1;\n");
@ -358,30 +358,30 @@ static void WriteI4Encoder(ShaderCode& code, APIType api_type, const EFBCopyPara
WriteSampleColor(code, "rgb", "texSample", 7, api_type, params);
WriteColorToIntensity(code, "texSample", "color1.a");
code.WriteFmt(" color0.rgba += IntensityConst.aaaa;\n"
code.Write(" color0.rgba += IntensityConst.aaaa;\n"
" color1.rgba += IntensityConst.aaaa;\n");
WriteToBitDepth(code, 4, "color0", "color0");
WriteToBitDepth(code, 4, "color1", "color1");
code.WriteFmt(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n");
code.Write(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n");
WriteEncoderEnd(code);
}
static void WriteIA8Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params)
{
WriteSwizzler(code, params, EFBCopyFormat::RA8, api_type);
code.WriteFmt(" float4 texSample;\n");
code.Write(" float4 texSample;\n");
WriteSampleColor(code, "rgba", "texSample", 0, api_type, params);
code.WriteFmt(" ocol0.b = texSample.a;\n");
code.Write(" ocol0.b = texSample.a;\n");
WriteColorToIntensity(code, "texSample", "ocol0.g");
WriteSampleColor(code, "rgba", "texSample", 1, api_type, params);
code.WriteFmt(" ocol0.r = texSample.a;\n");
code.Write(" ocol0.r = texSample.a;\n");
WriteColorToIntensity(code, "texSample", "ocol0.a");
code.WriteFmt(" ocol0.ga += IntensityConst.aa;\n");
code.Write(" ocol0.ga += IntensityConst.aa;\n");
WriteEncoderEnd(code);
}
@ -389,57 +389,57 @@ static void WriteIA8Encoder(ShaderCode& code, APIType api_type, const EFBCopyPar
static void WriteIA4Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params)
{
WriteSwizzler(code, params, EFBCopyFormat::RA4, api_type);
code.WriteFmt(" float4 texSample;\n"
code.Write(" float4 texSample;\n"
" float4 color0;\n"
" float4 color1;\n");
WriteSampleColor(code, "rgba", "texSample", 0, api_type, params);
code.WriteFmt(" color0.b = texSample.a;\n");
code.Write(" color0.b = texSample.a;\n");
WriteColorToIntensity(code, "texSample", "color1.b");
WriteSampleColor(code, "rgba", "texSample", 1, api_type, params);
code.WriteFmt(" color0.g = texSample.a;\n");
code.Write(" color0.g = texSample.a;\n");
WriteColorToIntensity(code, "texSample", "color1.g");
WriteSampleColor(code, "rgba", "texSample", 2, api_type, params);
code.WriteFmt(" color0.r = texSample.a;\n");
code.Write(" color0.r = texSample.a;\n");
WriteColorToIntensity(code, "texSample", "color1.r");
WriteSampleColor(code, "rgba", "texSample", 3, api_type, params);
code.WriteFmt(" color0.a = texSample.a;\n");
code.Write(" color0.a = texSample.a;\n");
WriteColorToIntensity(code, "texSample", "color1.a");
code.WriteFmt(" color1.rgba += IntensityConst.aaaa;\n");
code.Write(" color1.rgba += IntensityConst.aaaa;\n");
WriteToBitDepth(code, 4, "color0", "color0");
WriteToBitDepth(code, 4, "color1", "color1");
code.WriteFmt(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n");
code.Write(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n");
WriteEncoderEnd(code);
}
static void WriteRGB565Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params)
{
WriteSwizzler(code, params, EFBCopyFormat::RGB565, api_type);
code.WriteFmt(" float3 texSample0;\n"
code.Write(" float3 texSample0;\n"
" float3 texSample1;\n");
WriteSampleColor(code, "rgb", "texSample0", 0, api_type, params);
WriteSampleColor(code, "rgb", "texSample1", 1, api_type, params);
code.WriteFmt(" float2 texRs = float2(texSample0.r, texSample1.r);\n"
code.Write(" float2 texRs = float2(texSample0.r, texSample1.r);\n"
" float2 texGs = float2(texSample0.g, texSample1.g);\n"
" float2 texBs = float2(texSample0.b, texSample1.b);\n");
WriteToBitDepth(code, 6, "texGs", "float2 gInt");
code.WriteFmt(" float2 gUpper = floor(gInt / 8.0);\n"
code.Write(" float2 gUpper = floor(gInt / 8.0);\n"
" float2 gLower = gInt - gUpper * 8.0;\n");
WriteToBitDepth(code, 5, "texRs", "ocol0.br");
code.WriteFmt(" ocol0.br = ocol0.br * 8.0 + gUpper;\n");
code.Write(" ocol0.br = ocol0.br * 8.0 + gUpper;\n");
WriteToBitDepth(code, 5, "texBs", "ocol0.ga");
code.WriteFmt(" ocol0.ga = ocol0.ga + gLower * 32.0;\n");
code.Write(" ocol0.ga = ocol0.ga + gLower * 32.0;\n");
code.WriteFmt(" ocol0 = ocol0 / 255.0;\n");
code.Write(" ocol0 = ocol0 / 255.0;\n");
WriteEncoderEnd(code);
}
@ -447,7 +447,7 @@ static void WriteRGB5A3Encoder(ShaderCode& code, APIType api_type, const EFBCopy
{
WriteSwizzler(code, params, EFBCopyFormat::RGB5A3, api_type);
code.WriteFmt(" float4 texSample;\n"
code.Write(" float4 texSample;\n"
" float color0;\n"
" float gUpper;\n"
" float gLower;\n");
@ -455,55 +455,55 @@ static void WriteRGB5A3Encoder(ShaderCode& code, APIType api_type, const EFBCopy
WriteSampleColor(code, "rgba", "texSample", 0, api_type, params);
// 0.8784 = 224 / 255 which is the maximum alpha value that can be represented in 3 bits
code.WriteFmt("if(texSample.a > 0.878f) {{\n");
code.Write("if(texSample.a > 0.878f) {{\n");
WriteToBitDepth(code, 5, "texSample.g", "color0");
code.WriteFmt(" gUpper = floor(color0 / 8.0);\n"
code.Write(" gUpper = floor(color0 / 8.0);\n"
" gLower = color0 - gUpper * 8.0;\n");
WriteToBitDepth(code, 5, "texSample.r", "ocol0.b");
code.WriteFmt(" ocol0.b = ocol0.b * 4.0 + gUpper + 128.0;\n");
code.Write(" ocol0.b = ocol0.b * 4.0 + gUpper + 128.0;\n");
WriteToBitDepth(code, 5, "texSample.b", "ocol0.g");
code.WriteFmt(" ocol0.g = ocol0.g + gLower * 32.0;\n");
code.Write(" ocol0.g = ocol0.g + gLower * 32.0;\n");
code.WriteFmt("}} else {{\n");
code.Write("}} else {{\n");
WriteToBitDepth(code, 4, "texSample.r", "ocol0.b");
WriteToBitDepth(code, 4, "texSample.b", "ocol0.g");
WriteToBitDepth(code, 3, "texSample.a", "color0");
code.WriteFmt("ocol0.b = ocol0.b + color0 * 16.0;\n");
code.Write("ocol0.b = ocol0.b + color0 * 16.0;\n");
WriteToBitDepth(code, 4, "texSample.g", "color0");
code.WriteFmt("ocol0.g = ocol0.g + color0 * 16.0;\n");
code.Write("ocol0.g = ocol0.g + color0 * 16.0;\n");
code.WriteFmt("}}\n");
code.Write("}}\n");
WriteSampleColor(code, "rgba", "texSample", 1, api_type, params);
code.WriteFmt("if(texSample.a > 0.878f) {{\n");
code.Write("if(texSample.a > 0.878f) {{\n");
WriteToBitDepth(code, 5, "texSample.g", "color0");
code.WriteFmt(" gUpper = floor(color0 / 8.0);\n"
code.Write(" gUpper = floor(color0 / 8.0);\n"
" gLower = color0 - gUpper * 8.0;\n");
WriteToBitDepth(code, 5, "texSample.r", "ocol0.r");
code.WriteFmt(" ocol0.r = ocol0.r * 4.0 + gUpper + 128.0;\n");
code.Write(" ocol0.r = ocol0.r * 4.0 + gUpper + 128.0;\n");
WriteToBitDepth(code, 5, "texSample.b", "ocol0.a");
code.WriteFmt(" ocol0.a = ocol0.a + gLower * 32.0;\n");
code.Write(" ocol0.a = ocol0.a + gLower * 32.0;\n");
code.WriteFmt("}} else {{\n");
code.Write("}} else {{\n");
WriteToBitDepth(code, 4, "texSample.r", "ocol0.r");
WriteToBitDepth(code, 4, "texSample.b", "ocol0.a");
WriteToBitDepth(code, 3, "texSample.a", "color0");
code.WriteFmt("ocol0.r = ocol0.r + color0 * 16.0;\n");
code.Write("ocol0.r = ocol0.r + color0 * 16.0;\n");
WriteToBitDepth(code, 4, "texSample.g", "color0");
code.WriteFmt("ocol0.a = ocol0.a + color0 * 16.0;\n");
code.Write("ocol0.a = ocol0.a + color0 * 16.0;\n");
code.WriteFmt("}}\n");
code.Write("}}\n");
code.WriteFmt(" ocol0 = ocol0 / 255.0;\n");
code.Write(" ocol0 = ocol0 / 255.0;\n");
WriteEncoderEnd(code);
}
@ -511,23 +511,23 @@ static void WriteRGBA8Encoder(ShaderCode& code, APIType api_type, const EFBCopyP
{
WriteSwizzler(code, params, EFBCopyFormat::RGBA8, api_type);
code.WriteFmt(" float4 texSample;\n"
code.Write(" float4 texSample;\n"
" float4 color0;\n"
" float4 color1;\n");
WriteSampleColor(code, "rgba", "texSample", 0, api_type, params);
code.WriteFmt(" color0.b = texSample.a;\n"
code.Write(" color0.b = texSample.a;\n"
" color0.g = texSample.r;\n"
" color1.b = texSample.g;\n"
" color1.g = texSample.b;\n");
WriteSampleColor(code, "rgba", "texSample", 1, api_type, params);
code.WriteFmt(" color0.r = texSample.a;\n"
code.Write(" color0.r = texSample.a;\n"
" color0.a = texSample.r;\n"
" color1.r = texSample.g;\n"
" color1.a = texSample.b;\n");
code.WriteFmt(" ocol0 = first ? color0 : color1;\n");
code.Write(" ocol0 = first ? color0 : color1;\n");
WriteEncoderEnd(code);
}
@ -536,7 +536,7 @@ static void WriteC4Encoder(ShaderCode& code, std::string_view comp, APIType api_
const EFBCopyParams& params)
{
WriteSwizzler(code, params, EFBCopyFormat::R4, api_type);
code.WriteFmt(" float4 color0;\n"
code.Write(" float4 color0;\n"
" float4 color1;\n");
WriteSampleColor(code, comp, "color0.b", 0, api_type, params);
@ -551,7 +551,7 @@ static void WriteC4Encoder(ShaderCode& code, std::string_view comp, APIType api_
WriteToBitDepth(code, 4, "color0", "color0");
WriteToBitDepth(code, 4, "color1", "color1");
code.WriteFmt(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n");
code.Write(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n");
WriteEncoderEnd(code);
}
@ -572,30 +572,30 @@ static void WriteCC4Encoder(ShaderCode& code, std::string_view comp, APIType api
const EFBCopyParams& params)
{
WriteSwizzler(code, params, EFBCopyFormat::RA4, api_type);
code.WriteFmt(" float2 texSample;\n"
code.Write(" float2 texSample;\n"
" float4 color0;\n"
" float4 color1;\n");
WriteSampleColor(code, comp, "texSample", 0, api_type, params);
code.WriteFmt(" color0.b = texSample.x;\n"
code.Write(" color0.b = texSample.x;\n"
" color1.b = texSample.y;\n");
WriteSampleColor(code, comp, "texSample", 1, api_type, params);
code.WriteFmt(" color0.g = texSample.x;\n"
code.Write(" color0.g = texSample.x;\n"
" color1.g = texSample.y;\n");
WriteSampleColor(code, comp, "texSample", 2, api_type, params);
code.WriteFmt(" color0.r = texSample.x;\n"
code.Write(" color0.r = texSample.x;\n"
" color1.r = texSample.y;\n");
WriteSampleColor(code, comp, "texSample", 3, api_type, params);
code.WriteFmt(" color0.a = texSample.x;\n"
code.Write(" color0.a = texSample.x;\n"
" color1.a = texSample.y;\n");
WriteToBitDepth(code, 4, "color0", "color0");
WriteToBitDepth(code, 4, "color1", "color1");
code.WriteFmt(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n");
code.Write(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n");
WriteEncoderEnd(code);
}
@ -615,19 +615,19 @@ static void WriteZ8Encoder(ShaderCode& code, std::string_view multiplier, APITyp
{
WriteSwizzler(code, params, EFBCopyFormat::G8, api_type);
code.WriteFmt(" float depth;\n");
code.Write(" float depth;\n");
WriteSampleColor(code, "r", "depth", 0, api_type, params);
code.WriteFmt("ocol0.b = frac(depth * {});\n", multiplier);
code.Write("ocol0.b = frac(depth * {});\n", multiplier);
WriteSampleColor(code, "r", "depth", 1, api_type, params);
code.WriteFmt("ocol0.g = frac(depth * {});\n", multiplier);
code.Write("ocol0.g = frac(depth * {});\n", multiplier);
WriteSampleColor(code, "r", "depth", 2, api_type, params);
code.WriteFmt("ocol0.r = frac(depth * {});\n", multiplier);
code.Write("ocol0.r = frac(depth * {});\n", multiplier);
WriteSampleColor(code, "r", "depth", 3, api_type, params);
code.WriteFmt("ocol0.a = frac(depth * {});\n", multiplier);
code.Write("ocol0.a = frac(depth * {});\n", multiplier);
WriteEncoderEnd(code);
}
@ -636,29 +636,29 @@ static void WriteZ16Encoder(ShaderCode& code, APIType api_type, const EFBCopyPar
{
WriteSwizzler(code, params, EFBCopyFormat::RA8, api_type);
code.WriteFmt(" float depth;\n"
code.Write(" float depth;\n"
" float3 expanded;\n");
// Byte order is reversed
WriteSampleColor(code, "r", "depth", 0, api_type, params);
code.WriteFmt(" depth *= 16777216.0;\n"
code.Write(" depth *= 16777216.0;\n"
" expanded.r = floor(depth / (256.0 * 256.0));\n"
" depth -= expanded.r * 256.0 * 256.0;\n"
" expanded.g = floor(depth / 256.0);\n");
code.WriteFmt(" ocol0.b = expanded.g / 255.0;\n"
code.Write(" ocol0.b = expanded.g / 255.0;\n"
" ocol0.g = expanded.r / 255.0;\n");
WriteSampleColor(code, "r", "depth", 1, api_type, params);
code.WriteFmt(" depth *= 16777216.0;\n"
code.Write(" depth *= 16777216.0;\n"
" expanded.r = floor(depth / (256.0 * 256.0));\n"
" depth -= expanded.r * 256.0 * 256.0;\n"
" expanded.g = floor(depth / 256.0);\n");
code.WriteFmt(" ocol0.r = expanded.g / 255.0;\n"
code.Write(" ocol0.r = expanded.g / 255.0;\n"
" ocol0.a = expanded.r / 255.0;\n");
WriteEncoderEnd(code);
@ -668,33 +668,33 @@ static void WriteZ16LEncoder(ShaderCode& code, APIType api_type, const EFBCopyPa
{
WriteSwizzler(code, params, EFBCopyFormat::GB8, api_type);
code.WriteFmt(" float depth;\n"
code.Write(" float depth;\n"
" float3 expanded;\n");
// Byte order is reversed
WriteSampleColor(code, "r", "depth", 0, api_type, params);
code.WriteFmt(" depth *= 16777216.0;\n"
code.Write(" depth *= 16777216.0;\n"
" expanded.r = floor(depth / (256.0 * 256.0));\n"
" depth -= expanded.r * 256.0 * 256.0;\n"
" expanded.g = floor(depth / 256.0);\n"
" depth -= expanded.g * 256.0;\n"
" expanded.b = depth;\n");
code.WriteFmt(" ocol0.b = expanded.b / 255.0;\n"
code.Write(" ocol0.b = expanded.b / 255.0;\n"
" ocol0.g = expanded.g / 255.0;\n");
WriteSampleColor(code, "r", "depth", 1, api_type, params);
code.WriteFmt(" depth *= 16777216.0;\n"
code.Write(" depth *= 16777216.0;\n"
" expanded.r = floor(depth / (256.0 * 256.0));\n"
" depth -= expanded.r * 256.0 * 256.0;\n"
" expanded.g = floor(depth / 256.0);\n"
" depth -= expanded.g * 256.0;\n"
" expanded.b = depth;\n");
code.WriteFmt(" ocol0.r = expanded.b / 255.0;\n"
code.Write(" ocol0.r = expanded.b / 255.0;\n"
" ocol0.a = expanded.g / 255.0;\n");
WriteEncoderEnd(code);
@ -704,7 +704,7 @@ static void WriteZ24Encoder(ShaderCode& code, APIType api_type, const EFBCopyPar
{
WriteSwizzler(code, params, EFBCopyFormat::RGBA8, api_type);
code.WriteFmt(" float depth0;\n"
code.Write(" float depth0;\n"
" float depth1;\n"
" float3 expanded0;\n"
" float3 expanded1;\n");
@ -714,24 +714,24 @@ static void WriteZ24Encoder(ShaderCode& code, APIType api_type, const EFBCopyPar
for (int i = 0; i < 2; i++)
{
code.WriteFmt(" depth{} *= 16777216.0;\n", i);
code.Write(" depth{} *= 16777216.0;\n", i);
code.WriteFmt(" expanded{}.r = floor(depth{} / (256.0 * 256.0));\n", i, i);
code.WriteFmt(" depth{} -= expanded{}.r * 256.0 * 256.0;\n", i, i);
code.WriteFmt(" expanded{}.g = floor(depth{} / 256.0);\n", i, i);
code.WriteFmt(" depth{} -= expanded{}.g * 256.0;\n", i, i);
code.WriteFmt(" expanded{}.b = depth{};\n", i, i);
code.Write(" expanded{}.r = floor(depth{} / (256.0 * 256.0));\n", i, i);
code.Write(" depth{} -= expanded{}.r * 256.0 * 256.0;\n", i, i);
code.Write(" expanded{}.g = floor(depth{} / 256.0);\n", i, i);
code.Write(" depth{} -= expanded{}.g * 256.0;\n", i, i);
code.Write(" expanded{}.b = depth{};\n", i, i);
}
code.WriteFmt(" if (!first) {{\n");
code.Write(" if (!first) {{\n");
// Upper 16
code.WriteFmt(" ocol0.b = expanded0.g / 255.0;\n"
code.Write(" ocol0.b = expanded0.g / 255.0;\n"
" ocol0.g = expanded0.b / 255.0;\n"
" ocol0.r = expanded1.g / 255.0;\n"
" ocol0.a = expanded1.b / 255.0;\n"
" }} else {{\n");
// Lower 8
code.WriteFmt(" ocol0.b = 1.0;\n"
code.Write(" ocol0.b = 1.0;\n"
" ocol0.g = expanded0.r / 255.0;\n"
" ocol0.r = 1.0;\n"
" ocol0.a = expanded1.r / 255.0;\n"
@ -744,16 +744,16 @@ static void WriteXFBEncoder(ShaderCode& code, APIType api_type, const EFBCopyPar
{
WriteSwizzler(code, params, EFBCopyFormat::XFB, api_type);
code.WriteFmt("float3 color0, color1;\n");
code.Write("float3 color0, color1;\n");
WriteSampleColor(code, "rgb", "color0", 0, api_type, params);
WriteSampleColor(code, "rgb", "color1", 1, api_type, params);
// Gamma is only applied to XFB copies.
code.WriteFmt(" color0 = pow(color0, float3(gamma_rcp, gamma_rcp, gamma_rcp));\n"
code.Write(" color0 = pow(color0, float3(gamma_rcp, gamma_rcp, gamma_rcp));\n"
" color1 = pow(color1, float3(gamma_rcp, gamma_rcp, gamma_rcp));\n");
// Convert to YUV.
code.WriteFmt(" const float3 y_const = float3(0.257, 0.504, 0.098);\n"
code.Write(" const float3 y_const = float3(0.257, 0.504, 0.098);\n"
" const float3 u_const = float3(-0.148, -0.291, 0.439);\n"
" const float3 v_const = float3(0.439, -0.368, -0.071);\n"
" float3 average = (color0 + color1) * 0.5;\n"

View File

@ -32,7 +32,7 @@ static void WriteHeader(APIType api_type, ShaderCode& out)
{
if (api_type == APIType::D3D)
{
out.WriteFmt("cbuffer PSBlock : register(b0) {{\n"
out.Write("cbuffer PSBlock : register(b0) {{\n"
" float2 src_offset, src_size;\n"
" float3 filter_coefficients;\n"
" float gamma_rcp;\n"
@ -42,7 +42,7 @@ static void WriteHeader(APIType api_type, ShaderCode& out)
}
else if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
out.WriteFmt("UBO_BINDING(std140, 1) uniform PSBlock {{\n"
out.Write("UBO_BINDING(std140, 1) uniform PSBlock {{\n"
" float2 src_offset, src_size;\n"
" float3 filter_coefficients;\n"
" float gamma_rcp;\n"
@ -59,35 +59,35 @@ ShaderCode GenerateVertexShader(APIType api_type)
if (api_type == APIType::D3D)
{
out.WriteFmt("void main(in uint id : SV_VertexID, out float3 v_tex0 : TEXCOORD0,\n"
out.Write("void main(in uint id : SV_VertexID, out float3 v_tex0 : TEXCOORD0,\n"
" out float4 opos : SV_Position) {{\n");
}
else if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
{
out.WriteFmt("VARYING_LOCATION(0) out VertexData {{\n"
out.Write("VARYING_LOCATION(0) out VertexData {{\n"
" float3 v_tex0;\n"
"}};\n");
}
else
{
out.WriteFmt("VARYING_LOCATION(0) out float3 v_tex0;\n");
out.Write("VARYING_LOCATION(0) out float3 v_tex0;\n");
}
out.WriteFmt("#define id gl_VertexID\n"
out.Write("#define id gl_VertexID\n"
"#define opos gl_Position\n"
"void main() {{\n");
}
out.WriteFmt(" v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n");
out.WriteFmt(
out.Write(" v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n");
out.Write(
" opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n");
out.WriteFmt(" v_tex0 = float3(src_offset + (src_size * v_tex0.xy), 0.0f);\n");
out.Write(" v_tex0 = float3(src_offset + (src_size * v_tex0.xy), 0.0f);\n");
// NDC space is flipped in Vulkan
if (api_type == APIType::Vulkan)
out.WriteFmt(" opos.y = -opos.y;\n");
out.Write(" opos.y = -opos.y;\n");
out.WriteFmt("}}\n");
out.Write("}}\n");
return out;
}
@ -101,34 +101,34 @@ ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data)
if (api_type == APIType::D3D)
{
out.WriteFmt("Texture2DArray tex0 : register(t0);\n"
out.Write("Texture2DArray tex0 : register(t0);\n"
"SamplerState samp0 : register(s0);\n"
"float4 SampleEFB(float3 uv, float y_offset) {{\n"
" return tex0.Sample(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), "
"clamp_tb.x, clamp_tb.y), {}));\n"
"}}\n\n",
mono_depth ? "0.0" : "uv.z");
out.WriteFmt("void main(in float3 v_tex0 : TEXCOORD0, out float4 ocol0 : SV_Target)\n{{\n");
out.Write("void main(in float3 v_tex0 : TEXCOORD0, out float4 ocol0 : SV_Target)\n{{\n");
}
else if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
out.WriteFmt("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n");
out.WriteFmt("float4 SampleEFB(float3 uv, float y_offset) {{\n"
out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n");
out.Write("float4 SampleEFB(float3 uv, float y_offset) {{\n"
" return texture(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), "
"clamp_tb.x, clamp_tb.y), {}));\n"
"}}\n",
mono_depth ? "0.0" : "uv.z");
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
{
out.WriteFmt("VARYING_LOCATION(0) in VertexData {{\n"
out.Write("VARYING_LOCATION(0) in VertexData {{\n"
" float3 v_tex0;\n"
"}};\n");
}
else
{
out.WriteFmt("VARYING_LOCATION(0) in vec3 v_tex0;\n");
out.Write("VARYING_LOCATION(0) in vec3 v_tex0;\n");
}
out.WriteFmt("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;"
out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;"
"void main()\n{{\n");
}
@ -136,7 +136,7 @@ ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data)
// The filter is only applied to the RGB channels, the alpha channel is left intact.
if (uid_data->copy_filter)
{
out.WriteFmt(" float4 prev_row = SampleEFB(v_tex0, -1.0f);\n"
out.Write(" float4 prev_row = SampleEFB(v_tex0, -1.0f);\n"
" float4 current_row = SampleEFB(v_tex0, 0.0f);\n"
" float4 next_row = SampleEFB(v_tex0, 1.0f);\n"
" float4 texcol = float4(min(prev_row.rgb * filter_coefficients[0] +\n"
@ -146,7 +146,7 @@ ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data)
}
else
{
out.WriteFmt(
out.Write(
" float4 current_row = SampleEFB(v_tex0, 0.0f);\n"
" float4 texcol = float4(min(current_row.rgb * filter_coefficients[1], float3(1, 1, 1)),\n"
" current_row.a);\n");
@ -155,9 +155,9 @@ ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data)
if (uid_data->is_depth_copy)
{
if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange)
out.WriteFmt("texcol.x = 1.0 - texcol.x;\n");
out.Write("texcol.x = 1.0 - texcol.x;\n");
out.WriteFmt(" int depth = int(texcol.x * 16777216.0);\n"
out.Write(" int depth = int(texcol.x * 16777216.0);\n"
// Convert to Z24 format
" int4 workspace;\n"
@ -173,44 +173,44 @@ ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data)
switch (uid_data->dst_format)
{
case EFBCopyFormat::R4: // Z4
out.WriteFmt(" ocol0 = texcol.aaaa;\n");
out.Write(" ocol0 = texcol.aaaa;\n");
break;
case EFBCopyFormat::R8_0x1: // Z8
case EFBCopyFormat::R8: // Z8H
out.WriteFmt(" ocol0 = texcol.rrrr;\n");
out.Write(" ocol0 = texcol.rrrr;\n");
break;
case EFBCopyFormat::RA8: // Z16
out.WriteFmt(" ocol0 = texcol.gggr;\n");
out.Write(" ocol0 = texcol.gggr;\n");
break;
case EFBCopyFormat::RG8: // Z16 (reverse order)
out.WriteFmt(" ocol0 = texcol.rrrg;\n");
out.Write(" ocol0 = texcol.rrrg;\n");
break;
case EFBCopyFormat::RGBA8: // Z24X8
out.WriteFmt(" ocol0 = float4(texcol.rgb, 1.0);\n");
out.Write(" ocol0 = float4(texcol.rgb, 1.0);\n");
break;
case EFBCopyFormat::G8: // Z8M
out.WriteFmt(" ocol0 = texcol.gggg;\n");
out.Write(" ocol0 = texcol.gggg;\n");
break;
case EFBCopyFormat::B8: // Z8L
out.WriteFmt(" ocol0 = texcol.bbbb;\n");
out.Write(" ocol0 = texcol.bbbb;\n");
break;
case EFBCopyFormat::GB8: // Z16L - copy lower 16 depth bits
// expected to be used as an IA8 texture (upper 8 bits stored as intensity, lower 8 bits
// stored as alpha)
// Used e.g. in Zelda: Skyward Sword
out.WriteFmt(" ocol0 = texcol.gggb;\n");
out.Write(" ocol0 = texcol.gggb;\n");
break;
default:
ERROR_LOG(VIDEO, "Unknown copy zbuf format: 0x%X", static_cast<int>(uid_data->dst_format));
out.WriteFmt(" ocol0 = float4(texcol.bgr, 0.0);\n");
out.Write(" ocol0 = float4(texcol.bgr, 0.0);\n");
break;
}
}
@ -229,10 +229,10 @@ ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data)
case EFBCopyFormat::RA4: // IA4
case EFBCopyFormat::RA8: // IA8
if (has_four_bits)
out.WriteFmt(" texcol = float4(int4(texcol * 255.0) & 0xF0) * (1.0 / 240.0);\n");
out.Write(" texcol = float4(int4(texcol * 255.0) & 0xF0) * (1.0 / 240.0);\n");
// TODO - verify these coefficients
out.WriteFmt(" const float3 coefficients = float3(0.257, 0.504, 0.098);\n"
out.Write(" const float3 coefficients = float3(0.257, 0.504, 0.098);\n"
" float intensity = dot(texcol.rgb, coefficients) + 16.0 / 255.0;\n"
" ocol0 = float4(intensity, intensity, intensity, {});\n",
has_alpha ? "texcol.a" : "intensity");
@ -241,58 +241,58 @@ ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data)
default:
ERROR_LOG(VIDEO, "Unknown copy intensity format: 0x%X",
static_cast<int>(uid_data->dst_format));
out.WriteFmt(" ocol0 = texcol;\n");
out.Write(" ocol0 = texcol;\n");
break;
}
}
else
{
if (!uid_data->efb_has_alpha)
out.WriteFmt(" texcol.a = 1.0;\n");
out.Write(" texcol.a = 1.0;\n");
switch (uid_data->dst_format)
{
case EFBCopyFormat::R4: // R4
out.WriteFmt(" float red = float(int(texcol.r * 255.0) & 0xF0) * (1.0 / 240.0);\n"
out.Write(" float red = float(int(texcol.r * 255.0) & 0xF0) * (1.0 / 240.0);\n"
" ocol0 = float4(red, red, red, red);\n");
break;
case EFBCopyFormat::R8_0x1: // R8
case EFBCopyFormat::R8: // R8
out.WriteFmt(" ocol0 = texcol.rrrr;\n");
out.Write(" ocol0 = texcol.rrrr;\n");
break;
case EFBCopyFormat::RA4: // RA4
out.WriteFmt(" float2 red_alpha = float2(int2(texcol.ra * 255.0) & 0xF0) * (1.0 / 240.0);\n"
out.Write(" float2 red_alpha = float2(int2(texcol.ra * 255.0) & 0xF0) * (1.0 / 240.0);\n"
" ocol0 = red_alpha.rrrg;\n");
break;
case EFBCopyFormat::RA8: // RA8
out.WriteFmt(" ocol0 = texcol.rrra;\n");
out.Write(" ocol0 = texcol.rrra;\n");
break;
case EFBCopyFormat::A8: // A8
out.WriteFmt(" ocol0 = texcol.aaaa;\n");
out.Write(" ocol0 = texcol.aaaa;\n");
break;
case EFBCopyFormat::G8: // G8
out.WriteFmt(" ocol0 = texcol.gggg;\n");
out.Write(" ocol0 = texcol.gggg;\n");
break;
case EFBCopyFormat::B8: // B8
out.WriteFmt(" ocol0 = texcol.bbbb;\n");
out.Write(" ocol0 = texcol.bbbb;\n");
break;
case EFBCopyFormat::RG8: // RG8
out.WriteFmt(" ocol0 = texcol.rrrg;\n");
out.Write(" ocol0 = texcol.rrrg;\n");
break;
case EFBCopyFormat::GB8: // GB8
out.WriteFmt(" ocol0 = texcol.gggb;\n");
out.Write(" ocol0 = texcol.gggb;\n");
break;
case EFBCopyFormat::RGB565: // RGB565
out.WriteFmt(" float2 red_blue = float2(int2(texcol.rb * 255.0) & 0xF8) * (1.0 / 248.0);\n"
out.Write(" float2 red_blue = float2(int2(texcol.rb * 255.0) & 0xF8) * (1.0 / 248.0);\n"
" float green = float(int(texcol.g * 255.0) & 0xFC) * (1.0 / 252.0);\n"
" ocol0 = float4(red_blue.r, green, red_blue.g, 1.0);\n");
break;
@ -300,28 +300,28 @@ ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data)
case EFBCopyFormat::RGB5A3: // RGB5A3
// TODO: The MSB controls whether we have RGB5 or RGB4A3, this selection
// will need to be implemented once we move away from floats.
out.WriteFmt(" float3 color = float3(int3(texcol.rgb * 255.0) & 0xF8) * (1.0 / 248.0);\n"
out.Write(" float3 color = float3(int3(texcol.rgb * 255.0) & 0xF8) * (1.0 / 248.0);\n"
" float alpha = float(int(texcol.a * 255.0) & 0xE0) * (1.0 / 224.0);\n"
" ocol0 = float4(color, alpha);\n");
break;
case EFBCopyFormat::RGBA8: // RGBA8
out.WriteFmt(" ocol0 = texcol;\n");
out.Write(" ocol0 = texcol;\n");
break;
case EFBCopyFormat::XFB:
out.WriteFmt(
out.Write(
" ocol0 = float4(pow(texcol.rgb, float3(gamma_rcp, gamma_rcp, gamma_rcp)), 1.0f);\n");
break;
default:
ERROR_LOG(VIDEO, "Unknown copy color format: 0x%X", static_cast<int>(uid_data->dst_format));
out.WriteFmt(" ocol0 = texcol;\n");
out.Write(" ocol0 = texcol;\n");
break;
}
}
out.WriteFmt("}}\n");
out.Write("}}\n");
return out;
}

View File

@ -18,8 +18,7 @@ void WriteUberShaderCommonHeader(ShaderCode& out, APIType api_type,
// ==============================================
if (!host_config.backend_bitfield)
{
out.WriteFmt(
"uint bitfieldExtract(uint val, int off, int size) {{\n"
out.Write("uint bitfieldExtract(uint val, int off, int size) {{\n"
" // This built-in function is only support in OpenGL 4.0+ and ES 3.1+\n"
" // Microsoft's HLSL compiler automatically optimises this to a bitfield extract "
"instruction.\n"
@ -34,58 +33,57 @@ void WriteLightingFunction(ShaderCode& out)
// ==============================================
// Lighting channel calculation helper
// ==============================================
out.WriteFmt("int4 CalculateLighting(uint index, uint attnfunc, uint diffusefunc, float3 pos, "
out.Write("int4 CalculateLighting(uint index, uint attnfunc, uint diffusefunc, float3 pos, "
"float3 normal) {{\n"
" float3 ldir, h, cosAttn, distAttn;\n"
" float dist, dist2, attn;\n"
"\n"
" switch (attnfunc) {{\n");
out.WriteFmt(" case {}u: // LIGNTATTN_NONE\n", LIGHTATTN_NONE);
out.WriteFmt(" case {}u: // LIGHTATTN_DIR\n", LIGHTATTN_DIR);
out.WriteFmt(" ldir = normalize(" I_LIGHTS "[index].pos.xyz - pos.xyz);\n"
out.Write(" case {}u: // LIGNTATTN_NONE\n", LIGHTATTN_NONE);
out.Write(" case {}u: // LIGHTATTN_DIR\n", LIGHTATTN_DIR);
out.Write(" ldir = normalize(" I_LIGHTS "[index].pos.xyz - pos.xyz);\n"
" attn = 1.0;\n"
" if (length(ldir) == 0.0)\n"
" ldir = normal;\n"
" break;\n\n");
out.WriteFmt(" case {}u: // LIGHTATTN_SPEC\n", LIGHTATTN_SPEC);
out.WriteFmt(" ldir = normalize(" I_LIGHTS "[index].pos.xyz - pos.xyz);\n"
out.Write(" case {}u: // LIGHTATTN_SPEC\n", LIGHTATTN_SPEC);
out.Write(" ldir = normalize(" I_LIGHTS "[index].pos.xyz - pos.xyz);\n"
" attn = (dot(normal, ldir) >= 0.0) ? max(0.0, dot(normal, " I_LIGHTS
"[index].dir.xyz)) : 0.0;\n"
" cosAttn = " I_LIGHTS "[index].cosatt.xyz;\n");
out.WriteFmt(" if (diffusefunc == {}u) // LIGHTDIF_NONE\n", LIGHTDIF_NONE);
out.WriteFmt(" distAttn = " I_LIGHTS "[index].distatt.xyz;\n"
out.Write(" if (diffusefunc == {}u) // LIGHTDIF_NONE\n", LIGHTDIF_NONE);
out.Write(" distAttn = " I_LIGHTS "[index].distatt.xyz;\n"
" else\n"
" distAttn = normalize(" I_LIGHTS "[index].distatt.xyz);\n"
" attn = max(0.0, dot(cosAttn, float3(1.0, attn, attn*attn))) / dot(distAttn, "
"float3(1.0, attn, attn*attn));\n"
" break;\n\n");
out.WriteFmt(" case {}u: // LIGHTATTN_SPOT\n", LIGHTATTN_SPOT);
out.WriteFmt(" ldir = " I_LIGHTS "[index].pos.xyz - pos.xyz;\n"
out.Write(" case {}u: // LIGHTATTN_SPOT\n", LIGHTATTN_SPOT);
out.Write(" ldir = " I_LIGHTS "[index].pos.xyz - pos.xyz;\n"
" dist2 = dot(ldir, ldir);\n"
" dist = sqrt(dist2);\n"
" ldir = ldir / dist;\n"
" attn = max(0.0, dot(ldir, " I_LIGHTS "[index].dir.xyz));\n"
" attn = max(0.0, " I_LIGHTS "[index].cosatt.x + " I_LIGHTS
"[index].cosatt.y * attn + " I_LIGHTS
"[index].cosatt.z * attn * attn) / dot(" I_LIGHTS
"[index].cosatt.y * attn + " I_LIGHTS "[index].cosatt.z * attn * attn) / dot(" I_LIGHTS
"[index].distatt.xyz, float3(1.0, dist, dist2));\n"
" break;\n\n");
out.WriteFmt(" default:\n"
out.Write(" default:\n"
" attn = 1.0;\n"
" ldir = normal;\n"
" break;\n"
" }}\n"
"\n"
" switch (diffusefunc) {{\n");
out.WriteFmt(" case {}u: // LIGHTDIF_NONE\n", LIGHTDIF_NONE);
out.WriteFmt(" return int4(round(attn * float4(" I_LIGHTS "[index].color)));\n\n");
out.WriteFmt(" case {}u: // LIGHTDIF_SIGN\n", LIGHTDIF_SIGN);
out.WriteFmt(" return int4(round(attn * dot(ldir, normal) * float4(" I_LIGHTS
out.Write(" case {}u: // LIGHTDIF_NONE\n", LIGHTDIF_NONE);
out.Write(" return int4(round(attn * float4(" I_LIGHTS "[index].color)));\n\n");
out.Write(" case {}u: // LIGHTDIF_SIGN\n", LIGHTDIF_SIGN);
out.Write(" return int4(round(attn * dot(ldir, normal) * float4(" I_LIGHTS
"[index].color)));\n\n");
out.WriteFmt(" case {}u: // LIGHTDIF_CLAMP\n", LIGHTDIF_CLAMP);
out.WriteFmt(" return int4(round(attn * max(0.0, dot(ldir, normal)) * float4(" I_LIGHTS
out.Write(" case {}u: // LIGHTDIF_CLAMP\n", LIGHTDIF_CLAMP);
out.Write(" return int4(round(attn * max(0.0, dot(ldir, normal)) * float4(" I_LIGHTS
"[index].color)));\n\n");
out.WriteFmt(" default:\n"
out.Write(" default:\n"
" return int4(0, 0, 0, 0);\n"
" }}\n"
"}}\n\n");
@ -96,104 +94,101 @@ void WriteVertexLighting(ShaderCode& out, APIType api_type, std::string_view wor
std::string_view in_color_1_var, std::string_view out_color_0_var,
std::string_view out_color_1_var)
{
out.WriteFmt("// Lighting\n");
out.WriteFmt("{}for (uint chan = 0u; chan < {}u; chan++) {{\n",
out.Write("// Lighting\n");
out.Write("{}for (uint chan = 0u; chan < {}u; chan++) {{\n",
api_type == APIType::D3D ? "[loop] " : "", NUM_XF_COLOR_CHANNELS);
out.WriteFmt(" uint colorreg = xfmem_color(chan);\n"
out.Write(" uint colorreg = xfmem_color(chan);\n"
" uint alphareg = xfmem_alpha(chan);\n"
" int4 mat = " I_MATERIALS "[chan + 2u]; \n"
" int4 lacc = int4(255, 255, 255, 255);\n"
"\n");
out.WriteFmt(" if ({} != 0u) {{\n", BitfieldExtract("colorreg", LitChannel().matsource));
out.WriteFmt(" if ((components & ({}u << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
out.WriteFmt(" mat.xyz = int3(round(((chan == 0u) ? {}.xyz : {}.xyz) * 255.0));\n",
out.Write(" if ({} != 0u) {{\n", BitfieldExtract("colorreg", LitChannel().matsource));
out.Write(" if ((components & ({}u << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
out.Write(" mat.xyz = int3(round(((chan == 0u) ? {}.xyz : {}.xyz) * 255.0));\n",
in_color_0_var, in_color_1_var);
out.WriteFmt(" else if ((components & {}u) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0);
out.WriteFmt(" mat.xyz = int3(round({}.xyz * 255.0));\n", in_color_0_var);
out.WriteFmt(" else\n"
out.Write(" else if ((components & {}u) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0);
out.Write(" mat.xyz = int3(round({}.xyz * 255.0));\n", in_color_0_var);
out.Write(" else\n"
" mat.xyz = int3(255, 255, 255);\n"
" }}\n"
"\n");
out.WriteFmt(" if ({} != 0u) {{\n", BitfieldExtract("alphareg", LitChannel().matsource));
out.WriteFmt(" if ((components & ({}u << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
out.WriteFmt(" mat.w = int(round(((chan == 0u) ? {}.w : {}.w) * 255.0));\n", in_color_0_var,
out.Write(" if ({} != 0u) {{\n", BitfieldExtract("alphareg", LitChannel().matsource));
out.Write(" if ((components & ({}u << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
out.Write(" mat.w = int(round(((chan == 0u) ? {}.w : {}.w) * 255.0));\n", in_color_0_var,
in_color_1_var);
out.WriteFmt(" else if ((components & {}u) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0);
out.WriteFmt(" mat.w = int(round({}.w * 255.0));\n", in_color_0_var);
out.WriteFmt(" else\n"
out.Write(" else if ((components & {}u) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0);
out.Write(" mat.w = int(round({}.w * 255.0));\n", in_color_0_var);
out.Write(" else\n"
" mat.w = 255;\n"
" }} else {{\n"
" mat.w = " I_MATERIALS " [chan + 2u].w;\n"
" }}\n"
"\n");
out.WriteFmt(" if ({} != 0u) {{\n", BitfieldExtract("colorreg", LitChannel().enablelighting));
out.WriteFmt(" if ({} != 0u) {{\n", BitfieldExtract("colorreg", LitChannel().ambsource));
out.WriteFmt(" if ((components & ({}u << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
out.WriteFmt(" lacc.xyz = int3(round(((chan == 0u) ? {}.xyz : {}.xyz) * 255.0));\n",
out.Write(" if ({} != 0u) {{\n", BitfieldExtract("colorreg", LitChannel().enablelighting));
out.Write(" if ({} != 0u) {{\n", BitfieldExtract("colorreg", LitChannel().ambsource));
out.Write(" if ((components & ({}u << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
out.Write(" lacc.xyz = int3(round(((chan == 0u) ? {}.xyz : {}.xyz) * 255.0));\n",
in_color_0_var, in_color_1_var);
out.WriteFmt(" else if ((components & {}u) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0);
out.WriteFmt(" lacc.xyz = int3(round({}.xyz * 255.0));\n", in_color_0_var);
out.WriteFmt(" else\n"
out.Write(" else if ((components & {}u) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0);
out.Write(" lacc.xyz = int3(round({}.xyz * 255.0));\n", in_color_0_var);
out.Write(" else\n"
" lacc.xyz = int3(255, 255, 255);\n"
" }} else {{\n"
" lacc.xyz = " I_MATERIALS " [chan].xyz;\n"
" }}\n"
"\n");
out.WriteFmt(" uint light_mask = {} | ({} << 4u);\n",
out.Write(" uint light_mask = {} | ({} << 4u);\n",
BitfieldExtract("colorreg", LitChannel().lightMask0_3),
BitfieldExtract("colorreg", LitChannel().lightMask4_7));
out.WriteFmt(" uint attnfunc = {};\n", BitfieldExtract("colorreg", LitChannel().attnfunc));
out.WriteFmt(" uint diffusefunc = {};\n",
BitfieldExtract("colorreg", LitChannel().diffusefunc));
out.WriteFmt(
out.Write(" uint attnfunc = {};\n", BitfieldExtract("colorreg", LitChannel().attnfunc));
out.Write(" uint diffusefunc = {};\n", BitfieldExtract("colorreg", LitChannel().diffusefunc));
out.Write(
" for (uint light_index = 0u; light_index < 8u; light_index++) {{\n"
" if ((light_mask & (1u << light_index)) != 0u)\n"
" lacc.xyz += CalculateLighting(light_index, attnfunc, diffusefunc, {}, {}).xyz;\n",
world_pos_var, normal_var);
out.WriteFmt(" }}\n"
out.Write(" }}\n"
" }}\n"
"\n");
out.WriteFmt(" if ({} != 0u) {{\n", BitfieldExtract("alphareg", LitChannel().enablelighting));
out.WriteFmt(" if ({} != 0u) {{\n", BitfieldExtract("alphareg", LitChannel().ambsource));
out.WriteFmt(" if ((components & ({}u << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
out.WriteFmt(" lacc.w = int(round(((chan == 0u) ? {}.w : {}.w) * 255.0));\n",
in_color_0_var, in_color_1_var);
out.WriteFmt(" else if ((components & {}u) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0);
out.WriteFmt(" lacc.w = int(round({}.w * 255.0));\n", in_color_0_var);
out.WriteFmt(" else\n"
out.Write(" if ({} != 0u) {{\n", BitfieldExtract("alphareg", LitChannel().enablelighting));
out.Write(" if ({} != 0u) {{\n", BitfieldExtract("alphareg", LitChannel().ambsource));
out.Write(" if ((components & ({}u << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
out.Write(" lacc.w = int(round(((chan == 0u) ? {}.w : {}.w) * 255.0));\n", in_color_0_var,
in_color_1_var);
out.Write(" else if ((components & {}u) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0);
out.Write(" lacc.w = int(round({}.w * 255.0));\n", in_color_0_var);
out.Write(" else\n"
" lacc.w = 255;\n"
" }} else {{\n"
" lacc.w = " I_MATERIALS " [chan].w;\n"
" }}\n"
"\n");
out.WriteFmt(" uint light_mask = {} | ({} << 4u);\n",
out.Write(" uint light_mask = {} | ({} << 4u);\n",
BitfieldExtract("alphareg", LitChannel().lightMask0_3),
BitfieldExtract("alphareg", LitChannel().lightMask4_7));
out.WriteFmt(" uint attnfunc = {};\n", BitfieldExtract("alphareg", LitChannel().attnfunc));
out.WriteFmt(" uint diffusefunc = {};\n",
BitfieldExtract("alphareg", LitChannel().diffusefunc));
out.WriteFmt(
" for (uint light_index = 0u; light_index < 8u; light_index++) {{\n\n"
out.Write(" uint attnfunc = {};\n", BitfieldExtract("alphareg", LitChannel().attnfunc));
out.Write(" uint diffusefunc = {};\n", BitfieldExtract("alphareg", LitChannel().diffusefunc));
out.Write(" for (uint light_index = 0u; light_index < 8u; light_index++) {{\n\n"
" if ((light_mask & (1u << light_index)) != 0u)\n\n"
" lacc.w += CalculateLighting(light_index, attnfunc, diffusefunc, {}, {}).w;\n",
world_pos_var, normal_var);
out.WriteFmt(" }}\n"
out.Write(" }}\n"
" }}\n"
"\n");
out.WriteFmt(" lacc = clamp(lacc, 0, 255);\n"
out.Write(" lacc = clamp(lacc, 0, 255);\n"
"\n"
" // Hopefully GPUs that can support dynamic indexing will optimize this.\n"
" float4 lit_color = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n"
" switch (chan) {{\n"
" case 0u: {} = lit_color; break;\n",
out_color_0_var);
out.WriteFmt(" case 1u: {} = lit_color; break;\n", out_color_1_var);
out.WriteFmt(" }}\n"
out.Write(" case 1u: {} = lit_color; break;\n", out_color_1_var);
out.Write(" }}\n"
"}}\n"
"\n");
}

View File

@ -62,7 +62,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
const u32 numTexgen = uid_data->num_texgens;
ShaderCode out;
out.WriteFmt("// Pixel UberShader for {} texgens{}{}\n", numTexgen,
out.Write("// Pixel UberShader for {} texgens{}{}\n", numTexgen,
early_depth ? ", early-depth" : "", per_pixel_depth ? ", per-pixel depth" : "");
WritePixelShaderCommonHeader(out, ApiType, numTexgen, host_config, bounding_box);
WriteUberShaderCommonHeader(out, ApiType, host_config);
@ -76,12 +76,12 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
{
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION))
{
out.WriteFmt("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"
out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"
"FRAGMENT_OUTPUT_LOCATION(1) out vec4 ocol1;\n");
}
else
{
out.WriteFmt("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 ocol0;\n"
out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 ocol0;\n"
"FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n");
}
}
@ -93,55 +93,55 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
// shader
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION))
{
out.WriteFmt("FRAGMENT_OUTPUT_LOCATION(0) FRAGMENT_INOUT vec4 real_ocol0;\n");
out.Write("FRAGMENT_OUTPUT_LOCATION(0) FRAGMENT_INOUT vec4 real_ocol0;\n");
}
else
{
out.WriteFmt("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) FRAGMENT_INOUT vec4 real_ocol0;\n");
out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) FRAGMENT_INOUT vec4 real_ocol0;\n");
}
}
else
{
out.WriteFmt("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n");
out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n");
}
if (per_pixel_depth)
out.WriteFmt("#define depth gl_FragDepth\n");
out.Write("#define depth gl_FragDepth\n");
if (host_config.backend_geometry_shaders)
{
out.WriteFmt("VARYING_LOCATION(0) in VertexData {{\n");
out.Write("VARYING_LOCATION(0) in VertexData {{\n");
GenerateVSOutputMembers(out, ApiType, numTexgen, host_config,
GetInterpolationQualifier(msaa, ssaa, true, true));
if (stereo)
out.WriteFmt(" flat int layer;\n");
out.Write(" flat int layer;\n");
out.WriteFmt("}};\n\n");
out.Write("}};\n\n");
}
else
{
// Let's set up attributes
u32 counter = 0;
out.WriteFmt("VARYING_LOCATION({}) {} in float4 colors_0;\n", counter++,
out.Write("VARYING_LOCATION({}) {} in float4 colors_0;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.WriteFmt("VARYING_LOCATION({}) {} in float4 colors_1;\n", counter++,
out.Write("VARYING_LOCATION({}) {} in float4 colors_1;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
for (u32 i = 0; i < numTexgen; ++i)
{
out.WriteFmt("VARYING_LOCATION({}) {} in float3 tex{};\n", counter++,
out.Write("VARYING_LOCATION({}) {} in float3 tex{};\n", counter++,
GetInterpolationQualifier(msaa, ssaa), i);
}
if (!host_config.fast_depth_calc)
{
out.WriteFmt("VARYING_LOCATION({}) {} in float4 clipPos;\n", counter++,
out.Write("VARYING_LOCATION({}) {} in float4 clipPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
}
if (per_pixel_lighting)
{
out.WriteFmt("VARYING_LOCATION({}) {} in float3 Normal;\n", counter++,
out.Write("VARYING_LOCATION({}) {} in float3 Normal;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.WriteFmt("VARYING_LOCATION({}) {} in float3 WorldPos;\n", counter++,
out.Write("VARYING_LOCATION({}) {} in float3 WorldPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
}
}
@ -152,71 +152,71 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
{
if (ApiType != APIType::D3D)
{
out.WriteFmt("float3 selectTexCoord(uint index) {{\n");
out.Write("float3 selectTexCoord(uint index) {{\n");
}
else
{
out.WriteFmt("float3 selectTexCoord(uint index");
out.Write("float3 selectTexCoord(uint index");
for (u32 i = 0; i < numTexgen; i++)
out.WriteFmt(", float3 tex{}", i);
out.WriteFmt(") {{\n");
out.Write(", float3 tex{}", i);
out.Write(") {{\n");
}
if (ApiType == APIType::D3D)
{
out.WriteFmt(" switch (index) {{\n");
out.Write(" switch (index) {{\n");
for (u32 i = 0; i < numTexgen; i++)
{
out.WriteFmt(" case {}u:\n"
out.Write(" case {}u:\n"
" return tex{};\n",
i, i);
}
out.WriteFmt(" default:\n"
out.Write(" default:\n"
" return float3(0.0, 0.0, 0.0);\n"
" }}\n");
}
else
{
if (numTexgen > 4)
out.WriteFmt(" if (index < 4u) {{\n");
out.Write(" if (index < 4u) {{\n");
if (numTexgen > 2)
out.WriteFmt(" if (index < 2u) {{\n");
out.Write(" if (index < 2u) {{\n");
if (numTexgen > 1)
out.WriteFmt(" return (index == 0u) ? tex0 : tex1;\n");
out.Write(" return (index == 0u) ? tex0 : tex1;\n");
else
out.WriteFmt(" return (index == 0u) ? tex0 : float3(0.0, 0.0, 0.0);\n");
out.Write(" return (index == 0u) ? tex0 : float3(0.0, 0.0, 0.0);\n");
if (numTexgen > 2)
{
out.WriteFmt(" }} else {{\n"); // >= 2
out.Write(" }} else {{\n"); // >= 2
if (numTexgen > 3)
out.WriteFmt(" return (index == 2u) ? tex2 : tex3;\n");
out.Write(" return (index == 2u) ? tex2 : tex3;\n");
else
out.WriteFmt(" return (index == 2u) ? tex2 : float3(0.0, 0.0, 0.0);\n");
out.WriteFmt(" }}\n");
out.Write(" return (index == 2u) ? tex2 : float3(0.0, 0.0, 0.0);\n");
out.Write(" }}\n");
}
if (numTexgen > 4)
{
out.WriteFmt(" }} else {{\n"); // >= 4 <= 8
out.Write(" }} else {{\n"); // >= 4 <= 8
if (numTexgen > 6)
out.WriteFmt(" if (index < 6u) {{\n");
out.Write(" if (index < 6u) {{\n");
if (numTexgen > 5)
out.WriteFmt(" return (index == 4u) ? tex4 : tex5;\n");
out.Write(" return (index == 4u) ? tex4 : tex5;\n");
else
out.WriteFmt(" return (index == 4u) ? tex4 : float3(0.0, 0.0, 0.0);\n");
out.Write(" return (index == 4u) ? tex4 : float3(0.0, 0.0, 0.0);\n");
if (numTexgen > 6)
{
out.WriteFmt(" }} else {{\n"); // >= 6 <= 8
out.Write(" }} else {{\n"); // >= 6 <= 8
if (numTexgen > 7)
out.WriteFmt(" return (index == 6u) ? tex6 : tex7;\n");
out.Write(" return (index == 6u) ? tex6 : tex7;\n");
else
out.WriteFmt(" return (index == 6u) ? tex6 : float3(0.0, 0.0, 0.0);\n");
out.WriteFmt(" }}\n");
out.Write(" return (index == 6u) ? tex6 : float3(0.0, 0.0, 0.0);\n");
out.Write(" }}\n");
}
out.WriteFmt(" }}\n");
out.Write(" }}\n");
}
}
out.WriteFmt("}}\n\n");
out.Write("}}\n\n");
}
// =====================
@ -227,17 +227,16 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
{
// Doesn't look like DirectX supports this. Oh well the code path is here just in case it
// supports this in the future.
out.WriteFmt("int4 sampleTexture(uint sampler_num, float3 uv) {{\n");
out.Write("int4 sampleTexture(uint sampler_num, float3 uv) {{\n");
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
out.WriteFmt(" return iround(texture(samp[sampler_num], uv) * 255.0);\n");
out.Write(" return iround(texture(samp[sampler_num], uv) * 255.0);\n");
else if (ApiType == APIType::D3D)
out.WriteFmt(" return iround(Tex[sampler_num].Sample(samp[sampler_num], uv) * 255.0);\n");
out.WriteFmt("}}\n\n");
out.Write(" return iround(Tex[sampler_num].Sample(samp[sampler_num], uv) * 255.0);\n");
out.Write("}}\n\n");
}
else
{
out.WriteFmt(
"int4 sampleTexture(uint sampler_num, float3 uv) {{\n"
out.Write("int4 sampleTexture(uint sampler_num, float3 uv) {{\n"
" // This is messy, but DirectX, OpenGL 3.3 and OpenGL ES 3.0 doesn't support "
"dynamic indexing of the sampler array\n"
" // With any luck the shader compiler will optimise this if the hardware supports "
@ -246,11 +245,11 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
for (int i = 0; i < 8; i++)
{
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
out.WriteFmt(" case {}u: return iround(texture(samp[{}], uv) * 255.0);\n", i, i);
out.Write(" case {}u: return iround(texture(samp[{}], uv) * 255.0);\n", i, i);
else if (ApiType == APIType::D3D)
out.WriteFmt(" case {}u: return iround(Tex[{}].Sample(samp[{}], uv) * 255.0);\n", i, i, i);
out.Write(" case {}u: return iround(Tex[{}].Sample(samp[{}], uv) * 255.0);\n", i, i, i);
}
out.WriteFmt(" }}\n"
out.Write(" }}\n"
"}}\n\n");
}
@ -258,23 +257,23 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
// Arbitrary Swizzling
// ======================
out.WriteFmt("int4 Swizzle(uint s, int4 color) {{\n"
out.Write("int4 Swizzle(uint s, int4 color) {{\n"
" // AKA: Color Channel Swapping\n"
"\n"
" int4 ret;\n");
out.WriteFmt(" ret.r = color[{}];\n", BitfieldExtract("bpmem_tevksel(s * 2u)", TevKSel().swap1));
out.WriteFmt(" ret.g = color[{}];\n", BitfieldExtract("bpmem_tevksel(s * 2u)", TevKSel().swap2));
out.WriteFmt(" ret.b = color[{}];\n",
out.Write(" ret.r = color[{}];\n", BitfieldExtract("bpmem_tevksel(s * 2u)", TevKSel().swap1));
out.Write(" ret.g = color[{}];\n", BitfieldExtract("bpmem_tevksel(s * 2u)", TevKSel().swap2));
out.Write(" ret.b = color[{}];\n",
BitfieldExtract("bpmem_tevksel(s * 2u + 1u)", TevKSel().swap1));
out.WriteFmt(" ret.a = color[{}];\n",
out.Write(" ret.a = color[{}];\n",
BitfieldExtract("bpmem_tevksel(s * 2u + 1u)", TevKSel().swap2));
out.WriteFmt(" return ret;\n"
out.Write(" return ret;\n"
"}}\n\n");
// ======================
// Indirect Wrapping
// ======================
out.WriteFmt("int Wrap(int coord, uint mode) {{\n"
out.Write("int Wrap(int coord, uint mode) {{\n"
" if (mode == 0u) // ITW_OFF\n"
" return coord;\n"
" else if (mode < 6u) // ITW_256 to ITW_16\n"
@ -288,7 +287,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
// ======================
const auto LookupIndirectTexture = [&out, stereo](std::string_view out_var_name,
std::string_view in_index_name) {
out.WriteFmt("{{\n"
out.Write("{{\n"
" uint iref = bpmem_iref({});\n"
" if ( iref != 0u)\n"
" {{\n"
@ -307,7 +306,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
"[texmap].xy, {})).abg;\n",
in_index_name, in_index_name, in_index_name, in_index_name, out_var_name,
stereo ? "float(layer)" : "0.0");
out.WriteFmt(" }}\n"
out.Write(" }}\n"
" else\n"
" {{\n"
" {} = int3(0, 0, 0);\n"
@ -320,7 +319,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
// TEV's Special Lerp
// ======================
const auto WriteTevLerp = [&out](std::string_view components) {
out.WriteFmt(
out.Write(
"// TEV's Linear Interpolate, plus bias, add/subtract and scale\n"
"int{0} tevLerp{0}(int{0} A, int{0} B, int{0} C, int{0} D, uint bias, bool op, bool alpha, "
"uint shift) {{\n"
@ -363,7 +362,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
// TEV's Color Compare
// =======================
out.WriteFmt(
out.Write(
"// Implements operations 0-5 of TEV's compare mode,\n"
"// which are common to both color and alpha channels\n"
"bool tevCompare(uint op, int3 color_A, int3 color_B) {{\n"
@ -393,7 +392,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
// Input Selects
// =================
out.WriteFmt("struct State {{\n"
out.Write("struct State {{\n"
" int4 Reg[4];\n"
" int4 TexColor;\n"
" int AlphaBump;\n"
@ -414,7 +413,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
// Transforming the switch into a binary tree of ifs can increase performance by up to 20%.
if (ApiType == APIType::D3D)
{
out.WriteFmt("// Helper function for Alpha Test\n"
out.Write("// Helper function for Alpha Test\n"
"bool alphaCompare(int a, int b, uint compare) {{\n"
" switch (compare) {{\n"
" case 0u: // NEVER\n"
@ -548,7 +547,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
}
else
{
out.WriteFmt(
out.Write(
"// Helper function for Alpha Test\n"
"bool alphaCompare(int a, int b, uint compare) {{\n"
" if (compare < 4u) {{\n"
@ -673,28 +672,28 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
{
if (ApiType != APIType::D3D)
{
out.WriteFmt("#define getTexCoord(index) selectTexCoord((index))\n\n");
out.Write("#define getTexCoord(index) selectTexCoord((index))\n\n");
}
else
{
out.WriteFmt("#define getTexCoord(index) selectTexCoord((index)");
out.Write("#define getTexCoord(index) selectTexCoord((index)");
for (u32 i = 0; i < numTexgen; i++)
out.WriteFmt(", tex{}", i);
out.WriteFmt(")\n\n");
out.Write(", tex{}", i);
out.Write(")\n\n");
}
}
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
{
if (early_depth && host_config.backend_early_z)
out.WriteFmt("FORCE_EARLY_Z;\n");
out.Write("FORCE_EARLY_Z;\n");
out.WriteFmt("void main()\n{{\n");
out.WriteFmt(" float4 rawpos = gl_FragCoord;\n");
out.Write("void main()\n{{\n");
out.Write(" float4 rawpos = gl_FragCoord;\n");
if (use_shader_blend)
{
// Store off a copy of the initial fb value for blending
out.WriteFmt(" float4 initial_ocol0 = FB_FETCH_VALUE;\n"
out.Write(" float4 initial_ocol0 = FB_FETCH_VALUE;\n"
" float4 ocol0;\n"
" float4 ocol1;\n");
}
@ -702,61 +701,61 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
else // D3D
{
if (early_depth && host_config.backend_early_z)
out.WriteFmt("[earlydepthstencil]\n");
out.Write("[earlydepthstencil]\n");
out.WriteFmt("void main(\n");
out.Write("void main(\n");
if (uid_data->uint_output)
{
out.WriteFmt(" out uint4 ocol0 : SV_Target,\n");
out.Write(" out uint4 ocol0 : SV_Target,\n");
}
else
{
out.WriteFmt(" out float4 ocol0 : SV_Target0,\n"
out.Write(" out float4 ocol0 : SV_Target0,\n"
" out float4 ocol1 : SV_Target1,\n");
}
if (per_pixel_depth)
out.WriteFmt(" out float depth : SV_Depth,\n");
out.WriteFmt(" in float4 rawpos : SV_Position,\n");
out.WriteFmt(" in {} float4 colors_0 : COLOR0,\n", GetInterpolationQualifier(msaa, ssaa));
out.WriteFmt(" in {} float4 colors_1 : COLOR1", GetInterpolationQualifier(msaa, ssaa));
out.Write(" out float depth : SV_Depth,\n");
out.Write(" in float4 rawpos : SV_Position,\n");
out.Write(" in {} float4 colors_0 : COLOR0,\n", GetInterpolationQualifier(msaa, ssaa));
out.Write(" in {} float4 colors_1 : COLOR1", GetInterpolationQualifier(msaa, ssaa));
// compute window position if needed because binding semantic WPOS is not widely supported
for (u32 i = 0; i < numTexgen; ++i)
{
out.WriteFmt(",\n in {} float3 tex{} : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa), i,
out.Write(",\n in {} float3 tex{} : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa), i,
i);
}
if (!host_config.fast_depth_calc)
{
out.WriteFmt("\n,\n in {} float4 clipPos : TEXCOORD{}",
GetInterpolationQualifier(msaa, ssaa), numTexgen);
out.Write("\n,\n in {} float4 clipPos : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa),
numTexgen);
}
if (per_pixel_lighting)
{
out.WriteFmt(",\n in {} float3 Normal : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa),
out.Write(",\n in {} float3 Normal : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa),
numTexgen + 1);
out.WriteFmt(",\n in {} float3 WorldPos : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa),
out.Write(",\n in {} float3 WorldPos : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa),
numTexgen + 2);
}
out.WriteFmt(",\n in float clipDist0 : SV_ClipDistance0\n"
out.Write(",\n in float clipDist0 : SV_ClipDistance0\n"
",\n in float clipDist1 : SV_ClipDistance1\n");
if (stereo)
out.WriteFmt(",\n in uint layer : SV_RenderTargetArrayIndex\n");
out.WriteFmt("\n ) {{\n");
out.Write(",\n in uint layer : SV_RenderTargetArrayIndex\n");
out.Write("\n ) {{\n");
}
out.WriteFmt(" int3 tevcoord = int3(0, 0, 0);\n"
out.Write(" int3 tevcoord = int3(0, 0, 0);\n"
" State s;\n"
" s.TexColor = int4(0, 0, 0, 0);\n"
" s.AlphaBump = 0;\n"
"\n");
for (int i = 0; i < 4; i++)
out.WriteFmt(" s.Reg[{}] = " I_COLORS "[{}];\n", i, i);
out.Write(" s.Reg[{}] = " I_COLORS "[{}];\n", i, i);
const char* color_input_prefix = "";
if (per_pixel_lighting)
{
out.WriteFmt(" float4 lit_colors_0 = colors_0;\n"
out.Write(" float4 lit_colors_0 = colors_0;\n"
" float4 lit_colors_1 = colors_1;\n"
" float3 lit_normal = normalize(Normal.xyz);\n"
" float3 lit_pos = WorldPos.xyz;\n");
@ -765,18 +764,17 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
color_input_prefix = "lit_";
}
out.WriteFmt(" uint num_stages = {};\n\n",
out.Write(" uint num_stages = {};\n\n",
BitfieldExtract("bpmem_genmode", bpmem.genMode.numtevstages));
out.WriteFmt(" // Main tev loop\n");
out.Write(" // Main tev loop\n");
if (ApiType == APIType::D3D)
{
// Tell DirectX we don't want this loop unrolled (it crashes if it tries to)
out.WriteFmt(" [loop]\n");
out.Write(" [loop]\n");
}
out.WriteFmt(
" for(uint stage = 0u; stage <= num_stages; stage++)\n"
out.Write(" for(uint stage = 0u; stage <= num_stages; stage++)\n"
" {{\n"
" StageState ss;\n"
" ss.stage = stage;\n"
@ -790,56 +788,56 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
// Disable texturing when there are no texgens (for now)
if (numTexgen != 0)
{
out.WriteFmt(" uint tex_coord = {};\n",
out.Write(" uint tex_coord = {};\n",
BitfieldExtract("ss.order", TwoTevStageOrders().texcoord0));
out.WriteFmt(" float3 uv = getTexCoord(tex_coord);\n"
out.Write(" float3 uv = getTexCoord(tex_coord);\n"
" int2 fixedPoint_uv = int2((uv.z == 0.0 ? uv.xy : (uv.xy / uv.z)) * " I_TEXDIMS
"[tex_coord].zw);\n"
"\n"
" bool texture_enabled = (ss.order & {}u) != 0u;\n",
1 << TwoTevStageOrders().enable0.StartBit());
out.WriteFmt("\n"
out.Write("\n"
" // Indirect textures\n"
" uint tevind = bpmem_tevind(stage);\n"
" if (tevind != 0u)\n"
" {{\n"
" uint bs = {};\n",
BitfieldExtract("tevind", TevStageIndirect().bs));
out.WriteFmt(" uint fmt = {};\n", BitfieldExtract("tevind", TevStageIndirect().fmt));
out.WriteFmt(" uint bias = {};\n", BitfieldExtract("tevind", TevStageIndirect().bias));
out.WriteFmt(" uint bt = {};\n", BitfieldExtract("tevind", TevStageIndirect().bt));
out.WriteFmt(" uint mid = {};\n", BitfieldExtract("tevind", TevStageIndirect().mid));
out.WriteFmt("\n");
out.WriteFmt(" int3 indcoord;\n");
out.Write(" uint fmt = {};\n", BitfieldExtract("tevind", TevStageIndirect().fmt));
out.Write(" uint bias = {};\n", BitfieldExtract("tevind", TevStageIndirect().bias));
out.Write(" uint bt = {};\n", BitfieldExtract("tevind", TevStageIndirect().bt));
out.Write(" uint mid = {};\n", BitfieldExtract("tevind", TevStageIndirect().mid));
out.Write("\n");
out.Write(" int3 indcoord;\n");
LookupIndirectTexture("indcoord", "bt");
out.WriteFmt(" if (bs != 0u)\n"
out.Write(" if (bs != 0u)\n"
" s.AlphaBump = indcoord[bs - 1u];\n"
" switch(fmt)\n"
" {{\n"
" case {}u:\n",
ITF_8);
out.WriteFmt(" indcoord.x = indcoord.x + ((bias & 1u) != 0u ? -128 : 0);\n"
out.Write(" indcoord.x = indcoord.x + ((bias & 1u) != 0u ? -128 : 0);\n"
" indcoord.y = indcoord.y + ((bias & 2u) != 0u ? -128 : 0);\n"
" indcoord.z = indcoord.z + ((bias & 4u) != 0u ? -128 : 0);\n"
" s.AlphaBump = s.AlphaBump & 0xf8;\n"
" break;\n"
" case {}u:\n",
ITF_5);
out.WriteFmt(" indcoord.x = (indcoord.x & 0x1f) + ((bias & 1u) != 0u ? 1 : 0);\n"
out.Write(" indcoord.x = (indcoord.x & 0x1f) + ((bias & 1u) != 0u ? 1 : 0);\n"
" indcoord.y = (indcoord.y & 0x1f) + ((bias & 2u) != 0u ? 1 : 0);\n"
" indcoord.z = (indcoord.z & 0x1f) + ((bias & 4u) != 0u ? 1 : 0);\n"
" s.AlphaBump = s.AlphaBump & 0xe0;\n"
" break;\n"
" case {}u:\n",
ITF_4);
out.WriteFmt(" indcoord.x = (indcoord.x & 0x0f) + ((bias & 1u) != 0u ? 1 : 0);\n"
out.Write(" indcoord.x = (indcoord.x & 0x0f) + ((bias & 1u) != 0u ? 1 : 0);\n"
" indcoord.y = (indcoord.y & 0x0f) + ((bias & 2u) != 0u ? 1 : 0);\n"
" indcoord.z = (indcoord.z & 0x0f) + ((bias & 4u) != 0u ? 1 : 0);\n"
" s.AlphaBump = s.AlphaBump & 0xf0;\n"
" break;\n"
" case {}u:\n",
ITF_3);
out.WriteFmt(" indcoord.x = (indcoord.x & 0x07) + ((bias & 1u) != 0u ? 1 : 0);\n"
out.Write(" indcoord.x = (indcoord.x & 0x07) + ((bias & 1u) != 0u ? 1 : 0);\n"
" indcoord.y = (indcoord.y & 0x07) + ((bias & 2u) != 0u ? 1 : 0);\n"
" indcoord.z = (indcoord.z & 0x07) + ((bias & 4u) != 0u ? 1 : 0);\n"
" s.AlphaBump = s.AlphaBump & 0xf8;\n"
@ -857,8 +855,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
" {{\n"
" case 0u: // 3x2 S0.10 matrix\n"
" indtevtrans = int2(idot(" I_INDTEXMTX
"[mtxidx].xyz, indcoord), idot(" I_INDTEXMTX
"[mtxidx + 1u].xyz, indcoord)) >> 3;\n"
"[mtxidx].xyz, indcoord), idot(" I_INDTEXMTX "[mtxidx + 1u].xyz, indcoord)) >> 3;\n"
" break;\n"
" case 1u: // S matrix, S17.7 format\n"
" indtevtrans = (fixedPoint_uv * indcoord.xx) >> 8;\n"
@ -877,13 +874,13 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
" // Wrapping\n"
" uint sw = {};\n",
BitfieldExtract("tevind", TevStageIndirect().sw));
out.WriteFmt(" uint tw = {}; \n", BitfieldExtract("tevind", TevStageIndirect().tw));
out.WriteFmt(
out.Write(" uint tw = {}; \n", BitfieldExtract("tevind", TevStageIndirect().tw));
out.Write(
" int2 wrapped_coord = int2(Wrap(fixedPoint_uv.x, sw), Wrap(fixedPoint_uv.y, tw));\n"
"\n"
" if ((tevind & {}u) != 0u) // add previous tevcoord\n",
1 << TevStageIndirect().fb_addprev.StartBit());
out.WriteFmt(" tevcoord.xy += wrapped_coord + indtevtrans;\n"
out.Write(" tevcoord.xy += wrapped_coord + indtevtrans;\n"
" else\n"
" tevcoord.xy = wrapped_coord + indtevtrans;\n"
"\n"
@ -899,40 +896,39 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
" if (texture_enabled) {{\n"
" uint sampler_num = {};\n",
BitfieldExtract("ss.order", TwoTevStageOrders().texmap0));
out.WriteFmt("\n"
out.Write("\n"
" float2 uv = (float2(tevcoord.xy)) * " I_TEXDIMS "[sampler_num].xy;\n");
out.WriteFmt(" int4 color = sampleTexture(sampler_num, float3(uv, {}));\n",
out.Write(" int4 color = sampleTexture(sampler_num, float3(uv, {}));\n",
stereo ? "float(layer)" : "0.0");
out.WriteFmt(" uint swap = {};\n",
BitfieldExtract("ss.ac", TevStageCombiner().alphaC.tswap));
out.WriteFmt(" s.TexColor = Swizzle(swap, color);\n");
out.WriteFmt(" }} else {{\n"
out.Write(" uint swap = {};\n", BitfieldExtract("ss.ac", TevStageCombiner().alphaC.tswap));
out.Write(" s.TexColor = Swizzle(swap, color);\n");
out.Write(" }} else {{\n"
" // Texture is disabled\n"
" s.TexColor = int4(255, 255, 255, 255);\n"
" }}\n"
"\n");
}
out.WriteFmt(" // This is the Meat of TEV\n"
out.Write(" // This is the Meat of TEV\n"
" {{\n"
" // Color Combiner\n");
out.WriteFmt(" uint color_a = {};\n", BitfieldExtract("ss.cc", TevStageCombiner().colorC.a));
out.WriteFmt(" uint color_b = {};\n", BitfieldExtract("ss.cc", TevStageCombiner().colorC.b));
out.WriteFmt(" uint color_c = {};\n", BitfieldExtract("ss.cc", TevStageCombiner().colorC.c));
out.WriteFmt(" uint color_d = {};\n", BitfieldExtract("ss.cc", TevStageCombiner().colorC.d));
out.Write(" uint color_a = {};\n", BitfieldExtract("ss.cc", TevStageCombiner().colorC.a));
out.Write(" uint color_b = {};\n", BitfieldExtract("ss.cc", TevStageCombiner().colorC.b));
out.Write(" uint color_c = {};\n", BitfieldExtract("ss.cc", TevStageCombiner().colorC.c));
out.Write(" uint color_d = {};\n", BitfieldExtract("ss.cc", TevStageCombiner().colorC.d));
out.WriteFmt(" uint color_bias = {};\n",
out.Write(" uint color_bias = {};\n",
BitfieldExtract("ss.cc", TevStageCombiner().colorC.bias));
out.WriteFmt(" bool color_op = bool({});\n",
out.Write(" bool color_op = bool({});\n",
BitfieldExtract("ss.cc", TevStageCombiner().colorC.op));
out.WriteFmt(" bool color_clamp = bool({});\n",
out.Write(" bool color_clamp = bool({});\n",
BitfieldExtract("ss.cc", TevStageCombiner().colorC.clamp));
out.WriteFmt(" uint color_shift = {};\n",
out.Write(" uint color_shift = {};\n",
BitfieldExtract("ss.cc", TevStageCombiner().colorC.shift));
out.WriteFmt(" uint color_dest = {};\n",
out.Write(" uint color_dest = {};\n",
BitfieldExtract("ss.cc", TevStageCombiner().colorC.dest));
out.WriteFmt(
out.Write(
" uint color_compare_op = color_shift << 1 | uint(color_op);\n"
"\n"
" int3 color_A = selectColorInput(s, ss, {0}colors_0, {0}colors_1, color_a) & "
@ -945,7 +941,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
"bits + sign\n"
"\n", // TODO: do we need to sign extend?
color_input_prefix);
out.WriteFmt(
out.Write(
" int3 color;\n"
" if (color_bias != 3u) {{ // Normal mode\n"
" color = tevLerp3(color_A, color_B, color_C, color_D, color_bias, color_op, false, "
@ -981,24 +977,24 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
"\n");
// Alpha combiner
out.WriteFmt(" // Alpha Combiner\n");
out.WriteFmt(" uint alpha_a = {};\n", BitfieldExtract("ss.ac", TevStageCombiner().alphaC.a));
out.WriteFmt(" uint alpha_b = {};\n", BitfieldExtract("ss.ac", TevStageCombiner().alphaC.b));
out.WriteFmt(" uint alpha_c = {};\n", BitfieldExtract("ss.ac", TevStageCombiner().alphaC.c));
out.WriteFmt(" uint alpha_d = {};\n", BitfieldExtract("ss.ac", TevStageCombiner().alphaC.d));
out.Write(" // Alpha Combiner\n");
out.Write(" uint alpha_a = {};\n", BitfieldExtract("ss.ac", TevStageCombiner().alphaC.a));
out.Write(" uint alpha_b = {};\n", BitfieldExtract("ss.ac", TevStageCombiner().alphaC.b));
out.Write(" uint alpha_c = {};\n", BitfieldExtract("ss.ac", TevStageCombiner().alphaC.c));
out.Write(" uint alpha_d = {};\n", BitfieldExtract("ss.ac", TevStageCombiner().alphaC.d));
out.WriteFmt(" uint alpha_bias = {};\n",
out.Write(" uint alpha_bias = {};\n",
BitfieldExtract("ss.ac", TevStageCombiner().alphaC.bias));
out.WriteFmt(" bool alpha_op = bool({});\n",
out.Write(" bool alpha_op = bool({});\n",
BitfieldExtract("ss.ac", TevStageCombiner().alphaC.op));
out.WriteFmt(" bool alpha_clamp = bool({});\n",
out.Write(" bool alpha_clamp = bool({});\n",
BitfieldExtract("ss.ac", TevStageCombiner().alphaC.clamp));
out.WriteFmt(" uint alpha_shift = {};\n",
out.Write(" uint alpha_shift = {};\n",
BitfieldExtract("ss.ac", TevStageCombiner().alphaC.shift));
out.WriteFmt(" uint alpha_dest = {};\n",
out.Write(" uint alpha_dest = {};\n",
BitfieldExtract("ss.ac", TevStageCombiner().alphaC.dest));
out.WriteFmt(
out.Write(
" uint alpha_compare_op = alpha_shift << 1 | uint(alpha_op);\n"
"\n"
" int alpha_A;\n"
@ -1013,7 +1009,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
"+ sign\n"
"\n", // TODO: do we need to sign extend?
color_input_prefix);
out.WriteFmt("\n"
out.Write("\n"
" int alpha;\n"
" if (alpha_bias != 3u) {{ // Normal mode\n"
" alpha = tevLerp(alpha_A, alpha_B, alpha_C, alpha_D, alpha_bias, alpha_op, "
@ -1042,30 +1038,30 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
" setRegAlpha(s, alpha_dest, alpha);\n"
" }}\n");
out.WriteFmt(" }} // Main TEV loop\n"
out.Write(" }} // Main TEV loop\n"
"\n");
// Select the output color and alpha registers from the last stage.
out.WriteFmt(" int4 TevResult;\n");
out.WriteFmt(" TevResult.xyz = getTevReg(s, {}).xyz;\n",
out.Write(" int4 TevResult;\n");
out.Write(" TevResult.xyz = getTevReg(s, {}).xyz;\n",
BitfieldExtract("bpmem_combiners(num_stages).x", TevStageCombiner().colorC.dest));
out.WriteFmt(" TevResult.w = getTevReg(s, {}).w;\n",
out.Write(" TevResult.w = getTevReg(s, {}).w;\n",
BitfieldExtract("bpmem_combiners(num_stages).y", TevStageCombiner().alphaC.dest));
out.WriteFmt(" TevResult &= 255;\n\n");
out.Write(" TevResult &= 255;\n\n");
if (host_config.fast_depth_calc)
{
if (!host_config.backend_reversed_depth_range)
out.WriteFmt(" int zCoord = int((1.0 - rawpos.z) * 16777216.0);\n");
out.Write(" int zCoord = int((1.0 - rawpos.z) * 16777216.0);\n");
else
out.WriteFmt(" int zCoord = int(rawpos.z * 16777216.0);\n");
out.WriteFmt(" zCoord = clamp(zCoord, 0, 0xFFFFFF);\n"
out.Write(" int zCoord = int(rawpos.z * 16777216.0);\n");
out.Write(" zCoord = clamp(zCoord, 0, 0xFFFFFF);\n"
"\n");
}
else
{
out.WriteFmt("\tint zCoord = " I_ZBIAS "[1].x + int((clipPos.z / clipPos.w) * float(" I_ZBIAS
out.Write("\tint zCoord = " I_ZBIAS "[1].x + int((clipPos.z / clipPos.w) * float(" I_ZBIAS
"[1].y));\n");
}
@ -1076,16 +1072,16 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
if (per_pixel_depth)
{
// Zfreeze forces early depth off
out.WriteFmt(" // ZFreeze\n"
out.Write(" // ZFreeze\n"
" if ((bpmem_genmode & {}u) != 0u) {{\n",
1 << GenMode().zfreeze.StartBit());
out.WriteFmt(" float2 screenpos = rawpos.xy * " I_EFBSCALE ".xy;\n");
out.Write(" float2 screenpos = rawpos.xy * " I_EFBSCALE ".xy;\n");
if (ApiType == APIType::OpenGL)
{
out.WriteFmt(" // OpenGL has reversed vertical screenspace coordinates\n"
out.Write(" // OpenGL has reversed vertical screenspace coordinates\n"
" screenpos.y = 528.0 - screenpos.y;\n");
}
out.WriteFmt(" zCoord = int(" I_ZSLOPE ".z + " I_ZSLOPE ".x * screenpos.x + " I_ZSLOPE
out.Write(" zCoord = int(" I_ZSLOPE ".z + " I_ZSLOPE ".x * screenpos.x + " I_ZSLOPE
".y * screenpos.y);\n"
" }}\n"
"\n");
@ -1095,7 +1091,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
// Depth Texture
// =================
out.WriteFmt(" // Depth Texture\n"
out.Write(" // Depth Texture\n"
" int early_zCoord = zCoord;\n"
" if (bpmem_ztex_op != 0u) {{\n"
" int ztex = int(" I_ZBIAS "[1].w); // fixed bias\n"
@ -1109,27 +1105,27 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
if (per_pixel_depth)
{
out.WriteFmt(" // If early depth is enabled, write to zbuffer before depth textures\n"
out.Write(" // If early depth is enabled, write to zbuffer before depth textures\n"
" // If early depth isn't enabled, we write to the zbuffer here\n"
" int zbuffer_zCoord = bpmem_late_ztest ? zCoord : early_zCoord;\n");
if (!host_config.backend_reversed_depth_range)
out.WriteFmt(" depth = 1.0 - float(zbuffer_zCoord) / 16777216.0;\n");
out.Write(" depth = 1.0 - float(zbuffer_zCoord) / 16777216.0;\n");
else
out.WriteFmt(" depth = float(zbuffer_zCoord) / 16777216.0;\n");
out.Write(" depth = float(zbuffer_zCoord) / 16777216.0;\n");
}
out.WriteFmt(" // Alpha Test\n"
out.Write(" // Alpha Test\n"
" if (bpmem_alphaTest != 0u) {{\n"
" bool comp0 = alphaCompare(TevResult.a, " I_ALPHA ".r, {});\n",
BitfieldExtract("bpmem_alphaTest", AlphaTest().comp0));
out.WriteFmt(" bool comp1 = alphaCompare(TevResult.a, " I_ALPHA ".g, {});\n",
out.Write(" bool comp1 = alphaCompare(TevResult.a, " I_ALPHA ".g, {});\n",
BitfieldExtract("bpmem_alphaTest", AlphaTest().comp1));
out.WriteFmt("\n"
out.Write("\n"
" // These if statements are written weirdly to work around intel and Qualcomm "
"bugs with handling booleans.\n"
" switch ({}) {{\n",
BitfieldExtract("bpmem_alphaTest", AlphaTest().logic));
out.WriteFmt(" case 0u: // AND\n"
out.Write(" case 0u: // AND\n"
" if (comp0 && comp1) break; else discard; break;\n"
" case 1u: // OR\n"
" if (comp0 || comp1) break; else discard; break;\n"
@ -1144,7 +1140,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
// =========
// Dithering
// =========
out.WriteFmt(" if (bpmem_dither) {{\n"
out.Write(" if (bpmem_dither) {{\n"
" // Flipper uses a standard 2x2 Bayer Matrix for 6 bit dithering\n"
" // Here the matrix is encoded into the two factor constants\n"
" int2 dither = int2(rawpos.xy) & 1;\n"
@ -1158,15 +1154,15 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
// FIXME: Fog is implemented the same as ShaderGen, but ShaderGen's fog is all hacks.
// Should be fixed point, and should not make guesses about Range-Based adjustments.
out.WriteFmt(" // Fog\n"
out.Write(" // Fog\n"
" uint fog_function = {};\n",
BitfieldExtract("bpmem_fogParam3", FogParam3().fsel));
out.WriteFmt(" if (fog_function != 0u) {{\n"
out.Write(" if (fog_function != 0u) {{\n"
" // TODO: This all needs to be converted from float to fixed point\n"
" float ze;\n"
" if ({} == 0u) {{\n",
BitfieldExtract("bpmem_fogParam3", FogParam3().proj));
out.WriteFmt(" // perspective\n"
out.Write(" // perspective\n"
" // ze = A/(B - (Zs >> B_SHF)\n"
" ze = (" I_FOGF ".x * 16777216.0) / float(" I_FOGI ".y - (zCoord >> " I_FOGI
".w));\n"
@ -1178,7 +1174,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
"\n"
" if (bool({})) {{\n",
BitfieldExtract("bpmem_fogRangeBase", FogRangeParams::RangeBase().Enabled));
out.WriteFmt(" // x_adjust = sqrt((x-center)^2 + k^2)/k\n"
out.Write(" // x_adjust = sqrt((x-center)^2 + k^2)/k\n"
" // ze *= x_adjust\n"
" float offset = (2.0 * (rawpos.x / " I_FOGF ".w)) - 1.0 - " I_FOGF ".z;\n"
" float floatindex = clamp(9.0 - abs(offset) * 9.0, 0.0, 9.0);\n"
@ -1212,15 +1208,14 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
" }}\n"
"\n"
" int ifog = iround(fog * 256.0);\n"
" TevResult.rgb = (TevResult.rgb * (256 - ifog) + " I_FOGCOLOR
".rgb * ifog) >> 8;\n"
" TevResult.rgb = (TevResult.rgb * (256 - ifog) + " I_FOGCOLOR ".rgb * ifog) >> 8;\n"
" }}\n"
"\n");
// D3D requires that the shader outputs be uint when writing to a uint render target for logic op.
if (ApiType == APIType::D3D && uid_data->uint_output)
{
out.WriteFmt(" if (bpmem_rgba6_format)\n"
out.Write(" if (bpmem_rgba6_format)\n"
" ocol0 = uint4(TevResult & 0xFC);\n"
" else\n"
" ocol0 = uint4(TevResult);\n"
@ -1228,21 +1223,21 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
}
else
{
out.WriteFmt(" if (bpmem_rgba6_format)\n"
out.Write(" if (bpmem_rgba6_format)\n"
" ocol0.rgb = float3(TevResult.rgb >> 2) / 63.0;\n"
" else\n"
" ocol0.rgb = float3(TevResult.rgb) / 255.0;\n"
"\n"
" if (bpmem_dstalpha != 0u)\n");
out.WriteFmt(" ocol0.a = float({} >> 2) / 63.0;\n",
out.Write(" ocol0.a = float({} >> 2) / 63.0;\n",
BitfieldExtract("bpmem_dstalpha", ConstantAlpha().alpha));
out.WriteFmt(" else\n"
out.Write(" else\n"
" ocol0.a = float(TevResult.a >> 2) / 63.0;\n"
" \n");
if (use_dual_source || use_shader_blend)
{
out.WriteFmt(" // Dest alpha override (dual source blending)\n"
out.Write(" // Dest alpha override (dual source blending)\n"
" // Colors will be blended against the alpha from ocol1 and\n"
" // the alpha from ocol0 will be written to the framebuffer.\n"
" ocol1 = float4(0.0, 0.0, 0.0, float(TevResult.a) / 255.0);\n");
@ -1251,7 +1246,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
if (bounding_box)
{
out.WriteFmt(" if (bpmem_bounding_box) {{\n"
out.Write(" if (bpmem_bounding_box) {{\n"
" UpdateBoundingBox(rawpos.xy);\n"
" }}\n");
}
@ -1299,36 +1294,36 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
"1.0 - initial_ocol0.a;", // INVDSTALPHA
}};
out.WriteFmt(" if (blend_enable) {{\n"
out.Write(" if (blend_enable) {{\n"
" float4 blend_src;\n"
" switch (blend_src_factor) {{\n");
for (size_t i = 0; i < blendSrcFactor.size(); i++)
{
out.WriteFmt(" case {}u: blend_src.rgb = {}; break;\n", i, blendSrcFactor[i]);
out.Write(" case {}u: blend_src.rgb = {}; break;\n", i, blendSrcFactor[i]);
}
out.WriteFmt(" }}\n"
out.Write(" }}\n"
" switch (blend_src_factor_alpha) {{\n");
for (size_t i = 0; i < blendSrcFactorAlpha.size(); i++)
{
out.WriteFmt(" case {}u: blend_src.a = {}; break;\n", i, blendSrcFactorAlpha[i]);
out.Write(" case {}u: blend_src.a = {}; break;\n", i, blendSrcFactorAlpha[i]);
}
out.WriteFmt(" }}\n"
out.Write(" }}\n"
" float4 blend_dst;\n"
" switch (blend_dst_factor) {{\n");
for (size_t i = 0; i < blendDstFactor.size(); i++)
{
out.WriteFmt(" case {}u: blend_dst.rgb = {}; break;\n", i, blendDstFactor[i]);
out.Write(" case {}u: blend_dst.rgb = {}; break;\n", i, blendDstFactor[i]);
}
out.WriteFmt(" }}\n"
out.Write(" }}\n"
" switch (blend_dst_factor_alpha) {{\n");
for (size_t i = 0; i < blendDstFactorAlpha.size(); i++)
{
out.WriteFmt(" case {}u: blend_dst.a = {}; break;\n", i, blendDstFactorAlpha[i]);
out.Write(" case {}u: blend_dst.a = {}; break;\n", i, blendDstFactorAlpha[i]);
}
out.WriteFmt(
out.Write(
" }}\n"
" float4 blend_result;\n"
" if (blend_subtract)\n"
@ -1337,30 +1332,30 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
" blend_result.rgb = initial_ocol0.rgb * blend_dst.rgb + ocol0.rgb * "
"blend_src.rgb;\n");
out.WriteFmt(" if (blend_subtract_alpha)\n"
out.Write(" if (blend_subtract_alpha)\n"
" blend_result.a = initial_ocol0.a * blend_dst.a - ocol0.a * blend_src.a;\n"
" else\n"
" blend_result.a = initial_ocol0.a * blend_dst.a + ocol0.a * blend_src.a;\n");
out.WriteFmt(" real_ocol0 = blend_result;\n");
out.Write(" real_ocol0 = blend_result;\n");
out.WriteFmt(" }} else {{\n"
out.Write(" }} else {{\n"
" real_ocol0 = ocol0;\n"
" }}\n");
}
out.WriteFmt("}}\n"
out.Write("}}\n"
"\n"
"int4 getRasColor(State s, StageState ss, float4 colors_0, float4 colors_1) {{\n"
" // Select Ras for stage\n"
" uint ras = {};\n",
BitfieldExtract("ss.order", TwoTevStageOrders().colorchan0));
out.WriteFmt(" if (ras < 2u) {{ // Lighting Channel 0 or 1\n"
out.Write(" if (ras < 2u) {{ // Lighting Channel 0 or 1\n"
" int4 color = iround(((ras == 0u) ? colors_0 : colors_1) * 255.0);\n"
" uint swap = {};\n",
BitfieldExtract("ss.ac", TevStageCombiner().alphaC.rswap));
out.WriteFmt(" return Swizzle(swap, color);\n");
out.WriteFmt(" }} else if (ras == 5u) {{ // Alpha Bumb\n"
out.Write(" return Swizzle(swap, color);\n");
out.Write(" }} else if (ras == 5u) {{ // Alpha Bumb\n"
" return int4(s.AlphaBump, s.AlphaBump, s.AlphaBump, s.AlphaBump);\n"
" }} else if (ras == 6u) {{ // Normalzied Alpha Bump\n"
" int normalized = s.AlphaBump | s.AlphaBump >> 5;\n"
@ -1379,11 +1374,11 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
" return int4(konstLookup[{}].rgb, konstLookup[{}].a);\n",
BitfieldExtract("tevksel", bpmem.tevksel[0].kcsel0),
BitfieldExtract("tevksel", bpmem.tevksel[0].kasel0));
out.WriteFmt(" else\n"
out.Write(" else\n"
" return int4(konstLookup[{}].rgb, konstLookup[{}].a);\n",
BitfieldExtract("tevksel", bpmem.tevksel[0].kcsel1),
BitfieldExtract("tevksel", bpmem.tevksel[0].kasel1));
out.WriteFmt("}}\n");
out.Write("}}\n");
return out;
}

View File

@ -35,93 +35,93 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config
const u32 num_texgen = uid_data->num_texgens;
ShaderCode out;
out.WriteFmt("// Vertex UberShader\n\n");
out.WriteFmt("{}", s_lighting_struct);
out.Write("// Vertex UberShader\n\n");
out.Write("{}", s_lighting_struct);
// uniforms
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
out.WriteFmt("UBO_BINDING(std140, 2) uniform VSBlock {{\n");
out.Write("UBO_BINDING(std140, 2) uniform VSBlock {{\n");
else
out.WriteFmt("cbuffer VSBlock {{\n");
out.WriteFmt("{}", s_shader_uniforms);
out.WriteFmt("}};\n");
out.Write("cbuffer VSBlock {{\n");
out.Write("{}", s_shader_uniforms);
out.Write("}};\n");
out.WriteFmt("struct VS_OUTPUT {{\n");
out.Write("struct VS_OUTPUT {{\n");
GenerateVSOutputMembers(out, api_type, num_texgen, host_config, "");
out.WriteFmt("}};\n\n");
out.Write("}};\n\n");
WriteUberShaderCommonHeader(out, api_type, host_config);
WriteLightingFunction(out);
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
out.WriteFmt("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB);
out.WriteFmt("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB);
out.WriteFmt("ATTRIBUTE_LOCATION({}) in float3 rawnorm0;\n", SHADER_NORM0_ATTRIB);
out.WriteFmt("ATTRIBUTE_LOCATION({}) in float3 rawnorm1;\n", SHADER_NORM1_ATTRIB);
out.WriteFmt("ATTRIBUTE_LOCATION({}) in float3 rawnorm2;\n", SHADER_NORM2_ATTRIB);
out.WriteFmt("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB);
out.WriteFmt("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnorm0;\n", SHADER_NORM0_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnorm1;\n", SHADER_NORM1_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnorm2;\n", SHADER_NORM2_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB);
for (int i = 0; i < 8; ++i)
out.WriteFmt("ATTRIBUTE_LOCATION({}) in float3 rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i, i);
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i, i);
if (host_config.backend_geometry_shaders)
{
out.WriteFmt("VARYING_LOCATION(0) out VertexData {{\n");
out.Write("VARYING_LOCATION(0) out VertexData {{\n");
GenerateVSOutputMembers(out, api_type, num_texgen, host_config,
GetInterpolationQualifier(msaa, ssaa, true, false));
out.WriteFmt("}} vs;\n");
out.Write("}} vs;\n");
}
else
{
// Let's set up attributes
u32 counter = 0;
out.WriteFmt("VARYING_LOCATION({}) {} out float4 colors_0;\n", counter++,
out.Write("VARYING_LOCATION({}) {} out float4 colors_0;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.WriteFmt("VARYING_LOCATION({}) {} out float4 colors_1;\n", counter++,
out.Write("VARYING_LOCATION({}) {} out float4 colors_1;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
for (u32 i = 0; i < num_texgen; ++i)
{
out.WriteFmt("VARYING_LOCATION({}) {} out float3 tex{};\n", counter++,
out.Write("VARYING_LOCATION({}) {} out float3 tex{};\n", counter++,
GetInterpolationQualifier(msaa, ssaa), i);
}
if (!host_config.fast_depth_calc)
{
out.WriteFmt("VARYING_LOCATION({}) {} out float4 clipPos;\n", counter++,
out.Write("VARYING_LOCATION({}) {} out float4 clipPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
}
if (per_pixel_lighting)
{
out.WriteFmt("VARYING_LOCATION({}) {} out float3 Normal;\n", counter++,
out.Write("VARYING_LOCATION({}) {} out float3 Normal;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.WriteFmt("VARYING_LOCATION({}) {} out float3 WorldPos;\n", counter++,
out.Write("VARYING_LOCATION({}) {} out float3 WorldPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
}
}
out.WriteFmt("void main()\n{{\n");
out.Write("void main()\n{{\n");
}
else // D3D
{
out.WriteFmt("VS_OUTPUT main(\n");
out.Write("VS_OUTPUT main(\n");
// inputs
out.WriteFmt(" float3 rawnorm0 : NORMAL0,\n"
out.Write(" float3 rawnorm0 : NORMAL0,\n"
" float3 rawnorm1 : NORMAL1,\n"
" float3 rawnorm2 : NORMAL2,\n"
" float4 rawcolor0 : COLOR0,\n"
" float4 rawcolor1 : COLOR1,\n");
for (int i = 0; i < 8; ++i)
out.WriteFmt(" float3 rawtex{} : TEXCOORD{},\n", i, i);
out.WriteFmt(" uint posmtx : BLENDINDICES,\n");
out.WriteFmt(" float4 rawpos : POSITION) {{\n");
out.Write(" float3 rawtex{} : TEXCOORD{},\n", i, i);
out.Write(" uint posmtx : BLENDINDICES,\n");
out.Write(" float4 rawpos : POSITION) {{\n");
}
out.WriteFmt("VS_OUTPUT o;\n"
out.Write("VS_OUTPUT o;\n"
"\n");
// Transforms
out.WriteFmt("// Position matrix\n"
out.Write("// Position matrix\n"
"float4 P0;\n"
"float4 P1;\n"
"float4 P2;\n"
@ -133,7 +133,7 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config
"\n"
"if ((components & {}u) != 0u) {{// VB_HAS_POSMTXIDX\n",
VB_HAS_POSMTXIDX);
out.WriteFmt(" // Vertex format has a per-vertex matrix\n"
out.Write(" // Vertex format has a per-vertex matrix\n"
" int posidx = int(posmtx.r);\n"
" P0 = " I_TRANSFORMMATRICES "[posidx];\n"
" P1 = " I_TRANSFORMMATRICES "[posidx+1];\n"
@ -161,18 +161,18 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config
"float3 _norm0 = float3(0.0, 0.0, 0.0);\n"
"if ((components & {}u) != 0u) // VB_HAS_NRM0\n",
VB_HAS_NRM0);
out.WriteFmt(
out.Write(
" _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n"
"\n"
"float3 _norm1 = float3(0.0, 0.0, 0.0);\n"
"if ((components & {}u) != 0u) // VB_HAS_NRM1\n",
VB_HAS_NRM1);
out.WriteFmt(" _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n"
out.Write(" _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n"
"\n"
"float3 _norm2 = float3(0.0, 0.0, 0.0);\n"
"if ((components & {}u) != 0u) // VB_HAS_NRM2\n",
VB_HAS_NRM2);
out.WriteFmt(" _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n"
out.Write(" _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n"
"\n");
// Hardware Lighting
@ -183,14 +183,14 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config
if (num_texgen > 0)
GenVertexShaderTexGens(api_type, num_texgen, out);
out.WriteFmt("if (xfmem_numColorChans == 0u) {{\n"
out.Write("if (xfmem_numColorChans == 0u) {{\n"
" if ((components & {}u) != 0u)\n"
" o.colors_0 = rawcolor0;\n"
" else\n"
" o.colors_1 = float4(1.0, 1.0, 1.0, 1.0);\n"
"}}\n",
VB_HAS_COL0);
out.WriteFmt("if (xfmem_numColorChans < 2u) {{\n"
out.Write("if (xfmem_numColorChans < 2u) {{\n"
" if ((components & {}u) != 0u)\n"
" o.colors_0 = rawcolor1;\n"
" else\n"
@ -201,17 +201,17 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config
if (!host_config.fast_depth_calc)
{
// clipPos/w needs to be done in pixel shader, not here
out.WriteFmt("o.clipPos = o.pos;\n");
out.Write("o.clipPos = o.pos;\n");
}
if (per_pixel_lighting)
{
out.WriteFmt("o.Normal = _norm0;\n"
out.Write("o.Normal = _norm0;\n"
"o.WorldPos = pos.xyz;\n");
out.WriteFmt("if ((components & {}u) != 0u) // VB_HAS_COL0\n"
out.Write("if ((components & {}u) != 0u) // VB_HAS_COL0\n"
" o.colors_0 = rawcolor0;\n",
VB_HAS_COL0);
out.WriteFmt("if ((components & {}u) != 0u) // VB_HAS_COL1\n"
out.Write("if ((components & {}u) != 0u) // VB_HAS_COL1\n"
" o.colors_1 = rawcolor1;\n",
VB_HAS_COL1);
}
@ -225,12 +225,12 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config
// own clipping. We want to clip so that -w <= z <= 0, which matches the console -1..0 range.
// We adjust our depth value for clipping purposes to match the perspective projection in the
// software backend, which is a hack to fix Sonic Adventure and Unleashed games.
out.WriteFmt("float clipDepth = o.pos.z * (1.0 - 1e-7);\n"
out.Write("float clipDepth = o.pos.z * (1.0 - 1e-7);\n"
"float clipDist0 = clipDepth + o.pos.w;\n" // Near: z < -w
"float clipDist1 = -clipDepth;\n"); // Far: z > 0
if (host_config.backend_geometry_shaders)
{
out.WriteFmt("o.clipDist0 = clipDist0;\n"
out.Write("o.clipDist0 = clipDist0;\n"
"o.clipDist1 = clipDist1;\n");
}
}
@ -246,7 +246,7 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config
// divide, because some games will use a depth range larger than what is allowed by the
// graphics API. These large depth ranges will still be clipped to the 0..1 range, so these
// games effectively add a depth bias to the values written to the depth buffer.
out.WriteFmt("o.pos.z = o.pos.w * " I_PIXELCENTERCORRECTION ".w - "
out.Write("o.pos.z = o.pos.w * " I_PIXELCENTERCORRECTION ".w - "
"o.pos.z * " I_PIXELCENTERCORRECTION ".z;\n");
if (!host_config.backend_clip_control)
@ -254,12 +254,12 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config
// If the graphics API doesn't support a depth range of 0..1, then we need to map z to
// the -1..1 range. Unfortunately we have to use a substraction, which is a lossy floating-point
// operation that can introduce a round-trip error.
out.WriteFmt("o.pos.z = o.pos.z * 2.0 - o.pos.w;\n");
out.Write("o.pos.z = o.pos.z * 2.0 - o.pos.w;\n");
}
// Correct for negative viewports by mirroring all vertices. We need to negate the height here,
// since the viewport height is already negated by the render backend.
out.WriteFmt("o.pos.xy *= sign(" I_PIXELCENTERCORRECTION ".xy * float2(1.0, -1.0));\n");
out.Write("o.pos.xy *= sign(" I_PIXELCENTERCORRECTION ".xy * float2(1.0, -1.0));\n");
// The console GPU places the pixel center at 7/12 in screen space unless
// antialiasing is enabled, while D3D and OpenGL place it at 0.5. This results
@ -267,7 +267,7 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config
// which in turn can be critical if it happens for clear quads.
// Hence, we compensate for this pixel center difference so that primitives
// get rasterized correctly.
out.WriteFmt("o.pos.xy = o.pos.xy - o.pos.w * " I_PIXELCENTERCORRECTION ".xy;\n");
out.Write("o.pos.xy = o.pos.xy - o.pos.w * " I_PIXELCENTERCORRECTION ".xy;\n");
if (vertex_rounding)
{
@ -275,16 +275,16 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config
// cause an additional pixel offset. Due to a higher pixel density we need to correct this
// by converting our clip-space position into the Wii's screen-space.
// Acquire the right pixel and then convert it back.
out.WriteFmt("if (o.pos.w == 1.0f)\n"
out.Write("if (o.pos.w == 1.0f)\n"
"{{\n");
out.WriteFmt("\tfloat ss_pixel_x = ((o.pos.x + 1.0f) * (" I_VIEWPORT_SIZE ".x * 0.5f));\n"
out.Write("\tfloat ss_pixel_x = ((o.pos.x + 1.0f) * (" I_VIEWPORT_SIZE ".x * 0.5f));\n"
"\tfloat ss_pixel_y = ((o.pos.y + 1.0f) * (" I_VIEWPORT_SIZE ".y * 0.5f));\n");
out.WriteFmt("\tss_pixel_x = round(ss_pixel_x);\n"
out.Write("\tss_pixel_x = round(ss_pixel_x);\n"
"\tss_pixel_y = round(ss_pixel_y);\n");
out.WriteFmt("\to.pos.x = ((ss_pixel_x / (" I_VIEWPORT_SIZE ".x * 0.5f)) - 1.0f);\n"
out.Write("\to.pos.x = ((ss_pixel_x / (" I_VIEWPORT_SIZE ".x * 0.5f)) - 1.0f);\n"
"\to.pos.y = ((ss_pixel_y / (" I_VIEWPORT_SIZE ".y * 0.5f)) - 1.0f);\n"
"}}\n");
}
@ -300,35 +300,35 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config
// TODO: Pass interface blocks between shader stages even if geometry shaders
// are not supported, however that will require at least OpenGL 3.2 support.
for (u32 i = 0; i < num_texgen; ++i)
out.WriteFmt("tex{}.xyz = o.tex{};\n", i, i);
out.Write("tex{}.xyz = o.tex{};\n", i, i);
if (!host_config.fast_depth_calc)
out.WriteFmt("clipPos = o.clipPos;\n");
out.Write("clipPos = o.clipPos;\n");
if (per_pixel_lighting)
{
out.WriteFmt("Normal = o.Normal;\n"
out.Write("Normal = o.Normal;\n"
"WorldPos = o.WorldPos;\n");
}
out.WriteFmt("colors_0 = o.colors_0;\n"
out.Write("colors_0 = o.colors_0;\n"
"colors_1 = o.colors_1;\n");
}
if (host_config.backend_depth_clamp)
{
out.WriteFmt("gl_ClipDistance[0] = clipDist0;\n"
out.Write("gl_ClipDistance[0] = clipDist0;\n"
"gl_ClipDistance[1] = clipDist1;\n");
}
// Vulkan NDC space has Y pointing down (right-handed NDC space).
if (api_type == APIType::Vulkan)
out.WriteFmt("gl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n");
out.Write("gl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n");
else
out.WriteFmt("gl_Position = o.pos;\n");
out.Write("gl_Position = o.pos;\n");
}
else // D3D
{
out.WriteFmt("return o;\n");
out.Write("return o;\n");
}
out.WriteFmt("}}\n");
out.Write("}}\n");
return out;
}
@ -338,104 +338,103 @@ static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode&
// The HLSL compiler complains that the output texture coordinates are uninitialized when trying
// to dynamically index them.
for (u32 i = 0; i < num_texgen; i++)
out.WriteFmt("o.tex{} = float3(0.0, 0.0, 0.0);\n", i);
out.Write("o.tex{} = float3(0.0, 0.0, 0.0);\n", i);
out.WriteFmt("// Texture coordinate generation\n");
out.Write("// Texture coordinate generation\n");
if (num_texgen == 1)
{
out.WriteFmt("{{ const uint texgen = 0u;\n");
out.Write("{{ const uint texgen = 0u;\n");
}
else
{
out.WriteFmt("{}for (uint texgen = 0u; texgen < {}u; texgen++) {{\n",
out.Write("{}for (uint texgen = 0u; texgen < {}u; texgen++) {{\n",
api_type == APIType::D3D ? "[loop] " : "", num_texgen);
}
out.WriteFmt(" // Texcoord transforms\n");
out.WriteFmt(" float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n"
out.Write(" // Texcoord transforms\n");
out.Write(" float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n"
" uint texMtxInfo = xfmem_texMtxInfo(texgen);\n");
out.WriteFmt(" switch ({}) {{\n", BitfieldExtract("texMtxInfo", TexMtxInfo().sourcerow));
out.WriteFmt(" case {}u: // XF_SRCGEOM_INROW\n", XF_SRCGEOM_INROW);
out.WriteFmt(" coord.xyz = rawpos.xyz;\n");
out.WriteFmt(" break;\n\n");
out.WriteFmt(" case {}u: // XF_SRCNORMAL_INROW\n", XF_SRCNORMAL_INROW);
out.WriteFmt(
out.Write(" switch ({}) {{\n", BitfieldExtract("texMtxInfo", TexMtxInfo().sourcerow));
out.Write(" case {}u: // XF_SRCGEOM_INROW\n", XF_SRCGEOM_INROW);
out.Write(" coord.xyz = rawpos.xyz;\n");
out.Write(" break;\n\n");
out.Write(" case {}u: // XF_SRCNORMAL_INROW\n", XF_SRCNORMAL_INROW);
out.Write(
" coord.xyz = ((components & {}u /* VB_HAS_NRM0 */) != 0u) ? rawnorm0.xyz : coord.xyz;",
VB_HAS_NRM0);
out.WriteFmt(" break;\n\n");
out.WriteFmt(" case {}u: // XF_SRCBINORMAL_T_INROW\n", XF_SRCBINORMAL_T_INROW);
out.WriteFmt(
out.Write(" break;\n\n");
out.Write(" case {}u: // XF_SRCBINORMAL_T_INROW\n", XF_SRCBINORMAL_T_INROW);
out.Write(
" coord.xyz = ((components & {}u /* VB_HAS_NRM1 */) != 0u) ? rawnorm1.xyz : coord.xyz;",
VB_HAS_NRM1);
out.WriteFmt(" break;\n\n");
out.WriteFmt(" case {}u: // XF_SRCBINORMAL_B_INROW\n", XF_SRCBINORMAL_B_INROW);
out.WriteFmt(
out.Write(" break;\n\n");
out.Write(" case {}u: // XF_SRCBINORMAL_B_INROW\n", XF_SRCBINORMAL_B_INROW);
out.Write(
" coord.xyz = ((components & {}u /* VB_HAS_NRM2 */) != 0u) ? rawnorm2.xyz : coord.xyz;",
VB_HAS_NRM2);
out.WriteFmt(" break;\n\n");
out.Write(" break;\n\n");
for (u32 i = 0; i < 8; i++)
{
out.WriteFmt(" case {}u: // XF_SRCTEX{}_INROW\n", XF_SRCTEX0_INROW + i, i);
out.WriteFmt(
out.Write(" case {}u: // XF_SRCTEX{}_INROW\n", XF_SRCTEX0_INROW + i, i);
out.Write(
" coord = ((components & {}u /* VB_HAS_UV{} */) != 0u) ? float4(rawtex{}.x, rawtex{}.y, "
"1.0, 1.0) : coord;\n",
VB_HAS_UV0 << i, i, i, i);
out.WriteFmt(" break;\n\n");
out.Write(" break;\n\n");
}
out.WriteFmt(" }}\n"
out.Write(" }}\n"
"\n");
out.WriteFmt(" // Input form of AB11 sets z element to 1.0\n");
out.WriteFmt(" if ({} == {}u) // inputform == XF_TEXINPUT_AB11\n",
out.Write(" // Input form of AB11 sets z element to 1.0\n");
out.Write(" if ({} == {}u) // inputform == XF_TEXINPUT_AB11\n",
BitfieldExtract("texMtxInfo", TexMtxInfo().inputform), XF_TEXINPUT_AB11);
out.WriteFmt(" coord.z = 1.0f;\n"
out.Write(" coord.z = 1.0f;\n"
"\n");
out.WriteFmt(" // first transformation\n");
out.WriteFmt(" uint texgentype = {};\n", BitfieldExtract("texMtxInfo", TexMtxInfo().texgentype));
out.WriteFmt(" float3 output_tex;\n"
out.Write(" // first transformation\n");
out.Write(" uint texgentype = {};\n", BitfieldExtract("texMtxInfo", TexMtxInfo().texgentype));
out.Write(" float3 output_tex;\n"
" switch (texgentype)\n"
" {{\n");
out.WriteFmt(" case {}u: // XF_TEXGEN_EMBOSS_MAP\n", XF_TEXGEN_EMBOSS_MAP);
out.WriteFmt(" {{\n");
out.WriteFmt(" uint light = {};\n",
out.Write(" case {}u: // XF_TEXGEN_EMBOSS_MAP\n", XF_TEXGEN_EMBOSS_MAP);
out.Write(" {{\n");
out.Write(" uint light = {};\n",
BitfieldExtract("texMtxInfo", TexMtxInfo().embosslightshift));
out.WriteFmt(" uint source = {};\n",
out.Write(" uint source = {};\n",
BitfieldExtract("texMtxInfo", TexMtxInfo().embosssourceshift));
out.WriteFmt(" switch (source) {{\n");
out.Write(" switch (source) {{\n");
for (u32 i = 0; i < num_texgen; i++)
out.WriteFmt(" case {}u: output_tex.xyz = o.tex{}; break;\n", i, i);
out.WriteFmt(" default: output_tex.xyz = float3(0.0, 0.0, 0.0); break;\n"
out.Write(" case {}u: output_tex.xyz = o.tex{}; break;\n", i, i);
out.Write(" default: output_tex.xyz = float3(0.0, 0.0, 0.0); break;\n"
" }}\n");
out.WriteFmt(" if ((components & {}u) != 0u) {{ // VB_HAS_NRM1 | VB_HAS_NRM2\n",
out.Write(" if ((components & {}u) != 0u) {{ // VB_HAS_NRM1 | VB_HAS_NRM2\n",
VB_HAS_NRM1 | VB_HAS_NRM2); // Should this be VB_HAS_NRM1 | VB_HAS_NRM2
out.WriteFmt(" float3 ldir = normalize(" I_LIGHTS "[light].pos.xyz - pos.xyz);\n"
out.Write(" float3 ldir = normalize(" I_LIGHTS "[light].pos.xyz - pos.xyz);\n"
" output_tex.xyz += float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0);\n"
" }}\n"
" }}\n"
" break;\n\n");
out.WriteFmt(" case {}u: // XF_TEXGEN_COLOR_STRGBC0\n", XF_TEXGEN_COLOR_STRGBC0);
out.WriteFmt(" output_tex.xyz = float3(o.colors_0.x, o.colors_0.y, 1.0);\n"
out.Write(" case {}u: // XF_TEXGEN_COLOR_STRGBC0\n", XF_TEXGEN_COLOR_STRGBC0);
out.Write(" output_tex.xyz = float3(o.colors_0.x, o.colors_0.y, 1.0);\n"
" break;\n\n");
out.WriteFmt(" case {}u: // XF_TEXGEN_COLOR_STRGBC1\n", XF_TEXGEN_COLOR_STRGBC1);
out.WriteFmt(" output_tex.xyz = float3(o.colors_1.x, o.colors_1.y, 1.0);\n"
out.Write(" case {}u: // XF_TEXGEN_COLOR_STRGBC1\n", XF_TEXGEN_COLOR_STRGBC1);
out.Write(" output_tex.xyz = float3(o.colors_1.x, o.colors_1.y, 1.0);\n"
" break;\n\n");
out.WriteFmt(" default: // Also XF_TEXGEN_REGULAR\n"
out.Write(" default: // Also XF_TEXGEN_REGULAR\n"
" {{\n");
out.WriteFmt(" if ((components & ({}u /* VB_HAS_TEXMTXIDX0 */ << texgen)) != 0u) {{\n",
out.Write(" if ((components & ({}u /* VB_HAS_TEXMTXIDX0 */ << texgen)) != 0u) {{\n",
VB_HAS_TEXMTXIDX0);
out.WriteFmt(
" // This is messy, due to dynamic indexing of the input texture coordinates.\n"
out.Write(" // This is messy, due to dynamic indexing of the input texture coordinates.\n"
" // Hopefully the compiler will unroll this whole loop anyway and the switch.\n"
" int tmp = 0;\n"
" switch (texgen) {{\n");
for (u32 i = 0; i < num_texgen; i++)
out.WriteFmt(" case {}u: tmp = int(rawtex{}.z); break;\n", i, i);
out.WriteFmt(" }}\n"
out.Write(" case {}u: tmp = int(rawtex{}.z); break;\n", i, i);
out.Write(" }}\n"
"\n");
out.WriteFmt(" if ({} == {}u) {{\n",
BitfieldExtract("texMtxInfo", TexMtxInfo().projection), XF_TEXPROJ_STQ);
out.WriteFmt(" output_tex.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]),\n"
out.Write(" if ({} == {}u) {{\n", BitfieldExtract("texMtxInfo", TexMtxInfo().projection),
XF_TEXPROJ_STQ);
out.Write(" output_tex.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]),\n"
" dot(coord, " I_TRANSFORMMATRICES "[tmp + 1]),\n"
" dot(coord, " I_TRANSFORMMATRICES "[tmp + 2]));\n"
" }} else {{\n"
@ -444,10 +443,9 @@ static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode&
" 1.0);\n"
" }}\n"
" }} else {{\n");
out.WriteFmt(" if ({} == {}u) {{\n",
BitfieldExtract("texMtxInfo", TexMtxInfo().projection), XF_TEXPROJ_STQ);
out.WriteFmt(
" output_tex.xyz = float3(dot(coord, " I_TEXMATRICES "[3u * texgen]),\n"
out.Write(" if ({} == {}u) {{\n", BitfieldExtract("texMtxInfo", TexMtxInfo().projection),
XF_TEXPROJ_STQ);
out.Write(" output_tex.xyz = float3(dot(coord, " I_TEXMATRICES "[3u * texgen]),\n"
" dot(coord, " I_TEXMATRICES "[3u * texgen + 1u]),\n"
" dot(coord, " I_TEXMATRICES "[3u * texgen + 2u]));\n"
" }} else {{\n"
@ -461,15 +459,15 @@ static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode&
" }}\n"
"\n");
out.WriteFmt(" if (xfmem_dualTexInfo != 0u) {{\n");
out.WriteFmt(" uint postMtxInfo = xfmem_postMtxInfo(texgen);");
out.WriteFmt(" uint base_index = {};\n", BitfieldExtract("postMtxInfo", PostMtxInfo().index));
out.WriteFmt(" float4 P0 = " I_POSTTRANSFORMMATRICES "[base_index & 0x3fu];\n"
out.Write(" if (xfmem_dualTexInfo != 0u) {{\n");
out.Write(" uint postMtxInfo = xfmem_postMtxInfo(texgen);");
out.Write(" uint base_index = {};\n", BitfieldExtract("postMtxInfo", PostMtxInfo().index));
out.Write(" float4 P0 = " I_POSTTRANSFORMMATRICES "[base_index & 0x3fu];\n"
" float4 P1 = " I_POSTTRANSFORMMATRICES "[(base_index + 1u) & 0x3fu];\n"
" float4 P2 = " I_POSTTRANSFORMMATRICES "[(base_index + 2u) & 0x3fu];\n"
"\n");
out.WriteFmt(" if ({} != 0u)\n", BitfieldExtract("postMtxInfo", PostMtxInfo().normalize));
out.WriteFmt(" output_tex.xyz = normalize(output_tex.xyz);\n"
out.Write(" if ({} != 0u)\n", BitfieldExtract("postMtxInfo", PostMtxInfo().normalize));
out.Write(" output_tex.xyz = normalize(output_tex.xyz);\n"
"\n"
" // multiply by postmatrix\n"
" output_tex.xyz = float3(dot(P0.xyz, output_tex.xyz) + P0.w,\n"
@ -480,17 +478,17 @@ static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode&
// When q is 0, the GameCube appears to have a special case
// This can be seen in devkitPro's neheGX Lesson08 example for Wii
// Makes differences in Rogue Squadron 3 (Hoth sky) and The Last Story (shadow culling)
out.WriteFmt(" if (texgentype == {}u && output_tex.z == 0.0) // XF_TEXGEN_REGULAR\n",
out.Write(" if (texgentype == {}u && output_tex.z == 0.0) // XF_TEXGEN_REGULAR\n",
XF_TEXGEN_REGULAR);
out.WriteFmt(
out.Write(
" output_tex.xy = clamp(output_tex.xy / 2.0f, float2(-1.0f,-1.0f), float2(1.0f,1.0f));\n"
"\n");
out.WriteFmt(" // Hopefully GPUs that can support dynamic indexing will optimize this.\n");
out.WriteFmt(" switch (texgen) {{\n");
out.Write(" // Hopefully GPUs that can support dynamic indexing will optimize this.\n");
out.Write(" switch (texgen) {{\n");
for (u32 i = 0; i < num_texgen; i++)
out.WriteFmt(" case {}u: o.tex{} = output_tex; break;\n", i, i);
out.WriteFmt(" }}\n"
out.Write(" case {}u: o.tex{} = output_tex; break;\n", i, i);
out.Write(" }}\n"
"}}\n");
}

View File

@ -83,37 +83,37 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
const bool ssaa = host_config.ssaa;
const bool vertex_rounding = host_config.vertex_rounding;
out.WriteFmt("{}", s_lighting_struct);
out.Write("{}", s_lighting_struct);
// uniforms
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
out.WriteFmt("UBO_BINDING(std140, 2) uniform VSBlock {{\n");
out.Write("UBO_BINDING(std140, 2) uniform VSBlock {{\n");
else
out.WriteFmt("cbuffer VSBlock {{\n");
out.Write("cbuffer VSBlock {{\n");
out.WriteFmt("{}", s_shader_uniforms);
out.WriteFmt("}};\n");
out.Write("{}", s_shader_uniforms);
out.Write("}};\n");
out.WriteFmt("struct VS_OUTPUT {{\n");
out.Write("struct VS_OUTPUT {{\n");
GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, host_config, "");
out.WriteFmt("}};\n");
out.Write("}};\n");
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
out.WriteFmt("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB);
if ((uid_data->components & VB_HAS_POSMTXIDX) != 0)
out.WriteFmt("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB);
if ((uid_data->components & VB_HAS_NRM0) != 0)
out.WriteFmt("ATTRIBUTE_LOCATION({}) in float3 rawnorm0;\n", SHADER_NORM0_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnorm0;\n", SHADER_NORM0_ATTRIB);
if ((uid_data->components & VB_HAS_NRM1) != 0)
out.WriteFmt("ATTRIBUTE_LOCATION({}) in float3 rawnorm1;\n", SHADER_NORM1_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnorm1;\n", SHADER_NORM1_ATTRIB);
if ((uid_data->components & VB_HAS_NRM2) != 0)
out.WriteFmt("ATTRIBUTE_LOCATION({}) in float3 rawnorm2;\n", SHADER_NORM2_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnorm2;\n", SHADER_NORM2_ATTRIB);
if ((uid_data->components & VB_HAS_COL0) != 0)
out.WriteFmt("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB);
if ((uid_data->components & VB_HAS_COL1) != 0)
out.WriteFmt("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB);
for (u32 i = 0; i < 8; ++i)
{
@ -121,138 +121,138 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
if ((uid_data->components & (VB_HAS_UV0 << i)) != 0 || has_texmtx != 0)
{
out.WriteFmt("ATTRIBUTE_LOCATION({}) in float{} rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i,
out.Write("ATTRIBUTE_LOCATION({}) in float{} rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i,
has_texmtx != 0 ? 3 : 2, i);
}
}
if (host_config.backend_geometry_shaders)
{
out.WriteFmt("VARYING_LOCATION(0) out VertexData {{\n");
out.Write("VARYING_LOCATION(0) out VertexData {{\n");
GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, host_config,
GetInterpolationQualifier(msaa, ssaa, true, false));
out.WriteFmt("}} vs;\n");
out.Write("}} vs;\n");
}
else
{
// Let's set up attributes
u32 counter = 0;
out.WriteFmt("VARYING_LOCATION({}) {} out float4 colors_0;\n", counter++,
out.Write("VARYING_LOCATION({}) {} out float4 colors_0;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.WriteFmt("VARYING_LOCATION({}) {} out float4 colors_1;\n", counter++,
out.Write("VARYING_LOCATION({}) {} out float4 colors_1;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
for (u32 i = 0; i < uid_data->numTexGens; ++i)
{
out.WriteFmt("VARYING_LOCATION({}) {} out float3 tex{};\n", counter++,
out.Write("VARYING_LOCATION({}) {} out float3 tex{};\n", counter++,
GetInterpolationQualifier(msaa, ssaa), i);
}
if (!host_config.fast_depth_calc)
{
out.WriteFmt("VARYING_LOCATION({}) {} out float4 clipPos;\n", counter++,
out.Write("VARYING_LOCATION({}) {} out float4 clipPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
}
if (per_pixel_lighting)
{
out.WriteFmt("VARYING_LOCATION({}) {} out float3 Normal;\n", counter++,
out.Write("VARYING_LOCATION({}) {} out float3 Normal;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.WriteFmt("VARYING_LOCATION({}) {} out float3 WorldPos;\n", counter++,
out.Write("VARYING_LOCATION({}) {} out float3 WorldPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
}
}
out.WriteFmt("void main()\n{{\n");
out.Write("void main()\n{{\n");
}
else // D3D
{
out.WriteFmt("VS_OUTPUT main(\n");
out.Write("VS_OUTPUT main(\n");
// inputs
if ((uid_data->components & VB_HAS_NRM0) != 0)
out.WriteFmt(" float3 rawnorm0 : NORMAL0,\n");
out.Write(" float3 rawnorm0 : NORMAL0,\n");
if ((uid_data->components & VB_HAS_NRM1) != 0)
out.WriteFmt(" float3 rawnorm1 : NORMAL1,\n");
out.Write(" float3 rawnorm1 : NORMAL1,\n");
if ((uid_data->components & VB_HAS_NRM2) != 0)
out.WriteFmt(" float3 rawnorm2 : NORMAL2,\n");
out.Write(" float3 rawnorm2 : NORMAL2,\n");
if ((uid_data->components & VB_HAS_COL0) != 0)
out.WriteFmt(" float4 rawcolor0 : COLOR0,\n");
out.Write(" float4 rawcolor0 : COLOR0,\n");
if ((uid_data->components & VB_HAS_COL1) != 0)
out.WriteFmt(" float4 rawcolor1 : COLOR1,\n");
out.Write(" float4 rawcolor1 : COLOR1,\n");
for (u32 i = 0; i < 8; ++i)
{
const u32 has_texmtx = (uid_data->components & (VB_HAS_TEXMTXIDX0 << i));
if ((uid_data->components & (VB_HAS_UV0 << i)) != 0 || has_texmtx != 0)
out.WriteFmt(" float{} rawtex{} : TEXCOORD{},\n", has_texmtx ? 3 : 2, i, i);
out.Write(" float{} rawtex{} : TEXCOORD{},\n", has_texmtx ? 3 : 2, i, i);
}
if ((uid_data->components & VB_HAS_POSMTXIDX) != 0)
out.WriteFmt(" uint4 posmtx : BLENDINDICES,\n");
out.WriteFmt(" float4 rawpos : POSITION) {{\n");
out.Write(" uint4 posmtx : BLENDINDICES,\n");
out.Write(" float4 rawpos : POSITION) {{\n");
}
out.WriteFmt("VS_OUTPUT o;\n");
out.Write("VS_OUTPUT o;\n");
// transforms
if ((uid_data->components & VB_HAS_POSMTXIDX) != 0)
{
out.WriteFmt("int posidx = int(posmtx.r);\n"
out.Write("int posidx = int(posmtx.r);\n"
"float4 pos = float4(dot(" I_TRANSFORMMATRICES
"[posidx], rawpos), dot(" I_TRANSFORMMATRICES
"[posidx+1], rawpos), dot(" I_TRANSFORMMATRICES "[posidx+2], rawpos), 1);\n");
if ((uid_data->components & VB_HAS_NRMALL) != 0)
{
out.WriteFmt("int normidx = posidx & 31;\n"
out.Write("int normidx = posidx & 31;\n"
"float3 N0 = " I_NORMALMATRICES "[normidx].xyz, N1 = " I_NORMALMATRICES
"[normidx+1].xyz, N2 = " I_NORMALMATRICES "[normidx+2].xyz;\n");
}
if ((uid_data->components & VB_HAS_NRM0) != 0)
{
out.WriteFmt("float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, "
out.Write("float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, "
"rawnorm0)));\n");
}
if ((uid_data->components & VB_HAS_NRM1) != 0)
{
out.WriteFmt(
out.Write(
"float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n");
}
if ((uid_data->components & VB_HAS_NRM2) != 0)
{
out.WriteFmt(
out.Write(
"float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n");
}
}
else
{
out.WriteFmt("float4 pos = float4(dot(" I_POSNORMALMATRIX "[0], rawpos), dot(" I_POSNORMALMATRIX
out.Write("float4 pos = float4(dot(" I_POSNORMALMATRIX "[0], rawpos), dot(" I_POSNORMALMATRIX
"[1], rawpos), dot(" I_POSNORMALMATRIX "[2], rawpos), 1.0);\n");
if ((uid_data->components & VB_HAS_NRM0) != 0)
{
out.WriteFmt("float3 _norm0 = normalize(float3(dot(" I_POSNORMALMATRIX
out.Write("float3 _norm0 = normalize(float3(dot(" I_POSNORMALMATRIX
"[3].xyz, rawnorm0), dot(" I_POSNORMALMATRIX
"[4].xyz, rawnorm0), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm0)));\n");
}
if ((uid_data->components & VB_HAS_NRM1) != 0)
{
out.WriteFmt("float3 _norm1 = float3(dot(" I_POSNORMALMATRIX
out.Write("float3 _norm1 = float3(dot(" I_POSNORMALMATRIX
"[3].xyz, rawnorm1), dot(" I_POSNORMALMATRIX
"[4].xyz, rawnorm1), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm1));\n");
}
if ((uid_data->components & VB_HAS_NRM2) != 0)
{
out.WriteFmt("float3 _norm2 = float3(dot(" I_POSNORMALMATRIX
out.Write("float3 _norm2 = float3(dot(" I_POSNORMALMATRIX
"[3].xyz, rawnorm2), dot(" I_POSNORMALMATRIX
"[4].xyz, rawnorm2), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm2));\n");
}
}
if ((uid_data->components & VB_HAS_NRM0) == 0)
out.WriteFmt("float3 _norm0 = float3(0.0, 0.0, 0.0);\n");
out.Write("float3 _norm0 = float3(0.0, 0.0, 0.0);\n");
out.WriteFmt("o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION
out.Write("o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION
"[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n");
out.WriteFmt("int4 lacc;\n"
out.Write("int4 lacc;\n"
"float3 ldir, h, cosAttn, distAttn;\n"
"float dist, dist2, attn;\n");
@ -260,22 +260,22 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
"o.colors_");
// transform texcoords
out.WriteFmt("float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n");
out.Write("float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n");
for (u32 i = 0; i < uid_data->numTexGens; ++i)
{
auto& texinfo = uid_data->texMtxInfo[i];
out.WriteFmt("{{\n");
out.WriteFmt("coord = float4(0.0, 0.0, 1.0, 1.0);\n");
out.Write("{{\n");
out.Write("coord = float4(0.0, 0.0, 1.0, 1.0);\n");
switch (texinfo.sourcerow)
{
case XF_SRCGEOM_INROW:
out.WriteFmt("coord.xyz = rawpos.xyz;\n");
out.Write("coord.xyz = rawpos.xyz;\n");
break;
case XF_SRCNORMAL_INROW:
if ((uid_data->components & VB_HAS_NRM0) != 0)
{
out.WriteFmt("coord.xyz = rawnorm0.xyz;\n");
out.Write("coord.xyz = rawnorm0.xyz;\n");
}
break;
case XF_SRCCOLORS_INROW:
@ -285,20 +285,20 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
case XF_SRCBINORMAL_T_INROW:
if ((uid_data->components & VB_HAS_NRM1) != 0)
{
out.WriteFmt("coord.xyz = rawnorm1.xyz;\n");
out.Write("coord.xyz = rawnorm1.xyz;\n");
}
break;
case XF_SRCBINORMAL_B_INROW:
if ((uid_data->components & VB_HAS_NRM2) != 0)
{
out.WriteFmt("coord.xyz = rawnorm2.xyz;\n");
out.Write("coord.xyz = rawnorm2.xyz;\n");
}
break;
default:
ASSERT(texinfo.sourcerow <= XF_SRCTEX7_INROW);
if ((uid_data->components & (VB_HAS_UV0 << (texinfo.sourcerow - XF_SRCTEX0_INROW))) != 0)
{
out.WriteFmt("coord = float4(rawtex{}.x, rawtex{}.y, 1.0, 1.0);\n",
out.Write("coord = float4(rawtex{}.x, rawtex{}.y, 1.0, 1.0);\n",
texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW);
}
break;
@ -306,7 +306,7 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
// Input form of AB11 sets z element to 1.0
if (texinfo.inputform == XF_TEXINPUT_AB11)
out.WriteFmt("coord.z = 1.0;\n");
out.Write("coord.z = 1.0;\n");
// first transformation
switch (texinfo.texgentype)
@ -316,9 +316,9 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
if ((uid_data->components & (VB_HAS_NRM1 | VB_HAS_NRM2)) != 0)
{
// transform the light dir into tangent space
out.WriteFmt("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n",
out.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n",
LIGHT_POS_PARAMS(texinfo.embosslightshift));
out.WriteFmt(
out.Write(
"o.tex{}.xyz = o.tex{}.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0);\n", i,
texinfo.embosssourceshift);
}
@ -327,31 +327,31 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
// The following assert was triggered in House of the Dead Overkill and Star Wars Rogue
// Squadron 2
// ASSERT(0); // should have normals
out.WriteFmt("o.tex{}.xyz = o.tex{}.xyz;\n", i, texinfo.embosssourceshift);
out.Write("o.tex{}.xyz = o.tex{}.xyz;\n", i, texinfo.embosssourceshift);
}
break;
case XF_TEXGEN_COLOR_STRGBC0:
out.WriteFmt("o.tex{}.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i);
out.Write("o.tex{}.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i);
break;
case XF_TEXGEN_COLOR_STRGBC1:
out.WriteFmt("o.tex{}.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i);
out.Write("o.tex{}.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i);
break;
case XF_TEXGEN_REGULAR:
default:
if ((uid_data->components & (VB_HAS_TEXMTXIDX0 << i)) != 0)
{
out.WriteFmt("int tmp = int(rawtex{}.z);\n", i);
out.Write("int tmp = int(rawtex{}.z);\n", i);
if (((uid_data->texMtxInfo_n_projection >> i) & 1) == XF_TEXPROJ_STQ)
{
out.WriteFmt("o.tex{}.xyz = float3(dot(coord, " I_TRANSFORMMATRICES
out.Write("o.tex{}.xyz = float3(dot(coord, " I_TRANSFORMMATRICES
"[tmp]), dot(coord, " I_TRANSFORMMATRICES
"[tmp+1]), dot(coord, " I_TRANSFORMMATRICES "[tmp+2]));\n",
i);
}
else
{
out.WriteFmt("o.tex{}.xyz = float3(dot(coord, " I_TRANSFORMMATRICES
out.Write("o.tex{}.xyz = float3(dot(coord, " I_TRANSFORMMATRICES
"[tmp]), dot(coord, " I_TRANSFORMMATRICES "[tmp+1]), 1);\n",
i);
}
@ -360,14 +360,14 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
{
if (((uid_data->texMtxInfo_n_projection >> i) & 1) == XF_TEXPROJ_STQ)
{
out.WriteFmt("o.tex{}.xyz = float3(dot(coord, " I_TEXMATRICES
out.Write("o.tex{}.xyz = float3(dot(coord, " I_TEXMATRICES
"[{}]), dot(coord, " I_TEXMATRICES "[{}]), dot(coord, " I_TEXMATRICES
"[{}]));\n",
i, 3 * i, 3 * i + 1, 3 * i + 2);
}
else
{
out.WriteFmt("o.tex{}.xyz = float3(dot(coord, " I_TEXMATRICES
out.Write("o.tex{}.xyz = float3(dot(coord, " I_TEXMATRICES
"[{}]), dot(coord, " I_TEXMATRICES "[{}]), 1);\n",
i, 3 * i, 3 * i + 1);
}
@ -380,16 +380,16 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
{
auto& postInfo = uid_data->postMtxInfo[i];
out.WriteFmt("float4 P0 = " I_POSTTRANSFORMMATRICES "[{}];\n"
out.Write("float4 P0 = " I_POSTTRANSFORMMATRICES "[{}];\n"
"float4 P1 = " I_POSTTRANSFORMMATRICES "[{}];\n"
"float4 P2 = " I_POSTTRANSFORMMATRICES "[{}];\n",
postInfo.index & 0x3f, (postInfo.index + 1) & 0x3f, (postInfo.index + 2) & 0x3f);
if (postInfo.normalize)
out.WriteFmt("o.tex{}.xyz = normalize(o.tex{}.xyz);\n", i, i);
out.Write("o.tex{}.xyz = normalize(o.tex{}.xyz);\n", i, i);
// multiply by postmatrix
out.WriteFmt(
out.Write(
"o.tex{0}.xyz = float3(dot(P0.xyz, o.tex{0}.xyz) + P0.w, dot(P1.xyz, o.tex{0}.xyz) + "
"P1.w, dot(P2.xyz, o.tex{0}.xyz) + P2.w);\n",
i);
@ -401,44 +401,44 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
// TODO: check if this only affects XF_TEXGEN_REGULAR
if (texinfo.texgentype == XF_TEXGEN_REGULAR)
{
out.WriteFmt(
out.Write(
"if(o.tex{0}.z == 0.0f)\n"
"\to.tex{0}.xy = clamp(o.tex{0}.xy / 2.0f, float2(-1.0f,-1.0f), float2(1.0f,1.0f));\n",
i);
}
out.WriteFmt("}}\n");
out.Write("}}\n");
}
if (uid_data->numColorChans == 0)
{
if ((uid_data->components & VB_HAS_COL0) != 0)
out.WriteFmt("o.colors_0 = rawcolor0;\n");
out.Write("o.colors_0 = rawcolor0;\n");
else
out.WriteFmt("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n");
out.Write("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n");
}
if (uid_data->numColorChans < 2)
{
if ((uid_data->components & VB_HAS_COL1) != 0)
out.WriteFmt("o.colors_1 = rawcolor1;\n");
out.Write("o.colors_1 = rawcolor1;\n");
else
out.WriteFmt("o.colors_1 = o.colors_0;\n");
out.Write("o.colors_1 = o.colors_0;\n");
}
// clipPos/w needs to be done in pixel shader, not here
if (!host_config.fast_depth_calc)
out.WriteFmt("o.clipPos = o.pos;\n");
out.Write("o.clipPos = o.pos;\n");
if (per_pixel_lighting)
{
out.WriteFmt("o.Normal = _norm0;\n"
out.Write("o.Normal = _norm0;\n"
"o.WorldPos = pos.xyz;\n");
if ((uid_data->components & VB_HAS_COL0) != 0)
out.WriteFmt("o.colors_0 = rawcolor0;\n");
out.Write("o.colors_0 = rawcolor0;\n");
if ((uid_data->components & VB_HAS_COL1) != 0)
out.WriteFmt("o.colors_1 = rawcolor1;\n");
out.Write("o.colors_1 = rawcolor1;\n");
}
// If we can disable the incorrect depth clipping planes using depth clamping, then we can do
@ -450,13 +450,13 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
// own clipping. We want to clip so that -w <= z <= 0, which matches the console -1..0 range.
// We adjust our depth value for clipping purposes to match the perspective projection in the
// software backend, which is a hack to fix Sonic Adventure and Unleashed games.
out.WriteFmt("float clipDepth = o.pos.z * (1.0 - 1e-7);\n"
out.Write("float clipDepth = o.pos.z * (1.0 - 1e-7);\n"
"float clipDist0 = clipDepth + o.pos.w;\n" // Near: z < -w
"float clipDist1 = -clipDepth;\n"); // Far: z > 0
if (host_config.backend_geometry_shaders)
{
out.WriteFmt("o.clipDist0 = clipDist0;\n"
out.Write("o.clipDist0 = clipDist0;\n"
"o.clipDist1 = clipDist1;\n");
}
}
@ -472,7 +472,7 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
// divide, because some games will use a depth range larger than what is allowed by the
// graphics API. These large depth ranges will still be clipped to the 0..1 range, so these
// games effectively add a depth bias to the values written to the depth buffer.
out.WriteFmt("o.pos.z = o.pos.w * " I_PIXELCENTERCORRECTION ".w - "
out.Write("o.pos.z = o.pos.w * " I_PIXELCENTERCORRECTION ".w - "
"o.pos.z * " I_PIXELCENTERCORRECTION ".z;\n");
if (!host_config.backend_clip_control)
@ -480,12 +480,12 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
// If the graphics API doesn't support a depth range of 0..1, then we need to map z to
// the -1..1 range. Unfortunately we have to use a substraction, which is a lossy floating-point
// operation that can introduce a round-trip error.
out.WriteFmt("o.pos.z = o.pos.z * 2.0 - o.pos.w;\n");
out.Write("o.pos.z = o.pos.z * 2.0 - o.pos.w;\n");
}
// Correct for negative viewports by mirroring all vertices. We need to negate the height here,
// since the viewport height is already negated by the render backend.
out.WriteFmt("o.pos.xy *= sign(" I_PIXELCENTERCORRECTION ".xy * float2(1.0, -1.0));\n");
out.Write("o.pos.xy *= sign(" I_PIXELCENTERCORRECTION ".xy * float2(1.0, -1.0));\n");
// The console GPU places the pixel center at 7/12 in screen space unless
// antialiasing is enabled, while D3D and OpenGL place it at 0.5. This results
@ -493,7 +493,7 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
// which in turn can be critical if it happens for clear quads.
// Hence, we compensate for this pixel center difference so that primitives
// get rasterized correctly.
out.WriteFmt("o.pos.xy = o.pos.xy - o.pos.w * " I_PIXELCENTERCORRECTION ".xy;\n");
out.Write("o.pos.xy = o.pos.xy - o.pos.w * " I_PIXELCENTERCORRECTION ".xy;\n");
if (vertex_rounding)
{
@ -504,7 +504,7 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
// we need to correct this by converting our
// clip-space position into the Wii's screen-space
// acquire the right pixel and then convert it back
out.WriteFmt("if (o.pos.w == 1.0f)\n"
out.Write("if (o.pos.w == 1.0f)\n"
"{{\n"
"\tfloat ss_pixel_x = ((o.pos.x + 1.0f) * (" I_VIEWPORT_SIZE ".x * 0.5f));\n"
@ -529,35 +529,35 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
// TODO: Pass interface blocks between shader stages even if geometry shaders
// are not supported, however that will require at least OpenGL 3.2 support.
for (u32 i = 0; i < uid_data->numTexGens; ++i)
out.WriteFmt("tex{}.xyz = o.tex{};\n", i, i);
out.Write("tex{}.xyz = o.tex{};\n", i, i);
if (!host_config.fast_depth_calc)
out.WriteFmt("clipPos = o.clipPos;\n");
out.Write("clipPos = o.clipPos;\n");
if (per_pixel_lighting)
{
out.WriteFmt("Normal = o.Normal;\n"
out.Write("Normal = o.Normal;\n"
"WorldPos = o.WorldPos;\n");
}
out.WriteFmt("colors_0 = o.colors_0;\n"
out.Write("colors_0 = o.colors_0;\n"
"colors_1 = o.colors_1;\n");
}
if (host_config.backend_depth_clamp)
{
out.WriteFmt("gl_ClipDistance[0] = clipDist0;\n"
out.Write("gl_ClipDistance[0] = clipDist0;\n"
"gl_ClipDistance[1] = clipDist1;\n");
}
// Vulkan NDC space has Y pointing down (right-handed NDC space).
if (api_type == APIType::Vulkan)
out.WriteFmt("gl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n");
out.Write("gl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n");
else
out.WriteFmt("gl_Position = o.pos;\n");
out.Write("gl_Position = o.pos;\n");
}
else // D3D
{
out.WriteFmt("return o;\n");
out.Write("return o;\n");
}
out.WriteFmt("}}\n");
out.Write("}}\n");
return out;
}