diff --git a/Source/Core/VideoCommon/FramebufferShaderGen.cpp b/Source/Core/VideoCommon/FramebufferShaderGen.cpp index 0b4022239e..3e4f27a1e0 100644 --- a/Source/Core/VideoCommon/FramebufferShaderGen.cpp +++ b/Source/Core/VideoCommon/FramebufferShaderGen.cpp @@ -27,9 +27,9 @@ APIType GetAPIType() void EmitUniformBufferDeclaration(ShaderCode& code) { if (GetAPIType() == APIType::D3D) - code.WriteFmt("cbuffer PSBlock : register(b0)\n"); + code.Write("cbuffer PSBlock : register(b0)\n"); else - code.WriteFmt("UBO_BINDING(std140, 1) uniform PSBlock\n"); + code.Write("UBO_BINDING(std140, 1) uniform PSBlock\n"); } void EmitSamplerDeclarations(ShaderCode& code, u32 start = 0, u32 end = 1, @@ -43,8 +43,8 @@ void EmitSamplerDeclarations(ShaderCode& code, u32 start = 0, u32 end = 1, for (u32 i = start; i < end; i++) { - code.WriteFmt("{} tex{} : register(t{});\n", array_type, i, i); - code.WriteFmt("SamplerState samp{} : register(s{});\n", i, i); + code.Write("{} tex{} : register(t{});\n", array_type, i, i); + code.Write("SamplerState samp{} : register(s{});\n", i, i); } } break; @@ -56,7 +56,7 @@ void EmitSamplerDeclarations(ShaderCode& code, u32 start = 0, u32 end = 1, for (u32 i = start; i < end; i++) { - code.WriteFmt("SAMPLER_BINDING({}) uniform {} samp{};\n", i, array_type, i); + code.Write("SAMPLER_BINDING({}) uniform {} samp{};\n", i, array_type, i); } } break; @@ -70,12 +70,12 @@ void EmitSampleTexture(ShaderCode& code, u32 n, std::string_view coords) switch (GetAPIType()) { case APIType::D3D: - code.WriteFmt("tex{}.Sample(samp{}, {})", n, n, coords); + code.Write("tex{}.Sample(samp{}, {})", n, n, coords); break; case APIType::OpenGL: case APIType::Vulkan: - code.WriteFmt("texture(samp{}, {})", n, coords); + code.Write("texture(samp{}, {})", n, coords); break; default: @@ -90,12 +90,12 @@ void EmitTextureLoad(ShaderCode& code, u32 n, std::string_view coords) switch (GetAPIType()) { case APIType::D3D: - code.WriteFmt("tex{}.Load({})", n, coords); + code.Write("tex{}.Load({})", n, coords); break; case APIType::OpenGL: case APIType::Vulkan: - code.WriteFmt("texelFetch(samp{}, ({}).xyz, ({}).w)", n, coords, coords); + code.Write("texelFetch(samp{}, ({}).xyz, ({}).w)", n, coords, coords); break; default: @@ -111,19 +111,19 @@ void EmitVertexMainDeclaration(ShaderCode& code, u32 num_tex_inputs, u32 num_col { case APIType::D3D: { - code.WriteFmt("void main("); + code.Write("void main("); for (u32 i = 0; i < num_tex_inputs; i++) - code.WriteFmt("in float3 rawtex{} : TEXCOORD{}, ", i, i); + code.Write("in float3 rawtex{} : TEXCOORD{}, ", i, i); for (u32 i = 0; i < num_color_inputs; i++) - code.WriteFmt("in float4 rawcolor{} : COLOR{}, ", i, i); + code.Write("in float4 rawcolor{} : COLOR{}, ", i, i); if (position_input) - code.WriteFmt("in float4 rawpos : POSITION, "); - code.WriteFmt("{}", extra_inputs); + code.Write("in float4 rawpos : POSITION, "); + code.Write("{}", extra_inputs); for (u32 i = 0; i < num_tex_outputs; i++) - code.WriteFmt("out float3 v_tex{} : TEXCOORD{}, ", i, i); + code.Write("out float3 v_tex{} : TEXCOORD{}, ", i, i); for (u32 i = 0; i < num_color_outputs; i++) - code.WriteFmt("out float4 v_col{} : COLOR{}, ", i, i); - code.WriteFmt("out float4 opos : SV_Position)\n"); + code.Write("out float4 v_col{} : COLOR{}, ", i, i); + code.Write("out float4 opos : SV_Position)\n"); } break; @@ -133,35 +133,35 @@ void EmitVertexMainDeclaration(ShaderCode& code, u32 num_tex_inputs, u32 num_col for (u32 i = 0; i < num_tex_inputs; i++) { const auto attribute = SHADER_TEXTURE0_ATTRIB + i; - code.WriteFmt("ATTRIBUTE_LOCATION({}) in float3 rawtex{};\n", attribute, i); + code.Write("ATTRIBUTE_LOCATION({}) in float3 rawtex{};\n", attribute, i); } for (u32 i = 0; i < num_color_inputs; i++) { const auto attribute = SHADER_COLOR0_ATTRIB + i; - code.WriteFmt("ATTRIBUTE_LOCATION({}) in float4 rawcolor{};\n", attribute, i); + code.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor{};\n", attribute, i); } if (position_input) - code.WriteFmt("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB); + code.Write("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB); if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { - code.WriteFmt("VARYING_LOCATION(0) out VertexData {{\n"); + code.Write("VARYING_LOCATION(0) out VertexData {{\n"); for (u32 i = 0; i < num_tex_outputs; i++) - code.WriteFmt(" float3 v_tex{};\n", i); + code.Write(" float3 v_tex{};\n", i); for (u32 i = 0; i < num_color_outputs; i++) - code.WriteFmt(" float4 v_col{};\n", i); - code.WriteFmt("}};\n"); + code.Write(" float4 v_col{};\n", i); + code.Write("}};\n"); } else { for (u32 i = 0; i < num_tex_outputs; i++) - code.WriteFmt("VARYING_LOCATION({}) out float3 v_tex{};\n", i, i); + code.Write("VARYING_LOCATION({}) out float3 v_tex{};\n", i, i); for (u32 i = 0; i < num_color_outputs; i++) - code.WriteFmt("VARYING_LOCATION({}) out float4 v_col{};\n", num_tex_inputs + i, i); + code.Write("VARYING_LOCATION({}) out float4 v_col{};\n", num_tex_inputs + i, i); } - code.WriteFmt("#define opos gl_Position\n"); - code.WriteFmt("{}\n", extra_inputs); - code.WriteFmt("void main()\n"); + code.Write("#define opos gl_Position\n"); + code.Write("{}\n", extra_inputs); + code.Write("void main()\n"); } break; default: @@ -177,14 +177,14 @@ void EmitPixelMainDeclaration(ShaderCode& code, u32 num_tex_inputs, u32 num_colo { case APIType::D3D: { - code.WriteFmt("void main("); + code.Write("void main("); for (u32 i = 0; i < num_tex_inputs; i++) - code.WriteFmt("in float3 v_tex{} : TEXCOORD{}, ", i, i); + code.Write("in float3 v_tex{} : TEXCOORD{}, ", i, i); for (u32 i = 0; i < num_color_inputs; i++) - code.WriteFmt("in float4 v_col{} : COLOR{}, ", i, i); + code.Write("in float4 v_col{} : COLOR{}, ", i, i); if (emit_frag_coord) - code.WriteFmt("in float4 frag_coord : SV_Position, "); - code.WriteFmt("{}out {} ocol0 : SV_Target)\n", extra_vars, output_type); + code.Write("in float4 frag_coord : SV_Position, "); + code.Write("{}out {} ocol0 : SV_Target)\n", extra_vars, output_type); } break; @@ -193,26 +193,26 @@ void EmitPixelMainDeclaration(ShaderCode& code, u32 num_tex_inputs, u32 num_colo { if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { - code.WriteFmt("VARYING_LOCATION(0) in VertexData {{\n"); + code.Write("VARYING_LOCATION(0) in VertexData {{\n"); for (u32 i = 0; i < num_tex_inputs; i++) - code.WriteFmt(" in float3 v_tex{};\n", i); + code.Write(" in float3 v_tex{};\n", i); for (u32 i = 0; i < num_color_inputs; i++) - code.WriteFmt(" in float4 v_col{};\n", i); - code.WriteFmt("}};\n"); + code.Write(" in float4 v_col{};\n", i); + code.Write("}};\n"); } else { for (u32 i = 0; i < num_tex_inputs; i++) - code.WriteFmt("VARYING_LOCATION({}) in float3 v_tex{};\n", i, i); + code.Write("VARYING_LOCATION({}) in float3 v_tex{};\n", i, i); for (u32 i = 0; i < num_color_inputs; i++) - code.WriteFmt("VARYING_LOCATION({}) in float4 v_col{};\n", num_tex_inputs + i, i); + code.Write("VARYING_LOCATION({}) in float4 v_col{};\n", num_tex_inputs + i, i); } - code.WriteFmt("FRAGMENT_OUTPUT_LOCATION(0) out {} ocol0;\n", output_type); - code.WriteFmt("{}\n", extra_vars); + code.Write("FRAGMENT_OUTPUT_LOCATION(0) out {} ocol0;\n", output_type); + code.Write("{}\n", extra_vars); if (emit_frag_coord) - code.WriteFmt("#define frag_coord gl_FragCoord\n"); - code.WriteFmt("void main()\n"); + code.Write("#define frag_coord gl_FragCoord\n"); + code.Write("void main()\n"); } break; @@ -228,16 +228,16 @@ std::string GenerateScreenQuadVertexShader() EmitVertexMainDeclaration(code, 0, 0, false, 1, 0, GetAPIType() == APIType::D3D ? "in uint id : SV_VertexID, " : "#define id gl_VertexID\n"); - code.WriteFmt( + code.Write( "{{\n" " v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n" " opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n"); // NDC space is flipped in Vulkan. We also flip in GL so that (0,0) is in the lower-left. if (GetAPIType() == APIType::Vulkan || GetAPIType() == APIType::OpenGL) - code.WriteFmt(" opos.y = -opos.y;\n"); + code.Write(" opos.y = -opos.y;\n"); - code.WriteFmt("}}\n"); + code.Write("}}\n"); return code.GetBuffer(); } @@ -247,88 +247,88 @@ std::string GeneratePassthroughGeometryShader(u32 num_tex, u32 num_colors) ShaderCode code; if (GetAPIType() == APIType::D3D) { - code.WriteFmt("struct VS_OUTPUT\n" - "{{\n"); + code.Write("struct VS_OUTPUT\n" + "{{\n"); for (u32 i = 0; i < num_tex; i++) - code.WriteFmt(" float3 tex{} : TEXCOORD{};\n", i, i); + code.Write(" float3 tex{} : TEXCOORD{};\n", i, i); for (u32 i = 0; i < num_colors; i++) - code.WriteFmt(" float4 color{} : COLOR{};\n", i, i); - code.WriteFmt(" float4 position : SV_Position;\n" - "}};\n"); + code.Write(" float4 color{} : COLOR{};\n", i, i); + code.Write(" float4 position : SV_Position;\n" + "}};\n"); - code.WriteFmt("struct GS_OUTPUT\n" - "{{"); + code.Write("struct GS_OUTPUT\n" + "{{"); for (u32 i = 0; i < num_tex; i++) - code.WriteFmt(" float3 tex{} : TEXCOORD{};\n", i, i); + code.Write(" float3 tex{} : TEXCOORD{};\n", i, i); for (u32 i = 0; i < num_colors; i++) - code.WriteFmt(" float4 color{} : COLOR{};\n", i, i); - code.WriteFmt(" float4 position : SV_Position;\n" - " uint slice : SV_RenderTargetArrayIndex;\n" - "}};\n\n"); + code.Write(" float4 color{} : COLOR{};\n", i, i); + code.Write(" float4 position : SV_Position;\n" + " uint slice : SV_RenderTargetArrayIndex;\n" + "}};\n\n"); - code.WriteFmt("[maxvertexcount(6)]\n" - "void main(triangle VS_OUTPUT vso[3], inout TriangleStream output)\n" - "{{\n" - " for (uint slice = 0; slice < 2u; slice++)\n" - " {{\n" - " for (int i = 0; i < 3; i++)\n" - " {{\n" - " GS_OUTPUT gso;\n" - " gso.position = vso[i].position;\n"); + code.Write("[maxvertexcount(6)]\n" + "void main(triangle VS_OUTPUT vso[3], inout TriangleStream output)\n" + "{{\n" + " for (uint slice = 0; slice < 2u; slice++)\n" + " {{\n" + " for (int i = 0; i < 3; i++)\n" + " {{\n" + " GS_OUTPUT gso;\n" + " gso.position = vso[i].position;\n"); for (u32 i = 0; i < num_tex; i++) - code.WriteFmt(" gso.tex{} = float3(vso[i].tex{}.xy, float(slice));\n", i, i); + code.Write(" gso.tex{} = float3(vso[i].tex{}.xy, float(slice));\n", i, i); for (u32 i = 0; i < num_colors; i++) - code.WriteFmt(" gso.color{} = vso[i].color{};\n", i, i); - code.WriteFmt(" gso.slice = slice;\n" - " output.Append(gso);\n" - " }}\n" - " output.RestartStrip();\n" - " }}\n" - "}}\n"); + code.Write(" gso.color{} = vso[i].color{};\n", i, i); + code.Write(" gso.slice = slice;\n" + " output.Append(gso);\n" + " }}\n" + " output.RestartStrip();\n" + " }}\n" + "}}\n"); } else if (GetAPIType() == APIType::OpenGL || GetAPIType() == APIType::Vulkan) { - code.WriteFmt("layout(triangles) in;\n" - "layout(triangle_strip, max_vertices = 6) out;\n"); + code.Write("layout(triangles) in;\n" + "layout(triangle_strip, max_vertices = 6) out;\n"); if (num_tex > 0 || num_colors > 0) { - code.WriteFmt("VARYING_LOCATION(0) in VertexData {{\n"); + code.Write("VARYING_LOCATION(0) in VertexData {{\n"); for (u32 i = 0; i < num_tex; i++) - code.WriteFmt(" float3 v_tex{};\n", i); + code.Write(" float3 v_tex{};\n", i); for (u32 i = 0; i < num_colors; i++) - code.WriteFmt(" float4 v_col{};\n", i); - code.WriteFmt("}} v_in[];\n"); + code.Write(" float4 v_col{};\n", i); + code.Write("}} v_in[];\n"); - code.WriteFmt("VARYING_LOCATION(0) out VertexData {{\n"); + code.Write("VARYING_LOCATION(0) out VertexData {{\n"); for (u32 i = 0; i < num_tex; i++) - code.WriteFmt(" float3 v_tex{};\n", i); + code.Write(" float3 v_tex{};\n", i); for (u32 i = 0; i < num_colors; i++) - code.WriteFmt(" float4 v_col{};\n", i); - code.WriteFmt("}} v_out;\n"); + code.Write(" float4 v_col{};\n", i); + code.Write("}} v_out;\n"); } - code.WriteFmt("\n" - "void main()\n" - "{{\n" - " for (int j = 0; j < 2; j++)\n" - " {{\n" - " gl_Layer = j;\n"); + code.Write("\n" + "void main()\n" + "{{\n" + " for (int j = 0; j < 2; j++)\n" + " {{\n" + " gl_Layer = j;\n"); // We have to explicitly unroll this loop otherwise the GL compiler gets cranky. for (u32 v = 0; v < 3; v++) { - code.WriteFmt(" gl_Position = gl_in[{}].gl_Position;\n", v); + code.Write(" gl_Position = gl_in[{}].gl_Position;\n", v); for (u32 i = 0; i < num_tex; i++) { - code.WriteFmt(" v_out.v_tex{} = float3(v_in[{}].v_tex{}.xy, float(j));\n", i, v, i); + code.Write(" v_out.v_tex{} = float3(v_in[{}].v_tex{}.xy, float(j));\n", i, v, i); } for (u32 i = 0; i < num_colors; i++) - code.WriteFmt(" v_out.v_col{} = v_in[{}].v_col{};\n", i, v, i); - code.WriteFmt(" EmitVertex();\n\n"); + code.Write(" v_out.v_col{} = v_in[{}].v_col{};\n", i, v, i); + code.Write(" EmitVertex();\n\n"); } - code.WriteFmt(" EndPrimitive();\n" - " }}\n" - "}}\n"); + code.Write(" EndPrimitive();\n" + " }}\n" + "}}\n"); } return code.GetBuffer(); @@ -338,25 +338,24 @@ std::string GenerateTextureCopyVertexShader() { ShaderCode code; EmitUniformBufferDeclaration(code); - code.WriteFmt("{{" - " float2 src_offset;\n" - " float2 src_size;\n" - "}};\n\n"); + code.Write("{{" + " float2 src_offset;\n" + " float2 src_size;\n" + "}};\n\n"); EmitVertexMainDeclaration(code, 0, 0, false, 1, 0, GetAPIType() == APIType::D3D ? "in uint id : SV_VertexID, " : "#define id gl_VertexID"); - code.WriteFmt( - "{{\n" - " v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n" - " opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n" - " v_tex0 = float3(src_offset + (src_size * v_tex0.xy), 0.0f);\n"); + code.Write("{{\n" + " v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n" + " opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n" + " v_tex0 = float3(src_offset + (src_size * v_tex0.xy), 0.0f);\n"); // NDC space is flipped in Vulkan. We also flip in GL so that (0,0) is in the lower-left. if (GetAPIType() == APIType::Vulkan || GetAPIType() == APIType::OpenGL) - code.WriteFmt(" opos.y = -opos.y;\n"); + code.Write(" opos.y = -opos.y;\n"); - code.WriteFmt("}}\n"); + code.Write("}}\n"); return code.GetBuffer(); } @@ -366,11 +365,11 @@ std::string GenerateTextureCopyPixelShader() ShaderCode code; EmitSamplerDeclarations(code, 0, 1, false); EmitPixelMainDeclaration(code, 1, 0); - code.WriteFmt("{{\n" - " ocol0 = "); + code.Write("{{\n" + " ocol0 = "); EmitSampleTexture(code, 0, "v_tex0"); - code.WriteFmt(";\n" - "}}\n"); + code.Write(";\n" + "}}\n"); return code.GetBuffer(); } @@ -378,9 +377,9 @@ std::string GenerateColorPixelShader() { ShaderCode code; EmitPixelMainDeclaration(code, 0, 1); - code.WriteFmt("{{\n" - " ocol0 = v_col0;\n" - "}}\n"); + code.Write("{{\n" + " ocol0 = v_col0;\n" + "}}\n"); return code.GetBuffer(); } @@ -390,25 +389,25 @@ std::string GenerateResolveDepthPixelShader(u32 samples) EmitSamplerDeclarations(code, 0, 1, true); EmitPixelMainDeclaration(code, 1, 0, "float", GetAPIType() == APIType::D3D ? "in float4 ipos : SV_Position, " : ""); - code.WriteFmt("{{\n" - " int layer = int(v_tex0.z);\n"); + code.Write("{{\n" + " int layer = int(v_tex0.z);\n"); if (GetAPIType() == APIType::D3D) - code.WriteFmt(" int3 coords = int3(int2(ipos.xy), layer);\n"); + code.Write(" int3 coords = int3(int2(ipos.xy), layer);\n"); else - code.WriteFmt(" int3 coords = int3(int2(gl_FragCoord.xy), layer);\n"); + code.Write(" int3 coords = int3(int2(gl_FragCoord.xy), layer);\n"); // Take the minimum of all depth samples. if (GetAPIType() == APIType::D3D) - code.WriteFmt(" ocol0 = tex0.Load(coords, 0).r;\n"); + code.Write(" ocol0 = tex0.Load(coords, 0).r;\n"); else - code.WriteFmt(" ocol0 = texelFetch(samp0, coords, 0).r;\n"); - code.WriteFmt(" for (int i = 1; i < {}; i++)\n", samples); + code.Write(" ocol0 = texelFetch(samp0, coords, 0).r;\n"); + code.Write(" for (int i = 1; i < {}; i++)\n", samples); if (GetAPIType() == APIType::D3D) - code.WriteFmt(" ocol0 = min(ocol0, tex0.Load(coords, i).r);\n"); + code.Write(" ocol0 = min(ocol0, tex0.Load(coords, i).r);\n"); else - code.WriteFmt(" ocol0 = min(ocol0, texelFetch(samp0, coords, i).r);\n"); + code.Write(" ocol0 = min(ocol0, texelFetch(samp0, coords, i).r);\n"); - code.WriteFmt("}}\n"); + code.Write("}}\n"); return code.GetBuffer(); } @@ -416,15 +415,15 @@ std::string GenerateClearVertexShader() { ShaderCode code; EmitUniformBufferDeclaration(code); - code.WriteFmt("{{\n" - " float4 clear_color;\n" - " float clear_depth;\n" - "}};\n"); + code.Write("{{\n" + " float4 clear_color;\n" + " float clear_depth;\n" + "}};\n"); EmitVertexMainDeclaration(code, 0, 0, false, 0, 1, GetAPIType() == APIType::D3D ? "in uint id : SV_VertexID, " : "#define id gl_VertexID\n"); - code.WriteFmt( + code.Write( "{{\n" " float2 coord = float2(float((id << 1) & 2), float(id & 2));\n" " opos = float4(coord * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), clear_depth, 1.0f);\n" @@ -432,9 +431,9 @@ std::string GenerateClearVertexShader() // NDC space is flipped in Vulkan if (GetAPIType() == APIType::Vulkan) - code.WriteFmt(" opos.y = -opos.y;\n"); + code.Write(" opos.y = -opos.y;\n"); - code.WriteFmt("}}\n"); + code.Write("}}\n"); return code.GetBuffer(); } @@ -443,17 +442,17 @@ std::string GenerateEFBPokeVertexShader() { ShaderCode code; EmitVertexMainDeclaration(code, 0, 1, true, 0, 1); - code.WriteFmt("{{\n" - " v_col0 = rawcolor0;\n" - " opos = float4(rawpos.xyz, 1.0f);\n"); + code.Write("{{\n" + " v_col0 = rawcolor0;\n" + " opos = float4(rawpos.xyz, 1.0f);\n"); if (g_ActiveConfig.backend_info.bSupportsLargePoints) - code.WriteFmt(" gl_PointSize = rawpos.w;\n"); + code.Write(" gl_PointSize = rawpos.w;\n"); // NDC space is flipped in Vulkan. if (GetAPIType() == APIType::Vulkan) - code.WriteFmt(" opos.y = -opos.y;\n"); + code.Write(" opos.y = -opos.y;\n"); - code.WriteFmt("}}\n"); + code.Write("}}\n"); return code.GetBuffer(); } @@ -468,82 +467,82 @@ std::string GenerateFormatConversionShader(EFBReinterpretType convtype, u32 samp "in float4 ipos : SV_Position, in uint isample : SV_SampleIndex, " : "in float4 ipos : SV_Position, ") : ""); - code.WriteFmt("{{\n" - " int layer = int(v_tex0.z);\n"); + code.Write("{{\n" + " int layer = int(v_tex0.z);\n"); if (GetAPIType() == APIType::D3D) - code.WriteFmt(" int3 coords = int3(int2(ipos.xy), layer);\n"); + code.Write(" int3 coords = int3(int2(ipos.xy), layer);\n"); else - code.WriteFmt(" int3 coords = int3(int2(gl_FragCoord.xy), layer);\n"); + code.Write(" int3 coords = int3(int2(gl_FragCoord.xy), layer);\n"); if (samples == 1) { // No MSAA at all. if (GetAPIType() == APIType::D3D) - code.WriteFmt(" float4 val = tex0.Load(int4(coords, 0));\n"); + code.Write(" float4 val = tex0.Load(int4(coords, 0));\n"); else - code.WriteFmt(" float4 val = texelFetch(samp0, coords, 0);\n"); + code.Write(" float4 val = texelFetch(samp0, coords, 0);\n"); } else if (g_ActiveConfig.bSSAA) { // Sample shading, shader runs once per sample if (GetAPIType() == APIType::D3D) - code.WriteFmt(" float4 val = tex0.Load(coords, isample);"); + code.Write(" float4 val = tex0.Load(coords, isample);"); else - code.WriteFmt(" float4 val = texelFetch(samp0, coords, gl_SampleID);"); + code.Write(" float4 val = texelFetch(samp0, coords, gl_SampleID);"); } else { // MSAA without sample shading, average out all samples. - code.WriteFmt(" float4 val = float4(0.0f, 0.0f, 0.0f, 0.0f);\n"); - code.WriteFmt(" for (int i = 0; i < {}; i++)\n", samples); + code.Write(" float4 val = float4(0.0f, 0.0f, 0.0f, 0.0f);\n"); + code.Write(" for (int i = 0; i < {}; i++)\n", samples); if (GetAPIType() == APIType::D3D) - code.WriteFmt(" val += tex0.Load(coords, i);\n"); + code.Write(" val += tex0.Load(coords, i);\n"); else - code.WriteFmt(" val += texelFetch(samp0, coords, i);\n"); - code.WriteFmt(" val /= float({});\n", samples); + code.Write(" val += texelFetch(samp0, coords, i);\n"); + code.Write(" val /= float({});\n", samples); } switch (convtype) { case EFBReinterpretType::RGB8ToRGBA6: - code.WriteFmt(" int4 src8 = int4(round(val * 255.f));\n" - " int4 dst6;\n" - " dst6.r = src8.r >> 2;\n" - " dst6.g = ((src8.r & 0x3) << 4) | (src8.g >> 4);\n" - " dst6.b = ((src8.g & 0xF) << 2) | (src8.b >> 6);\n" - " dst6.a = src8.b & 0x3F;\n" - " ocol0 = float4(dst6) / 63.f;\n"); + code.Write(" int4 src8 = int4(round(val * 255.f));\n" + " int4 dst6;\n" + " dst6.r = src8.r >> 2;\n" + " dst6.g = ((src8.r & 0x3) << 4) | (src8.g >> 4);\n" + " dst6.b = ((src8.g & 0xF) << 2) | (src8.b >> 6);\n" + " dst6.a = src8.b & 0x3F;\n" + " ocol0 = float4(dst6) / 63.f;\n"); break; case EFBReinterpretType::RGB8ToRGB565: - code.WriteFmt(" ocol0 = val;\n"); + code.Write(" ocol0 = val;\n"); break; case EFBReinterpretType::RGBA6ToRGB8: - code.WriteFmt(" int4 src6 = int4(round(val * 63.f));\n" - " int4 dst8;\n" - " dst8.r = (src6.r << 2) | (src6.g >> 4);\n" - " dst8.g = ((src6.g & 0xF) << 4) | (src6.b >> 2);\n" - " dst8.b = ((src6.b & 0x3) << 6) | src6.a;\n" - " dst8.a = 255;\n" - " ocol0 = float4(dst8) / 255.f;\n"); + code.Write(" int4 src6 = int4(round(val * 63.f));\n" + " int4 dst8;\n" + " dst8.r = (src6.r << 2) | (src6.g >> 4);\n" + " dst8.g = ((src6.g & 0xF) << 4) | (src6.b >> 2);\n" + " dst8.b = ((src6.b & 0x3) << 6) | src6.a;\n" + " dst8.a = 255;\n" + " ocol0 = float4(dst8) / 255.f;\n"); break; case EFBReinterpretType::RGBA6ToRGB565: - code.WriteFmt(" ocol0 = val;\n"); + code.Write(" ocol0 = val;\n"); break; case EFBReinterpretType::RGB565ToRGB8: - code.WriteFmt(" ocol0 = val;\n"); + code.Write(" ocol0 = val;\n"); break; case EFBReinterpretType::RGB565ToRGBA6: // - code.WriteFmt(" ocol0 = val;\n"); + code.Write(" ocol0 = val;\n"); break; } - code.WriteFmt("}}\n"); + code.Write("}}\n"); return code.GetBuffer(); } @@ -552,71 +551,70 @@ std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureF ShaderCode code; EmitSamplerDeclarations(code, 0, 1, false); EmitPixelMainDeclaration(code, 1, 0, "float4", "", true); - code.WriteFmt("{{\n" - " int layer = int(v_tex0.z);\n" - " int4 coords = int4(int2(frag_coord.xy), layer, 0);\n"); + code.Write("{{\n" + " int layer = int(v_tex0.z);\n" + " int4 coords = int4(int2(frag_coord.xy), layer, 0);\n"); // Convert to a 32-bit value encompassing all channels, filling the most significant bits with // zeroes. - code.WriteFmt(" uint raw_value;\n"); + code.Write(" uint raw_value;\n"); switch (from_format) { case TextureFormat::I8: case TextureFormat::C8: { - code.WriteFmt(" float4 temp_value = "); + code.Write(" float4 temp_value = "); EmitTextureLoad(code, 0, "coords"); - code.WriteFmt(";\n" - " raw_value = uint(temp_value.r * 255.0);\n"); + code.Write(";\n" + " raw_value = uint(temp_value.r * 255.0);\n"); } break; case TextureFormat::IA8: { - code.WriteFmt(" float4 temp_value = "); + code.Write(" float4 temp_value = "); EmitTextureLoad(code, 0, "coords"); - code.WriteFmt( - ";\n" - " raw_value = uint(temp_value.r * 255.0) | (uint(temp_value.a * 255.0) << 8);\n"); + code.Write(";\n" + " raw_value = uint(temp_value.r * 255.0) | (uint(temp_value.a * 255.0) << 8);\n"); } break; case TextureFormat::I4: { - code.WriteFmt(" float4 temp_value = "); + code.Write(" float4 temp_value = "); EmitTextureLoad(code, 0, "coords"); - code.WriteFmt(";\n" - " raw_value = uint(temp_value.r * 15.0);\n"); + code.Write(";\n" + " raw_value = uint(temp_value.r * 15.0);\n"); } break; case TextureFormat::IA4: { - code.WriteFmt(" float4 temp_value = "); + code.Write(" float4 temp_value = "); EmitTextureLoad(code, 0, "coords"); - code.WriteFmt(";\n" - " raw_value = uint(temp_value.r * 15.0) | (uint(temp_value.a * 15.0) << 4);\n"); + code.Write(";\n" + " raw_value = uint(temp_value.r * 15.0) | (uint(temp_value.a * 15.0) << 4);\n"); } break; case TextureFormat::RGB565: { - code.WriteFmt(" float4 temp_value = "); + code.Write(" float4 temp_value = "); EmitTextureLoad(code, 0, "coords"); - code.WriteFmt(";\n" - " raw_value = uint(temp_value.b * 31.0) | (uint(temp_value.g * 63.0) << 5) |\n" - " (uint(temp_value.r * 31.0) << 11);\n"); + code.Write(";\n" + " raw_value = uint(temp_value.b * 31.0) | (uint(temp_value.g * 63.0) << 5) |\n" + " (uint(temp_value.r * 31.0) << 11);\n"); } break; case TextureFormat::RGB5A3: { - code.WriteFmt(" float4 temp_value = "); + code.Write(" float4 temp_value = "); EmitTextureLoad(code, 0, "coords"); - code.WriteFmt(";\n"); + code.Write(";\n"); // 0.8784 = 224 / 255 which is the maximum alpha value that can be represented in 3 bits - code.WriteFmt( + code.Write( " if (temp_value.a > 0.878f) {{\n" " raw_value = (uint(temp_value.b * 31.0)) | (uint(temp_value.g * 31.0) << 5) |\n" " (uint(temp_value.r * 31.0) << 10) | 0x8000u;\n" @@ -638,45 +636,45 @@ std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureF case TextureFormat::I8: case TextureFormat::C8: { - code.WriteFmt(" float orgba = float(raw_value & 0xFFu) / 255.0;\n" - " ocol0 = float4(orgba, orgba, orgba, orgba);\n"); + code.Write(" float orgba = float(raw_value & 0xFFu) / 255.0;\n" + " ocol0 = float4(orgba, orgba, orgba, orgba);\n"); } break; case TextureFormat::IA8: { - code.WriteFmt(" float orgb = float(raw_value & 0xFFu) / 255.0;\n" - " ocol0 = float4(orgb, orgb, orgb, float((raw_value >> 8) & 0xFFu) / 255.0);\n"); + code.Write(" float orgb = float(raw_value & 0xFFu) / 255.0;\n" + " ocol0 = float4(orgb, orgb, orgb, float((raw_value >> 8) & 0xFFu) / 255.0);\n"); } break; case TextureFormat::IA4: { - code.WriteFmt(" float orgb = float(raw_value & 0xFu) / 15.0;\n" - " ocol0 = float4(orgb, orgb, orgb, float((raw_value >> 4) & 0xFu) / 15.0);\n"); + code.Write(" float orgb = float(raw_value & 0xFu) / 15.0;\n" + " ocol0 = float4(orgb, orgb, orgb, float((raw_value >> 4) & 0xFu) / 15.0);\n"); } break; case TextureFormat::RGB565: { - code.WriteFmt(" ocol0 = float4(float((raw_value >> 10) & 0x1Fu) / 31.0,\n" - " float((raw_value >> 5) & 0x1Fu) / 31.0,\n" - " float(raw_value & 0x1Fu) / 31.0, 1.0);\n"); + code.Write(" ocol0 = float4(float((raw_value >> 10) & 0x1Fu) / 31.0,\n" + " float((raw_value >> 5) & 0x1Fu) / 31.0,\n" + " float(raw_value & 0x1Fu) / 31.0, 1.0);\n"); } break; case TextureFormat::RGB5A3: { - code.WriteFmt(" if ((raw_value & 0x8000u) != 0u) {{\n" - " ocol0 = float4(float((raw_value >> 10) & 0x1Fu) / 31.0,\n" - " float((raw_value >> 5) & 0x1Fu) / 31.0,\n" - " float(raw_value & 0x1Fu) / 31.0, 1.0);\n" - " }} else {{\n" - " ocol0 = float4(float((raw_value >> 8) & 0x0Fu) / 15.0,\n" - " float((raw_value >> 4) & 0x0Fu) / 15.0,\n" - " float(raw_value & 0x0Fu) / 15.0,\n" - " float((raw_value >> 12) & 0x07u) / 7.0);\n" - " }}\n"); + code.Write(" if ((raw_value & 0x8000u) != 0u) {{\n" + " ocol0 = float4(float((raw_value >> 10) & 0x1Fu) / 31.0,\n" + " float((raw_value >> 5) & 0x1Fu) / 31.0,\n" + " float(raw_value & 0x1Fu) / 31.0, 1.0);\n" + " }} else {{\n" + " ocol0 = float4(float((raw_value >> 8) & 0x0Fu) / 15.0,\n" + " float((raw_value >> 4) & 0x0Fu) / 15.0,\n" + " float(raw_value & 0x0Fu) / 15.0,\n" + " float((raw_value >> 12) & 0x07u) / 7.0);\n" + " }}\n"); } break; default: @@ -684,7 +682,7 @@ std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureF return "{}\n"; } - code.WriteFmt("}}\n"); + code.Write("}}\n"); return code.GetBuffer(); } @@ -694,14 +692,14 @@ std::string GenerateEFBRestorePixelShader() EmitSamplerDeclarations(code, 0, 2, false); EmitPixelMainDeclaration(code, 1, 0, "float4", GetAPIType() == APIType::D3D ? "out float depth : SV_Depth, " : ""); - code.WriteFmt("{{\n" - " ocol0 = "); + code.Write("{{\n" + " ocol0 = "); EmitSampleTexture(code, 0, "v_tex0"); - code.WriteFmt(";\n"); - code.WriteFmt(" {} = ", GetAPIType() == APIType::D3D ? "depth" : "gl_FragDepth"); + code.Write(";\n"); + code.Write(" {} = ", GetAPIType() == APIType::D3D ? "depth" : "gl_FragDepth"); EmitSampleTexture(code, 1, "v_tex0"); - code.WriteFmt(".r;\n" - "}}\n"); + code.Write(".r;\n" + "}}\n"); return code.GetBuffer(); } @@ -711,22 +709,22 @@ std::string GenerateImGuiVertexShader() // Uniform buffer contains the viewport size, and we transform in the vertex shader. EmitUniformBufferDeclaration(code); - code.WriteFmt("{{\n" - "float2 u_rcp_viewport_size_mul2;\n" - "}};\n\n"); + code.Write("{{\n" + "float2 u_rcp_viewport_size_mul2;\n" + "}};\n\n"); EmitVertexMainDeclaration(code, 1, 1, true, 1, 1); - code.WriteFmt("{{\n" - " v_tex0 = float3(rawtex0.xy, 0.0);\n" - " v_col0 = rawcolor0;\n" - " opos = float4(rawpos.x * u_rcp_viewport_size_mul2.x - 1.0," - " 1.0 - rawpos.y * u_rcp_viewport_size_mul2.y, 0.0, 1.0);\n"); + code.Write("{{\n" + " v_tex0 = float3(rawtex0.xy, 0.0);\n" + " v_col0 = rawcolor0;\n" + " opos = float4(rawpos.x * u_rcp_viewport_size_mul2.x - 1.0," + " 1.0 - rawpos.y * u_rcp_viewport_size_mul2.y, 0.0, 1.0);\n"); // NDC space is flipped in Vulkan. if (GetAPIType() == APIType::Vulkan) - code.WriteFmt(" opos.y = -opos.y;\n"); + code.Write(" opos.y = -opos.y;\n"); - code.WriteFmt("}}\n"); + code.Write("}}\n"); return code.GetBuffer(); } @@ -735,11 +733,11 @@ std::string GenerateImGuiPixelShader() ShaderCode code; EmitSamplerDeclarations(code, 0, 1, false); EmitPixelMainDeclaration(code, 1, 1); - code.WriteFmt("{{\n" - " ocol0 = "); + code.Write("{{\n" + " ocol0 = "); EmitSampleTexture(code, 0, "float3(v_tex0.xy, 0.0)"); - code.WriteFmt(" * v_col0;\n" - "}}\n"); + code.Write(" * v_col0;\n" + "}}\n"); return code.GetBuffer(); } diff --git a/Source/Core/VideoCommon/GeometryShaderGen.cpp b/Source/Core/VideoCommon/GeometryShaderGen.cpp index dd8dc6df38..718bc6d4cb 100644 --- a/Source/Core/VideoCommon/GeometryShaderGen.cpp +++ b/Source/Core/VideoCommon/GeometryShaderGen.cpp @@ -73,133 +73,131 @@ ShaderCode GenerateGeometryShaderCode(APIType ApiType, const ShaderHostConfig& h // Insert layout parameters if (host_config.backend_gs_instancing) { - out.WriteFmt("layout({}, invocations = {}) in;\n", primitives_ogl[primitive_type_index], - stereo ? 2 : 1); - out.WriteFmt("layout({}_strip, max_vertices = {}) out;\n", wireframe ? "line" : "triangle", - vertex_out); + out.Write("layout({}, invocations = {}) in;\n", primitives_ogl[primitive_type_index], + stereo ? 2 : 1); + out.Write("layout({}_strip, max_vertices = {}) out;\n", wireframe ? "line" : "triangle", + vertex_out); } else { - out.WriteFmt("layout({}) in;\n", primitives_ogl[primitive_type_index]); - out.WriteFmt("layout({}_strip, max_vertices = {}) out;\n", wireframe ? "line" : "triangle", - stereo ? vertex_out * 2 : vertex_out); + out.Write("layout({}) in;\n", primitives_ogl[primitive_type_index]); + out.Write("layout({}_strip, max_vertices = {}) out;\n", wireframe ? "line" : "triangle", + stereo ? vertex_out * 2 : vertex_out); } } - out.WriteFmt("{}", s_lighting_struct); + out.Write("{}", s_lighting_struct); // uniforms if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) - out.WriteFmt("UBO_BINDING(std140, 3) uniform GSBlock {{\n"); + out.Write("UBO_BINDING(std140, 3) uniform GSBlock {{\n"); else - out.WriteFmt("cbuffer GSBlock {{\n"); + out.Write("cbuffer GSBlock {{\n"); - out.WriteFmt("\tfloat4 " I_STEREOPARAMS ";\n" - "\tfloat4 " I_LINEPTPARAMS ";\n" - "\tint4 " I_TEXOFFSET ";\n" - "}};\n"); + out.Write("\tfloat4 " I_STEREOPARAMS ";\n" + "\tfloat4 " I_LINEPTPARAMS ";\n" + "\tint4 " I_TEXOFFSET ";\n" + "}};\n"); - out.WriteFmt("struct VS_OUTPUT {{\n"); + out.Write("struct VS_OUTPUT {{\n"); GenerateVSOutputMembers(out, ApiType, uid_data->numTexGens, host_config, ""); - out.WriteFmt("}};\n"); + out.Write("}};\n"); if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) { if (host_config.backend_gs_instancing) - out.WriteFmt("#define InstanceID gl_InvocationID\n"); + out.Write("#define InstanceID gl_InvocationID\n"); - out.WriteFmt("VARYING_LOCATION(0) in VertexData {{\n"); + out.Write("VARYING_LOCATION(0) in VertexData {{\n"); GenerateVSOutputMembers(out, ApiType, uid_data->numTexGens, host_config, GetInterpolationQualifier(msaa, ssaa, true, true)); - out.WriteFmt("}} vs[{}];\n", vertex_in); + out.Write("}} vs[{}];\n", vertex_in); - out.WriteFmt("VARYING_LOCATION(0) out VertexData {{\n"); + out.Write("VARYING_LOCATION(0) out VertexData {{\n"); GenerateVSOutputMembers(out, ApiType, uid_data->numTexGens, host_config, GetInterpolationQualifier(msaa, ssaa, true, false)); if (stereo) - out.WriteFmt("\tflat int layer;\n"); + out.Write("\tflat int layer;\n"); - out.WriteFmt("}} ps;\n"); + out.Write("}} ps;\n"); - out.WriteFmt("void main()\n{{\n"); + out.Write("void main()\n{{\n"); } else // D3D { - out.WriteFmt("struct VertexData {{\n"); - out.WriteFmt("\tVS_OUTPUT o;\n"); + out.Write("struct VertexData {{\n"); + out.Write("\tVS_OUTPUT o;\n"); if (stereo) - out.WriteFmt("\tuint layer : SV_RenderTargetArrayIndex;\n"); + out.Write("\tuint layer : SV_RenderTargetArrayIndex;\n"); - out.WriteFmt("}};\n"); + out.Write("}};\n"); if (host_config.backend_gs_instancing) { - out.WriteFmt("[maxvertexcount({})]\n[instance({})]\n", vertex_out, stereo ? 2 : 1); - out.WriteFmt("void main({} VS_OUTPUT o[{}], inout {}Stream output, in uint " - "InstanceID : SV_GSInstanceID)\n{{\n", - primitives_d3d[primitive_type_index], vertex_in, - wireframe ? "Line" : "Triangle"); + out.Write("[maxvertexcount({})]\n[instance({})]\n", vertex_out, stereo ? 2 : 1); + out.Write("void main({} VS_OUTPUT o[{}], inout {}Stream output, in uint " + "InstanceID : SV_GSInstanceID)\n{{\n", + primitives_d3d[primitive_type_index], vertex_in, wireframe ? "Line" : "Triangle"); } else { - out.WriteFmt("[maxvertexcount({})]\n", stereo ? vertex_out * 2 : vertex_out); - out.WriteFmt("void main({} VS_OUTPUT o[{}], inout {}Stream output)\n{{\n", - primitives_d3d[primitive_type_index], vertex_in, - wireframe ? "Line" : "Triangle"); + out.Write("[maxvertexcount({})]\n", stereo ? vertex_out * 2 : vertex_out); + out.Write("void main({} VS_OUTPUT o[{}], inout {}Stream output)\n{{\n", + primitives_d3d[primitive_type_index], vertex_in, wireframe ? "Line" : "Triangle"); } - out.WriteFmt("\tVertexData ps;\n"); + out.Write("\tVertexData ps;\n"); } if (primitive_type == PrimitiveType::Lines) { if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) { - out.WriteFmt("\tVS_OUTPUT start, end;\n"); + out.Write("\tVS_OUTPUT start, end;\n"); AssignVSOutputMembers(out, "start", "vs[0]", uid_data->numTexGens, host_config); AssignVSOutputMembers(out, "end", "vs[1]", uid_data->numTexGens, host_config); } else { - out.WriteFmt("\tVS_OUTPUT start = o[0];\n" - "\tVS_OUTPUT end = o[1];\n"); + out.Write("\tVS_OUTPUT start = o[0];\n" + "\tVS_OUTPUT end = o[1];\n"); } // GameCube/Wii's line drawing algorithm is a little quirky. It does not // use the correct line caps. Instead, the line caps are vertical or // horizontal depending the slope of the line. - out.WriteFmt("\tfloat2 offset;\n" - "\tfloat2 to = abs(end.pos.xy / end.pos.w - start.pos.xy / start.pos.w);\n" - // FIXME: What does real hardware do when line is at a 45-degree angle? - // FIXME: Lines aren't drawn at the correct width. See Twilight Princess map. - "\tif (" I_LINEPTPARAMS ".y * to.y > " I_LINEPTPARAMS ".x * to.x) {{\n" - // Line is more tall. Extend geometry left and right. - // Lerp LineWidth/2 from [0..VpWidth] to [-1..1] - "\t\toffset = float2(" I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".x, 0);\n" - "\t}} else {{\n" - // Line is more wide. Extend geometry up and down. - // Lerp LineWidth/2 from [0..VpHeight] to [1..-1] - "\t\toffset = float2(0, -" I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".y);\n" - "\t}}\n"); + out.Write("\tfloat2 offset;\n" + "\tfloat2 to = abs(end.pos.xy / end.pos.w - start.pos.xy / start.pos.w);\n" + // FIXME: What does real hardware do when line is at a 45-degree angle? + // FIXME: Lines aren't drawn at the correct width. See Twilight Princess map. + "\tif (" I_LINEPTPARAMS ".y * to.y > " I_LINEPTPARAMS ".x * to.x) {{\n" + // Line is more tall. Extend geometry left and right. + // Lerp LineWidth/2 from [0..VpWidth] to [-1..1] + "\t\toffset = float2(" I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".x, 0);\n" + "\t}} else {{\n" + // Line is more wide. Extend geometry up and down. + // Lerp LineWidth/2 from [0..VpHeight] to [1..-1] + "\t\toffset = float2(0, -" I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".y);\n" + "\t}}\n"); } else if (primitive_type == PrimitiveType::Points) { if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) { - out.WriteFmt("\tVS_OUTPUT center;\n"); + out.Write("\tVS_OUTPUT center;\n"); AssignVSOutputMembers(out, "center", "vs[0]", uid_data->numTexGens, host_config); } else { - out.WriteFmt("\tVS_OUTPUT center = o[0];\n"); + out.Write("\tVS_OUTPUT center = o[0];\n"); } // Offset from center to upper right vertex // Lerp PointSize/2 from [0,0..VpWidth,VpHeight] to [-1,1..1,-1] - out.WriteFmt("\tfloat2 offset = float2(" I_LINEPTPARAMS ".w / " I_LINEPTPARAMS - ".x, -" I_LINEPTPARAMS ".w / " I_LINEPTPARAMS ".y) * center.pos.w;\n"); + out.Write("\tfloat2 offset = float2(" I_LINEPTPARAMS ".w / " I_LINEPTPARAMS + ".x, -" I_LINEPTPARAMS ".w / " I_LINEPTPARAMS ".y) * center.pos.w;\n"); } if (stereo) @@ -207,19 +205,19 @@ ShaderCode GenerateGeometryShaderCode(APIType ApiType, const ShaderHostConfig& h // If the GPU supports invocation we don't need a for loop and can simply use the // invocation identifier to determine which layer we're rendering. if (host_config.backend_gs_instancing) - out.WriteFmt("\tint eye = InstanceID;\n"); + out.Write("\tint eye = InstanceID;\n"); else - out.WriteFmt("\tfor (int eye = 0; eye < 2; ++eye) {{\n"); + out.Write("\tfor (int eye = 0; eye < 2; ++eye) {{\n"); } if (wireframe) - out.WriteFmt("\tVS_OUTPUT first;\n"); + out.Write("\tVS_OUTPUT first;\n"); - out.WriteFmt("\tfor (int i = 0; i < {}; ++i) {{\n", vertex_in); + out.Write("\tfor (int i = 0; i < {}; ++i) {{\n", vertex_in); if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) { - out.WriteFmt("\tVS_OUTPUT f;\n"); + out.Write("\tVS_OUTPUT f;\n"); AssignVSOutputMembers(out, "f", "vs[i]", uid_data->numTexGens, host_config); if (host_config.backend_depth_clamp && @@ -227,21 +225,21 @@ ShaderCode GenerateGeometryShaderCode(APIType ApiType, const ShaderHostConfig& h { // On certain GPUs we have to consume the clip distance from the vertex shader // or else the other vertex shader outputs will get corrupted. - out.WriteFmt("\tf.clipDist0 = gl_in[i].gl_ClipDistance[0];\n" - "\tf.clipDist1 = gl_in[i].gl_ClipDistance[1];\n"); + out.Write("\tf.clipDist0 = gl_in[i].gl_ClipDistance[0];\n" + "\tf.clipDist1 = gl_in[i].gl_ClipDistance[1];\n"); } } else { - out.WriteFmt("\tVS_OUTPUT f = o[i];\n"); + out.Write("\tVS_OUTPUT f = o[i];\n"); } if (stereo) { // Select the output layer - out.WriteFmt("\tps.layer = eye;\n"); + out.Write("\tps.layer = eye;\n"); if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) - out.WriteFmt("\tgl_Layer = eye;\n"); + out.Write("\tgl_Layer = eye;\n"); // For stereoscopy add a small horizontal offset in Normalized Device Coordinates proportional // to the depth of the vertex. We retrieve the depth value from the w-component of the projected @@ -250,56 +248,56 @@ ShaderCode GenerateGeometryShaderCode(APIType ApiType, const ShaderHostConfig& h // the depth value. This results in objects at a distance smaller than the convergence // distance to seemingly appear in front of the screen. // This formula is based on page 13 of the "Nvidia 3D Vision Automatic, Best Practices Guide" - out.WriteFmt("\tfloat hoffset = (eye == 0) ? " I_STEREOPARAMS ".x : " I_STEREOPARAMS ".y;\n"); - out.WriteFmt("\tf.pos.x += hoffset * (f.pos.w - " I_STEREOPARAMS ".z);\n"); + out.Write("\tfloat hoffset = (eye == 0) ? " I_STEREOPARAMS ".x : " I_STEREOPARAMS ".y;\n"); + out.Write("\tf.pos.x += hoffset * (f.pos.w - " I_STEREOPARAMS ".z);\n"); } if (primitive_type == PrimitiveType::Lines) { - out.WriteFmt("\tVS_OUTPUT l = f;\n" - "\tVS_OUTPUT r = f;\n"); + out.Write("\tVS_OUTPUT l = f;\n" + "\tVS_OUTPUT r = f;\n"); - out.WriteFmt("\tl.pos.xy -= offset * l.pos.w;\n" - "\tr.pos.xy += offset * r.pos.w;\n"); + out.Write("\tl.pos.xy -= offset * l.pos.w;\n" + "\tr.pos.xy += offset * r.pos.w;\n"); - out.WriteFmt("\tif (" I_TEXOFFSET "[2] != 0) {{\n"); - out.WriteFmt("\tfloat texOffset = 1.0 / float(" I_TEXOFFSET "[2]);\n"); + out.Write("\tif (" I_TEXOFFSET "[2] != 0) {{\n"); + out.Write("\tfloat texOffset = 1.0 / float(" I_TEXOFFSET "[2]);\n"); for (u32 i = 0; i < uid_data->numTexGens; ++i) { - out.WriteFmt("\tif (((" I_TEXOFFSET "[0] >> {}) & 0x1) != 0)\n", i); - out.WriteFmt("\t\tr.tex{}.x += texOffset;\n", i); + out.Write("\tif (((" I_TEXOFFSET "[0] >> {}) & 0x1) != 0)\n", i); + out.Write("\t\tr.tex{}.x += texOffset;\n", i); } - out.WriteFmt("\t}}\n"); + out.Write("\t}}\n"); EmitVertex(out, host_config, uid_data, "l", ApiType, wireframe, true); EmitVertex(out, host_config, uid_data, "r", ApiType, wireframe); } else if (primitive_type == PrimitiveType::Points) { - out.WriteFmt("\tVS_OUTPUT ll = f;\n" - "\tVS_OUTPUT lr = f;\n" - "\tVS_OUTPUT ul = f;\n" - "\tVS_OUTPUT ur = f;\n"); + out.Write("\tVS_OUTPUT ll = f;\n" + "\tVS_OUTPUT lr = f;\n" + "\tVS_OUTPUT ul = f;\n" + "\tVS_OUTPUT ur = f;\n"); - out.WriteFmt("\tll.pos.xy += float2(-1,-1) * offset;\n" - "\tlr.pos.xy += float2(1,-1) * offset;\n" - "\tul.pos.xy += float2(-1,1) * offset;\n" - "\tur.pos.xy += offset;\n"); + out.Write("\tll.pos.xy += float2(-1,-1) * offset;\n" + "\tlr.pos.xy += float2(1,-1) * offset;\n" + "\tul.pos.xy += float2(-1,1) * offset;\n" + "\tur.pos.xy += offset;\n"); - out.WriteFmt("\tif (" I_TEXOFFSET "[3] != 0) {{\n"); - out.WriteFmt("\tfloat2 texOffset = float2(1.0 / float(" I_TEXOFFSET - "[3]), 1.0 / float(" I_TEXOFFSET "[3]));\n"); + out.Write("\tif (" I_TEXOFFSET "[3] != 0) {{\n"); + out.Write("\tfloat2 texOffset = float2(1.0 / float(" I_TEXOFFSET + "[3]), 1.0 / float(" I_TEXOFFSET "[3]));\n"); for (u32 i = 0; i < uid_data->numTexGens; ++i) { - out.WriteFmt("\tif (((" I_TEXOFFSET "[1] >> {}) & 0x1) != 0) {{\n", i); - out.WriteFmt("\t\tul.tex{}.xy += float2(0,1) * texOffset;\n", i); - out.WriteFmt("\t\tur.tex{}.xy += texOffset;\n", i); - out.WriteFmt("\t\tlr.tex{}.xy += float2(1,0) * texOffset;\n", i); - out.WriteFmt("\t}}\n"); + out.Write("\tif (((" I_TEXOFFSET "[1] >> {}) & 0x1) != 0) {{\n", i); + out.Write("\t\tul.tex{}.xy += float2(0,1) * texOffset;\n", i); + out.Write("\t\tur.tex{}.xy += texOffset;\n", i); + out.Write("\t\tlr.tex{}.xy += float2(1,0) * texOffset;\n", i); + out.Write("\t}}\n"); } - out.WriteFmt("\t}}\n"); + out.Write("\t}}\n"); EmitVertex(out, host_config, uid_data, "ll", ApiType, wireframe, true); EmitVertex(out, host_config, uid_data, "lr", ApiType, wireframe); @@ -311,14 +309,14 @@ ShaderCode GenerateGeometryShaderCode(APIType ApiType, const ShaderHostConfig& h EmitVertex(out, host_config, uid_data, "f", ApiType, wireframe, true); } - out.WriteFmt("\t}}\n"); + out.Write("\t}}\n"); EndPrimitive(out, host_config, uid_data, ApiType, wireframe); if (stereo && !host_config.backend_gs_instancing) - out.WriteFmt("\t}}\n"); + out.Write("\t}}\n"); - out.WriteFmt("}}\n"); + out.Write("}}\n"); return out; } @@ -328,34 +326,34 @@ static void EmitVertex(ShaderCode& out, const ShaderHostConfig& host_config, APIType ApiType, bool wireframe, bool first_vertex) { if (wireframe && first_vertex) - out.WriteFmt("\tif (i == 0) first = {};\n", vertex); + out.Write("\tif (i == 0) first = {};\n", vertex); if (ApiType == APIType::OpenGL) { - out.WriteFmt("\tgl_Position = {}.pos;\n", vertex); + out.Write("\tgl_Position = {}.pos;\n", vertex); if (host_config.backend_depth_clamp) { - out.WriteFmt("\tgl_ClipDistance[0] = {}.clipDist0;\n", vertex); - out.WriteFmt("\tgl_ClipDistance[1] = {}.clipDist1;\n", vertex); + out.Write("\tgl_ClipDistance[0] = {}.clipDist0;\n", vertex); + out.Write("\tgl_ClipDistance[1] = {}.clipDist1;\n", vertex); } AssignVSOutputMembers(out, "ps", vertex, uid_data->numTexGens, host_config); } else if (ApiType == APIType::Vulkan) { // Vulkan NDC space has Y pointing down (right-handed NDC space). - out.WriteFmt("\tgl_Position = {}.pos;\n", vertex); - out.WriteFmt("\tgl_Position.y = -gl_Position.y;\n"); + out.Write("\tgl_Position = {}.pos;\n", vertex); + out.Write("\tgl_Position.y = -gl_Position.y;\n"); AssignVSOutputMembers(out, "ps", vertex, uid_data->numTexGens, host_config); } else { - out.WriteFmt("\tps.o = {};\n", vertex); + out.Write("\tps.o = {};\n", vertex); } if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) - out.WriteFmt("\tEmitVertex();\n"); + out.Write("\tEmitVertex();\n"); else - out.WriteFmt("\toutput.Append(ps);\n"); + out.Write("\toutput.Append(ps);\n"); } static void EndPrimitive(ShaderCode& out, const ShaderHostConfig& host_config, @@ -365,9 +363,9 @@ static void EndPrimitive(ShaderCode& out, const ShaderHostConfig& host_config, EmitVertex(out, host_config, uid_data, "first", ApiType, wireframe); if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) - out.WriteFmt("\tEndPrimitive();\n"); + out.Write("\tEndPrimitive();\n"); else - out.WriteFmt("\toutput.RestartStrip();\n"); + out.Write("\toutput.RestartStrip();\n"); } void EnumerateGeometryShaderUids(const std::function& callback) diff --git a/Source/Core/VideoCommon/LightingShaderGen.cpp b/Source/Core/VideoCommon/LightingShaderGen.cpp index 0cde653d25..e4b8f16716 100644 --- a/Source/Core/VideoCommon/LightingShaderGen.cpp +++ b/Source/Core/VideoCommon/LightingShaderGen.cpp @@ -24,54 +24,53 @@ static void GenerateLightShader(ShaderCode& object, const LightingUidData& uid_d { case LIGHTATTN_NONE: case LIGHTATTN_DIR: - object.WriteFmt("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(index)); - object.WriteFmt("attn = 1.0;\n"); - object.WriteFmt("if (length(ldir) == 0.0)\n\t ldir = _norm0;\n"); + object.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(index)); + object.Write("attn = 1.0;\n"); + object.Write("if (length(ldir) == 0.0)\n\t ldir = _norm0;\n"); break; case LIGHTATTN_SPEC: - object.WriteFmt("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(index)); - object.WriteFmt("attn = (dot(_norm0, ldir) >= 0.0) ? max(0.0, dot(_norm0, " LIGHT_DIR - ".xyz)) : 0.0;\n", - LIGHT_DIR_PARAMS(index)); - object.WriteFmt("cosAttn = " LIGHT_COSATT ".xyz;\n", LIGHT_COSATT_PARAMS(index)); - object.WriteFmt("distAttn = {}(" LIGHT_DISTATT ".xyz);\n", - (diffusefunc == LIGHTDIF_NONE) ? "" : "normalize", LIGHT_DISTATT_PARAMS(index)); - object.WriteFmt("attn = max(0.0f, dot(cosAttn, float3(1.0, attn, attn*attn))) / dot(distAttn, " - "float3(1.0, attn, attn*attn));\n"); + object.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(index)); + object.Write("attn = (dot(_norm0, ldir) >= 0.0) ? max(0.0, dot(_norm0, " LIGHT_DIR + ".xyz)) : 0.0;\n", + LIGHT_DIR_PARAMS(index)); + object.Write("cosAttn = " LIGHT_COSATT ".xyz;\n", LIGHT_COSATT_PARAMS(index)); + object.Write("distAttn = {}(" LIGHT_DISTATT ".xyz);\n", + (diffusefunc == LIGHTDIF_NONE) ? "" : "normalize", LIGHT_DISTATT_PARAMS(index)); + object.Write("attn = max(0.0f, dot(cosAttn, float3(1.0, attn, attn*attn))) / dot(distAttn, " + "float3(1.0, attn, attn*attn));\n"); break; case LIGHTATTN_SPOT: - object.WriteFmt("ldir = " LIGHT_POS ".xyz - pos.xyz;\n", LIGHT_POS_PARAMS(index)); - object.WriteFmt("dist2 = dot(ldir, ldir);\n" - "dist = sqrt(dist2);\n" - "ldir = ldir / dist;\n" - "attn = max(0.0, dot(ldir, " LIGHT_DIR ".xyz));\n", - LIGHT_DIR_PARAMS(index)); + object.Write("ldir = " LIGHT_POS ".xyz - pos.xyz;\n", LIGHT_POS_PARAMS(index)); + object.Write("dist2 = dot(ldir, ldir);\n" + "dist = sqrt(dist2);\n" + "ldir = ldir / dist;\n" + "attn = max(0.0, dot(ldir, " LIGHT_DIR ".xyz));\n", + LIGHT_DIR_PARAMS(index)); // attn*attn may overflow - object.WriteFmt("attn = max(0.0, " LIGHT_COSATT ".x + " LIGHT_COSATT ".y*attn + " LIGHT_COSATT - ".z*attn*attn) / dot(" LIGHT_DISTATT ".xyz, float3(1.0,dist,dist2));\n", - LIGHT_COSATT_PARAMS(index), LIGHT_COSATT_PARAMS(index), - LIGHT_COSATT_PARAMS(index), LIGHT_DISTATT_PARAMS(index)); + object.Write("attn = max(0.0, " LIGHT_COSATT ".x + " LIGHT_COSATT ".y*attn + " LIGHT_COSATT + ".z*attn*attn) / dot(" LIGHT_DISTATT ".xyz, float3(1.0,dist,dist2));\n", + LIGHT_COSATT_PARAMS(index), LIGHT_COSATT_PARAMS(index), LIGHT_COSATT_PARAMS(index), + LIGHT_DISTATT_PARAMS(index)); break; } switch (diffusefunc) { case LIGHTDIF_NONE: - object.WriteFmt("lacc.{} += int{}(round(attn * float{}(" LIGHT_COL ")));\n", swizzle, - swizzle_components, swizzle_components, LIGHT_COL_PARAMS(index, swizzle)); + object.Write("lacc.{} += int{}(round(attn * float{}(" LIGHT_COL ")));\n", swizzle, + swizzle_components, swizzle_components, LIGHT_COL_PARAMS(index, swizzle)); break; case LIGHTDIF_SIGN: case LIGHTDIF_CLAMP: - object.WriteFmt("lacc.{} += int{}(round(attn * {}dot(ldir, _norm0)) * float{}(" LIGHT_COL - ")));\n", - swizzle, swizzle_components, diffusefunc != LIGHTDIF_SIGN ? "max(0.0," : "(", - swizzle_components, LIGHT_COL_PARAMS(index, swizzle)); + object.Write("lacc.{} += int{}(round(attn * {}dot(ldir, _norm0)) * float{}(" LIGHT_COL ")));\n", + swizzle, swizzle_components, diffusefunc != LIGHTDIF_SIGN ? "max(0.0," : "(", + swizzle_components, LIGHT_COL_PARAMS(index, swizzle)); break; default: ASSERT(0); } - object.WriteFmt("\n"); + object.Write("\n"); } // vertex shader @@ -84,21 +83,21 @@ void GenerateLightingShaderCode(ShaderCode& object, const LightingUidData& uid_d { for (u32 j = 0; j < NUM_XF_COLOR_CHANNELS; j++) { - object.WriteFmt("{{\n"); + object.Write("{{\n"); const bool colormatsource = !!(uid_data.matsource & (1 << j)); if (colormatsource) // from vertex { if ((components & (VB_HAS_COL0 << j)) != 0) - object.WriteFmt("int4 mat = int4(round({}{} * 255.0));\n", in_color_name, j); + object.Write("int4 mat = int4(round({}{} * 255.0));\n", in_color_name, j); else if ((components & VB_HAS_COL0) != 0) - object.WriteFmt("int4 mat = int4(round({}0 * 255.0));\n", in_color_name); + object.Write("int4 mat = int4(round({}0 * 255.0));\n", in_color_name); else - object.WriteFmt("int4 mat = int4(255, 255, 255, 255);\n"); + object.Write("int4 mat = int4(255, 255, 255, 255);\n"); } else // from color { - object.WriteFmt("int4 mat = {}[{}];\n", I_MATERIALS, j + 2); + object.Write("int4 mat = {}[{}];\n", I_MATERIALS, j + 2); } if ((uid_data.enablelighting & (1 << j)) != 0) @@ -107,28 +106,28 @@ void GenerateLightingShaderCode(ShaderCode& object, const LightingUidData& uid_d { if ((components & (VB_HAS_COL0 << j)) != 0) { - object.WriteFmt("lacc = int4(round({}{} * 255.0));\n", in_color_name, j); + object.Write("lacc = int4(round({}{} * 255.0));\n", in_color_name, j); } else if ((components & VB_HAS_COL0) != 0) { - object.WriteFmt("lacc = int4(round({}0 * 255.0));\n", in_color_name); + object.Write("lacc = int4(round({}0 * 255.0));\n", in_color_name); } else { // TODO: this isn't verified. Here we want to read the ambient from the vertex, // but the vertex itself has no color. So we don't know which value to read. // Returning 1.0 is the same as disabled lightning, so this could be fine - object.WriteFmt("lacc = int4(255, 255, 255, 255);\n"); + object.Write("lacc = int4(255, 255, 255, 255);\n"); } } else // from color { - object.WriteFmt("lacc = {}[{}];\n", I_MATERIALS, j); + object.Write("lacc = {}[{}];\n", I_MATERIALS, j); } } else { - object.WriteFmt("lacc = int4(255, 255, 255, 255);\n"); + object.Write("lacc = int4(255, 255, 255, 255);\n"); } // check if alpha is different @@ -138,15 +137,15 @@ void GenerateLightingShaderCode(ShaderCode& object, const LightingUidData& uid_d if (alphamatsource) // from vertex { if ((components & (VB_HAS_COL0 << j)) != 0) - object.WriteFmt("mat.w = int(round({}{}.w * 255.0));\n", in_color_name, j); + object.Write("mat.w = int(round({}{}.w * 255.0));\n", in_color_name, j); else if ((components & VB_HAS_COL0) != 0) - object.WriteFmt("mat.w = int(round({}0.w * 255.0));\n", in_color_name); + object.Write("mat.w = int(round({}0.w * 255.0));\n", in_color_name); else - object.WriteFmt("mat.w = 255;\n"); + object.Write("mat.w = 255;\n"); } else // from color { - object.WriteFmt("mat.w = {}[{}].w;\n", I_MATERIALS, j + 2); + object.Write("mat.w = {}[{}].w;\n", I_MATERIALS, j + 2); } } @@ -156,26 +155,26 @@ void GenerateLightingShaderCode(ShaderCode& object, const LightingUidData& uid_d { if ((components & (VB_HAS_COL0 << j)) != 0) { - object.WriteFmt("lacc.w = int(round({}{}.w * 255.0));\n", in_color_name, j); + object.Write("lacc.w = int(round({}{}.w * 255.0));\n", in_color_name, j); } else if ((components & VB_HAS_COL0) != 0) { - object.WriteFmt("lacc.w = int(round({}0.w * 255.0));\n", in_color_name); + object.Write("lacc.w = int(round({}0.w * 255.0));\n", in_color_name); } else { // TODO: The same for alpha: We want to read from vertex, but the vertex has no color - object.WriteFmt("lacc.w = 255;\n"); + object.Write("lacc.w = 255;\n"); } } else // from color { - object.WriteFmt("lacc.w = {}[{}].w;\n", I_MATERIALS, j); + object.Write("lacc.w = {}[{}].w;\n", I_MATERIALS, j); } } else { - object.WriteFmt("lacc.w = 255;\n"); + object.Write("lacc.w = 255;\n"); } if ((uid_data.enablelighting & (1 << j)) != 0) // Color lights @@ -194,9 +193,9 @@ void GenerateLightingShaderCode(ShaderCode& object, const LightingUidData& uid_d GenerateLightShader(object, uid_data, i, j + 2, true); } } - object.WriteFmt("lacc = clamp(lacc, 0, 255);\n"); - object.WriteFmt("{}{} = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n", dest, j); - object.WriteFmt("}}\n"); + object.Write("lacc = clamp(lacc, 0, 255);\n"); + object.Write("{}{} = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n", dest, j); + object.Write("}}\n"); } } diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 8d4abb704f..288761af8e 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -365,98 +365,98 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type, u32 num_tex const ShaderHostConfig& host_config, bool bounding_box) { // dot product for integer vectors - out.WriteFmt("int idot(int3 x, int3 y)\n" - "{{\n" - "\tint3 tmp = x * y;\n" - "\treturn tmp.x + tmp.y + tmp.z;\n" - "}}\n"); + out.Write("int idot(int3 x, int3 y)\n" + "{{\n" + "\tint3 tmp = x * y;\n" + "\treturn tmp.x + tmp.y + tmp.z;\n" + "}}\n"); - out.WriteFmt("int idot(int4 x, int4 y)\n" - "{{\n" - "\tint4 tmp = x * y;\n" - "\treturn tmp.x + tmp.y + tmp.z + tmp.w;\n" - "}}\n\n"); + out.Write("int idot(int4 x, int4 y)\n" + "{{\n" + "\tint4 tmp = x * y;\n" + "\treturn tmp.x + tmp.y + tmp.z + tmp.w;\n" + "}}\n\n"); // rounding + casting to integer at once in a single function - out.WriteFmt("int iround(float x) {{ return int (round(x)); }}\n" - "int2 iround(float2 x) {{ return int2(round(x)); }}\n" - "int3 iround(float3 x) {{ return int3(round(x)); }}\n" - "int4 iround(float4 x) {{ return int4(round(x)); }}\n\n"); + out.Write("int iround(float x) {{ return int (round(x)); }}\n" + "int2 iround(float2 x) {{ return int2(round(x)); }}\n" + "int3 iround(float3 x) {{ return int3(round(x)); }}\n" + "int4 iround(float4 x) {{ return int4(round(x)); }}\n\n"); if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) { - out.WriteFmt("SAMPLER_BINDING(0) uniform sampler2DArray samp[8];\n"); + out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp[8];\n"); } else // D3D { // Declare samplers - out.WriteFmt("SamplerState samp[8] : register(s0);\n" - "\n" - "Texture2DArray Tex[8] : register(t0);\n"); + out.Write("SamplerState samp[8] : register(s0);\n" + "\n" + "Texture2DArray Tex[8] : register(t0);\n"); } - out.WriteFmt("\n"); + out.Write("\n"); if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) - out.WriteFmt("UBO_BINDING(std140, 1) uniform PSBlock {{\n"); + out.Write("UBO_BINDING(std140, 1) uniform PSBlock {{\n"); else - out.WriteFmt("cbuffer PSBlock : register(b0) {{\n"); + out.Write("cbuffer PSBlock : register(b0) {{\n"); - out.WriteFmt("\tint4 " I_COLORS "[4];\n" - "\tint4 " I_KCOLORS "[4];\n" - "\tint4 " I_ALPHA ";\n" - "\tfloat4 " I_TEXDIMS "[8];\n" - "\tint4 " I_ZBIAS "[2];\n" - "\tint4 " I_INDTEXSCALE "[2];\n" - "\tint4 " I_INDTEXMTX "[6];\n" - "\tint4 " I_FOGCOLOR ";\n" - "\tint4 " I_FOGI ";\n" - "\tfloat4 " I_FOGF ";\n" - "\tfloat4 " I_FOGRANGE "[3];\n" - "\tfloat4 " I_ZSLOPE ";\n" - "\tfloat2 " I_EFBSCALE ";\n" - "\tuint bpmem_genmode;\n" - "\tuint bpmem_alphaTest;\n" - "\tuint bpmem_fogParam3;\n" - "\tuint bpmem_fogRangeBase;\n" - "\tuint bpmem_dstalpha;\n" - "\tuint bpmem_ztex_op;\n" - "\tbool bpmem_late_ztest;\n" - "\tbool bpmem_rgba6_format;\n" - "\tbool bpmem_dither;\n" - "\tbool bpmem_bounding_box;\n" - "\tuint4 bpmem_pack1[16];\n" // .xy - combiners, .z - tevind - "\tuint4 bpmem_pack2[8];\n" // .x - tevorder, .y - tevksel - "\tint4 konstLookup[32];\n" - "\tbool blend_enable;\n" - "\tuint blend_src_factor;\n" - "\tuint blend_src_factor_alpha;\n" - "\tuint blend_dst_factor;\n" - "\tuint blend_dst_factor_alpha;\n" - "\tbool blend_subtract;\n" - "\tbool blend_subtract_alpha;\n" - "}};\n\n"); - out.WriteFmt("#define bpmem_combiners(i) (bpmem_pack1[(i)].xy)\n" - "#define bpmem_tevind(i) (bpmem_pack1[(i)].z)\n" - "#define bpmem_iref(i) (bpmem_pack1[(i)].w)\n" - "#define bpmem_tevorder(i) (bpmem_pack2[(i)].x)\n" - "#define bpmem_tevksel(i) (bpmem_pack2[(i)].y)\n\n"); + out.Write("\tint4 " I_COLORS "[4];\n" + "\tint4 " I_KCOLORS "[4];\n" + "\tint4 " I_ALPHA ";\n" + "\tfloat4 " I_TEXDIMS "[8];\n" + "\tint4 " I_ZBIAS "[2];\n" + "\tint4 " I_INDTEXSCALE "[2];\n" + "\tint4 " I_INDTEXMTX "[6];\n" + "\tint4 " I_FOGCOLOR ";\n" + "\tint4 " I_FOGI ";\n" + "\tfloat4 " I_FOGF ";\n" + "\tfloat4 " I_FOGRANGE "[3];\n" + "\tfloat4 " I_ZSLOPE ";\n" + "\tfloat2 " I_EFBSCALE ";\n" + "\tuint bpmem_genmode;\n" + "\tuint bpmem_alphaTest;\n" + "\tuint bpmem_fogParam3;\n" + "\tuint bpmem_fogRangeBase;\n" + "\tuint bpmem_dstalpha;\n" + "\tuint bpmem_ztex_op;\n" + "\tbool bpmem_late_ztest;\n" + "\tbool bpmem_rgba6_format;\n" + "\tbool bpmem_dither;\n" + "\tbool bpmem_bounding_box;\n" + "\tuint4 bpmem_pack1[16];\n" // .xy - combiners, .z - tevind + "\tuint4 bpmem_pack2[8];\n" // .x - tevorder, .y - tevksel + "\tint4 konstLookup[32];\n" + "\tbool blend_enable;\n" + "\tuint blend_src_factor;\n" + "\tuint blend_src_factor_alpha;\n" + "\tuint blend_dst_factor;\n" + "\tuint blend_dst_factor_alpha;\n" + "\tbool blend_subtract;\n" + "\tbool blend_subtract_alpha;\n" + "}};\n\n"); + out.Write("#define bpmem_combiners(i) (bpmem_pack1[(i)].xy)\n" + "#define bpmem_tevind(i) (bpmem_pack1[(i)].z)\n" + "#define bpmem_iref(i) (bpmem_pack1[(i)].w)\n" + "#define bpmem_tevorder(i) (bpmem_pack2[(i)].x)\n" + "#define bpmem_tevksel(i) (bpmem_pack2[(i)].y)\n\n"); if (host_config.per_pixel_lighting) { - out.WriteFmt("{}", s_lighting_struct); + out.Write("{}", s_lighting_struct); if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) - out.WriteFmt("UBO_BINDING(std140, 2) uniform VSBlock {{\n"); + out.Write("UBO_BINDING(std140, 2) uniform VSBlock {{\n"); else - out.WriteFmt("cbuffer VSBlock : register(b1) {{\n"); + out.Write("cbuffer VSBlock : register(b1) {{\n"); - out.WriteFmt("{}", s_shader_uniforms); - out.WriteFmt("}};\n"); + out.Write("{}", s_shader_uniforms); + out.Write("}};\n"); } if (bounding_box) { - out.WriteFmt(R"( + out.Write(R"( #ifdef API_D3D globallycoherent RWBuffer bbox_data : register(u2); #define atomicMin InterlockedMin @@ -541,9 +541,9 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos const bool stereo = host_config.stereo; const u32 numStages = uid_data->genMode_numtevstages + 1; - out.WriteFmt("// Pixel Shader for TEV stages\n"); - out.WriteFmt("// {} TEV stages, {} texgens, {} IND stages\n", numStages, - uid_data->genMode_numtexgens, uid_data->genMode_numindstages); + out.Write("// Pixel Shader for TEV stages\n"); + out.Write("// {} TEV stages, {} texgens, {} IND stages\n", numStages, + uid_data->genMode_numtexgens, uid_data->genMode_numindstages); // Stuff that is shared between ubershaders and pixelgen. WritePixelShaderCommonHeader(out, api_type, uid_data->genMode_numtexgens, host_config, @@ -588,11 +588,11 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) { // This is a #define which signals whatever early-z method the driver supports. - out.WriteFmt("FORCE_EARLY_Z; \n"); + out.Write("FORCE_EARLY_Z; \n"); } else { - out.WriteFmt("[earlydepthstencil]\n"); + out.Write("[earlydepthstencil]\n"); } } @@ -610,13 +610,13 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos { if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION)) { - out.WriteFmt("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n" - "FRAGMENT_OUTPUT_LOCATION(1) out vec4 ocol1;\n"); + out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n" + "FRAGMENT_OUTPUT_LOCATION(1) out vec4 ocol1;\n"); } else { - out.WriteFmt("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 ocol0;\n" - "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n"); + out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 ocol0;\n" + "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n"); } } else if (use_shader_blend) @@ -627,142 +627,141 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos // shader if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION)) { - out.WriteFmt("FRAGMENT_OUTPUT_LOCATION(0) FRAGMENT_INOUT vec4 real_ocol0;\n"); + out.Write("FRAGMENT_OUTPUT_LOCATION(0) FRAGMENT_INOUT vec4 real_ocol0;\n"); } else { - out.WriteFmt("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) FRAGMENT_INOUT vec4 real_ocol0;\n"); + out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) FRAGMENT_INOUT vec4 real_ocol0;\n"); } } else { - out.WriteFmt("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"); + out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"); } if (uid_data->per_pixel_depth) - out.WriteFmt("#define depth gl_FragDepth\n"); + out.Write("#define depth gl_FragDepth\n"); if (host_config.backend_geometry_shaders) { - out.WriteFmt("VARYING_LOCATION(0) in VertexData {{\n"); + out.Write("VARYING_LOCATION(0) in VertexData {{\n"); GenerateVSOutputMembers(out, api_type, uid_data->genMode_numtexgens, host_config, GetInterpolationQualifier(msaa, ssaa, true, true)); if (stereo) - out.WriteFmt("\tflat int layer;\n"); + out.Write("\tflat int layer;\n"); - out.WriteFmt("}};\n"); + out.Write("}};\n"); } else { // Let's set up attributes u32 counter = 0; - out.WriteFmt("VARYING_LOCATION({}) {} in float4 colors_0;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - out.WriteFmt("VARYING_LOCATION({}) {} in float4 colors_1;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); + out.Write("VARYING_LOCATION({}) {} in float4 colors_0;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); + out.Write("VARYING_LOCATION({}) {} in float4 colors_1;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); for (u32 i = 0; i < uid_data->genMode_numtexgens; ++i) { - out.WriteFmt("VARYING_LOCATION({}) {} in float3 tex{};\n", counter++, - GetInterpolationQualifier(msaa, ssaa), i); + out.Write("VARYING_LOCATION({}) {} in float3 tex{};\n", counter++, + GetInterpolationQualifier(msaa, ssaa), i); } if (!host_config.fast_depth_calc) { - out.WriteFmt("VARYING_LOCATION({}) {} in float4 clipPos;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); + out.Write("VARYING_LOCATION({}) {} in float4 clipPos;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); } if (per_pixel_lighting) { - out.WriteFmt("VARYING_LOCATION({}) {} in float3 Normal;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - out.WriteFmt("VARYING_LOCATION({}) {} in float3 WorldPos;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); + out.Write("VARYING_LOCATION({}) {} in float3 Normal;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); + out.Write("VARYING_LOCATION({}) {} in float3 WorldPos;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); } } - out.WriteFmt("void main()\n{{\n"); - out.WriteFmt("\tfloat4 rawpos = gl_FragCoord;\n"); + out.Write("void main()\n{{\n"); + out.Write("\tfloat4 rawpos = gl_FragCoord;\n"); if (use_shader_blend) { // Store off a copy of the initial fb value for blending - out.WriteFmt("\tfloat4 initial_ocol0 = FB_FETCH_VALUE;\n" - "\tfloat4 ocol0;\n" - "\tfloat4 ocol1;\n"); + out.Write("\tfloat4 initial_ocol0 = FB_FETCH_VALUE;\n" + "\tfloat4 ocol0;\n" + "\tfloat4 ocol1;\n"); } } else // D3D { - out.WriteFmt("void main(\n"); + out.Write("void main(\n"); if (uid_data->uint_output) { - out.WriteFmt(" out uint4 ocol0 : SV_Target,\n"); + out.Write(" out uint4 ocol0 : SV_Target,\n"); } else { - out.WriteFmt(" out float4 ocol0 : SV_Target0,\n" - " out float4 ocol1 : SV_Target1,\n"); + out.Write(" out float4 ocol0 : SV_Target0,\n" + " out float4 ocol1 : SV_Target1,\n"); } - out.WriteFmt("{}" - " in float4 rawpos : SV_Position,\n", - uid_data->per_pixel_depth ? " out float depth : SV_Depth,\n" : ""); + out.Write("{}" + " in float4 rawpos : SV_Position,\n", + uid_data->per_pixel_depth ? " out float depth : SV_Depth,\n" : ""); - out.WriteFmt(" in {} float4 colors_0 : COLOR0,\n", GetInterpolationQualifier(msaa, ssaa)); - out.WriteFmt(" in {} float4 colors_1 : COLOR1\n", GetInterpolationQualifier(msaa, ssaa)); + out.Write(" in {} float4 colors_0 : COLOR0,\n", GetInterpolationQualifier(msaa, ssaa)); + out.Write(" in {} float4 colors_1 : COLOR1\n", GetInterpolationQualifier(msaa, ssaa)); // compute window position if needed because binding semantic WPOS is not widely supported for (u32 i = 0; i < uid_data->genMode_numtexgens; ++i) { - out.WriteFmt(",\n in {} float3 tex{} : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa), i, - i); + out.Write(",\n in {} float3 tex{} : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa), i, + i); } if (!host_config.fast_depth_calc) { - out.WriteFmt(",\n in {} float4 clipPos : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa), - uid_data->genMode_numtexgens); + out.Write(",\n in {} float4 clipPos : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa), + uid_data->genMode_numtexgens); } if (per_pixel_lighting) { - out.WriteFmt(",\n in {} float3 Normal : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa), - uid_data->genMode_numtexgens + 1); - out.WriteFmt(",\n in {} float3 WorldPos : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa), - uid_data->genMode_numtexgens + 2); + out.Write(",\n in {} float3 Normal : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa), + uid_data->genMode_numtexgens + 1); + out.Write(",\n in {} float3 WorldPos : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa), + uid_data->genMode_numtexgens + 2); } if (host_config.backend_geometry_shaders) { - out.WriteFmt(",\n in float clipDist0 : SV_ClipDistance0\n" - ",\n in float clipDist1 : SV_ClipDistance1\n"); + out.Write(",\n in float clipDist0 : SV_ClipDistance0\n" + ",\n in float clipDist1 : SV_ClipDistance1\n"); } if (stereo) - out.WriteFmt(",\n in uint layer : SV_RenderTargetArrayIndex\n"); - out.WriteFmt(" ) {{\n"); + out.Write(",\n in uint layer : SV_RenderTargetArrayIndex\n"); + out.Write(" ) {{\n"); } - out.WriteFmt( - "\tint4 c0 = " I_COLORS "[1], c1 = " I_COLORS "[2], c2 = " I_COLORS "[3], prev = " I_COLORS - "[0];\n" - "\tint4 rastemp = int4(0, 0, 0, 0), textemp = int4(0, 0, 0, 0), konsttemp = int4(0, 0, " - "0, 0);\n" - "\tint3 comp16 = int3(1, 256, 0), comp24 = int3(1, 256, 256*256);\n" - "\tint alphabump=0;\n" - "\tint3 tevcoord=int3(0, 0, 0);\n" - "\tint2 wrappedcoord=int2(0,0), tempcoord=int2(0,0);\n" - "\tint4 " - "tevin_a=int4(0,0,0,0),tevin_b=int4(0,0,0,0),tevin_c=int4(0,0,0,0),tevin_d=int4(0,0,0," - "0);\n\n"); // tev combiner inputs + out.Write("\tint4 c0 = " I_COLORS "[1], c1 = " I_COLORS "[2], c2 = " I_COLORS + "[3], prev = " I_COLORS "[0];\n" + "\tint4 rastemp = int4(0, 0, 0, 0), textemp = int4(0, 0, 0, 0), konsttemp = int4(0, 0, " + "0, 0);\n" + "\tint3 comp16 = int3(1, 256, 0), comp24 = int3(1, 256, 256*256);\n" + "\tint alphabump=0;\n" + "\tint3 tevcoord=int3(0, 0, 0);\n" + "\tint2 wrappedcoord=int2(0,0), tempcoord=int2(0,0);\n" + "\tint4 " + "tevin_a=int4(0,0,0,0),tevin_b=int4(0,0,0,0),tevin_c=int4(0,0,0,0),tevin_d=int4(0,0,0," + "0);\n\n"); // tev combiner inputs // On GLSL, input variables must not be assigned to. // This is why we declare these variables locally instead. - out.WriteFmt("\tfloat4 col0 = colors_0;\n" - "\tfloat4 col1 = colors_1;\n"); + out.Write("\tfloat4 col0 = colors_0;\n" + "\tfloat4 col1 = colors_1;\n"); if (per_pixel_lighting) { - out.WriteFmt("\tfloat3 _norm0 = normalize(Normal.xyz);\n\n" - "\tfloat3 pos = WorldPos;\n"); + out.Write("\tfloat3 _norm0 = normalize(Normal.xyz);\n\n" + "\tfloat3 pos = WorldPos;\n"); - out.WriteFmt("\tint4 lacc;\n" - "\tfloat3 ldir, h, cosAttn, distAttn;\n" - "\tfloat dist, dist2, attn;\n"); + out.Write("\tint4 lacc;\n" + "\tfloat3 ldir, h, cosAttn, distAttn;\n" + "\tfloat dist, dist2, attn;\n"); // TODO: Our current constant usage code isn't able to handle more than one buffer. // So we can't mark the VS constant as used here. But keep them here as reference. @@ -776,16 +775,16 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos // HACK to handle cases where the tex gen is not enabled if (uid_data->genMode_numtexgens == 0) { - out.WriteFmt("\tint2 fixpoint_uv0 = int2(0, 0);\n\n"); + out.Write("\tint2 fixpoint_uv0 = int2(0, 0);\n\n"); } else { out.SetConstantsUsed(C_TEXDIMS, C_TEXDIMS + uid_data->genMode_numtexgens - 1); for (u32 i = 0; i < uid_data->genMode_numtexgens; ++i) { - out.WriteFmt("\tint2 fixpoint_uv{} = int2(", i); - out.WriteFmt("(tex{}.z == 0.0 ? tex{}.xy : tex{}.xy / tex{}.z)", i, i, i, i); - out.WriteFmt(" * " I_TEXDIMS "[{}].zw);\n", i); + out.Write("\tint2 fixpoint_uv{} = int2(", i); + out.Write("(tex{}.z == 0.0 ? tex{}.xy : tex{}.xy / tex{}.z)", i, i, i, i); + out.Write(" * " I_TEXDIMS "[{}].zw);\n", i); // TODO: S24 overflows here? } } @@ -800,15 +799,15 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos if (texcoord < uid_data->genMode_numtexgens) { out.SetConstantsUsed(C_INDTEXSCALE + i / 2, C_INDTEXSCALE + i / 2); - out.WriteFmt("\ttempcoord = fixpoint_uv{} >> " I_INDTEXSCALE "[{}].{};\n", texcoord, i / 2, - (i & 1) != 0 ? "zw" : "xy"); + out.Write("\ttempcoord = fixpoint_uv{} >> " I_INDTEXSCALE "[{}].{};\n", texcoord, i / 2, + (i & 1) != 0 ? "zw" : "xy"); } else { - out.WriteFmt("\ttempcoord = int2(0, 0);\n"); + out.Write("\ttempcoord = int2(0, 0);\n"); } - out.WriteFmt("\tint3 iindtex{} = ", i); + out.Write("\tint3 iindtex{} = ", i); SampleTexture(out, "float2(tempcoord)", "abg", texmap, stereo, api_type); } } @@ -828,14 +827,14 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos last_ac.hex = uid_data->stagehash[uid_data->genMode_numtevstages].ac; if (last_cc.dest != 0) { - out.WriteFmt("\tprev.rgb = {};\n", tev_c_output_table[last_cc.dest]); + out.Write("\tprev.rgb = {};\n", tev_c_output_table[last_cc.dest]); } if (last_ac.dest != 0) { - out.WriteFmt("\tprev.a = {};\n", tev_a_output_table[last_ac.dest]); + out.Write("\tprev.a = {};\n", tev_a_output_table[last_ac.dest]); } } - out.WriteFmt("\tprev = prev & 255;\n"); + out.Write("\tprev = prev & 255;\n"); // NOTE: Fragment may not be discarded if alpha test always fails and early depth test is enabled // (in this case we need to write a depth value if depth test passes regardless of the alpha @@ -852,14 +851,14 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos out.SetConstantsUsed(C_ZSLOPE, C_ZSLOPE); out.SetConstantsUsed(C_EFBSCALE, C_EFBSCALE); - out.WriteFmt("\tfloat2 screenpos = rawpos.xy * " I_EFBSCALE ".xy;\n"); + out.Write("\tfloat2 screenpos = rawpos.xy * " I_EFBSCALE ".xy;\n"); // Opengl has reversed vertical screenspace coordinates if (api_type == APIType::OpenGL) - out.WriteFmt("\tscreenpos.y = {}.0 - screenpos.y;\n", EFB_HEIGHT); + out.Write("\tscreenpos.y = {}.0 - screenpos.y;\n", EFB_HEIGHT); - out.WriteFmt("\tint zCoord = int(" I_ZSLOPE ".z + " I_ZSLOPE ".x * screenpos.x + " I_ZSLOPE - ".y * screenpos.y);\n"); + out.Write("\tint zCoord = int(" I_ZSLOPE ".z + " I_ZSLOPE ".x * screenpos.x + " I_ZSLOPE + ".y * screenpos.y);\n"); } else if (!host_config.fast_depth_calc) { @@ -870,17 +869,17 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos // the host GPU driver from performing any early depth test optimizations. out.SetConstantsUsed(C_ZBIAS + 1, C_ZBIAS + 1); // the screen space depth value = far z + (clip z / clip w) * z range - out.WriteFmt("\tint zCoord = " I_ZBIAS "[1].x + int((clipPos.z / clipPos.w) * float(" I_ZBIAS - "[1].y));\n"); + out.Write("\tint zCoord = " I_ZBIAS "[1].x + int((clipPos.z / clipPos.w) * float(" I_ZBIAS + "[1].y));\n"); } else { if (!host_config.backend_reversed_depth_range) - out.WriteFmt("\tint zCoord = int((1.0 - rawpos.z) * 16777216.0);\n"); + out.Write("\tint zCoord = int((1.0 - rawpos.z) * 16777216.0);\n"); else - out.WriteFmt("\tint zCoord = int(rawpos.z * 16777216.0);\n"); + out.Write("\tint zCoord = int(rawpos.z * 16777216.0);\n"); } - out.WriteFmt("\tzCoord = clamp(zCoord, 0, 0xFFFFFF);\n"); + out.Write("\tzCoord = clamp(zCoord, 0, 0xFFFFFF);\n"); // depth texture can safely be ignored if the result won't be written to the depth buffer // (early_ztest) and isn't used for fog either @@ -890,9 +889,9 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos if (uid_data->per_pixel_depth && uid_data->early_ztest) { if (!host_config.backend_reversed_depth_range) - out.WriteFmt("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n"); + out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n"); else - out.WriteFmt("\tdepth = float(zCoord) / 16777216.0;\n"); + out.Write("\tdepth = float(zCoord) / 16777216.0;\n"); } // Note: depth texture output is only written to depth buffer if late depth test is used @@ -903,17 +902,17 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos // use the texture input of the last texture stage (textemp), hopefully this has been read and // is in correct format... out.SetConstantsUsed(C_ZBIAS, C_ZBIAS + 1); - out.WriteFmt("\tzCoord = idot(" I_ZBIAS "[0].xyzw, textemp.xyzw) + " I_ZBIAS "[1].w {};\n", - (uid_data->ztex_op == ZTEXTURE_ADD) ? "+ zCoord" : ""); - out.WriteFmt("\tzCoord = zCoord & 0xFFFFFF;\n"); + out.Write("\tzCoord = idot(" I_ZBIAS "[0].xyzw, textemp.xyzw) + " I_ZBIAS "[1].w {};\n", + (uid_data->ztex_op == ZTEXTURE_ADD) ? "+ zCoord" : ""); + out.Write("\tzCoord = zCoord & 0xFFFFFF;\n"); } if (uid_data->per_pixel_depth && uid_data->late_ztest) { if (!host_config.backend_reversed_depth_range) - out.WriteFmt("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n"); + out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n"); else - out.WriteFmt("\tdepth = float(zCoord) / 16777216.0;\n"); + out.Write("\tdepth = float(zCoord) / 16777216.0;\n"); } // No dithering for RGB8 mode @@ -921,8 +920,8 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos { // Flipper uses a standard 2x2 Bayer Matrix for 6 bit dithering // Here the matrix is encoded into the two factor constants - out.WriteFmt("\tint2 dither = int2(rawpos.xy) & 1;\n"); - out.WriteFmt("\tprev.rgb = (prev.rgb - (prev.rgb >> 6)) + abs(dither.y * 3 - dither.x * 2);\n"); + out.Write("\tint2 dither = int2(rawpos.xy) & 1;\n"); + out.Write("\tprev.rgb = (prev.rgb - (prev.rgb >> 6)) + abs(dither.y * 3 - dither.x * 2);\n"); } WriteFog(out, uid_data); @@ -935,9 +934,9 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos WriteBlend(out, uid_data); if (uid_data->bounding_box) - out.WriteFmt("\tUpdateBoundingBox(rawpos.xy);\n"); + out.Write("\tUpdateBoundingBox(rawpos.xy);\n"); - out.WriteFmt("}}\n"); + out.Write("}}\n"); return out; } @@ -946,7 +945,7 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i APIType api_type, bool stereo) { const auto& stage = uid_data->stagehash[n]; - out.WriteFmt("\n\t// TEV stage {}\n", n); + out.Write("\n\t// TEV stage {}\n", n); // HACK to handle cases where the tex gen is not enabled u32 texcoord = stage.tevorders_texcoord; @@ -959,7 +958,7 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i TevStageIndirect tevind; tevind.hex = stage.tevind; - out.WriteFmt("\t// indirect op\n"); + out.Write("\t// indirect op\n"); // Perform the indirect op on the incoming regular coordinates // using iindtex{} as the offset coords @@ -980,8 +979,8 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i "248", }; - out.WriteFmt("alphabump = iindtex{}.{} & {};\n", tevind.bt.Value(), - tev_ind_alpha_sel[tevind.bs], tev_ind_alpha_mask[tevind.fmt]); + out.Write("alphabump = iindtex{}.{} & {};\n", tevind.bt.Value(), tev_ind_alpha_sel[tevind.bs], + tev_ind_alpha_mask[tevind.fmt]); } else { @@ -997,8 +996,8 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i "15", "7", }; - out.WriteFmt("\tint3 iindtevcrd{} = iindtex{} & {};\n", n, tevind.bt.Value(), - tev_ind_fmt_mask[tevind.fmt]); + out.Write("\tint3 iindtevcrd{} = iindtex{} & {};\n", n, tevind.bt.Value(), + tev_ind_fmt_mask[tevind.fmt]); // bias - TODO: Check if this needs to be this complicated... // indexed by bias @@ -1016,19 +1015,19 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i if (tevind.bias == ITB_S || tevind.bias == ITB_T || tevind.bias == ITB_U) { - out.WriteFmt("\tiindtevcrd{}.{} += int({});\n", n, tev_ind_bias_field[tevind.bias], - tev_ind_bias_add[tevind.fmt]); + out.Write("\tiindtevcrd{}.{} += int({});\n", n, tev_ind_bias_field[tevind.bias], + tev_ind_bias_add[tevind.fmt]); } else if (tevind.bias == ITB_ST || tevind.bias == ITB_SU || tevind.bias == ITB_TU) { - out.WriteFmt("\tiindtevcrd{}.{} += int2({}, {});\n", n, tev_ind_bias_field[tevind.bias], - tev_ind_bias_add[tevind.fmt], tev_ind_bias_add[tevind.fmt]); + out.Write("\tiindtevcrd{}.{} += int2({}, {});\n", n, tev_ind_bias_field[tevind.bias], + tev_ind_bias_add[tevind.fmt], tev_ind_bias_add[tevind.fmt]); } else if (tevind.bias == ITB_STU) { - out.WriteFmt("\tiindtevcrd{}.{} += int3({}, {}, {});\n", n, tev_ind_bias_field[tevind.bias], - tev_ind_bias_add[tevind.fmt], tev_ind_bias_add[tevind.fmt], - tev_ind_bias_add[tevind.fmt]); + out.Write("\tiindtevcrd{}.{} += int3({}, {}, {});\n", n, tev_ind_bias_field[tevind.bias], + tev_ind_bias_add[tevind.fmt], tev_ind_bias_add[tevind.fmt], + tev_ind_bias_add[tevind.fmt]); } // multiply by offset matrix and scale - calculations are likely to overflow badly, @@ -1038,26 +1037,23 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i const u32 mtxidx = 2 * (tevind.mid - 1); out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx); - out.WriteFmt("\tint2 indtevtrans{} = int2(idot(" I_INDTEXMTX - "[{}].xyz, iindtevcrd{}), idot(" I_INDTEXMTX - "[{}].xyz, iindtevcrd{})) >> 3;\n", - n, mtxidx, n, mtxidx + 1, n); + out.Write("\tint2 indtevtrans{} = int2(idot(" I_INDTEXMTX + "[{}].xyz, iindtevcrd{}), idot(" I_INDTEXMTX "[{}].xyz, iindtevcrd{})) >> 3;\n", + n, mtxidx, n, mtxidx + 1, n); // TODO: should use a shader uid branch for this for better performance if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_BITWISE_OP_NEGATION)) { - out.WriteFmt("\tint indtexmtx_w_inverse_{} = -" I_INDTEXMTX "[{}].w;\n", n, mtxidx); - out.WriteFmt("\tif (" I_INDTEXMTX "[{}].w >= 0) indtevtrans{} >>= " I_INDTEXMTX - "[{}].w;\n", - mtxidx, n, mtxidx); - out.WriteFmt("\telse indtevtrans{} <<= indtexmtx_w_inverse_{};\n", n, n); + out.Write("\tint indtexmtx_w_inverse_{} = -" I_INDTEXMTX "[{}].w;\n", n, mtxidx); + out.Write("\tif (" I_INDTEXMTX "[{}].w >= 0) indtevtrans{} >>= " I_INDTEXMTX "[{}].w;\n", + mtxidx, n, mtxidx); + out.Write("\telse indtevtrans{} <<= indtexmtx_w_inverse_{};\n", n, n); } else { - out.WriteFmt("\tif (" I_INDTEXMTX "[{}].w >= 0) indtevtrans{} >>= " I_INDTEXMTX - "[{}].w;\n", - mtxidx, n, mtxidx); - out.WriteFmt("\telse indtevtrans{} <<= (-" I_INDTEXMTX "[{}].w);\n", n, mtxidx); + out.Write("\tif (" I_INDTEXMTX "[{}].w >= 0) indtevtrans{} >>= " I_INDTEXMTX "[{}].w;\n", + mtxidx, n, mtxidx); + out.Write("\telse indtevtrans{} <<= (-" I_INDTEXMTX "[{}].w);\n", n, mtxidx); } } else if (tevind.mid <= 7 && has_tex_coord) @@ -1066,22 +1062,20 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i const u32 mtxidx = 2 * (tevind.mid - 5); out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx); - out.WriteFmt("\tint2 indtevtrans{} = int2(fixpoint_uv{} * iindtevcrd{}.xx) >> 8;\n", n, - texcoord, n); + out.Write("\tint2 indtevtrans{} = int2(fixpoint_uv{} * iindtevcrd{}.xx) >> 8;\n", n, + texcoord, n); if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_BITWISE_OP_NEGATION)) { - out.WriteFmt("\tint indtexmtx_w_inverse_{} = -" I_INDTEXMTX "[{}].w;\n", n, mtxidx); - out.WriteFmt("\tif (" I_INDTEXMTX "[{}].w >= 0) indtevtrans{} >>= " I_INDTEXMTX - "[{}].w;\n", - mtxidx, n, mtxidx); - out.WriteFmt("\telse indtevtrans{} <<= (indtexmtx_w_inverse_{});\n", n, n); + out.Write("\tint indtexmtx_w_inverse_{} = -" I_INDTEXMTX "[{}].w;\n", n, mtxidx); + out.Write("\tif (" I_INDTEXMTX "[{}].w >= 0) indtevtrans{} >>= " I_INDTEXMTX "[{}].w;\n", + mtxidx, n, mtxidx); + out.Write("\telse indtevtrans{} <<= (indtexmtx_w_inverse_{});\n", n, n); } else { - out.WriteFmt("\tif (" I_INDTEXMTX "[{}].w >= 0) indtevtrans{} >>= " I_INDTEXMTX - "[{}].w;\n", - mtxidx, n, mtxidx); - out.WriteFmt("\telse indtevtrans{} <<= (-" I_INDTEXMTX "[{}].w);\n", n, mtxidx); + out.Write("\tif (" I_INDTEXMTX "[{}].w >= 0) indtevtrans{} >>= " I_INDTEXMTX "[{}].w;\n", + mtxidx, n, mtxidx); + out.Write("\telse indtevtrans{} <<= (-" I_INDTEXMTX "[{}].w);\n", n, mtxidx); } } else if (tevind.mid <= 11 && has_tex_coord) @@ -1090,33 +1084,31 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i const u32 mtxidx = 2 * (tevind.mid - 9); out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx); - out.WriteFmt("\tint2 indtevtrans{} = int2(fixpoint_uv{} * iindtevcrd{}.yy) >> 8;\n", n, - texcoord, n); + out.Write("\tint2 indtevtrans{} = int2(fixpoint_uv{} * iindtevcrd{}.yy) >> 8;\n", n, + texcoord, n); if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_BITWISE_OP_NEGATION)) { - out.WriteFmt("\tint indtexmtx_w_inverse_{} = -" I_INDTEXMTX "[{}].w;\n", n, mtxidx); - out.WriteFmt("\tif (" I_INDTEXMTX "[{}].w >= 0) indtevtrans{} >>= " I_INDTEXMTX - "[{}].w;\n", - mtxidx, n, mtxidx); - out.WriteFmt("\telse indtevtrans{} <<= (indtexmtx_w_inverse_{});\n", n, n); + out.Write("\tint indtexmtx_w_inverse_{} = -" I_INDTEXMTX "[{}].w;\n", n, mtxidx); + out.Write("\tif (" I_INDTEXMTX "[{}].w >= 0) indtevtrans{} >>= " I_INDTEXMTX "[{}].w;\n", + mtxidx, n, mtxidx); + out.Write("\telse indtevtrans{} <<= (indtexmtx_w_inverse_{});\n", n, n); } else { - out.WriteFmt("\tif (" I_INDTEXMTX "[{}].w >= 0) indtevtrans{} >>= " I_INDTEXMTX - "[{}].w;\n", - mtxidx, n, mtxidx); - out.WriteFmt("\telse indtevtrans{} <<= (-" I_INDTEXMTX "[{}].w);\n", n, mtxidx); + out.Write("\tif (" I_INDTEXMTX "[{}].w >= 0) indtevtrans{} >>= " I_INDTEXMTX "[{}].w;\n", + mtxidx, n, mtxidx); + out.Write("\telse indtevtrans{} <<= (-" I_INDTEXMTX "[{}].w);\n", n, mtxidx); } } else { - out.WriteFmt("\tint2 indtevtrans{} = int2(0, 0);\n", n); + out.Write("\tint2 indtevtrans{} = int2(0, 0);\n", n); } } else { - out.WriteFmt("\tint2 indtevtrans{} = int2(0, 0);\n", n); + out.Write("\tint2 indtevtrans{} = int2(0, 0);\n", n); } // --------- @@ -1131,40 +1123,40 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i // wrap S if (tevind.sw == ITW_OFF) { - out.WriteFmt("\twrappedcoord.x = fixpoint_uv{}.x;\n", texcoord); + out.Write("\twrappedcoord.x = fixpoint_uv{}.x;\n", texcoord); } else if (tevind.sw == ITW_0) { - out.WriteFmt("\twrappedcoord.x = 0;\n"); + out.Write("\twrappedcoord.x = 0;\n"); } else { - out.WriteFmt("\twrappedcoord.x = fixpoint_uv{}.x & ({} - 1);\n", texcoord, - tev_ind_wrap_start[tevind.sw]); + out.Write("\twrappedcoord.x = fixpoint_uv{}.x & ({} - 1);\n", texcoord, + tev_ind_wrap_start[tevind.sw]); } // wrap T if (tevind.tw == ITW_OFF) { - out.WriteFmt("\twrappedcoord.y = fixpoint_uv{}.y;\n", texcoord); + out.Write("\twrappedcoord.y = fixpoint_uv{}.y;\n", texcoord); } else if (tevind.tw == ITW_0) { - out.WriteFmt("\twrappedcoord.y = 0;\n"); + out.Write("\twrappedcoord.y = 0;\n"); } else { - out.WriteFmt("\twrappedcoord.y = fixpoint_uv{}.y & ({} - 1);\n", texcoord, - tev_ind_wrap_start[tevind.tw]); + out.Write("\twrappedcoord.y = fixpoint_uv{}.y & ({} - 1);\n", texcoord, + tev_ind_wrap_start[tevind.tw]); } if (tevind.fb_addprev) // add previous tevcoord - out.WriteFmt("\ttevcoord.xy += wrappedcoord + indtevtrans{};\n", n); + out.Write("\ttevcoord.xy += wrappedcoord + indtevtrans{};\n", n); else - out.WriteFmt("\ttevcoord.xy = wrappedcoord + indtevtrans{};\n", n); + out.Write("\ttevcoord.xy = wrappedcoord + indtevtrans{};\n", n); // Emulate s24 overflows - out.WriteFmt("\ttevcoord.xy = (tevcoord.xy << 8) >> 8;\n"); + out.Write("\ttevcoord.xy = (tevcoord.xy << 8) >> 8;\n"); } TevStageCombiner::ColorCombiner cc; @@ -1186,7 +1178,7 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i '\0', }; - out.WriteFmt("\trastemp = {}.{};\n", tev_ras_table[stage.tevorders_colorchan], rasswap); + out.Write("\trastemp = {}.{};\n", tev_ras_table[stage.tevorders_colorchan], rasswap); } if (stage.tevorders_enable) @@ -1204,24 +1196,24 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i { // calc tevcord if (has_tex_coord) - out.WriteFmt("\ttevcoord.xy = fixpoint_uv{};\n", texcoord); + out.Write("\ttevcoord.xy = fixpoint_uv{};\n", texcoord); else - out.WriteFmt("\ttevcoord.xy = int2(0, 0);\n"); + out.Write("\ttevcoord.xy = int2(0, 0);\n"); } - out.WriteFmt("\ttextemp = "); + out.Write("\ttextemp = "); SampleTexture(out, "float2(tevcoord.xy)", texswap, stage.tevorders_texmap, stereo, api_type); } else { - out.WriteFmt("\ttextemp = int4(255, 255, 255, 255);\n"); + out.Write("\ttextemp = int4(255, 255, 255, 255);\n"); } if (cc.a == TEVCOLORARG_KONST || cc.b == TEVCOLORARG_KONST || cc.c == TEVCOLORARG_KONST || cc.d == TEVCOLORARG_KONST || ac.a == TEVALPHAARG_KONST || ac.b == TEVALPHAARG_KONST || ac.c == TEVALPHAARG_KONST || ac.d == TEVALPHAARG_KONST) { - out.WriteFmt("\tkonsttemp = int4({}, {});\n", tev_ksel_table_c[stage.tevksel_kc], - tev_ksel_table_a[stage.tevksel_ka]); + out.Write("\tkonsttemp = int4({}, {});\n", tev_ksel_table_c[stage.tevksel_kc], + tev_ksel_table_a[stage.tevksel_ka]); if (stage.tevksel_kc > 7) { @@ -1250,16 +1242,16 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i if (ac.dest >= GX_TEVREG0) out.SetConstantsUsed(C_COLORS + ac.dest, C_COLORS + ac.dest); - out.WriteFmt("\ttevin_a = int4({}, {})&int4(255, 255, 255, 255);\n", tev_c_input_table[cc.a], - tev_a_input_table[ac.a]); - out.WriteFmt("\ttevin_b = int4({}, {})&int4(255, 255, 255, 255);\n", tev_c_input_table[cc.b], - tev_a_input_table[ac.b]); - out.WriteFmt("\ttevin_c = int4({}, {})&int4(255, 255, 255, 255);\n", tev_c_input_table[cc.c], - tev_a_input_table[ac.c]); - out.WriteFmt("\ttevin_d = int4({}, {});\n", tev_c_input_table[cc.d], tev_a_input_table[ac.d]); + out.Write("\ttevin_a = int4({}, {})&int4(255, 255, 255, 255);\n", tev_c_input_table[cc.a], + tev_a_input_table[ac.a]); + out.Write("\ttevin_b = int4({}, {})&int4(255, 255, 255, 255);\n", tev_c_input_table[cc.b], + tev_a_input_table[ac.b]); + out.Write("\ttevin_c = int4({}, {})&int4(255, 255, 255, 255);\n", tev_c_input_table[cc.c], + tev_a_input_table[ac.c]); + out.Write("\ttevin_d = int4({}, {});\n", tev_c_input_table[cc.d], tev_a_input_table[ac.d]); - out.WriteFmt("\t// color combine\n"); - out.WriteFmt("\t{} = clamp(", tev_c_output_table[cc.dest]); + out.Write("\t// color combine\n"); + out.Write("\t{} = clamp(", tev_c_output_table[cc.dest]); if (cc.bias != TEVBIAS_COMPARE) { WriteTevRegular(out, "rgb", cc.bias, cc.op, cc.clamp, cc.shift, false); @@ -1282,17 +1274,17 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i }; const u32 mode = (cc.shift << 1) | cc.op; - out.WriteFmt(" tevin_d.rgb + "); - out.WriteFmt("{}", function_table[mode]); + out.Write(" tevin_d.rgb + "); + out.Write("{}", function_table[mode]); } if (cc.clamp) - out.WriteFmt(", int3(0,0,0), int3(255,255,255))"); + out.Write(", int3(0,0,0), int3(255,255,255))"); else - out.WriteFmt(", int3(-1024,-1024,-1024), int3(1023,1023,1023))"); - out.WriteFmt(";\n"); + out.Write(", int3(-1024,-1024,-1024), int3(1023,1023,1023))"); + out.Write(";\n"); - out.WriteFmt("\t// alpha combine\n"); - out.WriteFmt("\t{} = clamp(", tev_a_output_table[ac.dest]); + out.Write("\t// alpha combine\n"); + out.Write("\t{} = clamp(", tev_a_output_table[ac.dest]); if (ac.bias != TEVBIAS_COMPARE) { WriteTevRegular(out, "a", ac.bias, ac.op, ac.clamp, ac.shift, true); @@ -1311,15 +1303,15 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i }; const u32 mode = (ac.shift << 1) | ac.op; - out.WriteFmt(" tevin_d.a + "); - out.WriteFmt("{}", function_table[mode]); + out.Write(" tevin_d.a + "); + out.Write("{}", function_table[mode]); } if (ac.clamp) - out.WriteFmt(", 0, 255)"); + out.Write(", 0, 255)"); else - out.WriteFmt(", -1024, 1023)"); + out.Write(", -1024, 1023)"); - out.WriteFmt(";\n"); + out.Write(";\n"); } static void WriteTevRegular(ShaderCode& out, std::string_view components, int bias, int op, @@ -1364,14 +1356,12 @@ static void WriteTevRegular(ShaderCode& out, std::string_view components, int bi // - c is scaled from 0..255 to 0..256, which allows dividing the result by 256 instead of 255 // - if scale is bigger than one, it is moved inside the lerp calculation for increased accuracy // - a rounding bias is added before dividing by 256 - out.WriteFmt("(((tevin_d.{}{}){})", components, tev_bias_table[bias], - tev_scale_table_left[shift]); - out.WriteFmt(" {} ", tev_op_table[op]); - out.WriteFmt( - "(((((tevin_a.{}<<8) + (tevin_b.{}-tevin_a.{})*(tevin_c.{}+(tevin_c.{}>>7))){}){})>>8)", - components, components, components, components, components, tev_scale_table_left[shift], - tev_lerp_bias[2 * op + ((shift == 3) == alpha)]); - out.WriteFmt("){}", tev_scale_table_right[shift]); + out.Write("(((tevin_d.{}{}){})", components, tev_bias_table[bias], tev_scale_table_left[shift]); + out.Write(" {} ", tev_op_table[op]); + out.Write("(((((tevin_a.{}<<8) + (tevin_b.{}-tevin_a.{})*(tevin_c.{}+(tevin_c.{}>>7))){}){})>>8)", + components, components, components, components, components, tev_scale_table_left[shift], + tev_lerp_bias[2 * op + ((shift == 3) == alpha)]); + out.Write("){}", tev_scale_table_right[shift]); } static void SampleTexture(ShaderCode& out, std::string_view texcoords, std::string_view texswap, @@ -1381,15 +1371,14 @@ static void SampleTexture(ShaderCode& out, std::string_view texcoords, std::stri if (api_type == APIType::D3D) { - out.WriteFmt("iround(255.0 * Tex[{}].Sample(samp[{}], float3({}.xy * " I_TEXDIMS - "[{}].xy, {}))).{};\n", - texmap, texmap, texcoords, texmap, stereo ? "layer" : "0.0", texswap); + out.Write("iround(255.0 * Tex[{}].Sample(samp[{}], float3({}.xy * " I_TEXDIMS + "[{}].xy, {}))).{};\n", + texmap, texmap, texcoords, texmap, stereo ? "layer" : "0.0", texswap); } else { - out.WriteFmt("iround(255.0 * texture(samp[{}], float3({}.xy * " I_TEXDIMS - "[{}].xy, {}))).{};\n", - texmap, texcoords, texmap, stereo ? "layer" : "0.0", texswap); + out.Write("iround(255.0 * texture(samp[{}], float3({}.xy * " I_TEXDIMS "[{}].xy, {}))).{};\n", + texmap, texcoords, texmap, stereo ? "layer" : "0.0", texswap); } } @@ -1422,52 +1411,52 @@ static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_dat const auto write_alpha_func = [&out](int index, std::string_view ref) { const bool has_no_arguments = index == 0 || index == tev_alpha_funcs_table.size() - 1; if (has_no_arguments) - out.WriteFmt("{}", tev_alpha_funcs_table[index]); + out.Write("{}", tev_alpha_funcs_table[index]); else - out.WriteFmt(tev_alpha_funcs_table[index], ref); + out.Write(tev_alpha_funcs_table[index], ref); }; out.SetConstantsUsed(C_ALPHA, C_ALPHA); if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_NEGATED_BOOLEAN)) - out.WriteFmt("\tif(( "); + out.Write("\tif(( "); else - out.WriteFmt("\tif(!( "); + out.Write("\tif(!( "); // Lookup the first component from the alpha function table const int comp0_index = uid_data->alpha_test_comp0; write_alpha_func(comp0_index, alpha_ref[0]); // Lookup the logic op - out.WriteFmt("{}", tev_alpha_funclogic_table[uid_data->alpha_test_logic]); + out.Write("{}", tev_alpha_funclogic_table[uid_data->alpha_test_logic]); // Lookup the second component from the alpha function table const int comp1_index = uid_data->alpha_test_comp1; write_alpha_func(comp1_index, alpha_ref[1]); if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_NEGATED_BOOLEAN)) - out.WriteFmt(") == false) {{\n"); + out.Write(") == false) {{\n"); else - out.WriteFmt(")) {{\n"); + out.Write(")) {{\n"); - out.WriteFmt("\t\tocol0 = float4(0.0, 0.0, 0.0, 0.0);\n"); + out.Write("\t\tocol0 = float4(0.0, 0.0, 0.0, 0.0);\n"); if (use_dual_source && !(api_type == APIType::D3D && uid_data->uint_output)) - out.WriteFmt("\t\tocol1 = float4(0.0, 0.0, 0.0, 0.0);\n"); + out.Write("\t\tocol1 = float4(0.0, 0.0, 0.0, 0.0);\n"); if (per_pixel_depth) { - out.WriteFmt("\t\tdepth = {};\n", - !g_ActiveConfig.backend_info.bSupportsReversedDepthRange ? "0.0" : "1.0"); + out.Write("\t\tdepth = {};\n", + !g_ActiveConfig.backend_info.bSupportsReversedDepthRange ? "0.0" : "1.0"); } // ZCOMPLOC HACK: if (!uid_data->alpha_test_use_zcomploc_hack) { - out.WriteFmt("\t\tdiscard;\n"); + out.Write("\t\tdiscard;\n"); if (api_type == APIType::D3D) - out.WriteFmt("\t\treturn;\n"); + out.Write("\t\treturn;\n"); } - out.WriteFmt("\t}}\n"); + out.Write("\t}}\n"); } constexpr std::array tev_fog_funcs_table{ @@ -1497,14 +1486,14 @@ static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data) // renderer) // Maybe we want to use "ze = (A << B_SHF)/((B << B_SHF) - Zs)" instead? // That's equivalent, but keeps the lower bits of Zs. - out.WriteFmt("\tfloat ze = (" I_FOGF ".x * 16777216.0) / float(" I_FOGI - ".y - (zCoord >> " I_FOGI ".w));\n"); + out.Write("\tfloat ze = (" I_FOGF ".x * 16777216.0) / float(" I_FOGI ".y - (zCoord >> " I_FOGI + ".w));\n"); } else { // orthographic // ze = a*Zs (here, no B_SHF) - out.WriteFmt("\tfloat ze = " I_FOGF ".x * float(zCoord) / 16777216.0;\n"); + out.Write("\tfloat ze = " I_FOGF ".x * float(zCoord) / 16777216.0;\n"); } // x_adjust = sqrt((x-center)^2 + k^2)/k @@ -1512,22 +1501,22 @@ static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data) if (uid_data->fog_RangeBaseEnabled) { out.SetConstantsUsed(C_FOGF, C_FOGF); - out.WriteFmt("\tfloat offset = (2.0 * (rawpos.x / " I_FOGF ".w)) - 1.0 - " I_FOGF ".z;\n" - "\tfloat floatindex = clamp(9.0 - abs(offset) * 9.0, 0.0, 9.0);\n" - "\tuint indexlower = uint(floatindex);\n" - "\tuint indexupper = indexlower + 1u;\n" - "\tfloat klower = " I_FOGRANGE "[indexlower >> 2u][indexlower & 3u];\n" - "\tfloat kupper = " I_FOGRANGE "[indexupper >> 2u][indexupper & 3u];\n" - "\tfloat k = lerp(klower, kupper, frac(floatindex));\n" - "\tfloat x_adjust = sqrt(offset * offset + k * k) / k;\n" - "\tze *= x_adjust;\n"); + out.Write("\tfloat offset = (2.0 * (rawpos.x / " I_FOGF ".w)) - 1.0 - " I_FOGF ".z;\n" + "\tfloat floatindex = clamp(9.0 - abs(offset) * 9.0, 0.0, 9.0);\n" + "\tuint indexlower = uint(floatindex);\n" + "\tuint indexupper = indexlower + 1u;\n" + "\tfloat klower = " I_FOGRANGE "[indexlower >> 2u][indexlower & 3u];\n" + "\tfloat kupper = " I_FOGRANGE "[indexupper >> 2u][indexupper & 3u];\n" + "\tfloat k = lerp(klower, kupper, frac(floatindex));\n" + "\tfloat x_adjust = sqrt(offset * offset + k * k) / k;\n" + "\tze *= x_adjust;\n"); } - out.WriteFmt("\tfloat fog = clamp(ze - " I_FOGF ".y, 0.0, 1.0);\n"); + out.Write("\tfloat fog = clamp(ze - " I_FOGF ".y, 0.0, 1.0);\n"); if (uid_data->fog_fsel > 3) { - out.WriteFmt("{}", tev_fog_funcs_table[uid_data->fog_fsel]); + out.Write("{}", tev_fog_funcs_table[uid_data->fog_fsel]); } else { @@ -1535,8 +1524,8 @@ static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data) WARN_LOG(VIDEO, "Unknown Fog Type! %08x", uid_data->fog_fsel); } - out.WriteFmt("\tint ifog = iround(fog * 256.0);\n"); - out.WriteFmt("\tprev.rgb = (prev.rgb * (256 - ifog) + " I_FOGCOLOR ".rgb * ifog) >> 8;\n"); + out.Write("\tint ifog = iround(fog * 256.0);\n"); + out.Write("\tprev.rgb = (prev.rgb * (256 - ifog) + " I_FOGCOLOR ".rgb * ifog) >> 8;\n"); } static void WriteColor(ShaderCode& out, APIType api_type, const pixel_shader_uid_data* uid_data, @@ -1546,33 +1535,33 @@ static void WriteColor(ShaderCode& out, APIType api_type, const pixel_shader_uid if (api_type == APIType::D3D && uid_data->uint_output) { if (uid_data->rgba6_format) - out.WriteFmt("\tocol0 = uint4(prev & 0xFC);\n"); + out.Write("\tocol0 = uint4(prev & 0xFC);\n"); else - out.WriteFmt("\tocol0 = uint4(prev);\n"); + out.Write("\tocol0 = uint4(prev);\n"); return; } if (uid_data->rgba6_format) - out.WriteFmt("\tocol0.rgb = float3(prev.rgb >> 2) / 63.0;\n"); + out.Write("\tocol0.rgb = float3(prev.rgb >> 2) / 63.0;\n"); else - out.WriteFmt("\tocol0.rgb = float3(prev.rgb) / 255.0;\n"); + out.Write("\tocol0.rgb = float3(prev.rgb) / 255.0;\n"); // Colors will be blended against the 8-bit alpha from ocol1 and // the 6-bit alpha from ocol0 will be written to the framebuffer if (uid_data->useDstAlpha) { out.SetConstantsUsed(C_ALPHA, C_ALPHA); - out.WriteFmt("\tocol0.a = float(" I_ALPHA ".a >> 2) / 63.0;\n"); + out.Write("\tocol0.a = float(" I_ALPHA ".a >> 2) / 63.0;\n"); // Use dual-source color blending to perform dst alpha in a single pass if (use_dual_source) - out.WriteFmt("\tocol1 = float4(0.0, 0.0, 0.0, float(prev.a) / 255.0);\n"); + out.Write("\tocol1 = float4(0.0, 0.0, 0.0, float(prev.a) / 255.0);\n"); } else { - out.WriteFmt("\tocol0.a = float(prev.a >> 2) / 63.0;\n"); + out.Write("\tocol0.a = float(prev.a >> 2) / 63.0;\n"); if (use_dual_source) - out.WriteFmt("\tocol1 = float4(0.0, 0.0, 0.0, float(prev.a) / 255.0);\n"); + out.Write("\tocol1 = float4(0.0, 0.0, 0.0, float(prev.a) / 255.0);\n"); } } @@ -1620,34 +1609,34 @@ static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data) "initial_ocol0.a;", // DSTALPHA "1.0 - initial_ocol0.a;", // INVDSTALPHA }; - out.WriteFmt("\tfloat4 blend_src;\n"); - out.WriteFmt("\tblend_src.rgb = {}\n", blend_src_factor[uid_data->blend_src_factor]); - out.WriteFmt("\tblend_src.a = {}\n", blend_src_factor_alpha[uid_data->blend_src_factor_alpha]); - out.WriteFmt("\tfloat4 blend_dst;\n"); - out.WriteFmt("\tblend_dst.rgb = {}\n", blend_dst_factor[uid_data->blend_dst_factor]); - out.WriteFmt("\tblend_dst.a = {}\n", blend_dst_factor_alpha[uid_data->blend_dst_factor_alpha]); + out.Write("\tfloat4 blend_src;\n"); + out.Write("\tblend_src.rgb = {}\n", blend_src_factor[uid_data->blend_src_factor]); + out.Write("\tblend_src.a = {}\n", blend_src_factor_alpha[uid_data->blend_src_factor_alpha]); + out.Write("\tfloat4 blend_dst;\n"); + out.Write("\tblend_dst.rgb = {}\n", blend_dst_factor[uid_data->blend_dst_factor]); + out.Write("\tblend_dst.a = {}\n", blend_dst_factor_alpha[uid_data->blend_dst_factor_alpha]); - out.WriteFmt("\tfloat4 blend_result;\n"); + out.Write("\tfloat4 blend_result;\n"); if (uid_data->blend_subtract) { - out.WriteFmt("\tblend_result.rgb = initial_ocol0.rgb * blend_dst.rgb - ocol0.rgb * " - "blend_src.rgb;\n"); + out.Write("\tblend_result.rgb = initial_ocol0.rgb * blend_dst.rgb - ocol0.rgb * " + "blend_src.rgb;\n"); } else { - out.WriteFmt( + out.Write( "\tblend_result.rgb = initial_ocol0.rgb * blend_dst.rgb + ocol0.rgb * blend_src.rgb;\n"); } if (uid_data->blend_subtract_alpha) - out.WriteFmt("\tblend_result.a = initial_ocol0.a * blend_dst.a - ocol0.a * blend_src.a;\n"); + out.Write("\tblend_result.a = initial_ocol0.a * blend_dst.a - ocol0.a * blend_src.a;\n"); else - out.WriteFmt("\tblend_result.a = initial_ocol0.a * blend_dst.a + ocol0.a * blend_src.a;\n"); + out.Write("\tblend_result.a = initial_ocol0.a * blend_dst.a + ocol0.a * blend_src.a;\n"); } else { - out.WriteFmt("\tfloat4 blend_result = ocol0;\n"); + out.Write("\tfloat4 blend_result = ocol0;\n"); } - out.WriteFmt("\treal_ocol0 = blend_result;\n"); + out.Write("\treal_ocol0 = blend_result;\n"); } diff --git a/Source/Core/VideoCommon/ShaderGenCommon.cpp b/Source/Core/VideoCommon/ShaderGenCommon.cpp index 75d77db687..f69dbabe99 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.cpp +++ b/Source/Core/VideoCommon/ShaderGenCommon.cpp @@ -91,20 +91,20 @@ static void DefineOutputMember(ShaderCode& object, APIType api_type, std::string std::string_view type, std::string_view name, int var_index, std::string_view semantic = {}, int semantic_index = -1) { - object.WriteFmt("\t{} {} {}", qualifier, type, name); + object.Write("\t{} {} {}", qualifier, type, name); if (var_index != -1) - object.WriteFmt("{}", var_index); + object.Write("{}", var_index); if (api_type == APIType::D3D && !semantic.empty()) { if (semantic_index != -1) - object.WriteFmt(" : {}{}", semantic, semantic_index); + object.Write(" : {}{}", semantic, semantic_index); else - object.WriteFmt(" : {}", semantic); + object.Write(" : {}", semantic); } - object.WriteFmt(";\n"); + object.Write(";\n"); } void GenerateVSOutputMembers(ShaderCode& object, APIType api_type, u32 texgens, @@ -138,26 +138,26 @@ void GenerateVSOutputMembers(ShaderCode& object, APIType api_type, u32 texgens, void AssignVSOutputMembers(ShaderCode& object, std::string_view a, std::string_view b, u32 texgens, const ShaderHostConfig& host_config) { - object.WriteFmt("\t{}.pos = {}.pos;\n", a, b); - object.WriteFmt("\t{}.colors_0 = {}.colors_0;\n", a, b); - object.WriteFmt("\t{}.colors_1 = {}.colors_1;\n", a, b); + object.Write("\t{}.pos = {}.pos;\n", a, b); + object.Write("\t{}.colors_0 = {}.colors_0;\n", a, b); + object.Write("\t{}.colors_1 = {}.colors_1;\n", a, b); for (unsigned int i = 0; i < texgens; ++i) - object.WriteFmt("\t{}.tex{} = {}.tex{};\n", a, i, b, i); + object.Write("\t{}.tex{} = {}.tex{};\n", a, i, b, i); if (!host_config.fast_depth_calc) - object.WriteFmt("\t{}.clipPos = {}.clipPos;\n", a, b); + object.Write("\t{}.clipPos = {}.clipPos;\n", a, b); if (host_config.per_pixel_lighting) { - object.WriteFmt("\t{}.Normal = {}.Normal;\n", a, b); - object.WriteFmt("\t{}.WorldPos = {}.WorldPos;\n", a, b); + object.Write("\t{}.Normal = {}.Normal;\n", a, b); + object.Write("\t{}.WorldPos = {}.WorldPos;\n", a, b); } if (host_config.backend_geometry_shaders) { - object.WriteFmt("\t{}.clipDist0 = {}.clipDist0;\n", a, b); - object.WriteFmt("\t{}.clipDist1 = {}.clipDist1;\n", a, b); + object.Write("\t{}.clipDist0 = {}.clipDist0;\n", a, b); + object.Write("\t{}.clipDist1 = {}.clipDist1;\n", a, b); } } diff --git a/Source/Core/VideoCommon/ShaderGenCommon.h b/Source/Core/VideoCommon/ShaderGenCommon.h index d816d46fae..98d01a3ff4 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/ShaderGenCommon.h @@ -4,7 +4,6 @@ #pragma once -#include #include #include #include @@ -104,21 +103,9 @@ public: ShaderCode() { m_buffer.reserve(16384); } const std::string& GetBuffer() const { return m_buffer; } - // Deprecated: Writes format strings using traditional printf format strings. - void Write(const char* fmt, ...) -#ifdef __GNUC__ - __attribute__((format(printf, 2, 3))) -#endif - { - va_list arglist; - va_start(arglist, fmt); - m_buffer += StringFromFormatV(fmt, arglist); - va_end(arglist); - } - // Writes format strings using fmtlib format strings. template - void WriteFmt(std::string_view format, Args&&... args) + void Write(std::string_view format, Args&&... args) { fmt::format_to(std::back_inserter(m_buffer), format, std::forward(args)...); } diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp index fefcce0d1d..46870c4b41 100644 --- a/Source/Core/VideoCommon/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/TextureConversionShader.cpp @@ -60,63 +60,63 @@ static void WriteHeader(ShaderCode& code, APIType api_type) { // left, top, of source rectangle within source texture // width of the destination rectangle, scale_factor (1 or 2) - code.WriteFmt("UBO_BINDING(std140, 1) uniform PSBlock {{\n" - " int4 position;\n" - " float y_scale;\n" - " float gamma_rcp;\n" - " float2 clamp_tb;\n" - " float3 filter_coefficients;\n" - "}};\n"); + code.Write("UBO_BINDING(std140, 1) uniform PSBlock {{\n" + " int4 position;\n" + " float y_scale;\n" + " float gamma_rcp;\n" + " float2 clamp_tb;\n" + " float3 filter_coefficients;\n" + "}};\n"); if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { - code.WriteFmt("VARYING_LOCATION(0) in VertexData {{\n" - " float3 v_tex0;\n" - "}};\n"); + code.Write("VARYING_LOCATION(0) in VertexData {{\n" + " float3 v_tex0;\n" + "}};\n"); } else { - code.WriteFmt("VARYING_LOCATION(0) in float3 v_tex0;\n"); + code.Write("VARYING_LOCATION(0) in float3 v_tex0;\n"); } - code.WriteFmt("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n" - "FRAGMENT_OUTPUT_LOCATION(0) out float4 ocol0;\n"); + code.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n" + "FRAGMENT_OUTPUT_LOCATION(0) out float4 ocol0;\n"); } else // D3D { - code.WriteFmt("cbuffer PSBlock : register(b0) {{\n" - " int4 position;\n" - " float y_scale;\n" - " float gamma_rcp;\n" - " float2 clamp_tb;\n" - " float3 filter_coefficients;\n" - "}};\n" - "sampler samp0 : register(s0);\n" - "Texture2DArray Tex0 : register(t0);\n"); + code.Write("cbuffer PSBlock : register(b0) {{\n" + " int4 position;\n" + " float y_scale;\n" + " float gamma_rcp;\n" + " float2 clamp_tb;\n" + " float3 filter_coefficients;\n" + "}};\n" + "sampler samp0 : register(s0);\n" + "Texture2DArray Tex0 : register(t0);\n"); } // D3D does not have roundEven(), only round(), which is specified "to the nearest integer". // This differs from the roundEven() behavior, but to get consistency across drivers in OpenGL // we need to use roundEven(). if (api_type == APIType::D3D) - code.WriteFmt("#define roundEven(x) round(x)\n"); + code.Write("#define roundEven(x) round(x)\n"); // Alpha channel in the copy is set to 1 the EFB format does not have an alpha channel. - code.WriteFmt("float4 RGBA8ToRGB8(float4 src)\n" - "{{\n" - " return float4(src.xyz, 1.0);\n" - "}}\n" + code.Write("float4 RGBA8ToRGB8(float4 src)\n" + "{{\n" + " return float4(src.xyz, 1.0);\n" + "}}\n" - "float4 RGBA8ToRGBA6(float4 src)\n" - "{{\n" - " int4 val = int4(roundEven(src * 255.0)) >> 2;\n" - " return float4(val) / 63.0;\n" - "}}\n" + "float4 RGBA8ToRGBA6(float4 src)\n" + "{{\n" + " int4 val = int4(roundEven(src * 255.0)) >> 2;\n" + " return float4(val) / 63.0;\n" + "}}\n" - "float4 RGBA8ToRGB565(float4 src)\n" - "{{\n" - " int4 val = int4(roundEven(src * 255.0));\n" - " val = int4(val.r >> 3, val.g >> 2, val.b >> 3, 1);\n" - " return float4(val) / float4(31.0, 63.0, 31.0, 1.0);\n" - "}}\n"); + "float4 RGBA8ToRGB565(float4 src)\n" + "{{\n" + " int4 val = int4(roundEven(src * 255.0));\n" + " val = int4(val.r >> 3, val.g >> 2, val.b >> 3, 1);\n" + " return float4(val) / float4(31.0, 63.0, 31.0, 1.0);\n" + "}}\n"); } static void WriteSampleFunction(ShaderCode& code, const EFBCopyParams& params, APIType api_type) @@ -127,16 +127,16 @@ static void WriteSampleFunction(ShaderCode& code, const EFBCopyParams& params, A switch (params.efb_format) { case PEControl::RGB8_Z24: - code.WriteFmt("RGBA8ToRGB8("); + code.Write("RGBA8ToRGB8("); break; case PEControl::RGBA6_Z24: - code.WriteFmt("RGBA8ToRGBA6("); + code.Write("RGBA8ToRGBA6("); break; case PEControl::RGB565_Z16: - code.WriteFmt("RGBA8ToRGB565("); + code.Write("RGBA8ToRGB565("); break; default: - code.WriteFmt("("); + code.Write("("); break; } } @@ -144,63 +144,63 @@ static void WriteSampleFunction(ShaderCode& code, const EFBCopyParams& params, A { // Handle D3D depth inversion. if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange) - code.WriteFmt("1.0 - ("); + code.Write("1.0 - ("); else - code.WriteFmt("("); + code.Write("("); } if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) - code.WriteFmt("texture(samp0, float3("); + code.Write("texture(samp0, float3("); else - code.WriteFmt("Tex0.Sample(samp0, float3("); + code.Write("Tex0.Sample(samp0, float3("); - code.WriteFmt("uv.x + float(xoffset) * pixel_size.x, "); + code.Write("uv.x + float(xoffset) * pixel_size.x, "); // Reverse the direction for OpenGL, since positive numbers are distance from the bottom row. if (yoffset != 0) { if (api_type == APIType::OpenGL) - code.WriteFmt("clamp(uv.y - float({}) * pixel_size.y, clamp_tb.x, clamp_tb.y)", yoffset); + code.Write("clamp(uv.y - float({}) * pixel_size.y, clamp_tb.x, clamp_tb.y)", yoffset); else - code.WriteFmt("clamp(uv.y + float({}) * pixel_size.y, clamp_tb.x, clamp_tb.y)", yoffset); + code.Write("clamp(uv.y + float({}) * pixel_size.y, clamp_tb.x, clamp_tb.y)", yoffset); } else { - code.WriteFmt("uv.y"); + code.Write("uv.y"); } - code.WriteFmt(", 0.0)))"); + code.Write(", 0.0)))"); }; // The copy filter applies to both color and depth copies. This has been verified on hardware. // The filter is only applied to the RGB channels, the alpha channel is left intact. - code.WriteFmt("float4 SampleEFB(float2 uv, float2 pixel_size, int xoffset)\n" - "{{\n"); + code.Write("float4 SampleEFB(float2 uv, float2 pixel_size, int xoffset)\n" + "{{\n"); if (params.copy_filter) { - code.WriteFmt(" float4 prev_row = "); + code.Write(" float4 prev_row = "); WriteSampleOp(-1); - code.WriteFmt(";\n" - " float4 current_row = "); + code.Write(";\n" + " float4 current_row = "); WriteSampleOp(0); - code.WriteFmt(";\n" - " float4 next_row = "); + code.Write(";\n" + " float4 next_row = "); WriteSampleOp(1); - code.WriteFmt(";\n" - " return float4(min(prev_row.rgb * filter_coefficients[0] +\n" - " current_row.rgb * filter_coefficients[1] +\n" - " next_row.rgb * filter_coefficients[2], \n" - " float3(1, 1, 1)), current_row.a);\n"); + code.Write(";\n" + " return float4(min(prev_row.rgb * filter_coefficients[0] +\n" + " current_row.rgb * filter_coefficients[1] +\n" + " next_row.rgb * filter_coefficients[2], \n" + " float3(1, 1, 1)), current_row.a);\n"); } else { - code.WriteFmt(" float4 current_row = "); + code.Write(" float4 current_row = "); WriteSampleOp(0); - code.WriteFmt(";\n" - "return float4(min(current_row.rgb * filter_coefficients[1], float3(1, 1, 1)),\n" - " current_row.a);\n"); + code.Write(";\n" + "return float4(min(current_row.rgb * filter_coefficients[1], float3(1, 1, 1)),\n" + " current_row.a);\n"); } - code.WriteFmt("}}\n"); + code.Write("}}\n"); } // Block dimensions : widthStride, heightStride @@ -213,101 +213,101 @@ static void WriteSwizzler(ShaderCode& code, const EFBCopyParams& params, EFBCopy if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) { - code.WriteFmt("void main()\n" - "{{\n" - " int2 sampleUv;\n" - " int2 uv1 = int2(gl_FragCoord.xy);\n"); + code.Write("void main()\n" + "{{\n" + " int2 sampleUv;\n" + " int2 uv1 = int2(gl_FragCoord.xy);\n"); } else // D3D { - code.WriteFmt("void main(\n" - " in float3 v_tex0 : TEXCOORD0,\n" - " in float4 rawpos : SV_Position,\n" - " out float4 ocol0 : SV_Target)\n" - "{{\n" - " int2 sampleUv;\n" - " int2 uv1 = int2(rawpos.xy);\n"); + code.Write("void main(\n" + " in float3 v_tex0 : TEXCOORD0,\n" + " in float4 rawpos : SV_Position,\n" + " out float4 ocol0 : SV_Target)\n" + "{{\n" + " int2 sampleUv;\n" + " int2 uv1 = int2(rawpos.xy);\n"); } const int blkW = TexDecoder_GetEFBCopyBlockWidthInTexels(format); const int blkH = TexDecoder_GetEFBCopyBlockHeightInTexels(format); int samples = GetEncodedSampleCount(format); - code.WriteFmt(" int x_block_position = (uv1.x >> {}) << {};\n", IntLog2(blkH * blkW / samples), - IntLog2(blkW)); - code.WriteFmt(" int y_block_position = uv1.y << {};\n", IntLog2(blkH)); + code.Write(" int x_block_position = (uv1.x >> {}) << {};\n", IntLog2(blkH * blkW / samples), + IntLog2(blkW)); + code.Write(" int y_block_position = uv1.y << {};\n", IntLog2(blkH)); if (samples == 1) { // With samples == 1, we write out pairs of blocks; one A8R8, one G8B8. - code.WriteFmt(" bool first = (uv1.x & {}) == 0;\n", blkH * blkW / 2); + code.Write(" bool first = (uv1.x & {}) == 0;\n", blkH * blkW / 2); samples = 2; } - code.WriteFmt(" int offset_in_block = uv1.x & {};\n", (blkH * blkW / samples) - 1); - code.WriteFmt(" int y_offset_in_block = offset_in_block >> {};\n", IntLog2(blkW / samples)); - code.WriteFmt(" int x_offset_in_block = (offset_in_block & {}) << {};\n", (blkW / samples) - 1, - IntLog2(samples)); + code.Write(" int offset_in_block = uv1.x & {};\n", (blkH * blkW / samples) - 1); + code.Write(" int y_offset_in_block = offset_in_block >> {};\n", IntLog2(blkW / samples)); + code.Write(" int x_offset_in_block = (offset_in_block & {}) << {};\n", (blkW / samples) - 1, + IntLog2(samples)); - code.WriteFmt(" sampleUv.x = x_block_position + x_offset_in_block;\n" - " sampleUv.y = y_block_position + y_offset_in_block;\n"); + code.Write(" sampleUv.x = x_block_position + x_offset_in_block;\n" + " sampleUv.y = y_block_position + y_offset_in_block;\n"); // sampleUv is the sample position in (int)gx_coords - code.WriteFmt(" float2 uv0 = float2(sampleUv);\n"); + code.Write(" float2 uv0 = float2(sampleUv);\n"); // Move to center of pixel - code.WriteFmt(" uv0 += float2(0.5, 0.5);\n"); + code.Write(" uv0 += float2(0.5, 0.5);\n"); // Scale by two if needed (also move to pixel borders // so that linear filtering will average adjacent // pixel) - code.WriteFmt(" uv0 *= float(position.w);\n"); + code.Write(" uv0 *= float(position.w);\n"); // Move to copied rect - code.WriteFmt(" uv0 += float2(position.xy);\n"); + code.Write(" uv0 += float2(position.xy);\n"); // Normalize to [0:1] - code.WriteFmt(" uv0 /= float2({}, {});\n", EFB_WIDTH, EFB_HEIGHT); + code.Write(" uv0 /= float2({}, {});\n", EFB_WIDTH, EFB_HEIGHT); // Apply the y scaling - code.WriteFmt(" uv0 /= float2(1, y_scale);\n"); + code.Write(" uv0 /= float2(1, y_scale);\n"); // OGL has to flip up and down if (api_type == APIType::OpenGL) { - code.WriteFmt(" uv0.y = 1.0-uv0.y;\n"); + code.Write(" uv0.y = 1.0-uv0.y;\n"); } - code.WriteFmt(" float2 pixel_size = float2(position.w, position.w) / float2({}, {});\n", - EFB_WIDTH, EFB_HEIGHT); + code.Write(" float2 pixel_size = float2(position.w, position.w) / float2({}, {});\n", EFB_WIDTH, + EFB_HEIGHT); } static void WriteSampleColor(ShaderCode& code, std::string_view color_comp, std::string_view dest, int x_offset, APIType api_type, const EFBCopyParams& params) { - code.WriteFmt(" {} = SampleEFB(uv0, pixel_size, {}).{};\n", dest, x_offset, color_comp); + code.Write(" {} = SampleEFB(uv0, pixel_size, {}).{};\n", dest, x_offset, color_comp); } static void WriteColorToIntensity(ShaderCode& code, std::string_view src, std::string_view dest) { if (!IntensityConstantAdded) { - code.WriteFmt(" float4 IntensityConst = float4(0.257f,0.504f,0.098f,0.0625f);\n"); + code.Write(" float4 IntensityConst = float4(0.257f,0.504f,0.098f,0.0625f);\n"); IntensityConstantAdded = true; } - code.WriteFmt(" {} = dot(IntensityConst.rgb, {}.rgb);\n", dest, src); + code.Write(" {} = dot(IntensityConst.rgb, {}.rgb);\n", dest, src); // don't add IntensityConst.a yet, because doing it later is faster and uses less instructions, // due to vectorization } static void WriteToBitDepth(ShaderCode& code, u8 depth, std::string_view src, std::string_view dest) { - code.WriteFmt(" {} = floor({} * 255.0 / exp2(8.0 - {}.0));\n", dest, src, depth); + code.Write(" {} = floor({} * 255.0 / exp2(8.0 - {}.0));\n", dest, src, depth); } static void WriteEncoderEnd(ShaderCode& code) { - code.WriteFmt("}}\n"); + code.Write("}}\n"); IntensityConstantAdded = false; } static void WriteI8Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params) { WriteSwizzler(code, params, EFBCopyFormat::R8, api_type); - code.WriteFmt(" float3 texSample;\n"); + code.Write(" float3 texSample;\n"); WriteSampleColor(code, "rgb", "texSample", 0, api_type, params); WriteColorToIntensity(code, "texSample", "ocol0.b"); @@ -322,7 +322,7 @@ static void WriteI8Encoder(ShaderCode& code, APIType api_type, const EFBCopyPara WriteColorToIntensity(code, "texSample", "ocol0.a"); // See WriteColorToIntensity - code.WriteFmt(" ocol0.rgba += IntensityConst.aaaa;\n"); + code.Write(" ocol0.rgba += IntensityConst.aaaa;\n"); WriteEncoderEnd(code); } @@ -330,9 +330,9 @@ static void WriteI8Encoder(ShaderCode& code, APIType api_type, const EFBCopyPara static void WriteI4Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params) { WriteSwizzler(code, params, EFBCopyFormat::R4, api_type); - code.WriteFmt(" float3 texSample;\n" - " float4 color0;\n" - " float4 color1;\n"); + code.Write(" float3 texSample;\n" + " float4 color0;\n" + " float4 color1;\n"); WriteSampleColor(code, "rgb", "texSample", 0, api_type, params); WriteColorToIntensity(code, "texSample", "color0.b"); @@ -358,30 +358,30 @@ static void WriteI4Encoder(ShaderCode& code, APIType api_type, const EFBCopyPara WriteSampleColor(code, "rgb", "texSample", 7, api_type, params); WriteColorToIntensity(code, "texSample", "color1.a"); - code.WriteFmt(" color0.rgba += IntensityConst.aaaa;\n" - " color1.rgba += IntensityConst.aaaa;\n"); + code.Write(" color0.rgba += IntensityConst.aaaa;\n" + " color1.rgba += IntensityConst.aaaa;\n"); WriteToBitDepth(code, 4, "color0", "color0"); WriteToBitDepth(code, 4, "color1", "color1"); - code.WriteFmt(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n"); + code.Write(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n"); WriteEncoderEnd(code); } static void WriteIA8Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params) { WriteSwizzler(code, params, EFBCopyFormat::RA8, api_type); - code.WriteFmt(" float4 texSample;\n"); + code.Write(" float4 texSample;\n"); WriteSampleColor(code, "rgba", "texSample", 0, api_type, params); - code.WriteFmt(" ocol0.b = texSample.a;\n"); + code.Write(" ocol0.b = texSample.a;\n"); WriteColorToIntensity(code, "texSample", "ocol0.g"); WriteSampleColor(code, "rgba", "texSample", 1, api_type, params); - code.WriteFmt(" ocol0.r = texSample.a;\n"); + code.Write(" ocol0.r = texSample.a;\n"); WriteColorToIntensity(code, "texSample", "ocol0.a"); - code.WriteFmt(" ocol0.ga += IntensityConst.aa;\n"); + code.Write(" ocol0.ga += IntensityConst.aa;\n"); WriteEncoderEnd(code); } @@ -389,57 +389,57 @@ static void WriteIA8Encoder(ShaderCode& code, APIType api_type, const EFBCopyPar static void WriteIA4Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params) { WriteSwizzler(code, params, EFBCopyFormat::RA4, api_type); - code.WriteFmt(" float4 texSample;\n" - " float4 color0;\n" - " float4 color1;\n"); + code.Write(" float4 texSample;\n" + " float4 color0;\n" + " float4 color1;\n"); WriteSampleColor(code, "rgba", "texSample", 0, api_type, params); - code.WriteFmt(" color0.b = texSample.a;\n"); + code.Write(" color0.b = texSample.a;\n"); WriteColorToIntensity(code, "texSample", "color1.b"); WriteSampleColor(code, "rgba", "texSample", 1, api_type, params); - code.WriteFmt(" color0.g = texSample.a;\n"); + code.Write(" color0.g = texSample.a;\n"); WriteColorToIntensity(code, "texSample", "color1.g"); WriteSampleColor(code, "rgba", "texSample", 2, api_type, params); - code.WriteFmt(" color0.r = texSample.a;\n"); + code.Write(" color0.r = texSample.a;\n"); WriteColorToIntensity(code, "texSample", "color1.r"); WriteSampleColor(code, "rgba", "texSample", 3, api_type, params); - code.WriteFmt(" color0.a = texSample.a;\n"); + code.Write(" color0.a = texSample.a;\n"); WriteColorToIntensity(code, "texSample", "color1.a"); - code.WriteFmt(" color1.rgba += IntensityConst.aaaa;\n"); + code.Write(" color1.rgba += IntensityConst.aaaa;\n"); WriteToBitDepth(code, 4, "color0", "color0"); WriteToBitDepth(code, 4, "color1", "color1"); - code.WriteFmt(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n"); + code.Write(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n"); WriteEncoderEnd(code); } static void WriteRGB565Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params) { WriteSwizzler(code, params, EFBCopyFormat::RGB565, api_type); - code.WriteFmt(" float3 texSample0;\n" - " float3 texSample1;\n"); + code.Write(" float3 texSample0;\n" + " float3 texSample1;\n"); WriteSampleColor(code, "rgb", "texSample0", 0, api_type, params); WriteSampleColor(code, "rgb", "texSample1", 1, api_type, params); - code.WriteFmt(" float2 texRs = float2(texSample0.r, texSample1.r);\n" - " float2 texGs = float2(texSample0.g, texSample1.g);\n" - " float2 texBs = float2(texSample0.b, texSample1.b);\n"); + code.Write(" float2 texRs = float2(texSample0.r, texSample1.r);\n" + " float2 texGs = float2(texSample0.g, texSample1.g);\n" + " float2 texBs = float2(texSample0.b, texSample1.b);\n"); WriteToBitDepth(code, 6, "texGs", "float2 gInt"); - code.WriteFmt(" float2 gUpper = floor(gInt / 8.0);\n" - " float2 gLower = gInt - gUpper * 8.0;\n"); + code.Write(" float2 gUpper = floor(gInt / 8.0);\n" + " float2 gLower = gInt - gUpper * 8.0;\n"); WriteToBitDepth(code, 5, "texRs", "ocol0.br"); - code.WriteFmt(" ocol0.br = ocol0.br * 8.0 + gUpper;\n"); + code.Write(" ocol0.br = ocol0.br * 8.0 + gUpper;\n"); WriteToBitDepth(code, 5, "texBs", "ocol0.ga"); - code.WriteFmt(" ocol0.ga = ocol0.ga + gLower * 32.0;\n"); + code.Write(" ocol0.ga = ocol0.ga + gLower * 32.0;\n"); - code.WriteFmt(" ocol0 = ocol0 / 255.0;\n"); + code.Write(" ocol0 = ocol0 / 255.0;\n"); WriteEncoderEnd(code); } @@ -447,63 +447,63 @@ static void WriteRGB5A3Encoder(ShaderCode& code, APIType api_type, const EFBCopy { WriteSwizzler(code, params, EFBCopyFormat::RGB5A3, api_type); - code.WriteFmt(" float4 texSample;\n" - " float color0;\n" - " float gUpper;\n" - " float gLower;\n"); + code.Write(" float4 texSample;\n" + " float color0;\n" + " float gUpper;\n" + " float gLower;\n"); WriteSampleColor(code, "rgba", "texSample", 0, api_type, params); // 0.8784 = 224 / 255 which is the maximum alpha value that can be represented in 3 bits - code.WriteFmt("if(texSample.a > 0.878f) {{\n"); + code.Write("if(texSample.a > 0.878f) {{\n"); WriteToBitDepth(code, 5, "texSample.g", "color0"); - code.WriteFmt(" gUpper = floor(color0 / 8.0);\n" - " gLower = color0 - gUpper * 8.0;\n"); + code.Write(" gUpper = floor(color0 / 8.0);\n" + " gLower = color0 - gUpper * 8.0;\n"); WriteToBitDepth(code, 5, "texSample.r", "ocol0.b"); - code.WriteFmt(" ocol0.b = ocol0.b * 4.0 + gUpper + 128.0;\n"); + code.Write(" ocol0.b = ocol0.b * 4.0 + gUpper + 128.0;\n"); WriteToBitDepth(code, 5, "texSample.b", "ocol0.g"); - code.WriteFmt(" ocol0.g = ocol0.g + gLower * 32.0;\n"); + code.Write(" ocol0.g = ocol0.g + gLower * 32.0;\n"); - code.WriteFmt("}} else {{\n"); + code.Write("}} else {{\n"); WriteToBitDepth(code, 4, "texSample.r", "ocol0.b"); WriteToBitDepth(code, 4, "texSample.b", "ocol0.g"); WriteToBitDepth(code, 3, "texSample.a", "color0"); - code.WriteFmt("ocol0.b = ocol0.b + color0 * 16.0;\n"); + code.Write("ocol0.b = ocol0.b + color0 * 16.0;\n"); WriteToBitDepth(code, 4, "texSample.g", "color0"); - code.WriteFmt("ocol0.g = ocol0.g + color0 * 16.0;\n"); + code.Write("ocol0.g = ocol0.g + color0 * 16.0;\n"); - code.WriteFmt("}}\n"); + code.Write("}}\n"); WriteSampleColor(code, "rgba", "texSample", 1, api_type, params); - code.WriteFmt("if(texSample.a > 0.878f) {{\n"); + code.Write("if(texSample.a > 0.878f) {{\n"); WriteToBitDepth(code, 5, "texSample.g", "color0"); - code.WriteFmt(" gUpper = floor(color0 / 8.0);\n" - " gLower = color0 - gUpper * 8.0;\n"); + code.Write(" gUpper = floor(color0 / 8.0);\n" + " gLower = color0 - gUpper * 8.0;\n"); WriteToBitDepth(code, 5, "texSample.r", "ocol0.r"); - code.WriteFmt(" ocol0.r = ocol0.r * 4.0 + gUpper + 128.0;\n"); + code.Write(" ocol0.r = ocol0.r * 4.0 + gUpper + 128.0;\n"); WriteToBitDepth(code, 5, "texSample.b", "ocol0.a"); - code.WriteFmt(" ocol0.a = ocol0.a + gLower * 32.0;\n"); + code.Write(" ocol0.a = ocol0.a + gLower * 32.0;\n"); - code.WriteFmt("}} else {{\n"); + code.Write("}} else {{\n"); WriteToBitDepth(code, 4, "texSample.r", "ocol0.r"); WriteToBitDepth(code, 4, "texSample.b", "ocol0.a"); WriteToBitDepth(code, 3, "texSample.a", "color0"); - code.WriteFmt("ocol0.r = ocol0.r + color0 * 16.0;\n"); + code.Write("ocol0.r = ocol0.r + color0 * 16.0;\n"); WriteToBitDepth(code, 4, "texSample.g", "color0"); - code.WriteFmt("ocol0.a = ocol0.a + color0 * 16.0;\n"); + code.Write("ocol0.a = ocol0.a + color0 * 16.0;\n"); - code.WriteFmt("}}\n"); + code.Write("}}\n"); - code.WriteFmt(" ocol0 = ocol0 / 255.0;\n"); + code.Write(" ocol0 = ocol0 / 255.0;\n"); WriteEncoderEnd(code); } @@ -511,23 +511,23 @@ static void WriteRGBA8Encoder(ShaderCode& code, APIType api_type, const EFBCopyP { WriteSwizzler(code, params, EFBCopyFormat::RGBA8, api_type); - code.WriteFmt(" float4 texSample;\n" - " float4 color0;\n" - " float4 color1;\n"); + code.Write(" float4 texSample;\n" + " float4 color0;\n" + " float4 color1;\n"); WriteSampleColor(code, "rgba", "texSample", 0, api_type, params); - code.WriteFmt(" color0.b = texSample.a;\n" - " color0.g = texSample.r;\n" - " color1.b = texSample.g;\n" - " color1.g = texSample.b;\n"); + code.Write(" color0.b = texSample.a;\n" + " color0.g = texSample.r;\n" + " color1.b = texSample.g;\n" + " color1.g = texSample.b;\n"); WriteSampleColor(code, "rgba", "texSample", 1, api_type, params); - code.WriteFmt(" color0.r = texSample.a;\n" - " color0.a = texSample.r;\n" - " color1.r = texSample.g;\n" - " color1.a = texSample.b;\n"); + code.Write(" color0.r = texSample.a;\n" + " color0.a = texSample.r;\n" + " color1.r = texSample.g;\n" + " color1.a = texSample.b;\n"); - code.WriteFmt(" ocol0 = first ? color0 : color1;\n"); + code.Write(" ocol0 = first ? color0 : color1;\n"); WriteEncoderEnd(code); } @@ -536,8 +536,8 @@ static void WriteC4Encoder(ShaderCode& code, std::string_view comp, APIType api_ const EFBCopyParams& params) { WriteSwizzler(code, params, EFBCopyFormat::R4, api_type); - code.WriteFmt(" float4 color0;\n" - " float4 color1;\n"); + code.Write(" float4 color0;\n" + " float4 color1;\n"); WriteSampleColor(code, comp, "color0.b", 0, api_type, params); WriteSampleColor(code, comp, "color1.b", 1, api_type, params); @@ -551,7 +551,7 @@ static void WriteC4Encoder(ShaderCode& code, std::string_view comp, APIType api_ WriteToBitDepth(code, 4, "color0", "color0"); WriteToBitDepth(code, 4, "color1", "color1"); - code.WriteFmt(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n"); + code.Write(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n"); WriteEncoderEnd(code); } @@ -572,30 +572,30 @@ static void WriteCC4Encoder(ShaderCode& code, std::string_view comp, APIType api const EFBCopyParams& params) { WriteSwizzler(code, params, EFBCopyFormat::RA4, api_type); - code.WriteFmt(" float2 texSample;\n" - " float4 color0;\n" - " float4 color1;\n"); + code.Write(" float2 texSample;\n" + " float4 color0;\n" + " float4 color1;\n"); WriteSampleColor(code, comp, "texSample", 0, api_type, params); - code.WriteFmt(" color0.b = texSample.x;\n" - " color1.b = texSample.y;\n"); + code.Write(" color0.b = texSample.x;\n" + " color1.b = texSample.y;\n"); WriteSampleColor(code, comp, "texSample", 1, api_type, params); - code.WriteFmt(" color0.g = texSample.x;\n" - " color1.g = texSample.y;\n"); + code.Write(" color0.g = texSample.x;\n" + " color1.g = texSample.y;\n"); WriteSampleColor(code, comp, "texSample", 2, api_type, params); - code.WriteFmt(" color0.r = texSample.x;\n" - " color1.r = texSample.y;\n"); + code.Write(" color0.r = texSample.x;\n" + " color1.r = texSample.y;\n"); WriteSampleColor(code, comp, "texSample", 3, api_type, params); - code.WriteFmt(" color0.a = texSample.x;\n" - " color1.a = texSample.y;\n"); + code.Write(" color0.a = texSample.x;\n" + " color1.a = texSample.y;\n"); WriteToBitDepth(code, 4, "color0", "color0"); WriteToBitDepth(code, 4, "color1", "color1"); - code.WriteFmt(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n"); + code.Write(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n"); WriteEncoderEnd(code); } @@ -615,19 +615,19 @@ static void WriteZ8Encoder(ShaderCode& code, std::string_view multiplier, APITyp { WriteSwizzler(code, params, EFBCopyFormat::G8, api_type); - code.WriteFmt(" float depth;\n"); + code.Write(" float depth;\n"); WriteSampleColor(code, "r", "depth", 0, api_type, params); - code.WriteFmt("ocol0.b = frac(depth * {});\n", multiplier); + code.Write("ocol0.b = frac(depth * {});\n", multiplier); WriteSampleColor(code, "r", "depth", 1, api_type, params); - code.WriteFmt("ocol0.g = frac(depth * {});\n", multiplier); + code.Write("ocol0.g = frac(depth * {});\n", multiplier); WriteSampleColor(code, "r", "depth", 2, api_type, params); - code.WriteFmt("ocol0.r = frac(depth * {});\n", multiplier); + code.Write("ocol0.r = frac(depth * {});\n", multiplier); WriteSampleColor(code, "r", "depth", 3, api_type, params); - code.WriteFmt("ocol0.a = frac(depth * {});\n", multiplier); + code.Write("ocol0.a = frac(depth * {});\n", multiplier); WriteEncoderEnd(code); } @@ -636,30 +636,30 @@ static void WriteZ16Encoder(ShaderCode& code, APIType api_type, const EFBCopyPar { WriteSwizzler(code, params, EFBCopyFormat::RA8, api_type); - code.WriteFmt(" float depth;\n" - " float3 expanded;\n"); + code.Write(" float depth;\n" + " float3 expanded;\n"); // Byte order is reversed WriteSampleColor(code, "r", "depth", 0, api_type, params); - code.WriteFmt(" depth *= 16777216.0;\n" - " expanded.r = floor(depth / (256.0 * 256.0));\n" - " depth -= expanded.r * 256.0 * 256.0;\n" - " expanded.g = floor(depth / 256.0);\n"); + code.Write(" depth *= 16777216.0;\n" + " expanded.r = floor(depth / (256.0 * 256.0));\n" + " depth -= expanded.r * 256.0 * 256.0;\n" + " expanded.g = floor(depth / 256.0);\n"); - code.WriteFmt(" ocol0.b = expanded.g / 255.0;\n" - " ocol0.g = expanded.r / 255.0;\n"); + code.Write(" ocol0.b = expanded.g / 255.0;\n" + " ocol0.g = expanded.r / 255.0;\n"); WriteSampleColor(code, "r", "depth", 1, api_type, params); - code.WriteFmt(" depth *= 16777216.0;\n" - " expanded.r = floor(depth / (256.0 * 256.0));\n" - " depth -= expanded.r * 256.0 * 256.0;\n" - " expanded.g = floor(depth / 256.0);\n"); + code.Write(" depth *= 16777216.0;\n" + " expanded.r = floor(depth / (256.0 * 256.0));\n" + " depth -= expanded.r * 256.0 * 256.0;\n" + " expanded.g = floor(depth / 256.0);\n"); - code.WriteFmt(" ocol0.r = expanded.g / 255.0;\n" - " ocol0.a = expanded.r / 255.0;\n"); + code.Write(" ocol0.r = expanded.g / 255.0;\n" + " ocol0.a = expanded.r / 255.0;\n"); WriteEncoderEnd(code); } @@ -668,34 +668,34 @@ static void WriteZ16LEncoder(ShaderCode& code, APIType api_type, const EFBCopyPa { WriteSwizzler(code, params, EFBCopyFormat::GB8, api_type); - code.WriteFmt(" float depth;\n" - " float3 expanded;\n"); + code.Write(" float depth;\n" + " float3 expanded;\n"); // Byte order is reversed WriteSampleColor(code, "r", "depth", 0, api_type, params); - code.WriteFmt(" depth *= 16777216.0;\n" - " expanded.r = floor(depth / (256.0 * 256.0));\n" - " depth -= expanded.r * 256.0 * 256.0;\n" - " expanded.g = floor(depth / 256.0);\n" - " depth -= expanded.g * 256.0;\n" - " expanded.b = depth;\n"); + code.Write(" depth *= 16777216.0;\n" + " expanded.r = floor(depth / (256.0 * 256.0));\n" + " depth -= expanded.r * 256.0 * 256.0;\n" + " expanded.g = floor(depth / 256.0);\n" + " depth -= expanded.g * 256.0;\n" + " expanded.b = depth;\n"); - code.WriteFmt(" ocol0.b = expanded.b / 255.0;\n" - " ocol0.g = expanded.g / 255.0;\n"); + code.Write(" ocol0.b = expanded.b / 255.0;\n" + " ocol0.g = expanded.g / 255.0;\n"); WriteSampleColor(code, "r", "depth", 1, api_type, params); - code.WriteFmt(" depth *= 16777216.0;\n" - " expanded.r = floor(depth / (256.0 * 256.0));\n" - " depth -= expanded.r * 256.0 * 256.0;\n" - " expanded.g = floor(depth / 256.0);\n" - " depth -= expanded.g * 256.0;\n" - " expanded.b = depth;\n"); + code.Write(" depth *= 16777216.0;\n" + " expanded.r = floor(depth / (256.0 * 256.0));\n" + " depth -= expanded.r * 256.0 * 256.0;\n" + " expanded.g = floor(depth / 256.0);\n" + " depth -= expanded.g * 256.0;\n" + " expanded.b = depth;\n"); - code.WriteFmt(" ocol0.r = expanded.b / 255.0;\n" - " ocol0.a = expanded.g / 255.0;\n"); + code.Write(" ocol0.r = expanded.b / 255.0;\n" + " ocol0.a = expanded.g / 255.0;\n"); WriteEncoderEnd(code); } @@ -704,38 +704,38 @@ static void WriteZ24Encoder(ShaderCode& code, APIType api_type, const EFBCopyPar { WriteSwizzler(code, params, EFBCopyFormat::RGBA8, api_type); - code.WriteFmt(" float depth0;\n" - " float depth1;\n" - " float3 expanded0;\n" - " float3 expanded1;\n"); + code.Write(" float depth0;\n" + " float depth1;\n" + " float3 expanded0;\n" + " float3 expanded1;\n"); WriteSampleColor(code, "r", "depth0", 0, api_type, params); WriteSampleColor(code, "r", "depth1", 1, api_type, params); for (int i = 0; i < 2; i++) { - code.WriteFmt(" depth{} *= 16777216.0;\n", i); + code.Write(" depth{} *= 16777216.0;\n", i); - code.WriteFmt(" expanded{}.r = floor(depth{} / (256.0 * 256.0));\n", i, i); - code.WriteFmt(" depth{} -= expanded{}.r * 256.0 * 256.0;\n", i, i); - code.WriteFmt(" expanded{}.g = floor(depth{} / 256.0);\n", i, i); - code.WriteFmt(" depth{} -= expanded{}.g * 256.0;\n", i, i); - code.WriteFmt(" expanded{}.b = depth{};\n", i, i); + code.Write(" expanded{}.r = floor(depth{} / (256.0 * 256.0));\n", i, i); + code.Write(" depth{} -= expanded{}.r * 256.0 * 256.0;\n", i, i); + code.Write(" expanded{}.g = floor(depth{} / 256.0);\n", i, i); + code.Write(" depth{} -= expanded{}.g * 256.0;\n", i, i); + code.Write(" expanded{}.b = depth{};\n", i, i); } - code.WriteFmt(" if (!first) {{\n"); + code.Write(" if (!first) {{\n"); // Upper 16 - code.WriteFmt(" ocol0.b = expanded0.g / 255.0;\n" - " ocol0.g = expanded0.b / 255.0;\n" - " ocol0.r = expanded1.g / 255.0;\n" - " ocol0.a = expanded1.b / 255.0;\n" - " }} else {{\n"); + code.Write(" ocol0.b = expanded0.g / 255.0;\n" + " ocol0.g = expanded0.b / 255.0;\n" + " ocol0.r = expanded1.g / 255.0;\n" + " ocol0.a = expanded1.b / 255.0;\n" + " }} else {{\n"); // Lower 8 - code.WriteFmt(" ocol0.b = 1.0;\n" - " ocol0.g = expanded0.r / 255.0;\n" - " ocol0.r = 1.0;\n" - " ocol0.a = expanded1.r / 255.0;\n" - " }}\n"); + code.Write(" ocol0.b = 1.0;\n" + " ocol0.g = expanded0.r / 255.0;\n" + " ocol0.r = 1.0;\n" + " ocol0.a = expanded1.r / 255.0;\n" + " }}\n"); WriteEncoderEnd(code); } @@ -744,23 +744,23 @@ static void WriteXFBEncoder(ShaderCode& code, APIType api_type, const EFBCopyPar { WriteSwizzler(code, params, EFBCopyFormat::XFB, api_type); - code.WriteFmt("float3 color0, color1;\n"); + code.Write("float3 color0, color1;\n"); WriteSampleColor(code, "rgb", "color0", 0, api_type, params); WriteSampleColor(code, "rgb", "color1", 1, api_type, params); // Gamma is only applied to XFB copies. - code.WriteFmt(" color0 = pow(color0, float3(gamma_rcp, gamma_rcp, gamma_rcp));\n" - " color1 = pow(color1, float3(gamma_rcp, gamma_rcp, gamma_rcp));\n"); + code.Write(" color0 = pow(color0, float3(gamma_rcp, gamma_rcp, gamma_rcp));\n" + " color1 = pow(color1, float3(gamma_rcp, gamma_rcp, gamma_rcp));\n"); // Convert to YUV. - code.WriteFmt(" const float3 y_const = float3(0.257, 0.504, 0.098);\n" - " const float3 u_const = float3(-0.148, -0.291, 0.439);\n" - " const float3 v_const = float3(0.439, -0.368, -0.071);\n" - " float3 average = (color0 + color1) * 0.5;\n" - " ocol0.b = dot(color0, y_const) + 0.0625;\n" - " ocol0.g = dot(average, u_const) + 0.5;\n" - " ocol0.r = dot(color1, y_const) + 0.0625;\n" - " ocol0.a = dot(average, v_const) + 0.5;\n"); + code.Write(" const float3 y_const = float3(0.257, 0.504, 0.098);\n" + " const float3 u_const = float3(-0.148, -0.291, 0.439);\n" + " const float3 v_const = float3(0.439, -0.368, -0.071);\n" + " float3 average = (color0 + color1) * 0.5;\n" + " ocol0.b = dot(color0, y_const) + 0.0625;\n" + " ocol0.g = dot(average, u_const) + 0.5;\n" + " ocol0.r = dot(color1, y_const) + 0.0625;\n" + " ocol0.a = dot(average, v_const) + 0.5;\n"); WriteEncoderEnd(code); } diff --git a/Source/Core/VideoCommon/TextureConverterShaderGen.cpp b/Source/Core/VideoCommon/TextureConverterShaderGen.cpp index 47919fb1ab..825c8c7ffe 100644 --- a/Source/Core/VideoCommon/TextureConverterShaderGen.cpp +++ b/Source/Core/VideoCommon/TextureConverterShaderGen.cpp @@ -32,23 +32,23 @@ static void WriteHeader(APIType api_type, ShaderCode& out) { if (api_type == APIType::D3D) { - out.WriteFmt("cbuffer PSBlock : register(b0) {{\n" - " float2 src_offset, src_size;\n" - " float3 filter_coefficients;\n" - " float gamma_rcp;\n" - " float2 clamp_tb;\n" - " float pixel_height;\n" - "}};\n\n"); + out.Write("cbuffer PSBlock : register(b0) {{\n" + " float2 src_offset, src_size;\n" + " float3 filter_coefficients;\n" + " float gamma_rcp;\n" + " float2 clamp_tb;\n" + " float pixel_height;\n" + "}};\n\n"); } else if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) { - out.WriteFmt("UBO_BINDING(std140, 1) uniform PSBlock {{\n" - " float2 src_offset, src_size;\n" - " float3 filter_coefficients;\n" - " float gamma_rcp;\n" - " float2 clamp_tb;\n" - " float pixel_height;\n" - "}};\n"); + out.Write("UBO_BINDING(std140, 1) uniform PSBlock {{\n" + " float2 src_offset, src_size;\n" + " float3 filter_coefficients;\n" + " float gamma_rcp;\n" + " float2 clamp_tb;\n" + " float pixel_height;\n" + "}};\n"); } } @@ -59,35 +59,35 @@ ShaderCode GenerateVertexShader(APIType api_type) if (api_type == APIType::D3D) { - out.WriteFmt("void main(in uint id : SV_VertexID, out float3 v_tex0 : TEXCOORD0,\n" - " out float4 opos : SV_Position) {{\n"); + out.Write("void main(in uint id : SV_VertexID, out float3 v_tex0 : TEXCOORD0,\n" + " out float4 opos : SV_Position) {{\n"); } else if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) { if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { - out.WriteFmt("VARYING_LOCATION(0) out VertexData {{\n" - " float3 v_tex0;\n" - "}};\n"); + out.Write("VARYING_LOCATION(0) out VertexData {{\n" + " float3 v_tex0;\n" + "}};\n"); } else { - out.WriteFmt("VARYING_LOCATION(0) out float3 v_tex0;\n"); + out.Write("VARYING_LOCATION(0) out float3 v_tex0;\n"); } - out.WriteFmt("#define id gl_VertexID\n" - "#define opos gl_Position\n" - "void main() {{\n"); + out.Write("#define id gl_VertexID\n" + "#define opos gl_Position\n" + "void main() {{\n"); } - out.WriteFmt(" v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n"); - out.WriteFmt( + out.Write(" v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n"); + out.Write( " opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n"); - out.WriteFmt(" v_tex0 = float3(src_offset + (src_size * v_tex0.xy), 0.0f);\n"); + out.Write(" v_tex0 = float3(src_offset + (src_size * v_tex0.xy), 0.0f);\n"); // NDC space is flipped in Vulkan if (api_type == APIType::Vulkan) - out.WriteFmt(" opos.y = -opos.y;\n"); + out.Write(" opos.y = -opos.y;\n"); - out.WriteFmt("}}\n"); + out.Write("}}\n"); return out; } @@ -101,52 +101,52 @@ ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data) if (api_type == APIType::D3D) { - out.WriteFmt("Texture2DArray tex0 : register(t0);\n" - "SamplerState samp0 : register(s0);\n" - "float4 SampleEFB(float3 uv, float y_offset) {{\n" - " return tex0.Sample(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), " - "clamp_tb.x, clamp_tb.y), {}));\n" - "}}\n\n", - mono_depth ? "0.0" : "uv.z"); - out.WriteFmt("void main(in float3 v_tex0 : TEXCOORD0, out float4 ocol0 : SV_Target)\n{{\n"); + out.Write("Texture2DArray tex0 : register(t0);\n" + "SamplerState samp0 : register(s0);\n" + "float4 SampleEFB(float3 uv, float y_offset) {{\n" + " return tex0.Sample(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), " + "clamp_tb.x, clamp_tb.y), {}));\n" + "}}\n\n", + mono_depth ? "0.0" : "uv.z"); + out.Write("void main(in float3 v_tex0 : TEXCOORD0, out float4 ocol0 : SV_Target)\n{{\n"); } else if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) { - out.WriteFmt("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"); - out.WriteFmt("float4 SampleEFB(float3 uv, float y_offset) {{\n" - " return texture(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), " - "clamp_tb.x, clamp_tb.y), {}));\n" - "}}\n", - mono_depth ? "0.0" : "uv.z"); + out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"); + out.Write("float4 SampleEFB(float3 uv, float y_offset) {{\n" + " return texture(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), " + "clamp_tb.x, clamp_tb.y), {}));\n" + "}}\n", + mono_depth ? "0.0" : "uv.z"); if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { - out.WriteFmt("VARYING_LOCATION(0) in VertexData {{\n" - " float3 v_tex0;\n" - "}};\n"); + out.Write("VARYING_LOCATION(0) in VertexData {{\n" + " float3 v_tex0;\n" + "}};\n"); } else { - out.WriteFmt("VARYING_LOCATION(0) in vec3 v_tex0;\n"); + out.Write("VARYING_LOCATION(0) in vec3 v_tex0;\n"); } - out.WriteFmt("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;" - "void main()\n{{\n"); + out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;" + "void main()\n{{\n"); } // The copy filter applies to both color and depth copies. This has been verified on hardware. // The filter is only applied to the RGB channels, the alpha channel is left intact. if (uid_data->copy_filter) { - out.WriteFmt(" float4 prev_row = SampleEFB(v_tex0, -1.0f);\n" - " float4 current_row = SampleEFB(v_tex0, 0.0f);\n" - " float4 next_row = SampleEFB(v_tex0, 1.0f);\n" - " float4 texcol = float4(min(prev_row.rgb * filter_coefficients[0] +\n" - " current_row.rgb * filter_coefficients[1] +\n" - " next_row.rgb * filter_coefficients[2], \n" - " float3(1, 1, 1)), current_row.a);\n"); + out.Write(" float4 prev_row = SampleEFB(v_tex0, -1.0f);\n" + " float4 current_row = SampleEFB(v_tex0, 0.0f);\n" + " float4 next_row = SampleEFB(v_tex0, 1.0f);\n" + " float4 texcol = float4(min(prev_row.rgb * filter_coefficients[0] +\n" + " current_row.rgb * filter_coefficients[1] +\n" + " next_row.rgb * filter_coefficients[2], \n" + " float3(1, 1, 1)), current_row.a);\n"); } else { - out.WriteFmt( + out.Write( " float4 current_row = SampleEFB(v_tex0, 0.0f);\n" " float4 texcol = float4(min(current_row.rgb * filter_coefficients[1], float3(1, 1, 1)),\n" " current_row.a);\n"); @@ -155,62 +155,62 @@ ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data) if (uid_data->is_depth_copy) { if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange) - out.WriteFmt("texcol.x = 1.0 - texcol.x;\n"); + out.Write("texcol.x = 1.0 - texcol.x;\n"); - out.WriteFmt(" int depth = int(texcol.x * 16777216.0);\n" + out.Write(" int depth = int(texcol.x * 16777216.0);\n" - // Convert to Z24 format - " int4 workspace;\n" - " workspace.r = (depth >> 16) & 255;\n" - " workspace.g = (depth >> 8) & 255;\n" - " workspace.b = depth & 255;\n" + // Convert to Z24 format + " int4 workspace;\n" + " workspace.r = (depth >> 16) & 255;\n" + " workspace.g = (depth >> 8) & 255;\n" + " workspace.b = depth & 255;\n" - // Convert to Z4 format - " workspace.a = (depth >> 16) & 0xF0;\n" + // Convert to Z4 format + " workspace.a = (depth >> 16) & 0xF0;\n" - // Normalize components to [0.0..1.0] - " texcol = float4(workspace) / 255.0;\n"); + // Normalize components to [0.0..1.0] + " texcol = float4(workspace) / 255.0;\n"); switch (uid_data->dst_format) { case EFBCopyFormat::R4: // Z4 - out.WriteFmt(" ocol0 = texcol.aaaa;\n"); + out.Write(" ocol0 = texcol.aaaa;\n"); break; case EFBCopyFormat::R8_0x1: // Z8 case EFBCopyFormat::R8: // Z8H - out.WriteFmt(" ocol0 = texcol.rrrr;\n"); + out.Write(" ocol0 = texcol.rrrr;\n"); break; case EFBCopyFormat::RA8: // Z16 - out.WriteFmt(" ocol0 = texcol.gggr;\n"); + out.Write(" ocol0 = texcol.gggr;\n"); break; case EFBCopyFormat::RG8: // Z16 (reverse order) - out.WriteFmt(" ocol0 = texcol.rrrg;\n"); + out.Write(" ocol0 = texcol.rrrg;\n"); break; case EFBCopyFormat::RGBA8: // Z24X8 - out.WriteFmt(" ocol0 = float4(texcol.rgb, 1.0);\n"); + out.Write(" ocol0 = float4(texcol.rgb, 1.0);\n"); break; case EFBCopyFormat::G8: // Z8M - out.WriteFmt(" ocol0 = texcol.gggg;\n"); + out.Write(" ocol0 = texcol.gggg;\n"); break; case EFBCopyFormat::B8: // Z8L - out.WriteFmt(" ocol0 = texcol.bbbb;\n"); + out.Write(" ocol0 = texcol.bbbb;\n"); break; case EFBCopyFormat::GB8: // Z16L - copy lower 16 depth bits // expected to be used as an IA8 texture (upper 8 bits stored as intensity, lower 8 bits // stored as alpha) // Used e.g. in Zelda: Skyward Sword - out.WriteFmt(" ocol0 = texcol.gggb;\n"); + out.Write(" ocol0 = texcol.gggb;\n"); break; default: ERROR_LOG(VIDEO, "Unknown copy zbuf format: 0x%X", static_cast(uid_data->dst_format)); - out.WriteFmt(" ocol0 = float4(texcol.bgr, 0.0);\n"); + out.Write(" ocol0 = float4(texcol.bgr, 0.0);\n"); break; } } @@ -229,99 +229,99 @@ ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data) case EFBCopyFormat::RA4: // IA4 case EFBCopyFormat::RA8: // IA8 if (has_four_bits) - out.WriteFmt(" texcol = float4(int4(texcol * 255.0) & 0xF0) * (1.0 / 240.0);\n"); + out.Write(" texcol = float4(int4(texcol * 255.0) & 0xF0) * (1.0 / 240.0);\n"); // TODO - verify these coefficients - out.WriteFmt(" const float3 coefficients = float3(0.257, 0.504, 0.098);\n" - " float intensity = dot(texcol.rgb, coefficients) + 16.0 / 255.0;\n" - " ocol0 = float4(intensity, intensity, intensity, {});\n", - has_alpha ? "texcol.a" : "intensity"); + out.Write(" const float3 coefficients = float3(0.257, 0.504, 0.098);\n" + " float intensity = dot(texcol.rgb, coefficients) + 16.0 / 255.0;\n" + " ocol0 = float4(intensity, intensity, intensity, {});\n", + has_alpha ? "texcol.a" : "intensity"); break; default: ERROR_LOG(VIDEO, "Unknown copy intensity format: 0x%X", static_cast(uid_data->dst_format)); - out.WriteFmt(" ocol0 = texcol;\n"); + out.Write(" ocol0 = texcol;\n"); break; } } else { if (!uid_data->efb_has_alpha) - out.WriteFmt(" texcol.a = 1.0;\n"); + out.Write(" texcol.a = 1.0;\n"); switch (uid_data->dst_format) { case EFBCopyFormat::R4: // R4 - out.WriteFmt(" float red = float(int(texcol.r * 255.0) & 0xF0) * (1.0 / 240.0);\n" - " ocol0 = float4(red, red, red, red);\n"); + out.Write(" float red = float(int(texcol.r * 255.0) & 0xF0) * (1.0 / 240.0);\n" + " ocol0 = float4(red, red, red, red);\n"); break; case EFBCopyFormat::R8_0x1: // R8 case EFBCopyFormat::R8: // R8 - out.WriteFmt(" ocol0 = texcol.rrrr;\n"); + out.Write(" ocol0 = texcol.rrrr;\n"); break; case EFBCopyFormat::RA4: // RA4 - out.WriteFmt(" float2 red_alpha = float2(int2(texcol.ra * 255.0) & 0xF0) * (1.0 / 240.0);\n" - " ocol0 = red_alpha.rrrg;\n"); + out.Write(" float2 red_alpha = float2(int2(texcol.ra * 255.0) & 0xF0) * (1.0 / 240.0);\n" + " ocol0 = red_alpha.rrrg;\n"); break; case EFBCopyFormat::RA8: // RA8 - out.WriteFmt(" ocol0 = texcol.rrra;\n"); + out.Write(" ocol0 = texcol.rrra;\n"); break; case EFBCopyFormat::A8: // A8 - out.WriteFmt(" ocol0 = texcol.aaaa;\n"); + out.Write(" ocol0 = texcol.aaaa;\n"); break; case EFBCopyFormat::G8: // G8 - out.WriteFmt(" ocol0 = texcol.gggg;\n"); + out.Write(" ocol0 = texcol.gggg;\n"); break; case EFBCopyFormat::B8: // B8 - out.WriteFmt(" ocol0 = texcol.bbbb;\n"); + out.Write(" ocol0 = texcol.bbbb;\n"); break; case EFBCopyFormat::RG8: // RG8 - out.WriteFmt(" ocol0 = texcol.rrrg;\n"); + out.Write(" ocol0 = texcol.rrrg;\n"); break; case EFBCopyFormat::GB8: // GB8 - out.WriteFmt(" ocol0 = texcol.gggb;\n"); + out.Write(" ocol0 = texcol.gggb;\n"); break; case EFBCopyFormat::RGB565: // RGB565 - out.WriteFmt(" float2 red_blue = float2(int2(texcol.rb * 255.0) & 0xF8) * (1.0 / 248.0);\n" - " float green = float(int(texcol.g * 255.0) & 0xFC) * (1.0 / 252.0);\n" - " ocol0 = float4(red_blue.r, green, red_blue.g, 1.0);\n"); + out.Write(" float2 red_blue = float2(int2(texcol.rb * 255.0) & 0xF8) * (1.0 / 248.0);\n" + " float green = float(int(texcol.g * 255.0) & 0xFC) * (1.0 / 252.0);\n" + " ocol0 = float4(red_blue.r, green, red_blue.g, 1.0);\n"); break; case EFBCopyFormat::RGB5A3: // RGB5A3 // TODO: The MSB controls whether we have RGB5 or RGB4A3, this selection // will need to be implemented once we move away from floats. - out.WriteFmt(" float3 color = float3(int3(texcol.rgb * 255.0) & 0xF8) * (1.0 / 248.0);\n" - " float alpha = float(int(texcol.a * 255.0) & 0xE0) * (1.0 / 224.0);\n" - " ocol0 = float4(color, alpha);\n"); + out.Write(" float3 color = float3(int3(texcol.rgb * 255.0) & 0xF8) * (1.0 / 248.0);\n" + " float alpha = float(int(texcol.a * 255.0) & 0xE0) * (1.0 / 224.0);\n" + " ocol0 = float4(color, alpha);\n"); break; case EFBCopyFormat::RGBA8: // RGBA8 - out.WriteFmt(" ocol0 = texcol;\n"); + out.Write(" ocol0 = texcol;\n"); break; case EFBCopyFormat::XFB: - out.WriteFmt( + out.Write( " ocol0 = float4(pow(texcol.rgb, float3(gamma_rcp, gamma_rcp, gamma_rcp)), 1.0f);\n"); break; default: ERROR_LOG(VIDEO, "Unknown copy color format: 0x%X", static_cast(uid_data->dst_format)); - out.WriteFmt(" ocol0 = texcol;\n"); + out.Write(" ocol0 = texcol;\n"); break; } } - out.WriteFmt("}}\n"); + out.Write("}}\n"); return out; } diff --git a/Source/Core/VideoCommon/UberShaderCommon.cpp b/Source/Core/VideoCommon/UberShaderCommon.cpp index 6560e7d9c8..afd86a6943 100644 --- a/Source/Core/VideoCommon/UberShaderCommon.cpp +++ b/Source/Core/VideoCommon/UberShaderCommon.cpp @@ -18,14 +18,13 @@ void WriteUberShaderCommonHeader(ShaderCode& out, APIType api_type, // ============================================== if (!host_config.backend_bitfield) { - out.WriteFmt( - "uint bitfieldExtract(uint val, int off, int size) {{\n" - " // This built-in function is only support in OpenGL 4.0+ and ES 3.1+\n" - " // Microsoft's HLSL compiler automatically optimises this to a bitfield extract " - "instruction.\n" - " uint mask = uint((1 << size) - 1);\n" - " return uint(val >> off) & mask;\n" - "}}\n\n"); + out.Write("uint bitfieldExtract(uint val, int off, int size) {{\n" + " // This built-in function is only support in OpenGL 4.0+ and ES 3.1+\n" + " // Microsoft's HLSL compiler automatically optimises this to a bitfield extract " + "instruction.\n" + " uint mask = uint((1 << size) - 1);\n" + " return uint(val >> off) & mask;\n" + "}}\n\n"); } } @@ -34,61 +33,60 @@ void WriteLightingFunction(ShaderCode& out) // ============================================== // Lighting channel calculation helper // ============================================== - out.WriteFmt("int4 CalculateLighting(uint index, uint attnfunc, uint diffusefunc, float3 pos, " - "float3 normal) {{\n" - " float3 ldir, h, cosAttn, distAttn;\n" - " float dist, dist2, attn;\n" - "\n" - " switch (attnfunc) {{\n"); - out.WriteFmt(" case {}u: // LIGNTATTN_NONE\n", LIGHTATTN_NONE); - out.WriteFmt(" case {}u: // LIGHTATTN_DIR\n", LIGHTATTN_DIR); - out.WriteFmt(" ldir = normalize(" I_LIGHTS "[index].pos.xyz - pos.xyz);\n" - " attn = 1.0;\n" - " if (length(ldir) == 0.0)\n" - " ldir = normal;\n" - " break;\n\n"); - out.WriteFmt(" case {}u: // LIGHTATTN_SPEC\n", LIGHTATTN_SPEC); - out.WriteFmt(" ldir = normalize(" I_LIGHTS "[index].pos.xyz - pos.xyz);\n" - " attn = (dot(normal, ldir) >= 0.0) ? max(0.0, dot(normal, " I_LIGHTS - "[index].dir.xyz)) : 0.0;\n" - " cosAttn = " I_LIGHTS "[index].cosatt.xyz;\n"); - out.WriteFmt(" if (diffusefunc == {}u) // LIGHTDIF_NONE\n", LIGHTDIF_NONE); - out.WriteFmt(" distAttn = " I_LIGHTS "[index].distatt.xyz;\n" - " else\n" - " distAttn = normalize(" I_LIGHTS "[index].distatt.xyz);\n" - " attn = max(0.0, dot(cosAttn, float3(1.0, attn, attn*attn))) / dot(distAttn, " - "float3(1.0, attn, attn*attn));\n" - " break;\n\n"); - out.WriteFmt(" case {}u: // LIGHTATTN_SPOT\n", LIGHTATTN_SPOT); - out.WriteFmt(" ldir = " I_LIGHTS "[index].pos.xyz - pos.xyz;\n" - " dist2 = dot(ldir, ldir);\n" - " dist = sqrt(dist2);\n" - " ldir = ldir / dist;\n" - " attn = max(0.0, dot(ldir, " I_LIGHTS "[index].dir.xyz));\n" - " attn = max(0.0, " I_LIGHTS "[index].cosatt.x + " I_LIGHTS - "[index].cosatt.y * attn + " I_LIGHTS - "[index].cosatt.z * attn * attn) / dot(" I_LIGHTS - "[index].distatt.xyz, float3(1.0, dist, dist2));\n" - " break;\n\n"); - out.WriteFmt(" default:\n" - " attn = 1.0;\n" - " ldir = normal;\n" - " break;\n" - " }}\n" - "\n" - " switch (diffusefunc) {{\n"); - out.WriteFmt(" case {}u: // LIGHTDIF_NONE\n", LIGHTDIF_NONE); - out.WriteFmt(" return int4(round(attn * float4(" I_LIGHTS "[index].color)));\n\n"); - out.WriteFmt(" case {}u: // LIGHTDIF_SIGN\n", LIGHTDIF_SIGN); - out.WriteFmt(" return int4(round(attn * dot(ldir, normal) * float4(" I_LIGHTS - "[index].color)));\n\n"); - out.WriteFmt(" case {}u: // LIGHTDIF_CLAMP\n", LIGHTDIF_CLAMP); - out.WriteFmt(" return int4(round(attn * max(0.0, dot(ldir, normal)) * float4(" I_LIGHTS - "[index].color)));\n\n"); - out.WriteFmt(" default:\n" - " return int4(0, 0, 0, 0);\n" - " }}\n" - "}}\n\n"); + out.Write("int4 CalculateLighting(uint index, uint attnfunc, uint diffusefunc, float3 pos, " + "float3 normal) {{\n" + " float3 ldir, h, cosAttn, distAttn;\n" + " float dist, dist2, attn;\n" + "\n" + " switch (attnfunc) {{\n"); + out.Write(" case {}u: // LIGNTATTN_NONE\n", LIGHTATTN_NONE); + out.Write(" case {}u: // LIGHTATTN_DIR\n", LIGHTATTN_DIR); + out.Write(" ldir = normalize(" I_LIGHTS "[index].pos.xyz - pos.xyz);\n" + " attn = 1.0;\n" + " if (length(ldir) == 0.0)\n" + " ldir = normal;\n" + " break;\n\n"); + out.Write(" case {}u: // LIGHTATTN_SPEC\n", LIGHTATTN_SPEC); + out.Write(" ldir = normalize(" I_LIGHTS "[index].pos.xyz - pos.xyz);\n" + " attn = (dot(normal, ldir) >= 0.0) ? max(0.0, dot(normal, " I_LIGHTS + "[index].dir.xyz)) : 0.0;\n" + " cosAttn = " I_LIGHTS "[index].cosatt.xyz;\n"); + out.Write(" if (diffusefunc == {}u) // LIGHTDIF_NONE\n", LIGHTDIF_NONE); + out.Write(" distAttn = " I_LIGHTS "[index].distatt.xyz;\n" + " else\n" + " distAttn = normalize(" I_LIGHTS "[index].distatt.xyz);\n" + " attn = max(0.0, dot(cosAttn, float3(1.0, attn, attn*attn))) / dot(distAttn, " + "float3(1.0, attn, attn*attn));\n" + " break;\n\n"); + out.Write(" case {}u: // LIGHTATTN_SPOT\n", LIGHTATTN_SPOT); + out.Write(" ldir = " I_LIGHTS "[index].pos.xyz - pos.xyz;\n" + " dist2 = dot(ldir, ldir);\n" + " dist = sqrt(dist2);\n" + " ldir = ldir / dist;\n" + " attn = max(0.0, dot(ldir, " I_LIGHTS "[index].dir.xyz));\n" + " attn = max(0.0, " I_LIGHTS "[index].cosatt.x + " I_LIGHTS + "[index].cosatt.y * attn + " I_LIGHTS "[index].cosatt.z * attn * attn) / dot(" I_LIGHTS + "[index].distatt.xyz, float3(1.0, dist, dist2));\n" + " break;\n\n"); + out.Write(" default:\n" + " attn = 1.0;\n" + " ldir = normal;\n" + " break;\n" + " }}\n" + "\n" + " switch (diffusefunc) {{\n"); + out.Write(" case {}u: // LIGHTDIF_NONE\n", LIGHTDIF_NONE); + out.Write(" return int4(round(attn * float4(" I_LIGHTS "[index].color)));\n\n"); + out.Write(" case {}u: // LIGHTDIF_SIGN\n", LIGHTDIF_SIGN); + out.Write(" return int4(round(attn * dot(ldir, normal) * float4(" I_LIGHTS + "[index].color)));\n\n"); + out.Write(" case {}u: // LIGHTDIF_CLAMP\n", LIGHTDIF_CLAMP); + out.Write(" return int4(round(attn * max(0.0, dot(ldir, normal)) * float4(" I_LIGHTS + "[index].color)));\n\n"); + out.Write(" default:\n" + " return int4(0, 0, 0, 0);\n" + " }}\n" + "}}\n\n"); } void WriteVertexLighting(ShaderCode& out, APIType api_type, std::string_view world_pos_var, @@ -96,105 +94,102 @@ void WriteVertexLighting(ShaderCode& out, APIType api_type, std::string_view wor std::string_view in_color_1_var, std::string_view out_color_0_var, std::string_view out_color_1_var) { - out.WriteFmt("// Lighting\n"); - out.WriteFmt("{}for (uint chan = 0u; chan < {}u; chan++) {{\n", - api_type == APIType::D3D ? "[loop] " : "", NUM_XF_COLOR_CHANNELS); - out.WriteFmt(" uint colorreg = xfmem_color(chan);\n" - " uint alphareg = xfmem_alpha(chan);\n" - " int4 mat = " I_MATERIALS "[chan + 2u]; \n" - " int4 lacc = int4(255, 255, 255, 255);\n" - "\n"); + out.Write("// Lighting\n"); + out.Write("{}for (uint chan = 0u; chan < {}u; chan++) {{\n", + api_type == APIType::D3D ? "[loop] " : "", NUM_XF_COLOR_CHANNELS); + out.Write(" uint colorreg = xfmem_color(chan);\n" + " uint alphareg = xfmem_alpha(chan);\n" + " int4 mat = " I_MATERIALS "[chan + 2u]; \n" + " int4 lacc = int4(255, 255, 255, 255);\n" + "\n"); - out.WriteFmt(" if ({} != 0u) {{\n", BitfieldExtract("colorreg", LitChannel().matsource)); - out.WriteFmt(" if ((components & ({}u << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0); - out.WriteFmt(" mat.xyz = int3(round(((chan == 0u) ? {}.xyz : {}.xyz) * 255.0));\n", - in_color_0_var, in_color_1_var); - out.WriteFmt(" else if ((components & {}u) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0); - out.WriteFmt(" mat.xyz = int3(round({}.xyz * 255.0));\n", in_color_0_var); - out.WriteFmt(" else\n" - " mat.xyz = int3(255, 255, 255);\n" - " }}\n" - "\n"); + out.Write(" if ({} != 0u) {{\n", BitfieldExtract("colorreg", LitChannel().matsource)); + out.Write(" if ((components & ({}u << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0); + out.Write(" mat.xyz = int3(round(((chan == 0u) ? {}.xyz : {}.xyz) * 255.0));\n", + in_color_0_var, in_color_1_var); + out.Write(" else if ((components & {}u) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0); + out.Write(" mat.xyz = int3(round({}.xyz * 255.0));\n", in_color_0_var); + out.Write(" else\n" + " mat.xyz = int3(255, 255, 255);\n" + " }}\n" + "\n"); - out.WriteFmt(" if ({} != 0u) {{\n", BitfieldExtract("alphareg", LitChannel().matsource)); - out.WriteFmt(" if ((components & ({}u << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0); - out.WriteFmt(" mat.w = int(round(((chan == 0u) ? {}.w : {}.w) * 255.0));\n", in_color_0_var, - in_color_1_var); - out.WriteFmt(" else if ((components & {}u) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0); - out.WriteFmt(" mat.w = int(round({}.w * 255.0));\n", in_color_0_var); - out.WriteFmt(" else\n" - " mat.w = 255;\n" - " }} else {{\n" - " mat.w = " I_MATERIALS " [chan + 2u].w;\n" - " }}\n" - "\n"); + out.Write(" if ({} != 0u) {{\n", BitfieldExtract("alphareg", LitChannel().matsource)); + out.Write(" if ((components & ({}u << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0); + out.Write(" mat.w = int(round(((chan == 0u) ? {}.w : {}.w) * 255.0));\n", in_color_0_var, + in_color_1_var); + out.Write(" else if ((components & {}u) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0); + out.Write(" mat.w = int(round({}.w * 255.0));\n", in_color_0_var); + out.Write(" else\n" + " mat.w = 255;\n" + " }} else {{\n" + " mat.w = " I_MATERIALS " [chan + 2u].w;\n" + " }}\n" + "\n"); - out.WriteFmt(" if ({} != 0u) {{\n", BitfieldExtract("colorreg", LitChannel().enablelighting)); - out.WriteFmt(" if ({} != 0u) {{\n", BitfieldExtract("colorreg", LitChannel().ambsource)); - out.WriteFmt(" if ((components & ({}u << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0); - out.WriteFmt(" lacc.xyz = int3(round(((chan == 0u) ? {}.xyz : {}.xyz) * 255.0));\n", - in_color_0_var, in_color_1_var); - out.WriteFmt(" else if ((components & {}u) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0); - out.WriteFmt(" lacc.xyz = int3(round({}.xyz * 255.0));\n", in_color_0_var); - out.WriteFmt(" else\n" - " lacc.xyz = int3(255, 255, 255);\n" - " }} else {{\n" - " lacc.xyz = " I_MATERIALS " [chan].xyz;\n" - " }}\n" - "\n"); - out.WriteFmt(" uint light_mask = {} | ({} << 4u);\n", - BitfieldExtract("colorreg", LitChannel().lightMask0_3), - BitfieldExtract("colorreg", LitChannel().lightMask4_7)); - out.WriteFmt(" uint attnfunc = {};\n", BitfieldExtract("colorreg", LitChannel().attnfunc)); - out.WriteFmt(" uint diffusefunc = {};\n", - BitfieldExtract("colorreg", LitChannel().diffusefunc)); - out.WriteFmt( + out.Write(" if ({} != 0u) {{\n", BitfieldExtract("colorreg", LitChannel().enablelighting)); + out.Write(" if ({} != 0u) {{\n", BitfieldExtract("colorreg", LitChannel().ambsource)); + out.Write(" if ((components & ({}u << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0); + out.Write(" lacc.xyz = int3(round(((chan == 0u) ? {}.xyz : {}.xyz) * 255.0));\n", + in_color_0_var, in_color_1_var); + out.Write(" else if ((components & {}u) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0); + out.Write(" lacc.xyz = int3(round({}.xyz * 255.0));\n", in_color_0_var); + out.Write(" else\n" + " lacc.xyz = int3(255, 255, 255);\n" + " }} else {{\n" + " lacc.xyz = " I_MATERIALS " [chan].xyz;\n" + " }}\n" + "\n"); + out.Write(" uint light_mask = {} | ({} << 4u);\n", + BitfieldExtract("colorreg", LitChannel().lightMask0_3), + BitfieldExtract("colorreg", LitChannel().lightMask4_7)); + out.Write(" uint attnfunc = {};\n", BitfieldExtract("colorreg", LitChannel().attnfunc)); + out.Write(" uint diffusefunc = {};\n", BitfieldExtract("colorreg", LitChannel().diffusefunc)); + out.Write( " for (uint light_index = 0u; light_index < 8u; light_index++) {{\n" " if ((light_mask & (1u << light_index)) != 0u)\n" " lacc.xyz += CalculateLighting(light_index, attnfunc, diffusefunc, {}, {}).xyz;\n", world_pos_var, normal_var); - out.WriteFmt(" }}\n" - " }}\n" - "\n"); + out.Write(" }}\n" + " }}\n" + "\n"); - out.WriteFmt(" if ({} != 0u) {{\n", BitfieldExtract("alphareg", LitChannel().enablelighting)); - out.WriteFmt(" if ({} != 0u) {{\n", BitfieldExtract("alphareg", LitChannel().ambsource)); - out.WriteFmt(" if ((components & ({}u << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0); - out.WriteFmt(" lacc.w = int(round(((chan == 0u) ? {}.w : {}.w) * 255.0));\n", - in_color_0_var, in_color_1_var); - out.WriteFmt(" else if ((components & {}u) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0); - out.WriteFmt(" lacc.w = int(round({}.w * 255.0));\n", in_color_0_var); - out.WriteFmt(" else\n" - " lacc.w = 255;\n" - " }} else {{\n" - " lacc.w = " I_MATERIALS " [chan].w;\n" - " }}\n" - "\n"); - out.WriteFmt(" uint light_mask = {} | ({} << 4u);\n", - BitfieldExtract("alphareg", LitChannel().lightMask0_3), - BitfieldExtract("alphareg", LitChannel().lightMask4_7)); - out.WriteFmt(" uint attnfunc = {};\n", BitfieldExtract("alphareg", LitChannel().attnfunc)); - out.WriteFmt(" uint diffusefunc = {};\n", - BitfieldExtract("alphareg", LitChannel().diffusefunc)); - out.WriteFmt( - " for (uint light_index = 0u; light_index < 8u; light_index++) {{\n\n" - " if ((light_mask & (1u << light_index)) != 0u)\n\n" - " lacc.w += CalculateLighting(light_index, attnfunc, diffusefunc, {}, {}).w;\n", - world_pos_var, normal_var); - out.WriteFmt(" }}\n" - " }}\n" - "\n"); + out.Write(" if ({} != 0u) {{\n", BitfieldExtract("alphareg", LitChannel().enablelighting)); + out.Write(" if ({} != 0u) {{\n", BitfieldExtract("alphareg", LitChannel().ambsource)); + out.Write(" if ((components & ({}u << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0); + out.Write(" lacc.w = int(round(((chan == 0u) ? {}.w : {}.w) * 255.0));\n", in_color_0_var, + in_color_1_var); + out.Write(" else if ((components & {}u) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0); + out.Write(" lacc.w = int(round({}.w * 255.0));\n", in_color_0_var); + out.Write(" else\n" + " lacc.w = 255;\n" + " }} else {{\n" + " lacc.w = " I_MATERIALS " [chan].w;\n" + " }}\n" + "\n"); + out.Write(" uint light_mask = {} | ({} << 4u);\n", + BitfieldExtract("alphareg", LitChannel().lightMask0_3), + BitfieldExtract("alphareg", LitChannel().lightMask4_7)); + out.Write(" uint attnfunc = {};\n", BitfieldExtract("alphareg", LitChannel().attnfunc)); + out.Write(" uint diffusefunc = {};\n", BitfieldExtract("alphareg", LitChannel().diffusefunc)); + out.Write(" for (uint light_index = 0u; light_index < 8u; light_index++) {{\n\n" + " if ((light_mask & (1u << light_index)) != 0u)\n\n" + " lacc.w += CalculateLighting(light_index, attnfunc, diffusefunc, {}, {}).w;\n", + world_pos_var, normal_var); + out.Write(" }}\n" + " }}\n" + "\n"); - out.WriteFmt(" lacc = clamp(lacc, 0, 255);\n" - "\n" - " // Hopefully GPUs that can support dynamic indexing will optimize this.\n" - " float4 lit_color = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n" - " switch (chan) {{\n" - " case 0u: {} = lit_color; break;\n", - out_color_0_var); - out.WriteFmt(" case 1u: {} = lit_color; break;\n", out_color_1_var); - out.WriteFmt(" }}\n" - "}}\n" - "\n"); + out.Write(" lacc = clamp(lacc, 0, 255);\n" + "\n" + " // Hopefully GPUs that can support dynamic indexing will optimize this.\n" + " float4 lit_color = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n" + " switch (chan) {{\n" + " case 0u: {} = lit_color; break;\n", + out_color_0_var); + out.Write(" case 1u: {} = lit_color; break;\n", out_color_1_var); + out.Write(" }}\n" + "}}\n" + "\n"); } } // namespace UberShader diff --git a/Source/Core/VideoCommon/UberShaderPixel.cpp b/Source/Core/VideoCommon/UberShaderPixel.cpp index 1cfc164223..721912a3f3 100644 --- a/Source/Core/VideoCommon/UberShaderPixel.cpp +++ b/Source/Core/VideoCommon/UberShaderPixel.cpp @@ -62,8 +62,8 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, const u32 numTexgen = uid_data->num_texgens; ShaderCode out; - out.WriteFmt("// Pixel UberShader for {} texgens{}{}\n", numTexgen, - early_depth ? ", early-depth" : "", per_pixel_depth ? ", per-pixel depth" : ""); + out.Write("// Pixel UberShader for {} texgens{}{}\n", numTexgen, + early_depth ? ", early-depth" : "", per_pixel_depth ? ", per-pixel depth" : ""); WritePixelShaderCommonHeader(out, ApiType, numTexgen, host_config, bounding_box); WriteUberShaderCommonHeader(out, ApiType, host_config); if (per_pixel_lighting) @@ -76,13 +76,13 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, { if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION)) { - out.WriteFmt("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n" - "FRAGMENT_OUTPUT_LOCATION(1) out vec4 ocol1;\n"); + out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n" + "FRAGMENT_OUTPUT_LOCATION(1) out vec4 ocol1;\n"); } else { - out.WriteFmt("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 ocol0;\n" - "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n"); + out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 ocol0;\n" + "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n"); } } else if (use_shader_blend) @@ -93,56 +93,56 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, // shader if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION)) { - out.WriteFmt("FRAGMENT_OUTPUT_LOCATION(0) FRAGMENT_INOUT vec4 real_ocol0;\n"); + out.Write("FRAGMENT_OUTPUT_LOCATION(0) FRAGMENT_INOUT vec4 real_ocol0;\n"); } else { - out.WriteFmt("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) FRAGMENT_INOUT vec4 real_ocol0;\n"); + out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) FRAGMENT_INOUT vec4 real_ocol0;\n"); } } else { - out.WriteFmt("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"); + out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"); } if (per_pixel_depth) - out.WriteFmt("#define depth gl_FragDepth\n"); + out.Write("#define depth gl_FragDepth\n"); if (host_config.backend_geometry_shaders) { - out.WriteFmt("VARYING_LOCATION(0) in VertexData {{\n"); + out.Write("VARYING_LOCATION(0) in VertexData {{\n"); GenerateVSOutputMembers(out, ApiType, numTexgen, host_config, GetInterpolationQualifier(msaa, ssaa, true, true)); if (stereo) - out.WriteFmt(" flat int layer;\n"); + out.Write(" flat int layer;\n"); - out.WriteFmt("}};\n\n"); + out.Write("}};\n\n"); } else { // Let's set up attributes u32 counter = 0; - out.WriteFmt("VARYING_LOCATION({}) {} in float4 colors_0;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - out.WriteFmt("VARYING_LOCATION({}) {} in float4 colors_1;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); + out.Write("VARYING_LOCATION({}) {} in float4 colors_0;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); + out.Write("VARYING_LOCATION({}) {} in float4 colors_1;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); for (u32 i = 0; i < numTexgen; ++i) { - out.WriteFmt("VARYING_LOCATION({}) {} in float3 tex{};\n", counter++, - GetInterpolationQualifier(msaa, ssaa), i); + out.Write("VARYING_LOCATION({}) {} in float3 tex{};\n", counter++, + GetInterpolationQualifier(msaa, ssaa), i); } if (!host_config.fast_depth_calc) { - out.WriteFmt("VARYING_LOCATION({}) {} in float4 clipPos;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); + out.Write("VARYING_LOCATION({}) {} in float4 clipPos;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); } if (per_pixel_lighting) { - out.WriteFmt("VARYING_LOCATION({}) {} in float3 Normal;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - out.WriteFmt("VARYING_LOCATION({}) {} in float3 WorldPos;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); + out.Write("VARYING_LOCATION({}) {} in float3 Normal;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); + out.Write("VARYING_LOCATION({}) {} in float3 WorldPos;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); } } } @@ -152,71 +152,71 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, { if (ApiType != APIType::D3D) { - out.WriteFmt("float3 selectTexCoord(uint index) {{\n"); + out.Write("float3 selectTexCoord(uint index) {{\n"); } else { - out.WriteFmt("float3 selectTexCoord(uint index"); + out.Write("float3 selectTexCoord(uint index"); for (u32 i = 0; i < numTexgen; i++) - out.WriteFmt(", float3 tex{}", i); - out.WriteFmt(") {{\n"); + out.Write(", float3 tex{}", i); + out.Write(") {{\n"); } if (ApiType == APIType::D3D) { - out.WriteFmt(" switch (index) {{\n"); + out.Write(" switch (index) {{\n"); for (u32 i = 0; i < numTexgen; i++) { - out.WriteFmt(" case {}u:\n" - " return tex{};\n", - i, i); + out.Write(" case {}u:\n" + " return tex{};\n", + i, i); } - out.WriteFmt(" default:\n" - " return float3(0.0, 0.0, 0.0);\n" - " }}\n"); + out.Write(" default:\n" + " return float3(0.0, 0.0, 0.0);\n" + " }}\n"); } else { if (numTexgen > 4) - out.WriteFmt(" if (index < 4u) {{\n"); + out.Write(" if (index < 4u) {{\n"); if (numTexgen > 2) - out.WriteFmt(" if (index < 2u) {{\n"); + out.Write(" if (index < 2u) {{\n"); if (numTexgen > 1) - out.WriteFmt(" return (index == 0u) ? tex0 : tex1;\n"); + out.Write(" return (index == 0u) ? tex0 : tex1;\n"); else - out.WriteFmt(" return (index == 0u) ? tex0 : float3(0.0, 0.0, 0.0);\n"); + out.Write(" return (index == 0u) ? tex0 : float3(0.0, 0.0, 0.0);\n"); if (numTexgen > 2) { - out.WriteFmt(" }} else {{\n"); // >= 2 + out.Write(" }} else {{\n"); // >= 2 if (numTexgen > 3) - out.WriteFmt(" return (index == 2u) ? tex2 : tex3;\n"); + out.Write(" return (index == 2u) ? tex2 : tex3;\n"); else - out.WriteFmt(" return (index == 2u) ? tex2 : float3(0.0, 0.0, 0.0);\n"); - out.WriteFmt(" }}\n"); + out.Write(" return (index == 2u) ? tex2 : float3(0.0, 0.0, 0.0);\n"); + out.Write(" }}\n"); } if (numTexgen > 4) { - out.WriteFmt(" }} else {{\n"); // >= 4 <= 8 + out.Write(" }} else {{\n"); // >= 4 <= 8 if (numTexgen > 6) - out.WriteFmt(" if (index < 6u) {{\n"); + out.Write(" if (index < 6u) {{\n"); if (numTexgen > 5) - out.WriteFmt(" return (index == 4u) ? tex4 : tex5;\n"); + out.Write(" return (index == 4u) ? tex4 : tex5;\n"); else - out.WriteFmt(" return (index == 4u) ? tex4 : float3(0.0, 0.0, 0.0);\n"); + out.Write(" return (index == 4u) ? tex4 : float3(0.0, 0.0, 0.0);\n"); if (numTexgen > 6) { - out.WriteFmt(" }} else {{\n"); // >= 6 <= 8 + out.Write(" }} else {{\n"); // >= 6 <= 8 if (numTexgen > 7) - out.WriteFmt(" return (index == 6u) ? tex6 : tex7;\n"); + out.Write(" return (index == 6u) ? tex6 : tex7;\n"); else - out.WriteFmt(" return (index == 6u) ? tex6 : float3(0.0, 0.0, 0.0);\n"); - out.WriteFmt(" }}\n"); + out.Write(" return (index == 6u) ? tex6 : float3(0.0, 0.0, 0.0);\n"); + out.Write(" }}\n"); } - out.WriteFmt(" }}\n"); + out.Write(" }}\n"); } } - out.WriteFmt("}}\n\n"); + out.Write("}}\n\n"); } // ===================== @@ -227,100 +227,99 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, { // Doesn't look like DirectX supports this. Oh well the code path is here just in case it // supports this in the future. - out.WriteFmt("int4 sampleTexture(uint sampler_num, float3 uv) {{\n"); + out.Write("int4 sampleTexture(uint sampler_num, float3 uv) {{\n"); if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) - out.WriteFmt(" return iround(texture(samp[sampler_num], uv) * 255.0);\n"); + out.Write(" return iround(texture(samp[sampler_num], uv) * 255.0);\n"); else if (ApiType == APIType::D3D) - out.WriteFmt(" return iround(Tex[sampler_num].Sample(samp[sampler_num], uv) * 255.0);\n"); - out.WriteFmt("}}\n\n"); + out.Write(" return iround(Tex[sampler_num].Sample(samp[sampler_num], uv) * 255.0);\n"); + out.Write("}}\n\n"); } else { - out.WriteFmt( - "int4 sampleTexture(uint sampler_num, float3 uv) {{\n" - " // This is messy, but DirectX, OpenGL 3.3 and OpenGL ES 3.0 doesn't support " - "dynamic indexing of the sampler array\n" - " // With any luck the shader compiler will optimise this if the hardware supports " - "dynamic indexing.\n" - " switch(sampler_num) {{\n"); + out.Write("int4 sampleTexture(uint sampler_num, float3 uv) {{\n" + " // This is messy, but DirectX, OpenGL 3.3 and OpenGL ES 3.0 doesn't support " + "dynamic indexing of the sampler array\n" + " // With any luck the shader compiler will optimise this if the hardware supports " + "dynamic indexing.\n" + " switch(sampler_num) {{\n"); for (int i = 0; i < 8; i++) { if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) - out.WriteFmt(" case {}u: return iround(texture(samp[{}], uv) * 255.0);\n", i, i); + out.Write(" case {}u: return iround(texture(samp[{}], uv) * 255.0);\n", i, i); else if (ApiType == APIType::D3D) - out.WriteFmt(" case {}u: return iround(Tex[{}].Sample(samp[{}], uv) * 255.0);\n", i, i, i); + out.Write(" case {}u: return iround(Tex[{}].Sample(samp[{}], uv) * 255.0);\n", i, i, i); } - out.WriteFmt(" }}\n" - "}}\n\n"); + out.Write(" }}\n" + "}}\n\n"); } // ====================== // Arbitrary Swizzling // ====================== - out.WriteFmt("int4 Swizzle(uint s, int4 color) {{\n" - " // AKA: Color Channel Swapping\n" - "\n" - " int4 ret;\n"); - out.WriteFmt(" ret.r = color[{}];\n", BitfieldExtract("bpmem_tevksel(s * 2u)", TevKSel().swap1)); - out.WriteFmt(" ret.g = color[{}];\n", BitfieldExtract("bpmem_tevksel(s * 2u)", TevKSel().swap2)); - out.WriteFmt(" ret.b = color[{}];\n", - BitfieldExtract("bpmem_tevksel(s * 2u + 1u)", TevKSel().swap1)); - out.WriteFmt(" ret.a = color[{}];\n", - BitfieldExtract("bpmem_tevksel(s * 2u + 1u)", TevKSel().swap2)); - out.WriteFmt(" return ret;\n" - "}}\n\n"); + out.Write("int4 Swizzle(uint s, int4 color) {{\n" + " // AKA: Color Channel Swapping\n" + "\n" + " int4 ret;\n"); + out.Write(" ret.r = color[{}];\n", BitfieldExtract("bpmem_tevksel(s * 2u)", TevKSel().swap1)); + out.Write(" ret.g = color[{}];\n", BitfieldExtract("bpmem_tevksel(s * 2u)", TevKSel().swap2)); + out.Write(" ret.b = color[{}];\n", + BitfieldExtract("bpmem_tevksel(s * 2u + 1u)", TevKSel().swap1)); + out.Write(" ret.a = color[{}];\n", + BitfieldExtract("bpmem_tevksel(s * 2u + 1u)", TevKSel().swap2)); + out.Write(" return ret;\n" + "}}\n\n"); // ====================== // Indirect Wrapping // ====================== - out.WriteFmt("int Wrap(int coord, uint mode) {{\n" - " if (mode == 0u) // ITW_OFF\n" - " return coord;\n" - " else if (mode < 6u) // ITW_256 to ITW_16\n" - " return coord & (0xfffe >> mode);\n" - " else // ITW_0\n" - " return 0;\n" - "}}\n\n"); + out.Write("int Wrap(int coord, uint mode) {{\n" + " if (mode == 0u) // ITW_OFF\n" + " return coord;\n" + " else if (mode < 6u) // ITW_256 to ITW_16\n" + " return coord & (0xfffe >> mode);\n" + " else // ITW_0\n" + " return 0;\n" + "}}\n\n"); // ====================== // Indirect Lookup // ====================== const auto LookupIndirectTexture = [&out, stereo](std::string_view out_var_name, std::string_view in_index_name) { - out.WriteFmt("{{\n" - " uint iref = bpmem_iref({});\n" - " if ( iref != 0u)\n" - " {{\n" - " uint texcoord = bitfieldExtract(iref, 0, 3);\n" - " uint texmap = bitfieldExtract(iref, 8, 3);\n" - " float3 uv = getTexCoord(texcoord);\n" - " int2 fixedPoint_uv = int2((uv.z == 0.0 ? uv.xy : (uv.xy / uv.z)) * " I_TEXDIMS - "[texcoord].zw);\n" - "\n" - " if (({} & 1u) == 0u)\n" - " fixedPoint_uv = fixedPoint_uv >> " I_INDTEXSCALE "[{} >> 1].xy;\n" - " else\n" - " fixedPoint_uv = fixedPoint_uv >> " I_INDTEXSCALE "[{} >> 1].zw;\n" - "\n" - " {} = sampleTexture(texmap, float3(float2(fixedPoint_uv) * " I_TEXDIMS - "[texmap].xy, {})).abg;\n", - in_index_name, in_index_name, in_index_name, in_index_name, out_var_name, - stereo ? "float(layer)" : "0.0"); - out.WriteFmt(" }}\n" - " else\n" - " {{\n" - " {} = int3(0, 0, 0);\n" - " }}\n" - "}}\n", - out_var_name); + out.Write("{{\n" + " uint iref = bpmem_iref({});\n" + " if ( iref != 0u)\n" + " {{\n" + " uint texcoord = bitfieldExtract(iref, 0, 3);\n" + " uint texmap = bitfieldExtract(iref, 8, 3);\n" + " float3 uv = getTexCoord(texcoord);\n" + " int2 fixedPoint_uv = int2((uv.z == 0.0 ? uv.xy : (uv.xy / uv.z)) * " I_TEXDIMS + "[texcoord].zw);\n" + "\n" + " if (({} & 1u) == 0u)\n" + " fixedPoint_uv = fixedPoint_uv >> " I_INDTEXSCALE "[{} >> 1].xy;\n" + " else\n" + " fixedPoint_uv = fixedPoint_uv >> " I_INDTEXSCALE "[{} >> 1].zw;\n" + "\n" + " {} = sampleTexture(texmap, float3(float2(fixedPoint_uv) * " I_TEXDIMS + "[texmap].xy, {})).abg;\n", + in_index_name, in_index_name, in_index_name, in_index_name, out_var_name, + stereo ? "float(layer)" : "0.0"); + out.Write(" }}\n" + " else\n" + " {{\n" + " {} = int3(0, 0, 0);\n" + " }}\n" + "}}\n", + out_var_name); }; // ====================== // TEV's Special Lerp // ====================== const auto WriteTevLerp = [&out](std::string_view components) { - out.WriteFmt( + out.Write( "// TEV's Linear Interpolate, plus bias, add/subtract and scale\n" "int{0} tevLerp{0}(int{0} A, int{0} B, int{0} C, int{0} D, uint bias, bool op, bool alpha, " "uint shift) {{\n" @@ -363,7 +362,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, // TEV's Color Compare // ======================= - out.WriteFmt( + out.Write( "// Implements operations 0-5 of TEV's compare mode,\n" "// which are common to both color and alpha channels\n" "bool tevCompare(uint op, int3 color_A, int3 color_B) {{\n" @@ -393,162 +392,162 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, // Input Selects // ================= - out.WriteFmt("struct State {{\n" - " int4 Reg[4];\n" - " int4 TexColor;\n" - " int AlphaBump;\n" - "}};\n" - "struct StageState {{\n" - " uint stage;\n" - " uint order;\n" - " uint cc;\n" - " uint ac;\n" - "}};\n" - "\n" - "int4 getRasColor(State s, StageState ss, float4 colors_0, float4 colors_1);\n" - "int4 getKonstColor(State s, StageState ss);\n" - "\n"); + out.Write("struct State {{\n" + " int4 Reg[4];\n" + " int4 TexColor;\n" + " int AlphaBump;\n" + "}};\n" + "struct StageState {{\n" + " uint stage;\n" + " uint order;\n" + " uint cc;\n" + " uint ac;\n" + "}};\n" + "\n" + "int4 getRasColor(State s, StageState ss, float4 colors_0, float4 colors_1);\n" + "int4 getKonstColor(State s, StageState ss);\n" + "\n"); // The switch statements in these functions appear to get transformed into an if..else chain // on NVIDIA's OpenGL/Vulkan drivers, resulting in lower performance than the D3D counterparts. // Transforming the switch into a binary tree of ifs can increase performance by up to 20%. if (ApiType == APIType::D3D) { - out.WriteFmt("// Helper function for Alpha Test\n" - "bool alphaCompare(int a, int b, uint compare) {{\n" - " switch (compare) {{\n" - " case 0u: // NEVER\n" - " return false;\n" - " case 1u: // LESS\n" - " return a < b;\n" - " case 2u: // EQUAL\n" - " return a == b;\n" - " case 3u: // LEQUAL\n" - " return a <= b;\n" - " case 4u: // GREATER\n" - " return a > b;\n" - " case 5u: // NEQUAL;\n" - " return a != b;\n" - " case 6u: // GEQUAL\n" - " return a >= b;\n" - " case 7u: // ALWAYS\n" - " return true;\n" - " }}\n" - "}}\n" - "\n" - "int3 selectColorInput(State s, StageState ss, float4 colors_0, float4 colors_1, " - "uint index) {{\n" - " switch (index) {{\n" - " case 0u: // prev.rgb\n" - " return s.Reg[0].rgb;\n" - " case 1u: // prev.aaa\n" - " return s.Reg[0].aaa;\n" - " case 2u: // c0.rgb\n" - " return s.Reg[1].rgb;\n" - " case 3u: // c0.aaa\n" - " return s.Reg[1].aaa;\n" - " case 4u: // c1.rgb\n" - " return s.Reg[2].rgb;\n" - " case 5u: // c1.aaa\n" - " return s.Reg[2].aaa;\n" - " case 6u: // c2.rgb\n" - " return s.Reg[3].rgb;\n" - " case 7u: // c2.aaa\n" - " return s.Reg[3].aaa;\n" - " case 8u:\n" - " return s.TexColor.rgb;\n" - " case 9u:\n" - " return s.TexColor.aaa;\n" - " case 10u:\n" - " return getRasColor(s, ss, colors_0, colors_1).rgb;\n" - " case 11u:\n" - " return getRasColor(s, ss, colors_0, colors_1).aaa;\n" - " case 12u: // One\n" - " return int3(255, 255, 255);\n" - " case 13u: // Half\n" - " return int3(128, 128, 128);\n" - " case 14u:\n" - " return getKonstColor(s, ss).rgb;\n" - " case 15u: // Zero\n" - " return int3(0, 0, 0);\n" - " }}\n" - "}}\n" - "\n" - "int selectAlphaInput(State s, StageState ss, float4 colors_0, float4 colors_1, " - "uint index) {{\n" - " switch (index) {{\n" - " case 0u: // prev.a\n" - " return s.Reg[0].a;\n" - " case 1u: // c0.a\n" - " return s.Reg[1].a;\n" - " case 2u: // c1.a\n" - " return s.Reg[2].a;\n" - " case 3u: // c2.a\n" - " return s.Reg[3].a;\n" - " case 4u:\n" - " return s.TexColor.a;\n" - " case 5u:\n" - " return getRasColor(s, ss, colors_0, colors_1).a;\n" - " case 6u:\n" - " return getKonstColor(s, ss).a;\n" - " case 7u: // Zero\n" - " return 0;\n" - " }}\n" - "}}\n" - "\n" - "int4 getTevReg(in State s, uint index) {{\n" - " switch (index) {{\n" - " case 0u: // prev\n" - " return s.Reg[0];\n" - " case 1u: // c0\n" - " return s.Reg[1];\n" - " case 2u: // c1\n" - " return s.Reg[2];\n" - " case 3u: // c2\n" - " return s.Reg[3];\n" - " default: // prev\n" - " return s.Reg[0];\n" - " }}\n" - "}}\n" - "\n" - "void setRegColor(inout State s, uint index, int3 color) {{\n" - " switch (index) {{\n" - " case 0u: // prev\n" - " s.Reg[0].rgb = color;\n" - " break;\n" - " case 1u: // c0\n" - " s.Reg[1].rgb = color;\n" - " break;\n" - " case 2u: // c1\n" - " s.Reg[2].rgb = color;\n" - " break;\n" - " case 3u: // c2\n" - " s.Reg[3].rgb = color;\n" - " break;\n" - " }}\n" - "}}\n" - "\n" - "void setRegAlpha(inout State s, uint index, int alpha) {{\n" - " switch (index) {{\n" - " case 0u: // prev\n" - " s.Reg[0].a = alpha;\n" - " break;\n" - " case 1u: // c0\n" - " s.Reg[1].a = alpha;\n" - " break;\n" - " case 2u: // c1\n" - " s.Reg[2].a = alpha;\n" - " break;\n" - " case 3u: // c2\n" - " s.Reg[3].a = alpha;\n" - " break;\n" - " }}\n" - "}}\n" - "\n"); + out.Write("// Helper function for Alpha Test\n" + "bool alphaCompare(int a, int b, uint compare) {{\n" + " switch (compare) {{\n" + " case 0u: // NEVER\n" + " return false;\n" + " case 1u: // LESS\n" + " return a < b;\n" + " case 2u: // EQUAL\n" + " return a == b;\n" + " case 3u: // LEQUAL\n" + " return a <= b;\n" + " case 4u: // GREATER\n" + " return a > b;\n" + " case 5u: // NEQUAL;\n" + " return a != b;\n" + " case 6u: // GEQUAL\n" + " return a >= b;\n" + " case 7u: // ALWAYS\n" + " return true;\n" + " }}\n" + "}}\n" + "\n" + "int3 selectColorInput(State s, StageState ss, float4 colors_0, float4 colors_1, " + "uint index) {{\n" + " switch (index) {{\n" + " case 0u: // prev.rgb\n" + " return s.Reg[0].rgb;\n" + " case 1u: // prev.aaa\n" + " return s.Reg[0].aaa;\n" + " case 2u: // c0.rgb\n" + " return s.Reg[1].rgb;\n" + " case 3u: // c0.aaa\n" + " return s.Reg[1].aaa;\n" + " case 4u: // c1.rgb\n" + " return s.Reg[2].rgb;\n" + " case 5u: // c1.aaa\n" + " return s.Reg[2].aaa;\n" + " case 6u: // c2.rgb\n" + " return s.Reg[3].rgb;\n" + " case 7u: // c2.aaa\n" + " return s.Reg[3].aaa;\n" + " case 8u:\n" + " return s.TexColor.rgb;\n" + " case 9u:\n" + " return s.TexColor.aaa;\n" + " case 10u:\n" + " return getRasColor(s, ss, colors_0, colors_1).rgb;\n" + " case 11u:\n" + " return getRasColor(s, ss, colors_0, colors_1).aaa;\n" + " case 12u: // One\n" + " return int3(255, 255, 255);\n" + " case 13u: // Half\n" + " return int3(128, 128, 128);\n" + " case 14u:\n" + " return getKonstColor(s, ss).rgb;\n" + " case 15u: // Zero\n" + " return int3(0, 0, 0);\n" + " }}\n" + "}}\n" + "\n" + "int selectAlphaInput(State s, StageState ss, float4 colors_0, float4 colors_1, " + "uint index) {{\n" + " switch (index) {{\n" + " case 0u: // prev.a\n" + " return s.Reg[0].a;\n" + " case 1u: // c0.a\n" + " return s.Reg[1].a;\n" + " case 2u: // c1.a\n" + " return s.Reg[2].a;\n" + " case 3u: // c2.a\n" + " return s.Reg[3].a;\n" + " case 4u:\n" + " return s.TexColor.a;\n" + " case 5u:\n" + " return getRasColor(s, ss, colors_0, colors_1).a;\n" + " case 6u:\n" + " return getKonstColor(s, ss).a;\n" + " case 7u: // Zero\n" + " return 0;\n" + " }}\n" + "}}\n" + "\n" + "int4 getTevReg(in State s, uint index) {{\n" + " switch (index) {{\n" + " case 0u: // prev\n" + " return s.Reg[0];\n" + " case 1u: // c0\n" + " return s.Reg[1];\n" + " case 2u: // c1\n" + " return s.Reg[2];\n" + " case 3u: // c2\n" + " return s.Reg[3];\n" + " default: // prev\n" + " return s.Reg[0];\n" + " }}\n" + "}}\n" + "\n" + "void setRegColor(inout State s, uint index, int3 color) {{\n" + " switch (index) {{\n" + " case 0u: // prev\n" + " s.Reg[0].rgb = color;\n" + " break;\n" + " case 1u: // c0\n" + " s.Reg[1].rgb = color;\n" + " break;\n" + " case 2u: // c1\n" + " s.Reg[2].rgb = color;\n" + " break;\n" + " case 3u: // c2\n" + " s.Reg[3].rgb = color;\n" + " break;\n" + " }}\n" + "}}\n" + "\n" + "void setRegAlpha(inout State s, uint index, int alpha) {{\n" + " switch (index) {{\n" + " case 0u: // prev\n" + " s.Reg[0].a = alpha;\n" + " break;\n" + " case 1u: // c0\n" + " s.Reg[1].a = alpha;\n" + " break;\n" + " case 2u: // c1\n" + " s.Reg[2].a = alpha;\n" + " break;\n" + " case 3u: // c2\n" + " s.Reg[3].a = alpha;\n" + " break;\n" + " }}\n" + "}}\n" + "\n"); } else { - out.WriteFmt( + out.Write( "// Helper function for Alpha Test\n" "bool alphaCompare(int a, int b, uint compare) {{\n" " if (compare < 4u) {{\n" @@ -673,266 +672,263 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, { if (ApiType != APIType::D3D) { - out.WriteFmt("#define getTexCoord(index) selectTexCoord((index))\n\n"); + out.Write("#define getTexCoord(index) selectTexCoord((index))\n\n"); } else { - out.WriteFmt("#define getTexCoord(index) selectTexCoord((index)"); + out.Write("#define getTexCoord(index) selectTexCoord((index)"); for (u32 i = 0; i < numTexgen; i++) - out.WriteFmt(", tex{}", i); - out.WriteFmt(")\n\n"); + out.Write(", tex{}", i); + out.Write(")\n\n"); } } if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) { if (early_depth && host_config.backend_early_z) - out.WriteFmt("FORCE_EARLY_Z;\n"); + out.Write("FORCE_EARLY_Z;\n"); - out.WriteFmt("void main()\n{{\n"); - out.WriteFmt(" float4 rawpos = gl_FragCoord;\n"); + out.Write("void main()\n{{\n"); + out.Write(" float4 rawpos = gl_FragCoord;\n"); if (use_shader_blend) { // Store off a copy of the initial fb value for blending - out.WriteFmt(" float4 initial_ocol0 = FB_FETCH_VALUE;\n" - " float4 ocol0;\n" - " float4 ocol1;\n"); + out.Write(" float4 initial_ocol0 = FB_FETCH_VALUE;\n" + " float4 ocol0;\n" + " float4 ocol1;\n"); } } else // D3D { if (early_depth && host_config.backend_early_z) - out.WriteFmt("[earlydepthstencil]\n"); + out.Write("[earlydepthstencil]\n"); - out.WriteFmt("void main(\n"); + out.Write("void main(\n"); if (uid_data->uint_output) { - out.WriteFmt(" out uint4 ocol0 : SV_Target,\n"); + out.Write(" out uint4 ocol0 : SV_Target,\n"); } else { - out.WriteFmt(" out float4 ocol0 : SV_Target0,\n" - " out float4 ocol1 : SV_Target1,\n"); + out.Write(" out float4 ocol0 : SV_Target0,\n" + " out float4 ocol1 : SV_Target1,\n"); } if (per_pixel_depth) - out.WriteFmt(" out float depth : SV_Depth,\n"); - out.WriteFmt(" in float4 rawpos : SV_Position,\n"); - out.WriteFmt(" in {} float4 colors_0 : COLOR0,\n", GetInterpolationQualifier(msaa, ssaa)); - out.WriteFmt(" in {} float4 colors_1 : COLOR1", GetInterpolationQualifier(msaa, ssaa)); + out.Write(" out float depth : SV_Depth,\n"); + out.Write(" in float4 rawpos : SV_Position,\n"); + out.Write(" in {} float4 colors_0 : COLOR0,\n", GetInterpolationQualifier(msaa, ssaa)); + out.Write(" in {} float4 colors_1 : COLOR1", GetInterpolationQualifier(msaa, ssaa)); // compute window position if needed because binding semantic WPOS is not widely supported for (u32 i = 0; i < numTexgen; ++i) { - out.WriteFmt(",\n in {} float3 tex{} : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa), i, - i); + out.Write(",\n in {} float3 tex{} : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa), i, + i); } if (!host_config.fast_depth_calc) { - out.WriteFmt("\n,\n in {} float4 clipPos : TEXCOORD{}", - GetInterpolationQualifier(msaa, ssaa), numTexgen); + out.Write("\n,\n in {} float4 clipPos : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa), + numTexgen); } if (per_pixel_lighting) { - out.WriteFmt(",\n in {} float3 Normal : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa), - numTexgen + 1); - out.WriteFmt(",\n in {} float3 WorldPos : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa), - numTexgen + 2); + out.Write(",\n in {} float3 Normal : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa), + numTexgen + 1); + out.Write(",\n in {} float3 WorldPos : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa), + numTexgen + 2); } - out.WriteFmt(",\n in float clipDist0 : SV_ClipDistance0\n" - ",\n in float clipDist1 : SV_ClipDistance1\n"); + out.Write(",\n in float clipDist0 : SV_ClipDistance0\n" + ",\n in float clipDist1 : SV_ClipDistance1\n"); if (stereo) - out.WriteFmt(",\n in uint layer : SV_RenderTargetArrayIndex\n"); - out.WriteFmt("\n ) {{\n"); + out.Write(",\n in uint layer : SV_RenderTargetArrayIndex\n"); + out.Write("\n ) {{\n"); } - out.WriteFmt(" int3 tevcoord = int3(0, 0, 0);\n" - " State s;\n" - " s.TexColor = int4(0, 0, 0, 0);\n" - " s.AlphaBump = 0;\n" - "\n"); + out.Write(" int3 tevcoord = int3(0, 0, 0);\n" + " State s;\n" + " s.TexColor = int4(0, 0, 0, 0);\n" + " s.AlphaBump = 0;\n" + "\n"); for (int i = 0; i < 4; i++) - out.WriteFmt(" s.Reg[{}] = " I_COLORS "[{}];\n", i, i); + out.Write(" s.Reg[{}] = " I_COLORS "[{}];\n", i, i); const char* color_input_prefix = ""; if (per_pixel_lighting) { - out.WriteFmt(" float4 lit_colors_0 = colors_0;\n" - " float4 lit_colors_1 = colors_1;\n" - " float3 lit_normal = normalize(Normal.xyz);\n" - " float3 lit_pos = WorldPos.xyz;\n"); + out.Write(" float4 lit_colors_0 = colors_0;\n" + " float4 lit_colors_1 = colors_1;\n" + " float3 lit_normal = normalize(Normal.xyz);\n" + " float3 lit_pos = WorldPos.xyz;\n"); WriteVertexLighting(out, ApiType, "lit_pos", "lit_normal", "colors_0", "colors_1", "lit_colors_0", "lit_colors_1"); color_input_prefix = "lit_"; } - out.WriteFmt(" uint num_stages = {};\n\n", - BitfieldExtract("bpmem_genmode", bpmem.genMode.numtevstages)); + out.Write(" uint num_stages = {};\n\n", + BitfieldExtract("bpmem_genmode", bpmem.genMode.numtevstages)); - out.WriteFmt(" // Main tev loop\n"); + out.Write(" // Main tev loop\n"); if (ApiType == APIType::D3D) { // Tell DirectX we don't want this loop unrolled (it crashes if it tries to) - out.WriteFmt(" [loop]\n"); + out.Write(" [loop]\n"); } - out.WriteFmt( - " for(uint stage = 0u; stage <= num_stages; stage++)\n" - " {{\n" - " StageState ss;\n" - " ss.stage = stage;\n" - " ss.cc = bpmem_combiners(stage).x;\n" - " ss.ac = bpmem_combiners(stage).y;\n" - " ss.order = bpmem_tevorder(stage>>1);\n" - " if ((stage & 1u) == 1u)\n" - " ss.order = ss.order >> {};\n\n", - int(TwoTevStageOrders().enable1.StartBit() - TwoTevStageOrders().enable0.StartBit())); + out.Write(" for(uint stage = 0u; stage <= num_stages; stage++)\n" + " {{\n" + " StageState ss;\n" + " ss.stage = stage;\n" + " ss.cc = bpmem_combiners(stage).x;\n" + " ss.ac = bpmem_combiners(stage).y;\n" + " ss.order = bpmem_tevorder(stage>>1);\n" + " if ((stage & 1u) == 1u)\n" + " ss.order = ss.order >> {};\n\n", + int(TwoTevStageOrders().enable1.StartBit() - TwoTevStageOrders().enable0.StartBit())); // Disable texturing when there are no texgens (for now) if (numTexgen != 0) { - out.WriteFmt(" uint tex_coord = {};\n", - BitfieldExtract("ss.order", TwoTevStageOrders().texcoord0)); - out.WriteFmt(" float3 uv = getTexCoord(tex_coord);\n" - " int2 fixedPoint_uv = int2((uv.z == 0.0 ? uv.xy : (uv.xy / uv.z)) * " I_TEXDIMS - "[tex_coord].zw);\n" - "\n" - " bool texture_enabled = (ss.order & {}u) != 0u;\n", - 1 << TwoTevStageOrders().enable0.StartBit()); - out.WriteFmt("\n" - " // Indirect textures\n" - " uint tevind = bpmem_tevind(stage);\n" - " if (tevind != 0u)\n" - " {{\n" - " uint bs = {};\n", - BitfieldExtract("tevind", TevStageIndirect().bs)); - out.WriteFmt(" uint fmt = {};\n", BitfieldExtract("tevind", TevStageIndirect().fmt)); - out.WriteFmt(" uint bias = {};\n", BitfieldExtract("tevind", TevStageIndirect().bias)); - out.WriteFmt(" uint bt = {};\n", BitfieldExtract("tevind", TevStageIndirect().bt)); - out.WriteFmt(" uint mid = {};\n", BitfieldExtract("tevind", TevStageIndirect().mid)); - out.WriteFmt("\n"); - out.WriteFmt(" int3 indcoord;\n"); + out.Write(" uint tex_coord = {};\n", + BitfieldExtract("ss.order", TwoTevStageOrders().texcoord0)); + out.Write(" float3 uv = getTexCoord(tex_coord);\n" + " int2 fixedPoint_uv = int2((uv.z == 0.0 ? uv.xy : (uv.xy / uv.z)) * " I_TEXDIMS + "[tex_coord].zw);\n" + "\n" + " bool texture_enabled = (ss.order & {}u) != 0u;\n", + 1 << TwoTevStageOrders().enable0.StartBit()); + out.Write("\n" + " // Indirect textures\n" + " uint tevind = bpmem_tevind(stage);\n" + " if (tevind != 0u)\n" + " {{\n" + " uint bs = {};\n", + BitfieldExtract("tevind", TevStageIndirect().bs)); + out.Write(" uint fmt = {};\n", BitfieldExtract("tevind", TevStageIndirect().fmt)); + out.Write(" uint bias = {};\n", BitfieldExtract("tevind", TevStageIndirect().bias)); + out.Write(" uint bt = {};\n", BitfieldExtract("tevind", TevStageIndirect().bt)); + out.Write(" uint mid = {};\n", BitfieldExtract("tevind", TevStageIndirect().mid)); + out.Write("\n"); + out.Write(" int3 indcoord;\n"); LookupIndirectTexture("indcoord", "bt"); - out.WriteFmt(" if (bs != 0u)\n" - " s.AlphaBump = indcoord[bs - 1u];\n" - " switch(fmt)\n" - " {{\n" - " case {}u:\n", - ITF_8); - out.WriteFmt(" indcoord.x = indcoord.x + ((bias & 1u) != 0u ? -128 : 0);\n" - " indcoord.y = indcoord.y + ((bias & 2u) != 0u ? -128 : 0);\n" - " indcoord.z = indcoord.z + ((bias & 4u) != 0u ? -128 : 0);\n" - " s.AlphaBump = s.AlphaBump & 0xf8;\n" - " break;\n" - " case {}u:\n", - ITF_5); - out.WriteFmt(" indcoord.x = (indcoord.x & 0x1f) + ((bias & 1u) != 0u ? 1 : 0);\n" - " indcoord.y = (indcoord.y & 0x1f) + ((bias & 2u) != 0u ? 1 : 0);\n" - " indcoord.z = (indcoord.z & 0x1f) + ((bias & 4u) != 0u ? 1 : 0);\n" - " s.AlphaBump = s.AlphaBump & 0xe0;\n" - " break;\n" - " case {}u:\n", - ITF_4); - out.WriteFmt(" indcoord.x = (indcoord.x & 0x0f) + ((bias & 1u) != 0u ? 1 : 0);\n" - " indcoord.y = (indcoord.y & 0x0f) + ((bias & 2u) != 0u ? 1 : 0);\n" - " indcoord.z = (indcoord.z & 0x0f) + ((bias & 4u) != 0u ? 1 : 0);\n" - " s.AlphaBump = s.AlphaBump & 0xf0;\n" - " break;\n" - " case {}u:\n", - ITF_3); - out.WriteFmt(" indcoord.x = (indcoord.x & 0x07) + ((bias & 1u) != 0u ? 1 : 0);\n" - " indcoord.y = (indcoord.y & 0x07) + ((bias & 2u) != 0u ? 1 : 0);\n" - " indcoord.z = (indcoord.z & 0x07) + ((bias & 4u) != 0u ? 1 : 0);\n" - " s.AlphaBump = s.AlphaBump & 0xf8;\n" - " break;\n" - " }}\n" - "\n" - " // Matrix multiply\n" - " int2 indtevtrans = int2(0, 0);\n" - " if ((mid & 3u) != 0u)\n" - " {{\n" - " uint mtxidx = 2u * ((mid & 3u) - 1u);\n" - " int shift = " I_INDTEXMTX "[mtxidx].w;\n" - "\n" - " switch (mid >> 2)\n" - " {{\n" - " case 0u: // 3x2 S0.10 matrix\n" - " indtevtrans = int2(idot(" I_INDTEXMTX - "[mtxidx].xyz, indcoord), idot(" I_INDTEXMTX - "[mtxidx + 1u].xyz, indcoord)) >> 3;\n" - " break;\n" - " case 1u: // S matrix, S17.7 format\n" - " indtevtrans = (fixedPoint_uv * indcoord.xx) >> 8;\n" - " break;\n" - " case 2u: // T matrix, S17.7 format\n" - " indtevtrans = (fixedPoint_uv * indcoord.yy) >> 8;\n" - " break;\n" - " }}\n" - "\n" - " if (shift >= 0)\n" - " indtevtrans = indtevtrans >> shift;\n" - " else\n" - " indtevtrans = indtevtrans << ((-shift) & 31);\n" - " }}\n" - "\n" - " // Wrapping\n" - " uint sw = {};\n", - BitfieldExtract("tevind", TevStageIndirect().sw)); - out.WriteFmt(" uint tw = {}; \n", BitfieldExtract("tevind", TevStageIndirect().tw)); - out.WriteFmt( + out.Write(" if (bs != 0u)\n" + " s.AlphaBump = indcoord[bs - 1u];\n" + " switch(fmt)\n" + " {{\n" + " case {}u:\n", + ITF_8); + out.Write(" indcoord.x = indcoord.x + ((bias & 1u) != 0u ? -128 : 0);\n" + " indcoord.y = indcoord.y + ((bias & 2u) != 0u ? -128 : 0);\n" + " indcoord.z = indcoord.z + ((bias & 4u) != 0u ? -128 : 0);\n" + " s.AlphaBump = s.AlphaBump & 0xf8;\n" + " break;\n" + " case {}u:\n", + ITF_5); + out.Write(" indcoord.x = (indcoord.x & 0x1f) + ((bias & 1u) != 0u ? 1 : 0);\n" + " indcoord.y = (indcoord.y & 0x1f) + ((bias & 2u) != 0u ? 1 : 0);\n" + " indcoord.z = (indcoord.z & 0x1f) + ((bias & 4u) != 0u ? 1 : 0);\n" + " s.AlphaBump = s.AlphaBump & 0xe0;\n" + " break;\n" + " case {}u:\n", + ITF_4); + out.Write(" indcoord.x = (indcoord.x & 0x0f) + ((bias & 1u) != 0u ? 1 : 0);\n" + " indcoord.y = (indcoord.y & 0x0f) + ((bias & 2u) != 0u ? 1 : 0);\n" + " indcoord.z = (indcoord.z & 0x0f) + ((bias & 4u) != 0u ? 1 : 0);\n" + " s.AlphaBump = s.AlphaBump & 0xf0;\n" + " break;\n" + " case {}u:\n", + ITF_3); + out.Write(" indcoord.x = (indcoord.x & 0x07) + ((bias & 1u) != 0u ? 1 : 0);\n" + " indcoord.y = (indcoord.y & 0x07) + ((bias & 2u) != 0u ? 1 : 0);\n" + " indcoord.z = (indcoord.z & 0x07) + ((bias & 4u) != 0u ? 1 : 0);\n" + " s.AlphaBump = s.AlphaBump & 0xf8;\n" + " break;\n" + " }}\n" + "\n" + " // Matrix multiply\n" + " int2 indtevtrans = int2(0, 0);\n" + " if ((mid & 3u) != 0u)\n" + " {{\n" + " uint mtxidx = 2u * ((mid & 3u) - 1u);\n" + " int shift = " I_INDTEXMTX "[mtxidx].w;\n" + "\n" + " switch (mid >> 2)\n" + " {{\n" + " case 0u: // 3x2 S0.10 matrix\n" + " indtevtrans = int2(idot(" I_INDTEXMTX + "[mtxidx].xyz, indcoord), idot(" I_INDTEXMTX "[mtxidx + 1u].xyz, indcoord)) >> 3;\n" + " break;\n" + " case 1u: // S matrix, S17.7 format\n" + " indtevtrans = (fixedPoint_uv * indcoord.xx) >> 8;\n" + " break;\n" + " case 2u: // T matrix, S17.7 format\n" + " indtevtrans = (fixedPoint_uv * indcoord.yy) >> 8;\n" + " break;\n" + " }}\n" + "\n" + " if (shift >= 0)\n" + " indtevtrans = indtevtrans >> shift;\n" + " else\n" + " indtevtrans = indtevtrans << ((-shift) & 31);\n" + " }}\n" + "\n" + " // Wrapping\n" + " uint sw = {};\n", + BitfieldExtract("tevind", TevStageIndirect().sw)); + out.Write(" uint tw = {}; \n", BitfieldExtract("tevind", TevStageIndirect().tw)); + out.Write( " int2 wrapped_coord = int2(Wrap(fixedPoint_uv.x, sw), Wrap(fixedPoint_uv.y, tw));\n" "\n" " if ((tevind & {}u) != 0u) // add previous tevcoord\n", 1 << TevStageIndirect().fb_addprev.StartBit()); - out.WriteFmt(" tevcoord.xy += wrapped_coord + indtevtrans;\n" - " else\n" - " tevcoord.xy = wrapped_coord + indtevtrans;\n" - "\n" - " // Emulate s24 overflows\n" - " tevcoord.xy = (tevcoord.xy << 8) >> 8;\n" - " }}\n" - " else if (texture_enabled)\n" - " {{\n" - " tevcoord.xy = fixedPoint_uv;\n" - " }}\n" - "\n" - " // Sample texture for stage\n" - " if (texture_enabled) {{\n" - " uint sampler_num = {};\n", - BitfieldExtract("ss.order", TwoTevStageOrders().texmap0)); - out.WriteFmt("\n" - " float2 uv = (float2(tevcoord.xy)) * " I_TEXDIMS "[sampler_num].xy;\n"); - out.WriteFmt(" int4 color = sampleTexture(sampler_num, float3(uv, {}));\n", - stereo ? "float(layer)" : "0.0"); - out.WriteFmt(" uint swap = {};\n", - BitfieldExtract("ss.ac", TevStageCombiner().alphaC.tswap)); - out.WriteFmt(" s.TexColor = Swizzle(swap, color);\n"); - out.WriteFmt(" }} else {{\n" - " // Texture is disabled\n" - " s.TexColor = int4(255, 255, 255, 255);\n" - " }}\n" - "\n"); + out.Write(" tevcoord.xy += wrapped_coord + indtevtrans;\n" + " else\n" + " tevcoord.xy = wrapped_coord + indtevtrans;\n" + "\n" + " // Emulate s24 overflows\n" + " tevcoord.xy = (tevcoord.xy << 8) >> 8;\n" + " }}\n" + " else if (texture_enabled)\n" + " {{\n" + " tevcoord.xy = fixedPoint_uv;\n" + " }}\n" + "\n" + " // Sample texture for stage\n" + " if (texture_enabled) {{\n" + " uint sampler_num = {};\n", + BitfieldExtract("ss.order", TwoTevStageOrders().texmap0)); + out.Write("\n" + " float2 uv = (float2(tevcoord.xy)) * " I_TEXDIMS "[sampler_num].xy;\n"); + out.Write(" int4 color = sampleTexture(sampler_num, float3(uv, {}));\n", + stereo ? "float(layer)" : "0.0"); + out.Write(" uint swap = {};\n", BitfieldExtract("ss.ac", TevStageCombiner().alphaC.tswap)); + out.Write(" s.TexColor = Swizzle(swap, color);\n"); + out.Write(" }} else {{\n" + " // Texture is disabled\n" + " s.TexColor = int4(255, 255, 255, 255);\n" + " }}\n" + "\n"); } - out.WriteFmt(" // This is the Meat of TEV\n" - " {{\n" - " // Color Combiner\n"); - out.WriteFmt(" uint color_a = {};\n", BitfieldExtract("ss.cc", TevStageCombiner().colorC.a)); - out.WriteFmt(" uint color_b = {};\n", BitfieldExtract("ss.cc", TevStageCombiner().colorC.b)); - out.WriteFmt(" uint color_c = {};\n", BitfieldExtract("ss.cc", TevStageCombiner().colorC.c)); - out.WriteFmt(" uint color_d = {};\n", BitfieldExtract("ss.cc", TevStageCombiner().colorC.d)); + out.Write(" // This is the Meat of TEV\n" + " {{\n" + " // Color Combiner\n"); + out.Write(" uint color_a = {};\n", BitfieldExtract("ss.cc", TevStageCombiner().colorC.a)); + out.Write(" uint color_b = {};\n", BitfieldExtract("ss.cc", TevStageCombiner().colorC.b)); + out.Write(" uint color_c = {};\n", BitfieldExtract("ss.cc", TevStageCombiner().colorC.c)); + out.Write(" uint color_d = {};\n", BitfieldExtract("ss.cc", TevStageCombiner().colorC.d)); - out.WriteFmt(" uint color_bias = {};\n", - BitfieldExtract("ss.cc", TevStageCombiner().colorC.bias)); - out.WriteFmt(" bool color_op = bool({});\n", - BitfieldExtract("ss.cc", TevStageCombiner().colorC.op)); - out.WriteFmt(" bool color_clamp = bool({});\n", - BitfieldExtract("ss.cc", TevStageCombiner().colorC.clamp)); - out.WriteFmt(" uint color_shift = {};\n", - BitfieldExtract("ss.cc", TevStageCombiner().colorC.shift)); - out.WriteFmt(" uint color_dest = {};\n", - BitfieldExtract("ss.cc", TevStageCombiner().colorC.dest)); + out.Write(" uint color_bias = {};\n", + BitfieldExtract("ss.cc", TevStageCombiner().colorC.bias)); + out.Write(" bool color_op = bool({});\n", + BitfieldExtract("ss.cc", TevStageCombiner().colorC.op)); + out.Write(" bool color_clamp = bool({});\n", + BitfieldExtract("ss.cc", TevStageCombiner().colorC.clamp)); + out.Write(" uint color_shift = {};\n", + BitfieldExtract("ss.cc", TevStageCombiner().colorC.shift)); + out.Write(" uint color_dest = {};\n", + BitfieldExtract("ss.cc", TevStageCombiner().colorC.dest)); - out.WriteFmt( + out.Write( " uint color_compare_op = color_shift << 1 | uint(color_op);\n" "\n" " int3 color_A = selectColorInput(s, ss, {0}colors_0, {0}colors_1, color_a) & " @@ -945,7 +941,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, "bits + sign\n" "\n", // TODO: do we need to sign extend? color_input_prefix); - out.WriteFmt( + out.Write( " int3 color;\n" " if (color_bias != 3u) {{ // Normal mode\n" " color = tevLerp3(color_A, color_B, color_C, color_D, color_bias, color_op, false, " @@ -981,24 +977,24 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, "\n"); // Alpha combiner - out.WriteFmt(" // Alpha Combiner\n"); - out.WriteFmt(" uint alpha_a = {};\n", BitfieldExtract("ss.ac", TevStageCombiner().alphaC.a)); - out.WriteFmt(" uint alpha_b = {};\n", BitfieldExtract("ss.ac", TevStageCombiner().alphaC.b)); - out.WriteFmt(" uint alpha_c = {};\n", BitfieldExtract("ss.ac", TevStageCombiner().alphaC.c)); - out.WriteFmt(" uint alpha_d = {};\n", BitfieldExtract("ss.ac", TevStageCombiner().alphaC.d)); + out.Write(" // Alpha Combiner\n"); + out.Write(" uint alpha_a = {};\n", BitfieldExtract("ss.ac", TevStageCombiner().alphaC.a)); + out.Write(" uint alpha_b = {};\n", BitfieldExtract("ss.ac", TevStageCombiner().alphaC.b)); + out.Write(" uint alpha_c = {};\n", BitfieldExtract("ss.ac", TevStageCombiner().alphaC.c)); + out.Write(" uint alpha_d = {};\n", BitfieldExtract("ss.ac", TevStageCombiner().alphaC.d)); - out.WriteFmt(" uint alpha_bias = {};\n", - BitfieldExtract("ss.ac", TevStageCombiner().alphaC.bias)); - out.WriteFmt(" bool alpha_op = bool({});\n", - BitfieldExtract("ss.ac", TevStageCombiner().alphaC.op)); - out.WriteFmt(" bool alpha_clamp = bool({});\n", - BitfieldExtract("ss.ac", TevStageCombiner().alphaC.clamp)); - out.WriteFmt(" uint alpha_shift = {};\n", - BitfieldExtract("ss.ac", TevStageCombiner().alphaC.shift)); - out.WriteFmt(" uint alpha_dest = {};\n", - BitfieldExtract("ss.ac", TevStageCombiner().alphaC.dest)); + out.Write(" uint alpha_bias = {};\n", + BitfieldExtract("ss.ac", TevStageCombiner().alphaC.bias)); + out.Write(" bool alpha_op = bool({});\n", + BitfieldExtract("ss.ac", TevStageCombiner().alphaC.op)); + out.Write(" bool alpha_clamp = bool({});\n", + BitfieldExtract("ss.ac", TevStageCombiner().alphaC.clamp)); + out.Write(" uint alpha_shift = {};\n", + BitfieldExtract("ss.ac", TevStageCombiner().alphaC.shift)); + out.Write(" uint alpha_dest = {};\n", + BitfieldExtract("ss.ac", TevStageCombiner().alphaC.dest)); - out.WriteFmt( + out.Write( " uint alpha_compare_op = alpha_shift << 1 | uint(alpha_op);\n" "\n" " int alpha_A;\n" @@ -1013,60 +1009,60 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, "+ sign\n" "\n", // TODO: do we need to sign extend? color_input_prefix); - out.WriteFmt("\n" - " int alpha;\n" - " if (alpha_bias != 3u) {{ // Normal mode\n" - " alpha = tevLerp(alpha_A, alpha_B, alpha_C, alpha_D, alpha_bias, alpha_op, " - "true, alpha_shift);\n" - " }} else {{ // Compare mode\n" - " if (alpha_compare_op == 6u) {{\n" - " // TEVCMP_A8_GT\n" - " alpha = (alpha_A > alpha_B) ? alpha_C : 0;\n" - " }} else if (alpha_compare_op == 7u) {{\n" - " // TEVCMP_A8_EQ\n" - " alpha = (alpha_A == alpha_B) ? alpha_C : 0;\n" - " }} else {{\n" - " // All remaining alpha compare ops actually compare the color channels\n" - " alpha = tevCompare(alpha_compare_op, color_A, color_B) ? alpha_C : 0;\n" - " }}\n" - " alpha = alpha_D + alpha;\n" - " }}\n" - "\n" - " // Clamp result\n" - " if (alpha_clamp)\n" - " alpha = clamp(alpha, 0, 255);\n" - " else\n" - " alpha = clamp(alpha, -1024, 1023);\n" - "\n" - " // Write result to the correct input register of the next stage\n" - " setRegAlpha(s, alpha_dest, alpha);\n" - " }}\n"); + out.Write("\n" + " int alpha;\n" + " if (alpha_bias != 3u) {{ // Normal mode\n" + " alpha = tevLerp(alpha_A, alpha_B, alpha_C, alpha_D, alpha_bias, alpha_op, " + "true, alpha_shift);\n" + " }} else {{ // Compare mode\n" + " if (alpha_compare_op == 6u) {{\n" + " // TEVCMP_A8_GT\n" + " alpha = (alpha_A > alpha_B) ? alpha_C : 0;\n" + " }} else if (alpha_compare_op == 7u) {{\n" + " // TEVCMP_A8_EQ\n" + " alpha = (alpha_A == alpha_B) ? alpha_C : 0;\n" + " }} else {{\n" + " // All remaining alpha compare ops actually compare the color channels\n" + " alpha = tevCompare(alpha_compare_op, color_A, color_B) ? alpha_C : 0;\n" + " }}\n" + " alpha = alpha_D + alpha;\n" + " }}\n" + "\n" + " // Clamp result\n" + " if (alpha_clamp)\n" + " alpha = clamp(alpha, 0, 255);\n" + " else\n" + " alpha = clamp(alpha, -1024, 1023);\n" + "\n" + " // Write result to the correct input register of the next stage\n" + " setRegAlpha(s, alpha_dest, alpha);\n" + " }}\n"); - out.WriteFmt(" }} // Main TEV loop\n" - "\n"); + out.Write(" }} // Main TEV loop\n" + "\n"); // Select the output color and alpha registers from the last stage. - out.WriteFmt(" int4 TevResult;\n"); - out.WriteFmt(" TevResult.xyz = getTevReg(s, {}).xyz;\n", - BitfieldExtract("bpmem_combiners(num_stages).x", TevStageCombiner().colorC.dest)); - out.WriteFmt(" TevResult.w = getTevReg(s, {}).w;\n", - BitfieldExtract("bpmem_combiners(num_stages).y", TevStageCombiner().alphaC.dest)); + out.Write(" int4 TevResult;\n"); + out.Write(" TevResult.xyz = getTevReg(s, {}).xyz;\n", + BitfieldExtract("bpmem_combiners(num_stages).x", TevStageCombiner().colorC.dest)); + out.Write(" TevResult.w = getTevReg(s, {}).w;\n", + BitfieldExtract("bpmem_combiners(num_stages).y", TevStageCombiner().alphaC.dest)); - out.WriteFmt(" TevResult &= 255;\n\n"); + out.Write(" TevResult &= 255;\n\n"); if (host_config.fast_depth_calc) { if (!host_config.backend_reversed_depth_range) - out.WriteFmt(" int zCoord = int((1.0 - rawpos.z) * 16777216.0);\n"); + out.Write(" int zCoord = int((1.0 - rawpos.z) * 16777216.0);\n"); else - out.WriteFmt(" int zCoord = int(rawpos.z * 16777216.0);\n"); - out.WriteFmt(" zCoord = clamp(zCoord, 0, 0xFFFFFF);\n" - "\n"); + out.Write(" int zCoord = int(rawpos.z * 16777216.0);\n"); + out.Write(" zCoord = clamp(zCoord, 0, 0xFFFFFF);\n" + "\n"); } else { - out.WriteFmt("\tint zCoord = " I_ZBIAS "[1].x + int((clipPos.z / clipPos.w) * float(" I_ZBIAS - "[1].y));\n"); + out.Write("\tint zCoord = " I_ZBIAS "[1].x + int((clipPos.z / clipPos.w) * float(" I_ZBIAS + "[1].y));\n"); } // =========== @@ -1076,81 +1072,81 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, if (per_pixel_depth) { // Zfreeze forces early depth off - out.WriteFmt(" // ZFreeze\n" - " if ((bpmem_genmode & {}u) != 0u) {{\n", - 1 << GenMode().zfreeze.StartBit()); - out.WriteFmt(" float2 screenpos = rawpos.xy * " I_EFBSCALE ".xy;\n"); + out.Write(" // ZFreeze\n" + " if ((bpmem_genmode & {}u) != 0u) {{\n", + 1 << GenMode().zfreeze.StartBit()); + out.Write(" float2 screenpos = rawpos.xy * " I_EFBSCALE ".xy;\n"); if (ApiType == APIType::OpenGL) { - out.WriteFmt(" // OpenGL has reversed vertical screenspace coordinates\n" - " screenpos.y = 528.0 - screenpos.y;\n"); + out.Write(" // OpenGL has reversed vertical screenspace coordinates\n" + " screenpos.y = 528.0 - screenpos.y;\n"); } - out.WriteFmt(" zCoord = int(" I_ZSLOPE ".z + " I_ZSLOPE ".x * screenpos.x + " I_ZSLOPE - ".y * screenpos.y);\n" - " }}\n" - "\n"); + out.Write(" zCoord = int(" I_ZSLOPE ".z + " I_ZSLOPE ".x * screenpos.x + " I_ZSLOPE + ".y * screenpos.y);\n" + " }}\n" + "\n"); } // ================= // Depth Texture // ================= - out.WriteFmt(" // Depth Texture\n" - " int early_zCoord = zCoord;\n" - " if (bpmem_ztex_op != 0u) {{\n" - " int ztex = int(" I_ZBIAS "[1].w); // fixed bias\n" - "\n" - " // Whatever texture was in our last stage, it's now our depth texture\n" - " ztex += idot(s.TexColor.xyzw, " I_ZBIAS "[0].xyzw);\n" - " ztex += (bpmem_ztex_op == 1u) ? zCoord : 0;\n" - " zCoord = ztex & 0xFFFFFF;\n" - " }}\n" - "\n"); + out.Write(" // Depth Texture\n" + " int early_zCoord = zCoord;\n" + " if (bpmem_ztex_op != 0u) {{\n" + " int ztex = int(" I_ZBIAS "[1].w); // fixed bias\n" + "\n" + " // Whatever texture was in our last stage, it's now our depth texture\n" + " ztex += idot(s.TexColor.xyzw, " I_ZBIAS "[0].xyzw);\n" + " ztex += (bpmem_ztex_op == 1u) ? zCoord : 0;\n" + " zCoord = ztex & 0xFFFFFF;\n" + " }}\n" + "\n"); if (per_pixel_depth) { - out.WriteFmt(" // If early depth is enabled, write to zbuffer before depth textures\n" - " // If early depth isn't enabled, we write to the zbuffer here\n" - " int zbuffer_zCoord = bpmem_late_ztest ? zCoord : early_zCoord;\n"); + out.Write(" // If early depth is enabled, write to zbuffer before depth textures\n" + " // If early depth isn't enabled, we write to the zbuffer here\n" + " int zbuffer_zCoord = bpmem_late_ztest ? zCoord : early_zCoord;\n"); if (!host_config.backend_reversed_depth_range) - out.WriteFmt(" depth = 1.0 - float(zbuffer_zCoord) / 16777216.0;\n"); + out.Write(" depth = 1.0 - float(zbuffer_zCoord) / 16777216.0;\n"); else - out.WriteFmt(" depth = float(zbuffer_zCoord) / 16777216.0;\n"); + out.Write(" depth = float(zbuffer_zCoord) / 16777216.0;\n"); } - out.WriteFmt(" // Alpha Test\n" - " if (bpmem_alphaTest != 0u) {{\n" - " bool comp0 = alphaCompare(TevResult.a, " I_ALPHA ".r, {});\n", - BitfieldExtract("bpmem_alphaTest", AlphaTest().comp0)); - out.WriteFmt(" bool comp1 = alphaCompare(TevResult.a, " I_ALPHA ".g, {});\n", - BitfieldExtract("bpmem_alphaTest", AlphaTest().comp1)); - out.WriteFmt("\n" - " // These if statements are written weirdly to work around intel and Qualcomm " - "bugs with handling booleans.\n" - " switch ({}) {{\n", - BitfieldExtract("bpmem_alphaTest", AlphaTest().logic)); - out.WriteFmt(" case 0u: // AND\n" - " if (comp0 && comp1) break; else discard; break;\n" - " case 1u: // OR\n" - " if (comp0 || comp1) break; else discard; break;\n" - " case 2u: // XOR\n" - " if (comp0 != comp1) break; else discard; break;\n" - " case 3u: // XNOR\n" - " if (comp0 == comp1) break; else discard; break;\n" - " }}\n" - " }}\n" - "\n"); + out.Write(" // Alpha Test\n" + " if (bpmem_alphaTest != 0u) {{\n" + " bool comp0 = alphaCompare(TevResult.a, " I_ALPHA ".r, {});\n", + BitfieldExtract("bpmem_alphaTest", AlphaTest().comp0)); + out.Write(" bool comp1 = alphaCompare(TevResult.a, " I_ALPHA ".g, {});\n", + BitfieldExtract("bpmem_alphaTest", AlphaTest().comp1)); + out.Write("\n" + " // These if statements are written weirdly to work around intel and Qualcomm " + "bugs with handling booleans.\n" + " switch ({}) {{\n", + BitfieldExtract("bpmem_alphaTest", AlphaTest().logic)); + out.Write(" case 0u: // AND\n" + " if (comp0 && comp1) break; else discard; break;\n" + " case 1u: // OR\n" + " if (comp0 || comp1) break; else discard; break;\n" + " case 2u: // XOR\n" + " if (comp0 != comp1) break; else discard; break;\n" + " case 3u: // XNOR\n" + " if (comp0 == comp1) break; else discard; break;\n" + " }}\n" + " }}\n" + "\n"); // ========= // Dithering // ========= - out.WriteFmt(" if (bpmem_dither) {{\n" - " // Flipper uses a standard 2x2 Bayer Matrix for 6 bit dithering\n" - " // Here the matrix is encoded into the two factor constants\n" - " int2 dither = int2(rawpos.xy) & 1;\n" - " TevResult.rgb = (TevResult.rgb - (TevResult.rgb >> 6)) + abs(dither.y * 3 - " - "dither.x * 2);\n" - " }}\n\n"); + out.Write(" if (bpmem_dither) {{\n" + " // Flipper uses a standard 2x2 Bayer Matrix for 6 bit dithering\n" + " // Here the matrix is encoded into the two factor constants\n" + " int2 dither = int2(rawpos.xy) & 1;\n" + " TevResult.rgb = (TevResult.rgb - (TevResult.rgb >> 6)) + abs(dither.y * 3 - " + "dither.x * 2);\n" + " }}\n\n"); // ========= // Fog @@ -1158,102 +1154,101 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, // FIXME: Fog is implemented the same as ShaderGen, but ShaderGen's fog is all hacks. // Should be fixed point, and should not make guesses about Range-Based adjustments. - out.WriteFmt(" // Fog\n" - " uint fog_function = {};\n", - BitfieldExtract("bpmem_fogParam3", FogParam3().fsel)); - out.WriteFmt(" if (fog_function != 0u) {{\n" - " // TODO: This all needs to be converted from float to fixed point\n" - " float ze;\n" - " if ({} == 0u) {{\n", - BitfieldExtract("bpmem_fogParam3", FogParam3().proj)); - out.WriteFmt(" // perspective\n" - " // ze = A/(B - (Zs >> B_SHF)\n" - " ze = (" I_FOGF ".x * 16777216.0) / float(" I_FOGI ".y - (zCoord >> " I_FOGI - ".w));\n" - " }} else {{\n" - " // orthographic\n" - " // ze = a*Zs (here, no B_SHF)\n" - " ze = " I_FOGF ".z * float(zCoord) / 16777216.0;\n" - " }}\n" - "\n" - " if (bool({})) {{\n", - BitfieldExtract("bpmem_fogRangeBase", FogRangeParams::RangeBase().Enabled)); - out.WriteFmt(" // x_adjust = sqrt((x-center)^2 + k^2)/k\n" - " // ze *= x_adjust\n" - " float offset = (2.0 * (rawpos.x / " I_FOGF ".w)) - 1.0 - " I_FOGF ".z;\n" - " float floatindex = clamp(9.0 - abs(offset) * 9.0, 0.0, 9.0);\n" - " uint indexlower = uint(floatindex);\n" - " uint indexupper = indexlower + 1u;\n" - " float klower = " I_FOGRANGE "[indexlower >> 2u][indexlower & 3u];\n" - " float kupper = " I_FOGRANGE "[indexupper >> 2u][indexupper & 3u];\n" - " float k = lerp(klower, kupper, frac(floatindex));\n" - " float x_adjust = sqrt(offset * offset + k * k) / k;\n" - " ze *= x_adjust;\n" - " }}\n" - "\n" - " float fog = clamp(ze - " I_FOGF ".y, 0.0, 1.0);\n" - "\n" - " if (fog_function > 3u) {{\n" - " switch (fog_function) {{\n" - " case 4u:\n" - " fog = 1.0 - exp2(-8.0 * fog);\n" - " break;\n" - " case 5u:\n" - " fog = 1.0 - exp2(-8.0 * fog * fog);\n" - " break;\n" - " case 6u:\n" - " fog = exp2(-8.0 * (1.0 - fog));\n" - " break;\n" - " case 7u:\n" - " fog = 1.0 - fog;\n" - " fog = exp2(-8.0 * fog * fog);\n" - " break;\n" - " }}\n" - " }}\n" - "\n" - " int ifog = iround(fog * 256.0);\n" - " TevResult.rgb = (TevResult.rgb * (256 - ifog) + " I_FOGCOLOR - ".rgb * ifog) >> 8;\n" - " }}\n" - "\n"); + out.Write(" // Fog\n" + " uint fog_function = {};\n", + BitfieldExtract("bpmem_fogParam3", FogParam3().fsel)); + out.Write(" if (fog_function != 0u) {{\n" + " // TODO: This all needs to be converted from float to fixed point\n" + " float ze;\n" + " if ({} == 0u) {{\n", + BitfieldExtract("bpmem_fogParam3", FogParam3().proj)); + out.Write(" // perspective\n" + " // ze = A/(B - (Zs >> B_SHF)\n" + " ze = (" I_FOGF ".x * 16777216.0) / float(" I_FOGI ".y - (zCoord >> " I_FOGI + ".w));\n" + " }} else {{\n" + " // orthographic\n" + " // ze = a*Zs (here, no B_SHF)\n" + " ze = " I_FOGF ".z * float(zCoord) / 16777216.0;\n" + " }}\n" + "\n" + " if (bool({})) {{\n", + BitfieldExtract("bpmem_fogRangeBase", FogRangeParams::RangeBase().Enabled)); + out.Write(" // x_adjust = sqrt((x-center)^2 + k^2)/k\n" + " // ze *= x_adjust\n" + " float offset = (2.0 * (rawpos.x / " I_FOGF ".w)) - 1.0 - " I_FOGF ".z;\n" + " float floatindex = clamp(9.0 - abs(offset) * 9.0, 0.0, 9.0);\n" + " uint indexlower = uint(floatindex);\n" + " uint indexupper = indexlower + 1u;\n" + " float klower = " I_FOGRANGE "[indexlower >> 2u][indexlower & 3u];\n" + " float kupper = " I_FOGRANGE "[indexupper >> 2u][indexupper & 3u];\n" + " float k = lerp(klower, kupper, frac(floatindex));\n" + " float x_adjust = sqrt(offset * offset + k * k) / k;\n" + " ze *= x_adjust;\n" + " }}\n" + "\n" + " float fog = clamp(ze - " I_FOGF ".y, 0.0, 1.0);\n" + "\n" + " if (fog_function > 3u) {{\n" + " switch (fog_function) {{\n" + " case 4u:\n" + " fog = 1.0 - exp2(-8.0 * fog);\n" + " break;\n" + " case 5u:\n" + " fog = 1.0 - exp2(-8.0 * fog * fog);\n" + " break;\n" + " case 6u:\n" + " fog = exp2(-8.0 * (1.0 - fog));\n" + " break;\n" + " case 7u:\n" + " fog = 1.0 - fog;\n" + " fog = exp2(-8.0 * fog * fog);\n" + " break;\n" + " }}\n" + " }}\n" + "\n" + " int ifog = iround(fog * 256.0);\n" + " TevResult.rgb = (TevResult.rgb * (256 - ifog) + " I_FOGCOLOR ".rgb * ifog) >> 8;\n" + " }}\n" + "\n"); // D3D requires that the shader outputs be uint when writing to a uint render target for logic op. if (ApiType == APIType::D3D && uid_data->uint_output) { - out.WriteFmt(" if (bpmem_rgba6_format)\n" - " ocol0 = uint4(TevResult & 0xFC);\n" - " else\n" - " ocol0 = uint4(TevResult);\n" - "\n"); + out.Write(" if (bpmem_rgba6_format)\n" + " ocol0 = uint4(TevResult & 0xFC);\n" + " else\n" + " ocol0 = uint4(TevResult);\n" + "\n"); } else { - out.WriteFmt(" if (bpmem_rgba6_format)\n" - " ocol0.rgb = float3(TevResult.rgb >> 2) / 63.0;\n" - " else\n" - " ocol0.rgb = float3(TevResult.rgb) / 255.0;\n" - "\n" - " if (bpmem_dstalpha != 0u)\n"); - out.WriteFmt(" ocol0.a = float({} >> 2) / 63.0;\n", - BitfieldExtract("bpmem_dstalpha", ConstantAlpha().alpha)); - out.WriteFmt(" else\n" - " ocol0.a = float(TevResult.a >> 2) / 63.0;\n" - " \n"); + out.Write(" if (bpmem_rgba6_format)\n" + " ocol0.rgb = float3(TevResult.rgb >> 2) / 63.0;\n" + " else\n" + " ocol0.rgb = float3(TevResult.rgb) / 255.0;\n" + "\n" + " if (bpmem_dstalpha != 0u)\n"); + out.Write(" ocol0.a = float({} >> 2) / 63.0;\n", + BitfieldExtract("bpmem_dstalpha", ConstantAlpha().alpha)); + out.Write(" else\n" + " ocol0.a = float(TevResult.a >> 2) / 63.0;\n" + " \n"); if (use_dual_source || use_shader_blend) { - out.WriteFmt(" // Dest alpha override (dual source blending)\n" - " // Colors will be blended against the alpha from ocol1 and\n" - " // the alpha from ocol0 will be written to the framebuffer.\n" - " ocol1 = float4(0.0, 0.0, 0.0, float(TevResult.a) / 255.0);\n"); + out.Write(" // Dest alpha override (dual source blending)\n" + " // Colors will be blended against the alpha from ocol1 and\n" + " // the alpha from ocol0 will be written to the framebuffer.\n" + " ocol1 = float4(0.0, 0.0, 0.0, float(TevResult.a) / 255.0);\n"); } } if (bounding_box) { - out.WriteFmt(" if (bpmem_bounding_box) {{\n" - " UpdateBoundingBox(rawpos.xy);\n" - " }}\n"); + out.Write(" if (bpmem_bounding_box) {{\n" + " UpdateBoundingBox(rawpos.xy);\n" + " }}\n"); } if (use_shader_blend) @@ -1299,36 +1294,36 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, "1.0 - initial_ocol0.a;", // INVDSTALPHA }}; - out.WriteFmt(" if (blend_enable) {{\n" - " float4 blend_src;\n" - " switch (blend_src_factor) {{\n"); + out.Write(" if (blend_enable) {{\n" + " float4 blend_src;\n" + " switch (blend_src_factor) {{\n"); for (size_t i = 0; i < blendSrcFactor.size(); i++) { - out.WriteFmt(" case {}u: blend_src.rgb = {}; break;\n", i, blendSrcFactor[i]); + out.Write(" case {}u: blend_src.rgb = {}; break;\n", i, blendSrcFactor[i]); } - out.WriteFmt(" }}\n" - " switch (blend_src_factor_alpha) {{\n"); + out.Write(" }}\n" + " switch (blend_src_factor_alpha) {{\n"); for (size_t i = 0; i < blendSrcFactorAlpha.size(); i++) { - out.WriteFmt(" case {}u: blend_src.a = {}; break;\n", i, blendSrcFactorAlpha[i]); + out.Write(" case {}u: blend_src.a = {}; break;\n", i, blendSrcFactorAlpha[i]); } - out.WriteFmt(" }}\n" - " float4 blend_dst;\n" - " switch (blend_dst_factor) {{\n"); + out.Write(" }}\n" + " float4 blend_dst;\n" + " switch (blend_dst_factor) {{\n"); for (size_t i = 0; i < blendDstFactor.size(); i++) { - out.WriteFmt(" case {}u: blend_dst.rgb = {}; break;\n", i, blendDstFactor[i]); + out.Write(" case {}u: blend_dst.rgb = {}; break;\n", i, blendDstFactor[i]); } - out.WriteFmt(" }}\n" - " switch (blend_dst_factor_alpha) {{\n"); + out.Write(" }}\n" + " switch (blend_dst_factor_alpha) {{\n"); for (size_t i = 0; i < blendDstFactorAlpha.size(); i++) { - out.WriteFmt(" case {}u: blend_dst.a = {}; break;\n", i, blendDstFactorAlpha[i]); + out.Write(" case {}u: blend_dst.a = {}; break;\n", i, blendDstFactorAlpha[i]); } - out.WriteFmt( + out.Write( " }}\n" " float4 blend_result;\n" " if (blend_subtract)\n" @@ -1337,53 +1332,53 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, " blend_result.rgb = initial_ocol0.rgb * blend_dst.rgb + ocol0.rgb * " "blend_src.rgb;\n"); - out.WriteFmt(" if (blend_subtract_alpha)\n" - " blend_result.a = initial_ocol0.a * blend_dst.a - ocol0.a * blend_src.a;\n" - " else\n" - " blend_result.a = initial_ocol0.a * blend_dst.a + ocol0.a * blend_src.a;\n"); + out.Write(" if (blend_subtract_alpha)\n" + " blend_result.a = initial_ocol0.a * blend_dst.a - ocol0.a * blend_src.a;\n" + " else\n" + " blend_result.a = initial_ocol0.a * blend_dst.a + ocol0.a * blend_src.a;\n"); - out.WriteFmt(" real_ocol0 = blend_result;\n"); + out.Write(" real_ocol0 = blend_result;\n"); - out.WriteFmt(" }} else {{\n" - " real_ocol0 = ocol0;\n" - " }}\n"); + out.Write(" }} else {{\n" + " real_ocol0 = ocol0;\n" + " }}\n"); } - out.WriteFmt("}}\n" - "\n" - "int4 getRasColor(State s, StageState ss, float4 colors_0, float4 colors_1) {{\n" - " // Select Ras for stage\n" - " uint ras = {};\n", - BitfieldExtract("ss.order", TwoTevStageOrders().colorchan0)); - out.WriteFmt(" if (ras < 2u) {{ // Lighting Channel 0 or 1\n" - " int4 color = iround(((ras == 0u) ? colors_0 : colors_1) * 255.0);\n" - " uint swap = {};\n", - BitfieldExtract("ss.ac", TevStageCombiner().alphaC.rswap)); - out.WriteFmt(" return Swizzle(swap, color);\n"); - out.WriteFmt(" }} else if (ras == 5u) {{ // Alpha Bumb\n" - " return int4(s.AlphaBump, s.AlphaBump, s.AlphaBump, s.AlphaBump);\n" - " }} else if (ras == 6u) {{ // Normalzied Alpha Bump\n" - " int normalized = s.AlphaBump | s.AlphaBump >> 5;\n" - " return int4(normalized, normalized, normalized, normalized);\n" - " }} else {{\n" - " return int4(0, 0, 0, 0);\n" - " }}\n" - "}}\n" - "\n" - "int4 getKonstColor(State s, StageState ss) {{\n" - " // Select Konst for stage\n" - " // TODO: a switch case might be better here than an dynamically" - " // indexed uniform lookup\n" - " uint tevksel = bpmem_tevksel(ss.stage>>1);\n" - " if ((ss.stage & 1u) == 0u)\n" - " return int4(konstLookup[{}].rgb, konstLookup[{}].a);\n", - BitfieldExtract("tevksel", bpmem.tevksel[0].kcsel0), - BitfieldExtract("tevksel", bpmem.tevksel[0].kasel0)); - out.WriteFmt(" else\n" - " return int4(konstLookup[{}].rgb, konstLookup[{}].a);\n", - BitfieldExtract("tevksel", bpmem.tevksel[0].kcsel1), - BitfieldExtract("tevksel", bpmem.tevksel[0].kasel1)); - out.WriteFmt("}}\n"); + out.Write("}}\n" + "\n" + "int4 getRasColor(State s, StageState ss, float4 colors_0, float4 colors_1) {{\n" + " // Select Ras for stage\n" + " uint ras = {};\n", + BitfieldExtract("ss.order", TwoTevStageOrders().colorchan0)); + out.Write(" if (ras < 2u) {{ // Lighting Channel 0 or 1\n" + " int4 color = iround(((ras == 0u) ? colors_0 : colors_1) * 255.0);\n" + " uint swap = {};\n", + BitfieldExtract("ss.ac", TevStageCombiner().alphaC.rswap)); + out.Write(" return Swizzle(swap, color);\n"); + out.Write(" }} else if (ras == 5u) {{ // Alpha Bumb\n" + " return int4(s.AlphaBump, s.AlphaBump, s.AlphaBump, s.AlphaBump);\n" + " }} else if (ras == 6u) {{ // Normalzied Alpha Bump\n" + " int normalized = s.AlphaBump | s.AlphaBump >> 5;\n" + " return int4(normalized, normalized, normalized, normalized);\n" + " }} else {{\n" + " return int4(0, 0, 0, 0);\n" + " }}\n" + "}}\n" + "\n" + "int4 getKonstColor(State s, StageState ss) {{\n" + " // Select Konst for stage\n" + " // TODO: a switch case might be better here than an dynamically" + " // indexed uniform lookup\n" + " uint tevksel = bpmem_tevksel(ss.stage>>1);\n" + " if ((ss.stage & 1u) == 0u)\n" + " return int4(konstLookup[{}].rgb, konstLookup[{}].a);\n", + BitfieldExtract("tevksel", bpmem.tevksel[0].kcsel0), + BitfieldExtract("tevksel", bpmem.tevksel[0].kasel0)); + out.Write(" else\n" + " return int4(konstLookup[{}].rgb, konstLookup[{}].a);\n", + BitfieldExtract("tevksel", bpmem.tevksel[0].kcsel1), + BitfieldExtract("tevksel", bpmem.tevksel[0].kasel1)); + out.Write("}}\n"); return out; } diff --git a/Source/Core/VideoCommon/UberShaderVertex.cpp b/Source/Core/VideoCommon/UberShaderVertex.cpp index 574288fc38..39c6ede74a 100644 --- a/Source/Core/VideoCommon/UberShaderVertex.cpp +++ b/Source/Core/VideoCommon/UberShaderVertex.cpp @@ -35,145 +35,145 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config const u32 num_texgen = uid_data->num_texgens; ShaderCode out; - out.WriteFmt("// Vertex UberShader\n\n"); - out.WriteFmt("{}", s_lighting_struct); + out.Write("// Vertex UberShader\n\n"); + out.Write("{}", s_lighting_struct); // uniforms if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) - out.WriteFmt("UBO_BINDING(std140, 2) uniform VSBlock {{\n"); + out.Write("UBO_BINDING(std140, 2) uniform VSBlock {{\n"); else - out.WriteFmt("cbuffer VSBlock {{\n"); - out.WriteFmt("{}", s_shader_uniforms); - out.WriteFmt("}};\n"); + out.Write("cbuffer VSBlock {{\n"); + out.Write("{}", s_shader_uniforms); + out.Write("}};\n"); - out.WriteFmt("struct VS_OUTPUT {{\n"); + out.Write("struct VS_OUTPUT {{\n"); GenerateVSOutputMembers(out, api_type, num_texgen, host_config, ""); - out.WriteFmt("}};\n\n"); + out.Write("}};\n\n"); WriteUberShaderCommonHeader(out, api_type, host_config); WriteLightingFunction(out); if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) { - out.WriteFmt("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB); - out.WriteFmt("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB); - out.WriteFmt("ATTRIBUTE_LOCATION({}) in float3 rawnorm0;\n", SHADER_NORM0_ATTRIB); - out.WriteFmt("ATTRIBUTE_LOCATION({}) in float3 rawnorm1;\n", SHADER_NORM1_ATTRIB); - out.WriteFmt("ATTRIBUTE_LOCATION({}) in float3 rawnorm2;\n", SHADER_NORM2_ATTRIB); - out.WriteFmt("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB); - out.WriteFmt("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnorm0;\n", SHADER_NORM0_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnorm1;\n", SHADER_NORM1_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnorm2;\n", SHADER_NORM2_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB); for (int i = 0; i < 8; ++i) - out.WriteFmt("ATTRIBUTE_LOCATION({}) in float3 rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i, i); + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i, i); if (host_config.backend_geometry_shaders) { - out.WriteFmt("VARYING_LOCATION(0) out VertexData {{\n"); + out.Write("VARYING_LOCATION(0) out VertexData {{\n"); GenerateVSOutputMembers(out, api_type, num_texgen, host_config, GetInterpolationQualifier(msaa, ssaa, true, false)); - out.WriteFmt("}} vs;\n"); + out.Write("}} vs;\n"); } else { // Let's set up attributes u32 counter = 0; - out.WriteFmt("VARYING_LOCATION({}) {} out float4 colors_0;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - out.WriteFmt("VARYING_LOCATION({}) {} out float4 colors_1;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); + out.Write("VARYING_LOCATION({}) {} out float4 colors_0;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); + out.Write("VARYING_LOCATION({}) {} out float4 colors_1;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); for (u32 i = 0; i < num_texgen; ++i) { - out.WriteFmt("VARYING_LOCATION({}) {} out float3 tex{};\n", counter++, - GetInterpolationQualifier(msaa, ssaa), i); + out.Write("VARYING_LOCATION({}) {} out float3 tex{};\n", counter++, + GetInterpolationQualifier(msaa, ssaa), i); } if (!host_config.fast_depth_calc) { - out.WriteFmt("VARYING_LOCATION({}) {} out float4 clipPos;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); + out.Write("VARYING_LOCATION({}) {} out float4 clipPos;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); } if (per_pixel_lighting) { - out.WriteFmt("VARYING_LOCATION({}) {} out float3 Normal;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - out.WriteFmt("VARYING_LOCATION({}) {} out float3 WorldPos;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); + out.Write("VARYING_LOCATION({}) {} out float3 Normal;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); + out.Write("VARYING_LOCATION({}) {} out float3 WorldPos;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); } } - out.WriteFmt("void main()\n{{\n"); + out.Write("void main()\n{{\n"); } else // D3D { - out.WriteFmt("VS_OUTPUT main(\n"); + out.Write("VS_OUTPUT main(\n"); // inputs - out.WriteFmt(" float3 rawnorm0 : NORMAL0,\n" - " float3 rawnorm1 : NORMAL1,\n" - " float3 rawnorm2 : NORMAL2,\n" - " float4 rawcolor0 : COLOR0,\n" - " float4 rawcolor1 : COLOR1,\n"); + out.Write(" float3 rawnorm0 : NORMAL0,\n" + " float3 rawnorm1 : NORMAL1,\n" + " float3 rawnorm2 : NORMAL2,\n" + " float4 rawcolor0 : COLOR0,\n" + " float4 rawcolor1 : COLOR1,\n"); for (int i = 0; i < 8; ++i) - out.WriteFmt(" float3 rawtex{} : TEXCOORD{},\n", i, i); - out.WriteFmt(" uint posmtx : BLENDINDICES,\n"); - out.WriteFmt(" float4 rawpos : POSITION) {{\n"); + out.Write(" float3 rawtex{} : TEXCOORD{},\n", i, i); + out.Write(" uint posmtx : BLENDINDICES,\n"); + out.Write(" float4 rawpos : POSITION) {{\n"); } - out.WriteFmt("VS_OUTPUT o;\n" - "\n"); + out.Write("VS_OUTPUT o;\n" + "\n"); // Transforms - out.WriteFmt("// Position matrix\n" - "float4 P0;\n" - "float4 P1;\n" - "float4 P2;\n" - "\n" - "// Normal matrix\n" - "float3 N0;\n" - "float3 N1;\n" - "float3 N2;\n" - "\n" - "if ((components & {}u) != 0u) {{// VB_HAS_POSMTXIDX\n", - VB_HAS_POSMTXIDX); - out.WriteFmt(" // Vertex format has a per-vertex matrix\n" - " int posidx = int(posmtx.r);\n" - " P0 = " I_TRANSFORMMATRICES "[posidx];\n" - " P1 = " I_TRANSFORMMATRICES "[posidx+1];\n" - " P2 = " I_TRANSFORMMATRICES "[posidx+2];\n" - "\n" - " int normidx = posidx >= 32 ? (posidx - 32) : posidx;\n" - " N0 = " I_NORMALMATRICES "[normidx].xyz;\n" - " N1 = " I_NORMALMATRICES "[normidx+1].xyz;\n" - " N2 = " I_NORMALMATRICES "[normidx+2].xyz;\n" - "}} else {{\n" - " // One shared matrix\n" - " P0 = " I_POSNORMALMATRIX "[0];\n" - " P1 = " I_POSNORMALMATRIX "[1];\n" - " P2 = " I_POSNORMALMATRIX "[2];\n" - " N0 = " I_POSNORMALMATRIX "[3].xyz;\n" - " N1 = " I_POSNORMALMATRIX "[4].xyz;\n" - " N2 = " I_POSNORMALMATRIX "[5].xyz;\n" - "}}\n" - "\n" - "float4 pos = float4(dot(P0, rawpos), dot(P1, rawpos), dot(P2, rawpos), 1.0);\n" - "o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION - "[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n" - "\n" - "// Only the first normal gets normalized (TODO: why?)\n" - "float3 _norm0 = float3(0.0, 0.0, 0.0);\n" - "if ((components & {}u) != 0u) // VB_HAS_NRM0\n", - VB_HAS_NRM0); - out.WriteFmt( + out.Write("// Position matrix\n" + "float4 P0;\n" + "float4 P1;\n" + "float4 P2;\n" + "\n" + "// Normal matrix\n" + "float3 N0;\n" + "float3 N1;\n" + "float3 N2;\n" + "\n" + "if ((components & {}u) != 0u) {{// VB_HAS_POSMTXIDX\n", + VB_HAS_POSMTXIDX); + out.Write(" // Vertex format has a per-vertex matrix\n" + " int posidx = int(posmtx.r);\n" + " P0 = " I_TRANSFORMMATRICES "[posidx];\n" + " P1 = " I_TRANSFORMMATRICES "[posidx+1];\n" + " P2 = " I_TRANSFORMMATRICES "[posidx+2];\n" + "\n" + " int normidx = posidx >= 32 ? (posidx - 32) : posidx;\n" + " N0 = " I_NORMALMATRICES "[normidx].xyz;\n" + " N1 = " I_NORMALMATRICES "[normidx+1].xyz;\n" + " N2 = " I_NORMALMATRICES "[normidx+2].xyz;\n" + "}} else {{\n" + " // One shared matrix\n" + " P0 = " I_POSNORMALMATRIX "[0];\n" + " P1 = " I_POSNORMALMATRIX "[1];\n" + " P2 = " I_POSNORMALMATRIX "[2];\n" + " N0 = " I_POSNORMALMATRIX "[3].xyz;\n" + " N1 = " I_POSNORMALMATRIX "[4].xyz;\n" + " N2 = " I_POSNORMALMATRIX "[5].xyz;\n" + "}}\n" + "\n" + "float4 pos = float4(dot(P0, rawpos), dot(P1, rawpos), dot(P2, rawpos), 1.0);\n" + "o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION + "[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n" + "\n" + "// Only the first normal gets normalized (TODO: why?)\n" + "float3 _norm0 = float3(0.0, 0.0, 0.0);\n" + "if ((components & {}u) != 0u) // VB_HAS_NRM0\n", + VB_HAS_NRM0); + out.Write( " _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n" "\n" "float3 _norm1 = float3(0.0, 0.0, 0.0);\n" "if ((components & {}u) != 0u) // VB_HAS_NRM1\n", VB_HAS_NRM1); - out.WriteFmt(" _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n" - "\n" - "float3 _norm2 = float3(0.0, 0.0, 0.0);\n" - "if ((components & {}u) != 0u) // VB_HAS_NRM2\n", - VB_HAS_NRM2); - out.WriteFmt(" _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n" - "\n"); + out.Write(" _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n" + "\n" + "float3 _norm2 = float3(0.0, 0.0, 0.0);\n" + "if ((components & {}u) != 0u) // VB_HAS_NRM2\n", + VB_HAS_NRM2); + out.Write(" _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n" + "\n"); // Hardware Lighting WriteVertexLighting(out, api_type, "pos.xyz", "_norm0", "rawcolor0", "rawcolor1", "o.colors_0", @@ -183,37 +183,37 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config if (num_texgen > 0) GenVertexShaderTexGens(api_type, num_texgen, out); - out.WriteFmt("if (xfmem_numColorChans == 0u) {{\n" - " if ((components & {}u) != 0u)\n" - " o.colors_0 = rawcolor0;\n" - " else\n" - " o.colors_1 = float4(1.0, 1.0, 1.0, 1.0);\n" - "}}\n", - VB_HAS_COL0); - out.WriteFmt("if (xfmem_numColorChans < 2u) {{\n" - " if ((components & {}u) != 0u)\n" - " o.colors_0 = rawcolor1;\n" - " else\n" - " o.colors_1 = float4(1.0, 1.0, 1.0, 1.0);\n" - "}}\n", - VB_HAS_COL1); + out.Write("if (xfmem_numColorChans == 0u) {{\n" + " if ((components & {}u) != 0u)\n" + " o.colors_0 = rawcolor0;\n" + " else\n" + " o.colors_1 = float4(1.0, 1.0, 1.0, 1.0);\n" + "}}\n", + VB_HAS_COL0); + out.Write("if (xfmem_numColorChans < 2u) {{\n" + " if ((components & {}u) != 0u)\n" + " o.colors_0 = rawcolor1;\n" + " else\n" + " o.colors_1 = float4(1.0, 1.0, 1.0, 1.0);\n" + "}}\n", + VB_HAS_COL1); if (!host_config.fast_depth_calc) { // clipPos/w needs to be done in pixel shader, not here - out.WriteFmt("o.clipPos = o.pos;\n"); + out.Write("o.clipPos = o.pos;\n"); } if (per_pixel_lighting) { - out.WriteFmt("o.Normal = _norm0;\n" - "o.WorldPos = pos.xyz;\n"); - out.WriteFmt("if ((components & {}u) != 0u) // VB_HAS_COL0\n" - " o.colors_0 = rawcolor0;\n", - VB_HAS_COL0); - out.WriteFmt("if ((components & {}u) != 0u) // VB_HAS_COL1\n" - " o.colors_1 = rawcolor1;\n", - VB_HAS_COL1); + out.Write("o.Normal = _norm0;\n" + "o.WorldPos = pos.xyz;\n"); + out.Write("if ((components & {}u) != 0u) // VB_HAS_COL0\n" + " o.colors_0 = rawcolor0;\n", + VB_HAS_COL0); + out.Write("if ((components & {}u) != 0u) // VB_HAS_COL1\n" + " o.colors_1 = rawcolor1;\n", + VB_HAS_COL1); } // If we can disable the incorrect depth clipping planes using depth clamping, then we can do @@ -225,13 +225,13 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config // own clipping. We want to clip so that -w <= z <= 0, which matches the console -1..0 range. // We adjust our depth value for clipping purposes to match the perspective projection in the // software backend, which is a hack to fix Sonic Adventure and Unleashed games. - out.WriteFmt("float clipDepth = o.pos.z * (1.0 - 1e-7);\n" - "float clipDist0 = clipDepth + o.pos.w;\n" // Near: z < -w - "float clipDist1 = -clipDepth;\n"); // Far: z > 0 + out.Write("float clipDepth = o.pos.z * (1.0 - 1e-7);\n" + "float clipDist0 = clipDepth + o.pos.w;\n" // Near: z < -w + "float clipDist1 = -clipDepth;\n"); // Far: z > 0 if (host_config.backend_geometry_shaders) { - out.WriteFmt("o.clipDist0 = clipDist0;\n" - "o.clipDist1 = clipDist1;\n"); + out.Write("o.clipDist0 = clipDist0;\n" + "o.clipDist1 = clipDist1;\n"); } } @@ -246,20 +246,20 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config // divide, because some games will use a depth range larger than what is allowed by the // graphics API. These large depth ranges will still be clipped to the 0..1 range, so these // games effectively add a depth bias to the values written to the depth buffer. - out.WriteFmt("o.pos.z = o.pos.w * " I_PIXELCENTERCORRECTION ".w - " - "o.pos.z * " I_PIXELCENTERCORRECTION ".z;\n"); + out.Write("o.pos.z = o.pos.w * " I_PIXELCENTERCORRECTION ".w - " + "o.pos.z * " I_PIXELCENTERCORRECTION ".z;\n"); if (!host_config.backend_clip_control) { // If the graphics API doesn't support a depth range of 0..1, then we need to map z to // the -1..1 range. Unfortunately we have to use a substraction, which is a lossy floating-point // operation that can introduce a round-trip error. - out.WriteFmt("o.pos.z = o.pos.z * 2.0 - o.pos.w;\n"); + out.Write("o.pos.z = o.pos.z * 2.0 - o.pos.w;\n"); } // Correct for negative viewports by mirroring all vertices. We need to negate the height here, // since the viewport height is already negated by the render backend. - out.WriteFmt("o.pos.xy *= sign(" I_PIXELCENTERCORRECTION ".xy * float2(1.0, -1.0));\n"); + out.Write("o.pos.xy *= sign(" I_PIXELCENTERCORRECTION ".xy * float2(1.0, -1.0));\n"); // The console GPU places the pixel center at 7/12 in screen space unless // antialiasing is enabled, while D3D and OpenGL place it at 0.5. This results @@ -267,7 +267,7 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config // which in turn can be critical if it happens for clear quads. // Hence, we compensate for this pixel center difference so that primitives // get rasterized correctly. - out.WriteFmt("o.pos.xy = o.pos.xy - o.pos.w * " I_PIXELCENTERCORRECTION ".xy;\n"); + out.Write("o.pos.xy = o.pos.xy - o.pos.w * " I_PIXELCENTERCORRECTION ".xy;\n"); if (vertex_rounding) { @@ -275,18 +275,18 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config // cause an additional pixel offset. Due to a higher pixel density we need to correct this // by converting our clip-space position into the Wii's screen-space. // Acquire the right pixel and then convert it back. - out.WriteFmt("if (o.pos.w == 1.0f)\n" - "{{\n"); + out.Write("if (o.pos.w == 1.0f)\n" + "{{\n"); - out.WriteFmt("\tfloat ss_pixel_x = ((o.pos.x + 1.0f) * (" I_VIEWPORT_SIZE ".x * 0.5f));\n" - "\tfloat ss_pixel_y = ((o.pos.y + 1.0f) * (" I_VIEWPORT_SIZE ".y * 0.5f));\n"); + out.Write("\tfloat ss_pixel_x = ((o.pos.x + 1.0f) * (" I_VIEWPORT_SIZE ".x * 0.5f));\n" + "\tfloat ss_pixel_y = ((o.pos.y + 1.0f) * (" I_VIEWPORT_SIZE ".y * 0.5f));\n"); - out.WriteFmt("\tss_pixel_x = round(ss_pixel_x);\n" - "\tss_pixel_y = round(ss_pixel_y);\n"); + out.Write("\tss_pixel_x = round(ss_pixel_x);\n" + "\tss_pixel_y = round(ss_pixel_y);\n"); - out.WriteFmt("\to.pos.x = ((ss_pixel_x / (" I_VIEWPORT_SIZE ".x * 0.5f)) - 1.0f);\n" - "\to.pos.y = ((ss_pixel_y / (" I_VIEWPORT_SIZE ".y * 0.5f)) - 1.0f);\n" - "}}\n"); + out.Write("\to.pos.x = ((ss_pixel_x / (" I_VIEWPORT_SIZE ".x * 0.5f)) - 1.0f);\n" + "\to.pos.y = ((ss_pixel_y / (" I_VIEWPORT_SIZE ".y * 0.5f)) - 1.0f);\n" + "}}\n"); } if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) @@ -300,35 +300,35 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config // TODO: Pass interface blocks between shader stages even if geometry shaders // are not supported, however that will require at least OpenGL 3.2 support. for (u32 i = 0; i < num_texgen; ++i) - out.WriteFmt("tex{}.xyz = o.tex{};\n", i, i); + out.Write("tex{}.xyz = o.tex{};\n", i, i); if (!host_config.fast_depth_calc) - out.WriteFmt("clipPos = o.clipPos;\n"); + out.Write("clipPos = o.clipPos;\n"); if (per_pixel_lighting) { - out.WriteFmt("Normal = o.Normal;\n" - "WorldPos = o.WorldPos;\n"); + out.Write("Normal = o.Normal;\n" + "WorldPos = o.WorldPos;\n"); } - out.WriteFmt("colors_0 = o.colors_0;\n" - "colors_1 = o.colors_1;\n"); + out.Write("colors_0 = o.colors_0;\n" + "colors_1 = o.colors_1;\n"); } if (host_config.backend_depth_clamp) { - out.WriteFmt("gl_ClipDistance[0] = clipDist0;\n" - "gl_ClipDistance[1] = clipDist1;\n"); + out.Write("gl_ClipDistance[0] = clipDist0;\n" + "gl_ClipDistance[1] = clipDist1;\n"); } // Vulkan NDC space has Y pointing down (right-handed NDC space). if (api_type == APIType::Vulkan) - out.WriteFmt("gl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n"); + out.Write("gl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n"); else - out.WriteFmt("gl_Position = o.pos;\n"); + out.Write("gl_Position = o.pos;\n"); } else // D3D { - out.WriteFmt("return o;\n"); + out.Write("return o;\n"); } - out.WriteFmt("}}\n"); + out.Write("}}\n"); return out; } @@ -338,160 +338,158 @@ static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode& // The HLSL compiler complains that the output texture coordinates are uninitialized when trying // to dynamically index them. for (u32 i = 0; i < num_texgen; i++) - out.WriteFmt("o.tex{} = float3(0.0, 0.0, 0.0);\n", i); + out.Write("o.tex{} = float3(0.0, 0.0, 0.0);\n", i); - out.WriteFmt("// Texture coordinate generation\n"); + out.Write("// Texture coordinate generation\n"); if (num_texgen == 1) { - out.WriteFmt("{{ const uint texgen = 0u;\n"); + out.Write("{{ const uint texgen = 0u;\n"); } else { - out.WriteFmt("{}for (uint texgen = 0u; texgen < {}u; texgen++) {{\n", - api_type == APIType::D3D ? "[loop] " : "", num_texgen); + out.Write("{}for (uint texgen = 0u; texgen < {}u; texgen++) {{\n", + api_type == APIType::D3D ? "[loop] " : "", num_texgen); } - out.WriteFmt(" // Texcoord transforms\n"); - out.WriteFmt(" float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n" - " uint texMtxInfo = xfmem_texMtxInfo(texgen);\n"); - out.WriteFmt(" switch ({}) {{\n", BitfieldExtract("texMtxInfo", TexMtxInfo().sourcerow)); - out.WriteFmt(" case {}u: // XF_SRCGEOM_INROW\n", XF_SRCGEOM_INROW); - out.WriteFmt(" coord.xyz = rawpos.xyz;\n"); - out.WriteFmt(" break;\n\n"); - out.WriteFmt(" case {}u: // XF_SRCNORMAL_INROW\n", XF_SRCNORMAL_INROW); - out.WriteFmt( + out.Write(" // Texcoord transforms\n"); + out.Write(" float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n" + " uint texMtxInfo = xfmem_texMtxInfo(texgen);\n"); + out.Write(" switch ({}) {{\n", BitfieldExtract("texMtxInfo", TexMtxInfo().sourcerow)); + out.Write(" case {}u: // XF_SRCGEOM_INROW\n", XF_SRCGEOM_INROW); + out.Write(" coord.xyz = rawpos.xyz;\n"); + out.Write(" break;\n\n"); + out.Write(" case {}u: // XF_SRCNORMAL_INROW\n", XF_SRCNORMAL_INROW); + out.Write( " coord.xyz = ((components & {}u /* VB_HAS_NRM0 */) != 0u) ? rawnorm0.xyz : coord.xyz;", VB_HAS_NRM0); - out.WriteFmt(" break;\n\n"); - out.WriteFmt(" case {}u: // XF_SRCBINORMAL_T_INROW\n", XF_SRCBINORMAL_T_INROW); - out.WriteFmt( + out.Write(" break;\n\n"); + out.Write(" case {}u: // XF_SRCBINORMAL_T_INROW\n", XF_SRCBINORMAL_T_INROW); + out.Write( " coord.xyz = ((components & {}u /* VB_HAS_NRM1 */) != 0u) ? rawnorm1.xyz : coord.xyz;", VB_HAS_NRM1); - out.WriteFmt(" break;\n\n"); - out.WriteFmt(" case {}u: // XF_SRCBINORMAL_B_INROW\n", XF_SRCBINORMAL_B_INROW); - out.WriteFmt( + out.Write(" break;\n\n"); + out.Write(" case {}u: // XF_SRCBINORMAL_B_INROW\n", XF_SRCBINORMAL_B_INROW); + out.Write( " coord.xyz = ((components & {}u /* VB_HAS_NRM2 */) != 0u) ? rawnorm2.xyz : coord.xyz;", VB_HAS_NRM2); - out.WriteFmt(" break;\n\n"); + out.Write(" break;\n\n"); for (u32 i = 0; i < 8; i++) { - out.WriteFmt(" case {}u: // XF_SRCTEX{}_INROW\n", XF_SRCTEX0_INROW + i, i); - out.WriteFmt( + out.Write(" case {}u: // XF_SRCTEX{}_INROW\n", XF_SRCTEX0_INROW + i, i); + out.Write( " coord = ((components & {}u /* VB_HAS_UV{} */) != 0u) ? float4(rawtex{}.x, rawtex{}.y, " "1.0, 1.0) : coord;\n", VB_HAS_UV0 << i, i, i, i); - out.WriteFmt(" break;\n\n"); + out.Write(" break;\n\n"); } - out.WriteFmt(" }}\n" - "\n"); + out.Write(" }}\n" + "\n"); - out.WriteFmt(" // Input form of AB11 sets z element to 1.0\n"); - out.WriteFmt(" if ({} == {}u) // inputform == XF_TEXINPUT_AB11\n", - BitfieldExtract("texMtxInfo", TexMtxInfo().inputform), XF_TEXINPUT_AB11); - out.WriteFmt(" coord.z = 1.0f;\n" - "\n"); + out.Write(" // Input form of AB11 sets z element to 1.0\n"); + out.Write(" if ({} == {}u) // inputform == XF_TEXINPUT_AB11\n", + BitfieldExtract("texMtxInfo", TexMtxInfo().inputform), XF_TEXINPUT_AB11); + out.Write(" coord.z = 1.0f;\n" + "\n"); - out.WriteFmt(" // first transformation\n"); - out.WriteFmt(" uint texgentype = {};\n", BitfieldExtract("texMtxInfo", TexMtxInfo().texgentype)); - out.WriteFmt(" float3 output_tex;\n" - " switch (texgentype)\n" - " {{\n"); - out.WriteFmt(" case {}u: // XF_TEXGEN_EMBOSS_MAP\n", XF_TEXGEN_EMBOSS_MAP); - out.WriteFmt(" {{\n"); - out.WriteFmt(" uint light = {};\n", - BitfieldExtract("texMtxInfo", TexMtxInfo().embosslightshift)); - out.WriteFmt(" uint source = {};\n", - BitfieldExtract("texMtxInfo", TexMtxInfo().embosssourceshift)); - out.WriteFmt(" switch (source) {{\n"); + out.Write(" // first transformation\n"); + out.Write(" uint texgentype = {};\n", BitfieldExtract("texMtxInfo", TexMtxInfo().texgentype)); + out.Write(" float3 output_tex;\n" + " switch (texgentype)\n" + " {{\n"); + out.Write(" case {}u: // XF_TEXGEN_EMBOSS_MAP\n", XF_TEXGEN_EMBOSS_MAP); + out.Write(" {{\n"); + out.Write(" uint light = {};\n", + BitfieldExtract("texMtxInfo", TexMtxInfo().embosslightshift)); + out.Write(" uint source = {};\n", + BitfieldExtract("texMtxInfo", TexMtxInfo().embosssourceshift)); + out.Write(" switch (source) {{\n"); for (u32 i = 0; i < num_texgen; i++) - out.WriteFmt(" case {}u: output_tex.xyz = o.tex{}; break;\n", i, i); - out.WriteFmt(" default: output_tex.xyz = float3(0.0, 0.0, 0.0); break;\n" - " }}\n"); - out.WriteFmt(" if ((components & {}u) != 0u) {{ // VB_HAS_NRM1 | VB_HAS_NRM2\n", - VB_HAS_NRM1 | VB_HAS_NRM2); // Should this be VB_HAS_NRM1 | VB_HAS_NRM2 - out.WriteFmt(" float3 ldir = normalize(" I_LIGHTS "[light].pos.xyz - pos.xyz);\n" - " output_tex.xyz += float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0);\n" - " }}\n" - " }}\n" - " break;\n\n"); - out.WriteFmt(" case {}u: // XF_TEXGEN_COLOR_STRGBC0\n", XF_TEXGEN_COLOR_STRGBC0); - out.WriteFmt(" output_tex.xyz = float3(o.colors_0.x, o.colors_0.y, 1.0);\n" - " break;\n\n"); - out.WriteFmt(" case {}u: // XF_TEXGEN_COLOR_STRGBC1\n", XF_TEXGEN_COLOR_STRGBC1); - out.WriteFmt(" output_tex.xyz = float3(o.colors_1.x, o.colors_1.y, 1.0);\n" - " break;\n\n"); - out.WriteFmt(" default: // Also XF_TEXGEN_REGULAR\n" - " {{\n"); - out.WriteFmt(" if ((components & ({}u /* VB_HAS_TEXMTXIDX0 */ << texgen)) != 0u) {{\n", - VB_HAS_TEXMTXIDX0); - out.WriteFmt( - " // This is messy, due to dynamic indexing of the input texture coordinates.\n" - " // Hopefully the compiler will unroll this whole loop anyway and the switch.\n" - " int tmp = 0;\n" - " switch (texgen) {{\n"); + out.Write(" case {}u: output_tex.xyz = o.tex{}; break;\n", i, i); + out.Write(" default: output_tex.xyz = float3(0.0, 0.0, 0.0); break;\n" + " }}\n"); + out.Write(" if ((components & {}u) != 0u) {{ // VB_HAS_NRM1 | VB_HAS_NRM2\n", + VB_HAS_NRM1 | VB_HAS_NRM2); // Should this be VB_HAS_NRM1 | VB_HAS_NRM2 + out.Write(" float3 ldir = normalize(" I_LIGHTS "[light].pos.xyz - pos.xyz);\n" + " output_tex.xyz += float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0);\n" + " }}\n" + " }}\n" + " break;\n\n"); + out.Write(" case {}u: // XF_TEXGEN_COLOR_STRGBC0\n", XF_TEXGEN_COLOR_STRGBC0); + out.Write(" output_tex.xyz = float3(o.colors_0.x, o.colors_0.y, 1.0);\n" + " break;\n\n"); + out.Write(" case {}u: // XF_TEXGEN_COLOR_STRGBC1\n", XF_TEXGEN_COLOR_STRGBC1); + out.Write(" output_tex.xyz = float3(o.colors_1.x, o.colors_1.y, 1.0);\n" + " break;\n\n"); + out.Write(" default: // Also XF_TEXGEN_REGULAR\n" + " {{\n"); + out.Write(" if ((components & ({}u /* VB_HAS_TEXMTXIDX0 */ << texgen)) != 0u) {{\n", + VB_HAS_TEXMTXIDX0); + out.Write(" // This is messy, due to dynamic indexing of the input texture coordinates.\n" + " // Hopefully the compiler will unroll this whole loop anyway and the switch.\n" + " int tmp = 0;\n" + " switch (texgen) {{\n"); for (u32 i = 0; i < num_texgen; i++) - out.WriteFmt(" case {}u: tmp = int(rawtex{}.z); break;\n", i, i); - out.WriteFmt(" }}\n" - "\n"); - out.WriteFmt(" if ({} == {}u) {{\n", - BitfieldExtract("texMtxInfo", TexMtxInfo().projection), XF_TEXPROJ_STQ); - out.WriteFmt(" output_tex.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]),\n" - " dot(coord, " I_TRANSFORMMATRICES "[tmp + 1]),\n" - " dot(coord, " I_TRANSFORMMATRICES "[tmp + 2]));\n" - " }} else {{\n" - " output_tex.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]),\n" - " dot(coord, " I_TRANSFORMMATRICES "[tmp + 1]),\n" - " 1.0);\n" - " }}\n" - " }} else {{\n"); - out.WriteFmt(" if ({} == {}u) {{\n", - BitfieldExtract("texMtxInfo", TexMtxInfo().projection), XF_TEXPROJ_STQ); - out.WriteFmt( - " output_tex.xyz = float3(dot(coord, " I_TEXMATRICES "[3u * texgen]),\n" - " dot(coord, " I_TEXMATRICES "[3u * texgen + 1u]),\n" - " dot(coord, " I_TEXMATRICES "[3u * texgen + 2u]));\n" - " }} else {{\n" - " output_tex.xyz = float3(dot(coord, " I_TEXMATRICES "[3u * texgen]),\n" - " dot(coord, " I_TEXMATRICES "[3u * texgen + 1u]),\n" - " 1.0);\n" - " }}\n" - " }}\n" - " }}\n" - " break;\n\n" - " }}\n" - "\n"); + out.Write(" case {}u: tmp = int(rawtex{}.z); break;\n", i, i); + out.Write(" }}\n" + "\n"); + out.Write(" if ({} == {}u) {{\n", BitfieldExtract("texMtxInfo", TexMtxInfo().projection), + XF_TEXPROJ_STQ); + out.Write(" output_tex.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]),\n" + " dot(coord, " I_TRANSFORMMATRICES "[tmp + 1]),\n" + " dot(coord, " I_TRANSFORMMATRICES "[tmp + 2]));\n" + " }} else {{\n" + " output_tex.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]),\n" + " dot(coord, " I_TRANSFORMMATRICES "[tmp + 1]),\n" + " 1.0);\n" + " }}\n" + " }} else {{\n"); + out.Write(" if ({} == {}u) {{\n", BitfieldExtract("texMtxInfo", TexMtxInfo().projection), + XF_TEXPROJ_STQ); + out.Write(" output_tex.xyz = float3(dot(coord, " I_TEXMATRICES "[3u * texgen]),\n" + " dot(coord, " I_TEXMATRICES "[3u * texgen + 1u]),\n" + " dot(coord, " I_TEXMATRICES "[3u * texgen + 2u]));\n" + " }} else {{\n" + " output_tex.xyz = float3(dot(coord, " I_TEXMATRICES "[3u * texgen]),\n" + " dot(coord, " I_TEXMATRICES "[3u * texgen + 1u]),\n" + " 1.0);\n" + " }}\n" + " }}\n" + " }}\n" + " break;\n\n" + " }}\n" + "\n"); - out.WriteFmt(" if (xfmem_dualTexInfo != 0u) {{\n"); - out.WriteFmt(" uint postMtxInfo = xfmem_postMtxInfo(texgen);"); - out.WriteFmt(" uint base_index = {};\n", BitfieldExtract("postMtxInfo", PostMtxInfo().index)); - out.WriteFmt(" float4 P0 = " I_POSTTRANSFORMMATRICES "[base_index & 0x3fu];\n" - " float4 P1 = " I_POSTTRANSFORMMATRICES "[(base_index + 1u) & 0x3fu];\n" - " float4 P2 = " I_POSTTRANSFORMMATRICES "[(base_index + 2u) & 0x3fu];\n" - "\n"); - out.WriteFmt(" if ({} != 0u)\n", BitfieldExtract("postMtxInfo", PostMtxInfo().normalize)); - out.WriteFmt(" output_tex.xyz = normalize(output_tex.xyz);\n" - "\n" - " // multiply by postmatrix\n" - " output_tex.xyz = float3(dot(P0.xyz, output_tex.xyz) + P0.w,\n" - " dot(P1.xyz, output_tex.xyz) + P1.w,\n" - " dot(P2.xyz, output_tex.xyz) + P2.w);\n" - " }}\n\n"); + out.Write(" if (xfmem_dualTexInfo != 0u) {{\n"); + out.Write(" uint postMtxInfo = xfmem_postMtxInfo(texgen);"); + out.Write(" uint base_index = {};\n", BitfieldExtract("postMtxInfo", PostMtxInfo().index)); + out.Write(" float4 P0 = " I_POSTTRANSFORMMATRICES "[base_index & 0x3fu];\n" + " float4 P1 = " I_POSTTRANSFORMMATRICES "[(base_index + 1u) & 0x3fu];\n" + " float4 P2 = " I_POSTTRANSFORMMATRICES "[(base_index + 2u) & 0x3fu];\n" + "\n"); + out.Write(" if ({} != 0u)\n", BitfieldExtract("postMtxInfo", PostMtxInfo().normalize)); + out.Write(" output_tex.xyz = normalize(output_tex.xyz);\n" + "\n" + " // multiply by postmatrix\n" + " output_tex.xyz = float3(dot(P0.xyz, output_tex.xyz) + P0.w,\n" + " dot(P1.xyz, output_tex.xyz) + P1.w,\n" + " dot(P2.xyz, output_tex.xyz) + P2.w);\n" + " }}\n\n"); // When q is 0, the GameCube appears to have a special case // This can be seen in devkitPro's neheGX Lesson08 example for Wii // Makes differences in Rogue Squadron 3 (Hoth sky) and The Last Story (shadow culling) - out.WriteFmt(" if (texgentype == {}u && output_tex.z == 0.0) // XF_TEXGEN_REGULAR\n", - XF_TEXGEN_REGULAR); - out.WriteFmt( + out.Write(" if (texgentype == {}u && output_tex.z == 0.0) // XF_TEXGEN_REGULAR\n", + XF_TEXGEN_REGULAR); + out.Write( " output_tex.xy = clamp(output_tex.xy / 2.0f, float2(-1.0f,-1.0f), float2(1.0f,1.0f));\n" "\n"); - out.WriteFmt(" // Hopefully GPUs that can support dynamic indexing will optimize this.\n"); - out.WriteFmt(" switch (texgen) {{\n"); + out.Write(" // Hopefully GPUs that can support dynamic indexing will optimize this.\n"); + out.Write(" switch (texgen) {{\n"); for (u32 i = 0; i < num_texgen; i++) - out.WriteFmt(" case {}u: o.tex{} = output_tex; break;\n", i, i); - out.WriteFmt(" }}\n" - "}}\n"); + out.Write(" case {}u: o.tex{} = output_tex; break;\n", i, i); + out.Write(" }}\n" + "}}\n"); } void EnumerateVertexShaderUids(const std::function& callback) diff --git a/Source/Core/VideoCommon/VertexShaderGen.cpp b/Source/Core/VideoCommon/VertexShaderGen.cpp index 917abfed04..a26fa006f3 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/VertexShaderGen.cpp @@ -83,37 +83,37 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho const bool ssaa = host_config.ssaa; const bool vertex_rounding = host_config.vertex_rounding; - out.WriteFmt("{}", s_lighting_struct); + out.Write("{}", s_lighting_struct); // uniforms if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) - out.WriteFmt("UBO_BINDING(std140, 2) uniform VSBlock {{\n"); + out.Write("UBO_BINDING(std140, 2) uniform VSBlock {{\n"); else - out.WriteFmt("cbuffer VSBlock {{\n"); + out.Write("cbuffer VSBlock {{\n"); - out.WriteFmt("{}", s_shader_uniforms); - out.WriteFmt("}};\n"); + out.Write("{}", s_shader_uniforms); + out.Write("}};\n"); - out.WriteFmt("struct VS_OUTPUT {{\n"); + out.Write("struct VS_OUTPUT {{\n"); GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, host_config, ""); - out.WriteFmt("}};\n"); + out.Write("}};\n"); if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) { - out.WriteFmt("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB); if ((uid_data->components & VB_HAS_POSMTXIDX) != 0) - out.WriteFmt("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB); if ((uid_data->components & VB_HAS_NRM0) != 0) - out.WriteFmt("ATTRIBUTE_LOCATION({}) in float3 rawnorm0;\n", SHADER_NORM0_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnorm0;\n", SHADER_NORM0_ATTRIB); if ((uid_data->components & VB_HAS_NRM1) != 0) - out.WriteFmt("ATTRIBUTE_LOCATION({}) in float3 rawnorm1;\n", SHADER_NORM1_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnorm1;\n", SHADER_NORM1_ATTRIB); if ((uid_data->components & VB_HAS_NRM2) != 0) - out.WriteFmt("ATTRIBUTE_LOCATION({}) in float3 rawnorm2;\n", SHADER_NORM2_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnorm2;\n", SHADER_NORM2_ATTRIB); if ((uid_data->components & VB_HAS_COL0) != 0) - out.WriteFmt("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB); if ((uid_data->components & VB_HAS_COL1) != 0) - out.WriteFmt("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB); for (u32 i = 0; i < 8; ++i) { @@ -121,161 +121,161 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho if ((uid_data->components & (VB_HAS_UV0 << i)) != 0 || has_texmtx != 0) { - out.WriteFmt("ATTRIBUTE_LOCATION({}) in float{} rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i, - has_texmtx != 0 ? 3 : 2, i); + out.Write("ATTRIBUTE_LOCATION({}) in float{} rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i, + has_texmtx != 0 ? 3 : 2, i); } } if (host_config.backend_geometry_shaders) { - out.WriteFmt("VARYING_LOCATION(0) out VertexData {{\n"); + out.Write("VARYING_LOCATION(0) out VertexData {{\n"); GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, host_config, GetInterpolationQualifier(msaa, ssaa, true, false)); - out.WriteFmt("}} vs;\n"); + out.Write("}} vs;\n"); } else { // Let's set up attributes u32 counter = 0; - out.WriteFmt("VARYING_LOCATION({}) {} out float4 colors_0;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - out.WriteFmt("VARYING_LOCATION({}) {} out float4 colors_1;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); + out.Write("VARYING_LOCATION({}) {} out float4 colors_0;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); + out.Write("VARYING_LOCATION({}) {} out float4 colors_1;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); for (u32 i = 0; i < uid_data->numTexGens; ++i) { - out.WriteFmt("VARYING_LOCATION({}) {} out float3 tex{};\n", counter++, - GetInterpolationQualifier(msaa, ssaa), i); + out.Write("VARYING_LOCATION({}) {} out float3 tex{};\n", counter++, + GetInterpolationQualifier(msaa, ssaa), i); } if (!host_config.fast_depth_calc) { - out.WriteFmt("VARYING_LOCATION({}) {} out float4 clipPos;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); + out.Write("VARYING_LOCATION({}) {} out float4 clipPos;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); } if (per_pixel_lighting) { - out.WriteFmt("VARYING_LOCATION({}) {} out float3 Normal;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); - out.WriteFmt("VARYING_LOCATION({}) {} out float3 WorldPos;\n", counter++, - GetInterpolationQualifier(msaa, ssaa)); + out.Write("VARYING_LOCATION({}) {} out float3 Normal;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); + out.Write("VARYING_LOCATION({}) {} out float3 WorldPos;\n", counter++, + GetInterpolationQualifier(msaa, ssaa)); } } - out.WriteFmt("void main()\n{{\n"); + out.Write("void main()\n{{\n"); } else // D3D { - out.WriteFmt("VS_OUTPUT main(\n"); + out.Write("VS_OUTPUT main(\n"); // inputs if ((uid_data->components & VB_HAS_NRM0) != 0) - out.WriteFmt(" float3 rawnorm0 : NORMAL0,\n"); + out.Write(" float3 rawnorm0 : NORMAL0,\n"); if ((uid_data->components & VB_HAS_NRM1) != 0) - out.WriteFmt(" float3 rawnorm1 : NORMAL1,\n"); + out.Write(" float3 rawnorm1 : NORMAL1,\n"); if ((uid_data->components & VB_HAS_NRM2) != 0) - out.WriteFmt(" float3 rawnorm2 : NORMAL2,\n"); + out.Write(" float3 rawnorm2 : NORMAL2,\n"); if ((uid_data->components & VB_HAS_COL0) != 0) - out.WriteFmt(" float4 rawcolor0 : COLOR0,\n"); + out.Write(" float4 rawcolor0 : COLOR0,\n"); if ((uid_data->components & VB_HAS_COL1) != 0) - out.WriteFmt(" float4 rawcolor1 : COLOR1,\n"); + out.Write(" float4 rawcolor1 : COLOR1,\n"); for (u32 i = 0; i < 8; ++i) { const u32 has_texmtx = (uid_data->components & (VB_HAS_TEXMTXIDX0 << i)); if ((uid_data->components & (VB_HAS_UV0 << i)) != 0 || has_texmtx != 0) - out.WriteFmt(" float{} rawtex{} : TEXCOORD{},\n", has_texmtx ? 3 : 2, i, i); + out.Write(" float{} rawtex{} : TEXCOORD{},\n", has_texmtx ? 3 : 2, i, i); } if ((uid_data->components & VB_HAS_POSMTXIDX) != 0) - out.WriteFmt(" uint4 posmtx : BLENDINDICES,\n"); - out.WriteFmt(" float4 rawpos : POSITION) {{\n"); + out.Write(" uint4 posmtx : BLENDINDICES,\n"); + out.Write(" float4 rawpos : POSITION) {{\n"); } - out.WriteFmt("VS_OUTPUT o;\n"); + out.Write("VS_OUTPUT o;\n"); // transforms if ((uid_data->components & VB_HAS_POSMTXIDX) != 0) { - out.WriteFmt("int posidx = int(posmtx.r);\n" - "float4 pos = float4(dot(" I_TRANSFORMMATRICES - "[posidx], rawpos), dot(" I_TRANSFORMMATRICES - "[posidx+1], rawpos), dot(" I_TRANSFORMMATRICES "[posidx+2], rawpos), 1);\n"); + out.Write("int posidx = int(posmtx.r);\n" + "float4 pos = float4(dot(" I_TRANSFORMMATRICES + "[posidx], rawpos), dot(" I_TRANSFORMMATRICES + "[posidx+1], rawpos), dot(" I_TRANSFORMMATRICES "[posidx+2], rawpos), 1);\n"); if ((uid_data->components & VB_HAS_NRMALL) != 0) { - out.WriteFmt("int normidx = posidx & 31;\n" - "float3 N0 = " I_NORMALMATRICES "[normidx].xyz, N1 = " I_NORMALMATRICES - "[normidx+1].xyz, N2 = " I_NORMALMATRICES "[normidx+2].xyz;\n"); + out.Write("int normidx = posidx & 31;\n" + "float3 N0 = " I_NORMALMATRICES "[normidx].xyz, N1 = " I_NORMALMATRICES + "[normidx+1].xyz, N2 = " I_NORMALMATRICES "[normidx+2].xyz;\n"); } if ((uid_data->components & VB_HAS_NRM0) != 0) { - out.WriteFmt("float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, " - "rawnorm0)));\n"); + out.Write("float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, " + "rawnorm0)));\n"); } if ((uid_data->components & VB_HAS_NRM1) != 0) { - out.WriteFmt( + out.Write( "float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n"); } if ((uid_data->components & VB_HAS_NRM2) != 0) { - out.WriteFmt( + out.Write( "float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n"); } } else { - out.WriteFmt("float4 pos = float4(dot(" I_POSNORMALMATRIX "[0], rawpos), dot(" I_POSNORMALMATRIX - "[1], rawpos), dot(" I_POSNORMALMATRIX "[2], rawpos), 1.0);\n"); + out.Write("float4 pos = float4(dot(" I_POSNORMALMATRIX "[0], rawpos), dot(" I_POSNORMALMATRIX + "[1], rawpos), dot(" I_POSNORMALMATRIX "[2], rawpos), 1.0);\n"); if ((uid_data->components & VB_HAS_NRM0) != 0) { - out.WriteFmt("float3 _norm0 = normalize(float3(dot(" I_POSNORMALMATRIX - "[3].xyz, rawnorm0), dot(" I_POSNORMALMATRIX - "[4].xyz, rawnorm0), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm0)));\n"); + out.Write("float3 _norm0 = normalize(float3(dot(" I_POSNORMALMATRIX + "[3].xyz, rawnorm0), dot(" I_POSNORMALMATRIX + "[4].xyz, rawnorm0), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm0)));\n"); } if ((uid_data->components & VB_HAS_NRM1) != 0) { - out.WriteFmt("float3 _norm1 = float3(dot(" I_POSNORMALMATRIX - "[3].xyz, rawnorm1), dot(" I_POSNORMALMATRIX - "[4].xyz, rawnorm1), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm1));\n"); + out.Write("float3 _norm1 = float3(dot(" I_POSNORMALMATRIX + "[3].xyz, rawnorm1), dot(" I_POSNORMALMATRIX + "[4].xyz, rawnorm1), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm1));\n"); } if ((uid_data->components & VB_HAS_NRM2) != 0) { - out.WriteFmt("float3 _norm2 = float3(dot(" I_POSNORMALMATRIX - "[3].xyz, rawnorm2), dot(" I_POSNORMALMATRIX - "[4].xyz, rawnorm2), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm2));\n"); + out.Write("float3 _norm2 = float3(dot(" I_POSNORMALMATRIX + "[3].xyz, rawnorm2), dot(" I_POSNORMALMATRIX + "[4].xyz, rawnorm2), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm2));\n"); } } if ((uid_data->components & VB_HAS_NRM0) == 0) - out.WriteFmt("float3 _norm0 = float3(0.0, 0.0, 0.0);\n"); + out.Write("float3 _norm0 = float3(0.0, 0.0, 0.0);\n"); - out.WriteFmt("o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION - "[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n"); + out.Write("o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION + "[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n"); - out.WriteFmt("int4 lacc;\n" - "float3 ldir, h, cosAttn, distAttn;\n" - "float dist, dist2, attn;\n"); + out.Write("int4 lacc;\n" + "float3 ldir, h, cosAttn, distAttn;\n" + "float dist, dist2, attn;\n"); GenerateLightingShaderCode(out, uid_data->lighting, uid_data->components, "rawcolor", "o.colors_"); // transform texcoords - out.WriteFmt("float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n"); + out.Write("float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n"); for (u32 i = 0; i < uid_data->numTexGens; ++i) { auto& texinfo = uid_data->texMtxInfo[i]; - out.WriteFmt("{{\n"); - out.WriteFmt("coord = float4(0.0, 0.0, 1.0, 1.0);\n"); + out.Write("{{\n"); + out.Write("coord = float4(0.0, 0.0, 1.0, 1.0);\n"); switch (texinfo.sourcerow) { case XF_SRCGEOM_INROW: - out.WriteFmt("coord.xyz = rawpos.xyz;\n"); + out.Write("coord.xyz = rawpos.xyz;\n"); break; case XF_SRCNORMAL_INROW: if ((uid_data->components & VB_HAS_NRM0) != 0) { - out.WriteFmt("coord.xyz = rawnorm0.xyz;\n"); + out.Write("coord.xyz = rawnorm0.xyz;\n"); } break; case XF_SRCCOLORS_INROW: @@ -285,28 +285,28 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho case XF_SRCBINORMAL_T_INROW: if ((uid_data->components & VB_HAS_NRM1) != 0) { - out.WriteFmt("coord.xyz = rawnorm1.xyz;\n"); + out.Write("coord.xyz = rawnorm1.xyz;\n"); } break; case XF_SRCBINORMAL_B_INROW: if ((uid_data->components & VB_HAS_NRM2) != 0) { - out.WriteFmt("coord.xyz = rawnorm2.xyz;\n"); + out.Write("coord.xyz = rawnorm2.xyz;\n"); } break; default: ASSERT(texinfo.sourcerow <= XF_SRCTEX7_INROW); if ((uid_data->components & (VB_HAS_UV0 << (texinfo.sourcerow - XF_SRCTEX0_INROW))) != 0) { - out.WriteFmt("coord = float4(rawtex{}.x, rawtex{}.y, 1.0, 1.0);\n", - texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW); + out.Write("coord = float4(rawtex{}.x, rawtex{}.y, 1.0, 1.0);\n", + texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW); } break; } // Input form of AB11 sets z element to 1.0 if (texinfo.inputform == XF_TEXINPUT_AB11) - out.WriteFmt("coord.z = 1.0;\n"); + out.Write("coord.z = 1.0;\n"); // first transformation switch (texinfo.texgentype) @@ -316,9 +316,9 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho if ((uid_data->components & (VB_HAS_NRM1 | VB_HAS_NRM2)) != 0) { // transform the light dir into tangent space - out.WriteFmt("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", - LIGHT_POS_PARAMS(texinfo.embosslightshift)); - out.WriteFmt( + out.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", + LIGHT_POS_PARAMS(texinfo.embosslightshift)); + out.Write( "o.tex{}.xyz = o.tex{}.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0);\n", i, texinfo.embosssourceshift); } @@ -327,49 +327,49 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho // The following assert was triggered in House of the Dead Overkill and Star Wars Rogue // Squadron 2 // ASSERT(0); // should have normals - out.WriteFmt("o.tex{}.xyz = o.tex{}.xyz;\n", i, texinfo.embosssourceshift); + out.Write("o.tex{}.xyz = o.tex{}.xyz;\n", i, texinfo.embosssourceshift); } break; case XF_TEXGEN_COLOR_STRGBC0: - out.WriteFmt("o.tex{}.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i); + out.Write("o.tex{}.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i); break; case XF_TEXGEN_COLOR_STRGBC1: - out.WriteFmt("o.tex{}.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i); + out.Write("o.tex{}.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i); break; case XF_TEXGEN_REGULAR: default: if ((uid_data->components & (VB_HAS_TEXMTXIDX0 << i)) != 0) { - out.WriteFmt("int tmp = int(rawtex{}.z);\n", i); + out.Write("int tmp = int(rawtex{}.z);\n", i); if (((uid_data->texMtxInfo_n_projection >> i) & 1) == XF_TEXPROJ_STQ) { - out.WriteFmt("o.tex{}.xyz = float3(dot(coord, " I_TRANSFORMMATRICES - "[tmp]), dot(coord, " I_TRANSFORMMATRICES - "[tmp+1]), dot(coord, " I_TRANSFORMMATRICES "[tmp+2]));\n", - i); + out.Write("o.tex{}.xyz = float3(dot(coord, " I_TRANSFORMMATRICES + "[tmp]), dot(coord, " I_TRANSFORMMATRICES + "[tmp+1]), dot(coord, " I_TRANSFORMMATRICES "[tmp+2]));\n", + i); } else { - out.WriteFmt("o.tex{}.xyz = float3(dot(coord, " I_TRANSFORMMATRICES - "[tmp]), dot(coord, " I_TRANSFORMMATRICES "[tmp+1]), 1);\n", - i); + out.Write("o.tex{}.xyz = float3(dot(coord, " I_TRANSFORMMATRICES + "[tmp]), dot(coord, " I_TRANSFORMMATRICES "[tmp+1]), 1);\n", + i); } } else { if (((uid_data->texMtxInfo_n_projection >> i) & 1) == XF_TEXPROJ_STQ) { - out.WriteFmt("o.tex{}.xyz = float3(dot(coord, " I_TEXMATRICES - "[{}]), dot(coord, " I_TEXMATRICES "[{}]), dot(coord, " I_TEXMATRICES - "[{}]));\n", - i, 3 * i, 3 * i + 1, 3 * i + 2); + out.Write("o.tex{}.xyz = float3(dot(coord, " I_TEXMATRICES + "[{}]), dot(coord, " I_TEXMATRICES "[{}]), dot(coord, " I_TEXMATRICES + "[{}]));\n", + i, 3 * i, 3 * i + 1, 3 * i + 2); } else { - out.WriteFmt("o.tex{}.xyz = float3(dot(coord, " I_TEXMATRICES - "[{}]), dot(coord, " I_TEXMATRICES "[{}]), 1);\n", - i, 3 * i, 3 * i + 1); + out.Write("o.tex{}.xyz = float3(dot(coord, " I_TEXMATRICES + "[{}]), dot(coord, " I_TEXMATRICES "[{}]), 1);\n", + i, 3 * i, 3 * i + 1); } } break; @@ -380,16 +380,16 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho { auto& postInfo = uid_data->postMtxInfo[i]; - out.WriteFmt("float4 P0 = " I_POSTTRANSFORMMATRICES "[{}];\n" - "float4 P1 = " I_POSTTRANSFORMMATRICES "[{}];\n" - "float4 P2 = " I_POSTTRANSFORMMATRICES "[{}];\n", - postInfo.index & 0x3f, (postInfo.index + 1) & 0x3f, (postInfo.index + 2) & 0x3f); + out.Write("float4 P0 = " I_POSTTRANSFORMMATRICES "[{}];\n" + "float4 P1 = " I_POSTTRANSFORMMATRICES "[{}];\n" + "float4 P2 = " I_POSTTRANSFORMMATRICES "[{}];\n", + postInfo.index & 0x3f, (postInfo.index + 1) & 0x3f, (postInfo.index + 2) & 0x3f); if (postInfo.normalize) - out.WriteFmt("o.tex{}.xyz = normalize(o.tex{}.xyz);\n", i, i); + out.Write("o.tex{}.xyz = normalize(o.tex{}.xyz);\n", i, i); // multiply by postmatrix - out.WriteFmt( + out.Write( "o.tex{0}.xyz = float3(dot(P0.xyz, o.tex{0}.xyz) + P0.w, dot(P1.xyz, o.tex{0}.xyz) + " "P1.w, dot(P2.xyz, o.tex{0}.xyz) + P2.w);\n", i); @@ -401,44 +401,44 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho // TODO: check if this only affects XF_TEXGEN_REGULAR if (texinfo.texgentype == XF_TEXGEN_REGULAR) { - out.WriteFmt( + out.Write( "if(o.tex{0}.z == 0.0f)\n" "\to.tex{0}.xy = clamp(o.tex{0}.xy / 2.0f, float2(-1.0f,-1.0f), float2(1.0f,1.0f));\n", i); } - out.WriteFmt("}}\n"); + out.Write("}}\n"); } if (uid_data->numColorChans == 0) { if ((uid_data->components & VB_HAS_COL0) != 0) - out.WriteFmt("o.colors_0 = rawcolor0;\n"); + out.Write("o.colors_0 = rawcolor0;\n"); else - out.WriteFmt("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n"); + out.Write("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n"); } if (uid_data->numColorChans < 2) { if ((uid_data->components & VB_HAS_COL1) != 0) - out.WriteFmt("o.colors_1 = rawcolor1;\n"); + out.Write("o.colors_1 = rawcolor1;\n"); else - out.WriteFmt("o.colors_1 = o.colors_0;\n"); + out.Write("o.colors_1 = o.colors_0;\n"); } // clipPos/w needs to be done in pixel shader, not here if (!host_config.fast_depth_calc) - out.WriteFmt("o.clipPos = o.pos;\n"); + out.Write("o.clipPos = o.pos;\n"); if (per_pixel_lighting) { - out.WriteFmt("o.Normal = _norm0;\n" - "o.WorldPos = pos.xyz;\n"); + out.Write("o.Normal = _norm0;\n" + "o.WorldPos = pos.xyz;\n"); if ((uid_data->components & VB_HAS_COL0) != 0) - out.WriteFmt("o.colors_0 = rawcolor0;\n"); + out.Write("o.colors_0 = rawcolor0;\n"); if ((uid_data->components & VB_HAS_COL1) != 0) - out.WriteFmt("o.colors_1 = rawcolor1;\n"); + out.Write("o.colors_1 = rawcolor1;\n"); } // If we can disable the incorrect depth clipping planes using depth clamping, then we can do @@ -450,14 +450,14 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho // own clipping. We want to clip so that -w <= z <= 0, which matches the console -1..0 range. // We adjust our depth value for clipping purposes to match the perspective projection in the // software backend, which is a hack to fix Sonic Adventure and Unleashed games. - out.WriteFmt("float clipDepth = o.pos.z * (1.0 - 1e-7);\n" - "float clipDist0 = clipDepth + o.pos.w;\n" // Near: z < -w - "float clipDist1 = -clipDepth;\n"); // Far: z > 0 + out.Write("float clipDepth = o.pos.z * (1.0 - 1e-7);\n" + "float clipDist0 = clipDepth + o.pos.w;\n" // Near: z < -w + "float clipDist1 = -clipDepth;\n"); // Far: z > 0 if (host_config.backend_geometry_shaders) { - out.WriteFmt("o.clipDist0 = clipDist0;\n" - "o.clipDist1 = clipDist1;\n"); + out.Write("o.clipDist0 = clipDist0;\n" + "o.clipDist1 = clipDist1;\n"); } } @@ -472,20 +472,20 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho // divide, because some games will use a depth range larger than what is allowed by the // graphics API. These large depth ranges will still be clipped to the 0..1 range, so these // games effectively add a depth bias to the values written to the depth buffer. - out.WriteFmt("o.pos.z = o.pos.w * " I_PIXELCENTERCORRECTION ".w - " - "o.pos.z * " I_PIXELCENTERCORRECTION ".z;\n"); + out.Write("o.pos.z = o.pos.w * " I_PIXELCENTERCORRECTION ".w - " + "o.pos.z * " I_PIXELCENTERCORRECTION ".z;\n"); if (!host_config.backend_clip_control) { // If the graphics API doesn't support a depth range of 0..1, then we need to map z to // the -1..1 range. Unfortunately we have to use a substraction, which is a lossy floating-point // operation that can introduce a round-trip error. - out.WriteFmt("o.pos.z = o.pos.z * 2.0 - o.pos.w;\n"); + out.Write("o.pos.z = o.pos.z * 2.0 - o.pos.w;\n"); } // Correct for negative viewports by mirroring all vertices. We need to negate the height here, // since the viewport height is already negated by the render backend. - out.WriteFmt("o.pos.xy *= sign(" I_PIXELCENTERCORRECTION ".xy * float2(1.0, -1.0));\n"); + out.Write("o.pos.xy *= sign(" I_PIXELCENTERCORRECTION ".xy * float2(1.0, -1.0));\n"); // The console GPU places the pixel center at 7/12 in screen space unless // antialiasing is enabled, while D3D and OpenGL place it at 0.5. This results @@ -493,7 +493,7 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho // which in turn can be critical if it happens for clear quads. // Hence, we compensate for this pixel center difference so that primitives // get rasterized correctly. - out.WriteFmt("o.pos.xy = o.pos.xy - o.pos.w * " I_PIXELCENTERCORRECTION ".xy;\n"); + out.Write("o.pos.xy = o.pos.xy - o.pos.w * " I_PIXELCENTERCORRECTION ".xy;\n"); if (vertex_rounding) { @@ -504,18 +504,18 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho // we need to correct this by converting our // clip-space position into the Wii's screen-space // acquire the right pixel and then convert it back - out.WriteFmt("if (o.pos.w == 1.0f)\n" - "{{\n" + out.Write("if (o.pos.w == 1.0f)\n" + "{{\n" - "\tfloat ss_pixel_x = ((o.pos.x + 1.0f) * (" I_VIEWPORT_SIZE ".x * 0.5f));\n" - "\tfloat ss_pixel_y = ((o.pos.y + 1.0f) * (" I_VIEWPORT_SIZE ".y * 0.5f));\n" + "\tfloat ss_pixel_x = ((o.pos.x + 1.0f) * (" I_VIEWPORT_SIZE ".x * 0.5f));\n" + "\tfloat ss_pixel_y = ((o.pos.y + 1.0f) * (" I_VIEWPORT_SIZE ".y * 0.5f));\n" - "\tss_pixel_x = round(ss_pixel_x);\n" - "\tss_pixel_y = round(ss_pixel_y);\n" + "\tss_pixel_x = round(ss_pixel_x);\n" + "\tss_pixel_y = round(ss_pixel_y);\n" - "\to.pos.x = ((ss_pixel_x / (" I_VIEWPORT_SIZE ".x * 0.5f)) - 1.0f);\n" - "\to.pos.y = ((ss_pixel_y / (" I_VIEWPORT_SIZE ".y * 0.5f)) - 1.0f);\n" - "}}\n"); + "\to.pos.x = ((ss_pixel_x / (" I_VIEWPORT_SIZE ".x * 0.5f)) - 1.0f);\n" + "\to.pos.y = ((ss_pixel_y / (" I_VIEWPORT_SIZE ".y * 0.5f)) - 1.0f);\n" + "}}\n"); } if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) @@ -529,35 +529,35 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho // TODO: Pass interface blocks between shader stages even if geometry shaders // are not supported, however that will require at least OpenGL 3.2 support. for (u32 i = 0; i < uid_data->numTexGens; ++i) - out.WriteFmt("tex{}.xyz = o.tex{};\n", i, i); + out.Write("tex{}.xyz = o.tex{};\n", i, i); if (!host_config.fast_depth_calc) - out.WriteFmt("clipPos = o.clipPos;\n"); + out.Write("clipPos = o.clipPos;\n"); if (per_pixel_lighting) { - out.WriteFmt("Normal = o.Normal;\n" - "WorldPos = o.WorldPos;\n"); + out.Write("Normal = o.Normal;\n" + "WorldPos = o.WorldPos;\n"); } - out.WriteFmt("colors_0 = o.colors_0;\n" - "colors_1 = o.colors_1;\n"); + out.Write("colors_0 = o.colors_0;\n" + "colors_1 = o.colors_1;\n"); } if (host_config.backend_depth_clamp) { - out.WriteFmt("gl_ClipDistance[0] = clipDist0;\n" - "gl_ClipDistance[1] = clipDist1;\n"); + out.Write("gl_ClipDistance[0] = clipDist0;\n" + "gl_ClipDistance[1] = clipDist1;\n"); } // Vulkan NDC space has Y pointing down (right-handed NDC space). if (api_type == APIType::Vulkan) - out.WriteFmt("gl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n"); + out.Write("gl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n"); else - out.WriteFmt("gl_Position = o.pos;\n"); + out.Write("gl_Position = o.pos;\n"); } else // D3D { - out.WriteFmt("return o;\n"); + out.Write("return o;\n"); } - out.WriteFmt("}}\n"); + out.Write("}}\n"); return out; }