Merge pull request #9234 from lioncash/rename-fmt

ShaderGenCommon: Rename WriteFmt() to Write()
This commit is contained in:
LC 2020-11-09 22:08:42 -05:00 committed by GitHub
commit 0a2564a89d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 2340 additions and 2381 deletions

View File

@ -27,9 +27,9 @@ APIType GetAPIType()
void EmitUniformBufferDeclaration(ShaderCode& code)
{
if (GetAPIType() == APIType::D3D)
code.WriteFmt("cbuffer PSBlock : register(b0)\n");
code.Write("cbuffer PSBlock : register(b0)\n");
else
code.WriteFmt("UBO_BINDING(std140, 1) uniform PSBlock\n");
code.Write("UBO_BINDING(std140, 1) uniform PSBlock\n");
}
void EmitSamplerDeclarations(ShaderCode& code, u32 start = 0, u32 end = 1,
@ -43,8 +43,8 @@ void EmitSamplerDeclarations(ShaderCode& code, u32 start = 0, u32 end = 1,
for (u32 i = start; i < end; i++)
{
code.WriteFmt("{} tex{} : register(t{});\n", array_type, i, i);
code.WriteFmt("SamplerState samp{} : register(s{});\n", i, i);
code.Write("{} tex{} : register(t{});\n", array_type, i, i);
code.Write("SamplerState samp{} : register(s{});\n", i, i);
}
}
break;
@ -56,7 +56,7 @@ void EmitSamplerDeclarations(ShaderCode& code, u32 start = 0, u32 end = 1,
for (u32 i = start; i < end; i++)
{
code.WriteFmt("SAMPLER_BINDING({}) uniform {} samp{};\n", i, array_type, i);
code.Write("SAMPLER_BINDING({}) uniform {} samp{};\n", i, array_type, i);
}
}
break;
@ -70,12 +70,12 @@ void EmitSampleTexture(ShaderCode& code, u32 n, std::string_view coords)
switch (GetAPIType())
{
case APIType::D3D:
code.WriteFmt("tex{}.Sample(samp{}, {})", n, n, coords);
code.Write("tex{}.Sample(samp{}, {})", n, n, coords);
break;
case APIType::OpenGL:
case APIType::Vulkan:
code.WriteFmt("texture(samp{}, {})", n, coords);
code.Write("texture(samp{}, {})", n, coords);
break;
default:
@ -90,12 +90,12 @@ void EmitTextureLoad(ShaderCode& code, u32 n, std::string_view coords)
switch (GetAPIType())
{
case APIType::D3D:
code.WriteFmt("tex{}.Load({})", n, coords);
code.Write("tex{}.Load({})", n, coords);
break;
case APIType::OpenGL:
case APIType::Vulkan:
code.WriteFmt("texelFetch(samp{}, ({}).xyz, ({}).w)", n, coords, coords);
code.Write("texelFetch(samp{}, ({}).xyz, ({}).w)", n, coords, coords);
break;
default:
@ -111,19 +111,19 @@ void EmitVertexMainDeclaration(ShaderCode& code, u32 num_tex_inputs, u32 num_col
{
case APIType::D3D:
{
code.WriteFmt("void main(");
code.Write("void main(");
for (u32 i = 0; i < num_tex_inputs; i++)
code.WriteFmt("in float3 rawtex{} : TEXCOORD{}, ", i, i);
code.Write("in float3 rawtex{} : TEXCOORD{}, ", i, i);
for (u32 i = 0; i < num_color_inputs; i++)
code.WriteFmt("in float4 rawcolor{} : COLOR{}, ", i, i);
code.Write("in float4 rawcolor{} : COLOR{}, ", i, i);
if (position_input)
code.WriteFmt("in float4 rawpos : POSITION, ");
code.WriteFmt("{}", extra_inputs);
code.Write("in float4 rawpos : POSITION, ");
code.Write("{}", extra_inputs);
for (u32 i = 0; i < num_tex_outputs; i++)
code.WriteFmt("out float3 v_tex{} : TEXCOORD{}, ", i, i);
code.Write("out float3 v_tex{} : TEXCOORD{}, ", i, i);
for (u32 i = 0; i < num_color_outputs; i++)
code.WriteFmt("out float4 v_col{} : COLOR{}, ", i, i);
code.WriteFmt("out float4 opos : SV_Position)\n");
code.Write("out float4 v_col{} : COLOR{}, ", i, i);
code.Write("out float4 opos : SV_Position)\n");
}
break;
@ -133,35 +133,35 @@ void EmitVertexMainDeclaration(ShaderCode& code, u32 num_tex_inputs, u32 num_col
for (u32 i = 0; i < num_tex_inputs; i++)
{
const auto attribute = SHADER_TEXTURE0_ATTRIB + i;
code.WriteFmt("ATTRIBUTE_LOCATION({}) in float3 rawtex{};\n", attribute, i);
code.Write("ATTRIBUTE_LOCATION({}) in float3 rawtex{};\n", attribute, i);
}
for (u32 i = 0; i < num_color_inputs; i++)
{
const auto attribute = SHADER_COLOR0_ATTRIB + i;
code.WriteFmt("ATTRIBUTE_LOCATION({}) in float4 rawcolor{};\n", attribute, i);
code.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor{};\n", attribute, i);
}
if (position_input)
code.WriteFmt("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB);
code.Write("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB);
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
{
code.WriteFmt("VARYING_LOCATION(0) out VertexData {{\n");
code.Write("VARYING_LOCATION(0) out VertexData {{\n");
for (u32 i = 0; i < num_tex_outputs; i++)
code.WriteFmt(" float3 v_tex{};\n", i);
code.Write(" float3 v_tex{};\n", i);
for (u32 i = 0; i < num_color_outputs; i++)
code.WriteFmt(" float4 v_col{};\n", i);
code.WriteFmt("}};\n");
code.Write(" float4 v_col{};\n", i);
code.Write("}};\n");
}
else
{
for (u32 i = 0; i < num_tex_outputs; i++)
code.WriteFmt("VARYING_LOCATION({}) out float3 v_tex{};\n", i, i);
code.Write("VARYING_LOCATION({}) out float3 v_tex{};\n", i, i);
for (u32 i = 0; i < num_color_outputs; i++)
code.WriteFmt("VARYING_LOCATION({}) out float4 v_col{};\n", num_tex_inputs + i, i);
code.Write("VARYING_LOCATION({}) out float4 v_col{};\n", num_tex_inputs + i, i);
}
code.WriteFmt("#define opos gl_Position\n");
code.WriteFmt("{}\n", extra_inputs);
code.WriteFmt("void main()\n");
code.Write("#define opos gl_Position\n");
code.Write("{}\n", extra_inputs);
code.Write("void main()\n");
}
break;
default:
@ -177,14 +177,14 @@ void EmitPixelMainDeclaration(ShaderCode& code, u32 num_tex_inputs, u32 num_colo
{
case APIType::D3D:
{
code.WriteFmt("void main(");
code.Write("void main(");
for (u32 i = 0; i < num_tex_inputs; i++)
code.WriteFmt("in float3 v_tex{} : TEXCOORD{}, ", i, i);
code.Write("in float3 v_tex{} : TEXCOORD{}, ", i, i);
for (u32 i = 0; i < num_color_inputs; i++)
code.WriteFmt("in float4 v_col{} : COLOR{}, ", i, i);
code.Write("in float4 v_col{} : COLOR{}, ", i, i);
if (emit_frag_coord)
code.WriteFmt("in float4 frag_coord : SV_Position, ");
code.WriteFmt("{}out {} ocol0 : SV_Target)\n", extra_vars, output_type);
code.Write("in float4 frag_coord : SV_Position, ");
code.Write("{}out {} ocol0 : SV_Target)\n", extra_vars, output_type);
}
break;
@ -193,26 +193,26 @@ void EmitPixelMainDeclaration(ShaderCode& code, u32 num_tex_inputs, u32 num_colo
{
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
{
code.WriteFmt("VARYING_LOCATION(0) in VertexData {{\n");
code.Write("VARYING_LOCATION(0) in VertexData {{\n");
for (u32 i = 0; i < num_tex_inputs; i++)
code.WriteFmt(" in float3 v_tex{};\n", i);
code.Write(" in float3 v_tex{};\n", i);
for (u32 i = 0; i < num_color_inputs; i++)
code.WriteFmt(" in float4 v_col{};\n", i);
code.WriteFmt("}};\n");
code.Write(" in float4 v_col{};\n", i);
code.Write("}};\n");
}
else
{
for (u32 i = 0; i < num_tex_inputs; i++)
code.WriteFmt("VARYING_LOCATION({}) in float3 v_tex{};\n", i, i);
code.Write("VARYING_LOCATION({}) in float3 v_tex{};\n", i, i);
for (u32 i = 0; i < num_color_inputs; i++)
code.WriteFmt("VARYING_LOCATION({}) in float4 v_col{};\n", num_tex_inputs + i, i);
code.Write("VARYING_LOCATION({}) in float4 v_col{};\n", num_tex_inputs + i, i);
}
code.WriteFmt("FRAGMENT_OUTPUT_LOCATION(0) out {} ocol0;\n", output_type);
code.WriteFmt("{}\n", extra_vars);
code.Write("FRAGMENT_OUTPUT_LOCATION(0) out {} ocol0;\n", output_type);
code.Write("{}\n", extra_vars);
if (emit_frag_coord)
code.WriteFmt("#define frag_coord gl_FragCoord\n");
code.WriteFmt("void main()\n");
code.Write("#define frag_coord gl_FragCoord\n");
code.Write("void main()\n");
}
break;
@ -228,16 +228,16 @@ std::string GenerateScreenQuadVertexShader()
EmitVertexMainDeclaration(code, 0, 0, false, 1, 0,
GetAPIType() == APIType::D3D ? "in uint id : SV_VertexID, " :
"#define id gl_VertexID\n");
code.WriteFmt(
code.Write(
"{{\n"
" v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n"
" opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n");
// NDC space is flipped in Vulkan. We also flip in GL so that (0,0) is in the lower-left.
if (GetAPIType() == APIType::Vulkan || GetAPIType() == APIType::OpenGL)
code.WriteFmt(" opos.y = -opos.y;\n");
code.Write(" opos.y = -opos.y;\n");
code.WriteFmt("}}\n");
code.Write("}}\n");
return code.GetBuffer();
}
@ -247,88 +247,88 @@ std::string GeneratePassthroughGeometryShader(u32 num_tex, u32 num_colors)
ShaderCode code;
if (GetAPIType() == APIType::D3D)
{
code.WriteFmt("struct VS_OUTPUT\n"
"{{\n");
code.Write("struct VS_OUTPUT\n"
"{{\n");
for (u32 i = 0; i < num_tex; i++)
code.WriteFmt(" float3 tex{} : TEXCOORD{};\n", i, i);
code.Write(" float3 tex{} : TEXCOORD{};\n", i, i);
for (u32 i = 0; i < num_colors; i++)
code.WriteFmt(" float4 color{} : COLOR{};\n", i, i);
code.WriteFmt(" float4 position : SV_Position;\n"
"}};\n");
code.Write(" float4 color{} : COLOR{};\n", i, i);
code.Write(" float4 position : SV_Position;\n"
"}};\n");
code.WriteFmt("struct GS_OUTPUT\n"
"{{");
code.Write("struct GS_OUTPUT\n"
"{{");
for (u32 i = 0; i < num_tex; i++)
code.WriteFmt(" float3 tex{} : TEXCOORD{};\n", i, i);
code.Write(" float3 tex{} : TEXCOORD{};\n", i, i);
for (u32 i = 0; i < num_colors; i++)
code.WriteFmt(" float4 color{} : COLOR{};\n", i, i);
code.WriteFmt(" float4 position : SV_Position;\n"
" uint slice : SV_RenderTargetArrayIndex;\n"
"}};\n\n");
code.Write(" float4 color{} : COLOR{};\n", i, i);
code.Write(" float4 position : SV_Position;\n"
" uint slice : SV_RenderTargetArrayIndex;\n"
"}};\n\n");
code.WriteFmt("[maxvertexcount(6)]\n"
"void main(triangle VS_OUTPUT vso[3], inout TriangleStream<GS_OUTPUT> output)\n"
"{{\n"
" for (uint slice = 0; slice < 2u; slice++)\n"
" {{\n"
" for (int i = 0; i < 3; i++)\n"
" {{\n"
" GS_OUTPUT gso;\n"
" gso.position = vso[i].position;\n");
code.Write("[maxvertexcount(6)]\n"
"void main(triangle VS_OUTPUT vso[3], inout TriangleStream<GS_OUTPUT> output)\n"
"{{\n"
" for (uint slice = 0; slice < 2u; slice++)\n"
" {{\n"
" for (int i = 0; i < 3; i++)\n"
" {{\n"
" GS_OUTPUT gso;\n"
" gso.position = vso[i].position;\n");
for (u32 i = 0; i < num_tex; i++)
code.WriteFmt(" gso.tex{} = float3(vso[i].tex{}.xy, float(slice));\n", i, i);
code.Write(" gso.tex{} = float3(vso[i].tex{}.xy, float(slice));\n", i, i);
for (u32 i = 0; i < num_colors; i++)
code.WriteFmt(" gso.color{} = vso[i].color{};\n", i, i);
code.WriteFmt(" gso.slice = slice;\n"
" output.Append(gso);\n"
" }}\n"
" output.RestartStrip();\n"
" }}\n"
"}}\n");
code.Write(" gso.color{} = vso[i].color{};\n", i, i);
code.Write(" gso.slice = slice;\n"
" output.Append(gso);\n"
" }}\n"
" output.RestartStrip();\n"
" }}\n"
"}}\n");
}
else if (GetAPIType() == APIType::OpenGL || GetAPIType() == APIType::Vulkan)
{
code.WriteFmt("layout(triangles) in;\n"
"layout(triangle_strip, max_vertices = 6) out;\n");
code.Write("layout(triangles) in;\n"
"layout(triangle_strip, max_vertices = 6) out;\n");
if (num_tex > 0 || num_colors > 0)
{
code.WriteFmt("VARYING_LOCATION(0) in VertexData {{\n");
code.Write("VARYING_LOCATION(0) in VertexData {{\n");
for (u32 i = 0; i < num_tex; i++)
code.WriteFmt(" float3 v_tex{};\n", i);
code.Write(" float3 v_tex{};\n", i);
for (u32 i = 0; i < num_colors; i++)
code.WriteFmt(" float4 v_col{};\n", i);
code.WriteFmt("}} v_in[];\n");
code.Write(" float4 v_col{};\n", i);
code.Write("}} v_in[];\n");
code.WriteFmt("VARYING_LOCATION(0) out VertexData {{\n");
code.Write("VARYING_LOCATION(0) out VertexData {{\n");
for (u32 i = 0; i < num_tex; i++)
code.WriteFmt(" float3 v_tex{};\n", i);
code.Write(" float3 v_tex{};\n", i);
for (u32 i = 0; i < num_colors; i++)
code.WriteFmt(" float4 v_col{};\n", i);
code.WriteFmt("}} v_out;\n");
code.Write(" float4 v_col{};\n", i);
code.Write("}} v_out;\n");
}
code.WriteFmt("\n"
"void main()\n"
"{{\n"
" for (int j = 0; j < 2; j++)\n"
" {{\n"
" gl_Layer = j;\n");
code.Write("\n"
"void main()\n"
"{{\n"
" for (int j = 0; j < 2; j++)\n"
" {{\n"
" gl_Layer = j;\n");
// We have to explicitly unroll this loop otherwise the GL compiler gets cranky.
for (u32 v = 0; v < 3; v++)
{
code.WriteFmt(" gl_Position = gl_in[{}].gl_Position;\n", v);
code.Write(" gl_Position = gl_in[{}].gl_Position;\n", v);
for (u32 i = 0; i < num_tex; i++)
{
code.WriteFmt(" v_out.v_tex{} = float3(v_in[{}].v_tex{}.xy, float(j));\n", i, v, i);
code.Write(" v_out.v_tex{} = float3(v_in[{}].v_tex{}.xy, float(j));\n", i, v, i);
}
for (u32 i = 0; i < num_colors; i++)
code.WriteFmt(" v_out.v_col{} = v_in[{}].v_col{};\n", i, v, i);
code.WriteFmt(" EmitVertex();\n\n");
code.Write(" v_out.v_col{} = v_in[{}].v_col{};\n", i, v, i);
code.Write(" EmitVertex();\n\n");
}
code.WriteFmt(" EndPrimitive();\n"
" }}\n"
"}}\n");
code.Write(" EndPrimitive();\n"
" }}\n"
"}}\n");
}
return code.GetBuffer();
@ -338,25 +338,24 @@ std::string GenerateTextureCopyVertexShader()
{
ShaderCode code;
EmitUniformBufferDeclaration(code);
code.WriteFmt("{{"
" float2 src_offset;\n"
" float2 src_size;\n"
"}};\n\n");
code.Write("{{"
" float2 src_offset;\n"
" float2 src_size;\n"
"}};\n\n");
EmitVertexMainDeclaration(code, 0, 0, false, 1, 0,
GetAPIType() == APIType::D3D ? "in uint id : SV_VertexID, " :
"#define id gl_VertexID");
code.WriteFmt(
"{{\n"
" v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n"
" opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n"
" v_tex0 = float3(src_offset + (src_size * v_tex0.xy), 0.0f);\n");
code.Write("{{\n"
" v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n"
" opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n"
" v_tex0 = float3(src_offset + (src_size * v_tex0.xy), 0.0f);\n");
// NDC space is flipped in Vulkan. We also flip in GL so that (0,0) is in the lower-left.
if (GetAPIType() == APIType::Vulkan || GetAPIType() == APIType::OpenGL)
code.WriteFmt(" opos.y = -opos.y;\n");
code.Write(" opos.y = -opos.y;\n");
code.WriteFmt("}}\n");
code.Write("}}\n");
return code.GetBuffer();
}
@ -366,11 +365,11 @@ std::string GenerateTextureCopyPixelShader()
ShaderCode code;
EmitSamplerDeclarations(code, 0, 1, false);
EmitPixelMainDeclaration(code, 1, 0);
code.WriteFmt("{{\n"
" ocol0 = ");
code.Write("{{\n"
" ocol0 = ");
EmitSampleTexture(code, 0, "v_tex0");
code.WriteFmt(";\n"
"}}\n");
code.Write(";\n"
"}}\n");
return code.GetBuffer();
}
@ -378,9 +377,9 @@ std::string GenerateColorPixelShader()
{
ShaderCode code;
EmitPixelMainDeclaration(code, 0, 1);
code.WriteFmt("{{\n"
" ocol0 = v_col0;\n"
"}}\n");
code.Write("{{\n"
" ocol0 = v_col0;\n"
"}}\n");
return code.GetBuffer();
}
@ -390,25 +389,25 @@ std::string GenerateResolveDepthPixelShader(u32 samples)
EmitSamplerDeclarations(code, 0, 1, true);
EmitPixelMainDeclaration(code, 1, 0, "float",
GetAPIType() == APIType::D3D ? "in float4 ipos : SV_Position, " : "");
code.WriteFmt("{{\n"
" int layer = int(v_tex0.z);\n");
code.Write("{{\n"
" int layer = int(v_tex0.z);\n");
if (GetAPIType() == APIType::D3D)
code.WriteFmt(" int3 coords = int3(int2(ipos.xy), layer);\n");
code.Write(" int3 coords = int3(int2(ipos.xy), layer);\n");
else
code.WriteFmt(" int3 coords = int3(int2(gl_FragCoord.xy), layer);\n");
code.Write(" int3 coords = int3(int2(gl_FragCoord.xy), layer);\n");
// Take the minimum of all depth samples.
if (GetAPIType() == APIType::D3D)
code.WriteFmt(" ocol0 = tex0.Load(coords, 0).r;\n");
code.Write(" ocol0 = tex0.Load(coords, 0).r;\n");
else
code.WriteFmt(" ocol0 = texelFetch(samp0, coords, 0).r;\n");
code.WriteFmt(" for (int i = 1; i < {}; i++)\n", samples);
code.Write(" ocol0 = texelFetch(samp0, coords, 0).r;\n");
code.Write(" for (int i = 1; i < {}; i++)\n", samples);
if (GetAPIType() == APIType::D3D)
code.WriteFmt(" ocol0 = min(ocol0, tex0.Load(coords, i).r);\n");
code.Write(" ocol0 = min(ocol0, tex0.Load(coords, i).r);\n");
else
code.WriteFmt(" ocol0 = min(ocol0, texelFetch(samp0, coords, i).r);\n");
code.Write(" ocol0 = min(ocol0, texelFetch(samp0, coords, i).r);\n");
code.WriteFmt("}}\n");
code.Write("}}\n");
return code.GetBuffer();
}
@ -416,15 +415,15 @@ std::string GenerateClearVertexShader()
{
ShaderCode code;
EmitUniformBufferDeclaration(code);
code.WriteFmt("{{\n"
" float4 clear_color;\n"
" float clear_depth;\n"
"}};\n");
code.Write("{{\n"
" float4 clear_color;\n"
" float clear_depth;\n"
"}};\n");
EmitVertexMainDeclaration(code, 0, 0, false, 0, 1,
GetAPIType() == APIType::D3D ? "in uint id : SV_VertexID, " :
"#define id gl_VertexID\n");
code.WriteFmt(
code.Write(
"{{\n"
" float2 coord = float2(float((id << 1) & 2), float(id & 2));\n"
" opos = float4(coord * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), clear_depth, 1.0f);\n"
@ -432,9 +431,9 @@ std::string GenerateClearVertexShader()
// NDC space is flipped in Vulkan
if (GetAPIType() == APIType::Vulkan)
code.WriteFmt(" opos.y = -opos.y;\n");
code.Write(" opos.y = -opos.y;\n");
code.WriteFmt("}}\n");
code.Write("}}\n");
return code.GetBuffer();
}
@ -443,17 +442,17 @@ std::string GenerateEFBPokeVertexShader()
{
ShaderCode code;
EmitVertexMainDeclaration(code, 0, 1, true, 0, 1);
code.WriteFmt("{{\n"
" v_col0 = rawcolor0;\n"
" opos = float4(rawpos.xyz, 1.0f);\n");
code.Write("{{\n"
" v_col0 = rawcolor0;\n"
" opos = float4(rawpos.xyz, 1.0f);\n");
if (g_ActiveConfig.backend_info.bSupportsLargePoints)
code.WriteFmt(" gl_PointSize = rawpos.w;\n");
code.Write(" gl_PointSize = rawpos.w;\n");
// NDC space is flipped in Vulkan.
if (GetAPIType() == APIType::Vulkan)
code.WriteFmt(" opos.y = -opos.y;\n");
code.Write(" opos.y = -opos.y;\n");
code.WriteFmt("}}\n");
code.Write("}}\n");
return code.GetBuffer();
}
@ -468,82 +467,82 @@ std::string GenerateFormatConversionShader(EFBReinterpretType convtype, u32 samp
"in float4 ipos : SV_Position, in uint isample : SV_SampleIndex, " :
"in float4 ipos : SV_Position, ") :
"");
code.WriteFmt("{{\n"
" int layer = int(v_tex0.z);\n");
code.Write("{{\n"
" int layer = int(v_tex0.z);\n");
if (GetAPIType() == APIType::D3D)
code.WriteFmt(" int3 coords = int3(int2(ipos.xy), layer);\n");
code.Write(" int3 coords = int3(int2(ipos.xy), layer);\n");
else
code.WriteFmt(" int3 coords = int3(int2(gl_FragCoord.xy), layer);\n");
code.Write(" int3 coords = int3(int2(gl_FragCoord.xy), layer);\n");
if (samples == 1)
{
// No MSAA at all.
if (GetAPIType() == APIType::D3D)
code.WriteFmt(" float4 val = tex0.Load(int4(coords, 0));\n");
code.Write(" float4 val = tex0.Load(int4(coords, 0));\n");
else
code.WriteFmt(" float4 val = texelFetch(samp0, coords, 0);\n");
code.Write(" float4 val = texelFetch(samp0, coords, 0);\n");
}
else if (g_ActiveConfig.bSSAA)
{
// Sample shading, shader runs once per sample
if (GetAPIType() == APIType::D3D)
code.WriteFmt(" float4 val = tex0.Load(coords, isample);");
code.Write(" float4 val = tex0.Load(coords, isample);");
else
code.WriteFmt(" float4 val = texelFetch(samp0, coords, gl_SampleID);");
code.Write(" float4 val = texelFetch(samp0, coords, gl_SampleID);");
}
else
{
// MSAA without sample shading, average out all samples.
code.WriteFmt(" float4 val = float4(0.0f, 0.0f, 0.0f, 0.0f);\n");
code.WriteFmt(" for (int i = 0; i < {}; i++)\n", samples);
code.Write(" float4 val = float4(0.0f, 0.0f, 0.0f, 0.0f);\n");
code.Write(" for (int i = 0; i < {}; i++)\n", samples);
if (GetAPIType() == APIType::D3D)
code.WriteFmt(" val += tex0.Load(coords, i);\n");
code.Write(" val += tex0.Load(coords, i);\n");
else
code.WriteFmt(" val += texelFetch(samp0, coords, i);\n");
code.WriteFmt(" val /= float({});\n", samples);
code.Write(" val += texelFetch(samp0, coords, i);\n");
code.Write(" val /= float({});\n", samples);
}
switch (convtype)
{
case EFBReinterpretType::RGB8ToRGBA6:
code.WriteFmt(" int4 src8 = int4(round(val * 255.f));\n"
" int4 dst6;\n"
" dst6.r = src8.r >> 2;\n"
" dst6.g = ((src8.r & 0x3) << 4) | (src8.g >> 4);\n"
" dst6.b = ((src8.g & 0xF) << 2) | (src8.b >> 6);\n"
" dst6.a = src8.b & 0x3F;\n"
" ocol0 = float4(dst6) / 63.f;\n");
code.Write(" int4 src8 = int4(round(val * 255.f));\n"
" int4 dst6;\n"
" dst6.r = src8.r >> 2;\n"
" dst6.g = ((src8.r & 0x3) << 4) | (src8.g >> 4);\n"
" dst6.b = ((src8.g & 0xF) << 2) | (src8.b >> 6);\n"
" dst6.a = src8.b & 0x3F;\n"
" ocol0 = float4(dst6) / 63.f;\n");
break;
case EFBReinterpretType::RGB8ToRGB565:
code.WriteFmt(" ocol0 = val;\n");
code.Write(" ocol0 = val;\n");
break;
case EFBReinterpretType::RGBA6ToRGB8:
code.WriteFmt(" int4 src6 = int4(round(val * 63.f));\n"
" int4 dst8;\n"
" dst8.r = (src6.r << 2) | (src6.g >> 4);\n"
" dst8.g = ((src6.g & 0xF) << 4) | (src6.b >> 2);\n"
" dst8.b = ((src6.b & 0x3) << 6) | src6.a;\n"
" dst8.a = 255;\n"
" ocol0 = float4(dst8) / 255.f;\n");
code.Write(" int4 src6 = int4(round(val * 63.f));\n"
" int4 dst8;\n"
" dst8.r = (src6.r << 2) | (src6.g >> 4);\n"
" dst8.g = ((src6.g & 0xF) << 4) | (src6.b >> 2);\n"
" dst8.b = ((src6.b & 0x3) << 6) | src6.a;\n"
" dst8.a = 255;\n"
" ocol0 = float4(dst8) / 255.f;\n");
break;
case EFBReinterpretType::RGBA6ToRGB565:
code.WriteFmt(" ocol0 = val;\n");
code.Write(" ocol0 = val;\n");
break;
case EFBReinterpretType::RGB565ToRGB8:
code.WriteFmt(" ocol0 = val;\n");
code.Write(" ocol0 = val;\n");
break;
case EFBReinterpretType::RGB565ToRGBA6:
//
code.WriteFmt(" ocol0 = val;\n");
code.Write(" ocol0 = val;\n");
break;
}
code.WriteFmt("}}\n");
code.Write("}}\n");
return code.GetBuffer();
}
@ -552,71 +551,70 @@ std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureF
ShaderCode code;
EmitSamplerDeclarations(code, 0, 1, false);
EmitPixelMainDeclaration(code, 1, 0, "float4", "", true);
code.WriteFmt("{{\n"
" int layer = int(v_tex0.z);\n"
" int4 coords = int4(int2(frag_coord.xy), layer, 0);\n");
code.Write("{{\n"
" int layer = int(v_tex0.z);\n"
" int4 coords = int4(int2(frag_coord.xy), layer, 0);\n");
// Convert to a 32-bit value encompassing all channels, filling the most significant bits with
// zeroes.
code.WriteFmt(" uint raw_value;\n");
code.Write(" uint raw_value;\n");
switch (from_format)
{
case TextureFormat::I8:
case TextureFormat::C8:
{
code.WriteFmt(" float4 temp_value = ");
code.Write(" float4 temp_value = ");
EmitTextureLoad(code, 0, "coords");
code.WriteFmt(";\n"
" raw_value = uint(temp_value.r * 255.0);\n");
code.Write(";\n"
" raw_value = uint(temp_value.r * 255.0);\n");
}
break;
case TextureFormat::IA8:
{
code.WriteFmt(" float4 temp_value = ");
code.Write(" float4 temp_value = ");
EmitTextureLoad(code, 0, "coords");
code.WriteFmt(
";\n"
" raw_value = uint(temp_value.r * 255.0) | (uint(temp_value.a * 255.0) << 8);\n");
code.Write(";\n"
" raw_value = uint(temp_value.r * 255.0) | (uint(temp_value.a * 255.0) << 8);\n");
}
break;
case TextureFormat::I4:
{
code.WriteFmt(" float4 temp_value = ");
code.Write(" float4 temp_value = ");
EmitTextureLoad(code, 0, "coords");
code.WriteFmt(";\n"
" raw_value = uint(temp_value.r * 15.0);\n");
code.Write(";\n"
" raw_value = uint(temp_value.r * 15.0);\n");
}
break;
case TextureFormat::IA4:
{
code.WriteFmt(" float4 temp_value = ");
code.Write(" float4 temp_value = ");
EmitTextureLoad(code, 0, "coords");
code.WriteFmt(";\n"
" raw_value = uint(temp_value.r * 15.0) | (uint(temp_value.a * 15.0) << 4);\n");
code.Write(";\n"
" raw_value = uint(temp_value.r * 15.0) | (uint(temp_value.a * 15.0) << 4);\n");
}
break;
case TextureFormat::RGB565:
{
code.WriteFmt(" float4 temp_value = ");
code.Write(" float4 temp_value = ");
EmitTextureLoad(code, 0, "coords");
code.WriteFmt(";\n"
" raw_value = uint(temp_value.b * 31.0) | (uint(temp_value.g * 63.0) << 5) |\n"
" (uint(temp_value.r * 31.0) << 11);\n");
code.Write(";\n"
" raw_value = uint(temp_value.b * 31.0) | (uint(temp_value.g * 63.0) << 5) |\n"
" (uint(temp_value.r * 31.0) << 11);\n");
}
break;
case TextureFormat::RGB5A3:
{
code.WriteFmt(" float4 temp_value = ");
code.Write(" float4 temp_value = ");
EmitTextureLoad(code, 0, "coords");
code.WriteFmt(";\n");
code.Write(";\n");
// 0.8784 = 224 / 255 which is the maximum alpha value that can be represented in 3 bits
code.WriteFmt(
code.Write(
" if (temp_value.a > 0.878f) {{\n"
" raw_value = (uint(temp_value.b * 31.0)) | (uint(temp_value.g * 31.0) << 5) |\n"
" (uint(temp_value.r * 31.0) << 10) | 0x8000u;\n"
@ -638,45 +636,45 @@ std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureF
case TextureFormat::I8:
case TextureFormat::C8:
{
code.WriteFmt(" float orgba = float(raw_value & 0xFFu) / 255.0;\n"
" ocol0 = float4(orgba, orgba, orgba, orgba);\n");
code.Write(" float orgba = float(raw_value & 0xFFu) / 255.0;\n"
" ocol0 = float4(orgba, orgba, orgba, orgba);\n");
}
break;
case TextureFormat::IA8:
{
code.WriteFmt(" float orgb = float(raw_value & 0xFFu) / 255.0;\n"
" ocol0 = float4(orgb, orgb, orgb, float((raw_value >> 8) & 0xFFu) / 255.0);\n");
code.Write(" float orgb = float(raw_value & 0xFFu) / 255.0;\n"
" ocol0 = float4(orgb, orgb, orgb, float((raw_value >> 8) & 0xFFu) / 255.0);\n");
}
break;
case TextureFormat::IA4:
{
code.WriteFmt(" float orgb = float(raw_value & 0xFu) / 15.0;\n"
" ocol0 = float4(orgb, orgb, orgb, float((raw_value >> 4) & 0xFu) / 15.0);\n");
code.Write(" float orgb = float(raw_value & 0xFu) / 15.0;\n"
" ocol0 = float4(orgb, orgb, orgb, float((raw_value >> 4) & 0xFu) / 15.0);\n");
}
break;
case TextureFormat::RGB565:
{
code.WriteFmt(" ocol0 = float4(float((raw_value >> 10) & 0x1Fu) / 31.0,\n"
" float((raw_value >> 5) & 0x1Fu) / 31.0,\n"
" float(raw_value & 0x1Fu) / 31.0, 1.0);\n");
code.Write(" ocol0 = float4(float((raw_value >> 10) & 0x1Fu) / 31.0,\n"
" float((raw_value >> 5) & 0x1Fu) / 31.0,\n"
" float(raw_value & 0x1Fu) / 31.0, 1.0);\n");
}
break;
case TextureFormat::RGB5A3:
{
code.WriteFmt(" if ((raw_value & 0x8000u) != 0u) {{\n"
" ocol0 = float4(float((raw_value >> 10) & 0x1Fu) / 31.0,\n"
" float((raw_value >> 5) & 0x1Fu) / 31.0,\n"
" float(raw_value & 0x1Fu) / 31.0, 1.0);\n"
" }} else {{\n"
" ocol0 = float4(float((raw_value >> 8) & 0x0Fu) / 15.0,\n"
" float((raw_value >> 4) & 0x0Fu) / 15.0,\n"
" float(raw_value & 0x0Fu) / 15.0,\n"
" float((raw_value >> 12) & 0x07u) / 7.0);\n"
" }}\n");
code.Write(" if ((raw_value & 0x8000u) != 0u) {{\n"
" ocol0 = float4(float((raw_value >> 10) & 0x1Fu) / 31.0,\n"
" float((raw_value >> 5) & 0x1Fu) / 31.0,\n"
" float(raw_value & 0x1Fu) / 31.0, 1.0);\n"
" }} else {{\n"
" ocol0 = float4(float((raw_value >> 8) & 0x0Fu) / 15.0,\n"
" float((raw_value >> 4) & 0x0Fu) / 15.0,\n"
" float(raw_value & 0x0Fu) / 15.0,\n"
" float((raw_value >> 12) & 0x07u) / 7.0);\n"
" }}\n");
}
break;
default:
@ -684,7 +682,7 @@ std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureF
return "{}\n";
}
code.WriteFmt("}}\n");
code.Write("}}\n");
return code.GetBuffer();
}
@ -694,14 +692,14 @@ std::string GenerateEFBRestorePixelShader()
EmitSamplerDeclarations(code, 0, 2, false);
EmitPixelMainDeclaration(code, 1, 0, "float4",
GetAPIType() == APIType::D3D ? "out float depth : SV_Depth, " : "");
code.WriteFmt("{{\n"
" ocol0 = ");
code.Write("{{\n"
" ocol0 = ");
EmitSampleTexture(code, 0, "v_tex0");
code.WriteFmt(";\n");
code.WriteFmt(" {} = ", GetAPIType() == APIType::D3D ? "depth" : "gl_FragDepth");
code.Write(";\n");
code.Write(" {} = ", GetAPIType() == APIType::D3D ? "depth" : "gl_FragDepth");
EmitSampleTexture(code, 1, "v_tex0");
code.WriteFmt(".r;\n"
"}}\n");
code.Write(".r;\n"
"}}\n");
return code.GetBuffer();
}
@ -711,22 +709,22 @@ std::string GenerateImGuiVertexShader()
// Uniform buffer contains the viewport size, and we transform in the vertex shader.
EmitUniformBufferDeclaration(code);
code.WriteFmt("{{\n"
"float2 u_rcp_viewport_size_mul2;\n"
"}};\n\n");
code.Write("{{\n"
"float2 u_rcp_viewport_size_mul2;\n"
"}};\n\n");
EmitVertexMainDeclaration(code, 1, 1, true, 1, 1);
code.WriteFmt("{{\n"
" v_tex0 = float3(rawtex0.xy, 0.0);\n"
" v_col0 = rawcolor0;\n"
" opos = float4(rawpos.x * u_rcp_viewport_size_mul2.x - 1.0,"
" 1.0 - rawpos.y * u_rcp_viewport_size_mul2.y, 0.0, 1.0);\n");
code.Write("{{\n"
" v_tex0 = float3(rawtex0.xy, 0.0);\n"
" v_col0 = rawcolor0;\n"
" opos = float4(rawpos.x * u_rcp_viewport_size_mul2.x - 1.0,"
" 1.0 - rawpos.y * u_rcp_viewport_size_mul2.y, 0.0, 1.0);\n");
// NDC space is flipped in Vulkan.
if (GetAPIType() == APIType::Vulkan)
code.WriteFmt(" opos.y = -opos.y;\n");
code.Write(" opos.y = -opos.y;\n");
code.WriteFmt("}}\n");
code.Write("}}\n");
return code.GetBuffer();
}
@ -735,11 +733,11 @@ std::string GenerateImGuiPixelShader()
ShaderCode code;
EmitSamplerDeclarations(code, 0, 1, false);
EmitPixelMainDeclaration(code, 1, 1);
code.WriteFmt("{{\n"
" ocol0 = ");
code.Write("{{\n"
" ocol0 = ");
EmitSampleTexture(code, 0, "float3(v_tex0.xy, 0.0)");
code.WriteFmt(" * v_col0;\n"
"}}\n");
code.Write(" * v_col0;\n"
"}}\n");
return code.GetBuffer();
}

View File

@ -73,133 +73,131 @@ ShaderCode GenerateGeometryShaderCode(APIType ApiType, const ShaderHostConfig& h
// Insert layout parameters
if (host_config.backend_gs_instancing)
{
out.WriteFmt("layout({}, invocations = {}) in;\n", primitives_ogl[primitive_type_index],
stereo ? 2 : 1);
out.WriteFmt("layout({}_strip, max_vertices = {}) out;\n", wireframe ? "line" : "triangle",
vertex_out);
out.Write("layout({}, invocations = {}) in;\n", primitives_ogl[primitive_type_index],
stereo ? 2 : 1);
out.Write("layout({}_strip, max_vertices = {}) out;\n", wireframe ? "line" : "triangle",
vertex_out);
}
else
{
out.WriteFmt("layout({}) in;\n", primitives_ogl[primitive_type_index]);
out.WriteFmt("layout({}_strip, max_vertices = {}) out;\n", wireframe ? "line" : "triangle",
stereo ? vertex_out * 2 : vertex_out);
out.Write("layout({}) in;\n", primitives_ogl[primitive_type_index]);
out.Write("layout({}_strip, max_vertices = {}) out;\n", wireframe ? "line" : "triangle",
stereo ? vertex_out * 2 : vertex_out);
}
}
out.WriteFmt("{}", s_lighting_struct);
out.Write("{}", s_lighting_struct);
// uniforms
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
out.WriteFmt("UBO_BINDING(std140, 3) uniform GSBlock {{\n");
out.Write("UBO_BINDING(std140, 3) uniform GSBlock {{\n");
else
out.WriteFmt("cbuffer GSBlock {{\n");
out.Write("cbuffer GSBlock {{\n");
out.WriteFmt("\tfloat4 " I_STEREOPARAMS ";\n"
"\tfloat4 " I_LINEPTPARAMS ";\n"
"\tint4 " I_TEXOFFSET ";\n"
"}};\n");
out.Write("\tfloat4 " I_STEREOPARAMS ";\n"
"\tfloat4 " I_LINEPTPARAMS ";\n"
"\tint4 " I_TEXOFFSET ";\n"
"}};\n");
out.WriteFmt("struct VS_OUTPUT {{\n");
out.Write("struct VS_OUTPUT {{\n");
GenerateVSOutputMembers(out, ApiType, uid_data->numTexGens, host_config, "");
out.WriteFmt("}};\n");
out.Write("}};\n");
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
{
if (host_config.backend_gs_instancing)
out.WriteFmt("#define InstanceID gl_InvocationID\n");
out.Write("#define InstanceID gl_InvocationID\n");
out.WriteFmt("VARYING_LOCATION(0) in VertexData {{\n");
out.Write("VARYING_LOCATION(0) in VertexData {{\n");
GenerateVSOutputMembers(out, ApiType, uid_data->numTexGens, host_config,
GetInterpolationQualifier(msaa, ssaa, true, true));
out.WriteFmt("}} vs[{}];\n", vertex_in);
out.Write("}} vs[{}];\n", vertex_in);
out.WriteFmt("VARYING_LOCATION(0) out VertexData {{\n");
out.Write("VARYING_LOCATION(0) out VertexData {{\n");
GenerateVSOutputMembers(out, ApiType, uid_data->numTexGens, host_config,
GetInterpolationQualifier(msaa, ssaa, true, false));
if (stereo)
out.WriteFmt("\tflat int layer;\n");
out.Write("\tflat int layer;\n");
out.WriteFmt("}} ps;\n");
out.Write("}} ps;\n");
out.WriteFmt("void main()\n{{\n");
out.Write("void main()\n{{\n");
}
else // D3D
{
out.WriteFmt("struct VertexData {{\n");
out.WriteFmt("\tVS_OUTPUT o;\n");
out.Write("struct VertexData {{\n");
out.Write("\tVS_OUTPUT o;\n");
if (stereo)
out.WriteFmt("\tuint layer : SV_RenderTargetArrayIndex;\n");
out.Write("\tuint layer : SV_RenderTargetArrayIndex;\n");
out.WriteFmt("}};\n");
out.Write("}};\n");
if (host_config.backend_gs_instancing)
{
out.WriteFmt("[maxvertexcount({})]\n[instance({})]\n", vertex_out, stereo ? 2 : 1);
out.WriteFmt("void main({} VS_OUTPUT o[{}], inout {}Stream<VertexData> output, in uint "
"InstanceID : SV_GSInstanceID)\n{{\n",
primitives_d3d[primitive_type_index], vertex_in,
wireframe ? "Line" : "Triangle");
out.Write("[maxvertexcount({})]\n[instance({})]\n", vertex_out, stereo ? 2 : 1);
out.Write("void main({} VS_OUTPUT o[{}], inout {}Stream<VertexData> output, in uint "
"InstanceID : SV_GSInstanceID)\n{{\n",
primitives_d3d[primitive_type_index], vertex_in, wireframe ? "Line" : "Triangle");
}
else
{
out.WriteFmt("[maxvertexcount({})]\n", stereo ? vertex_out * 2 : vertex_out);
out.WriteFmt("void main({} VS_OUTPUT o[{}], inout {}Stream<VertexData> output)\n{{\n",
primitives_d3d[primitive_type_index], vertex_in,
wireframe ? "Line" : "Triangle");
out.Write("[maxvertexcount({})]\n", stereo ? vertex_out * 2 : vertex_out);
out.Write("void main({} VS_OUTPUT o[{}], inout {}Stream<VertexData> output)\n{{\n",
primitives_d3d[primitive_type_index], vertex_in, wireframe ? "Line" : "Triangle");
}
out.WriteFmt("\tVertexData ps;\n");
out.Write("\tVertexData ps;\n");
}
if (primitive_type == PrimitiveType::Lines)
{
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
{
out.WriteFmt("\tVS_OUTPUT start, end;\n");
out.Write("\tVS_OUTPUT start, end;\n");
AssignVSOutputMembers(out, "start", "vs[0]", uid_data->numTexGens, host_config);
AssignVSOutputMembers(out, "end", "vs[1]", uid_data->numTexGens, host_config);
}
else
{
out.WriteFmt("\tVS_OUTPUT start = o[0];\n"
"\tVS_OUTPUT end = o[1];\n");
out.Write("\tVS_OUTPUT start = o[0];\n"
"\tVS_OUTPUT end = o[1];\n");
}
// GameCube/Wii's line drawing algorithm is a little quirky. It does not
// use the correct line caps. Instead, the line caps are vertical or
// horizontal depending the slope of the line.
out.WriteFmt("\tfloat2 offset;\n"
"\tfloat2 to = abs(end.pos.xy / end.pos.w - start.pos.xy / start.pos.w);\n"
// FIXME: What does real hardware do when line is at a 45-degree angle?
// FIXME: Lines aren't drawn at the correct width. See Twilight Princess map.
"\tif (" I_LINEPTPARAMS ".y * to.y > " I_LINEPTPARAMS ".x * to.x) {{\n"
// Line is more tall. Extend geometry left and right.
// Lerp LineWidth/2 from [0..VpWidth] to [-1..1]
"\t\toffset = float2(" I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".x, 0);\n"
"\t}} else {{\n"
// Line is more wide. Extend geometry up and down.
// Lerp LineWidth/2 from [0..VpHeight] to [1..-1]
"\t\toffset = float2(0, -" I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".y);\n"
"\t}}\n");
out.Write("\tfloat2 offset;\n"
"\tfloat2 to = abs(end.pos.xy / end.pos.w - start.pos.xy / start.pos.w);\n"
// FIXME: What does real hardware do when line is at a 45-degree angle?
// FIXME: Lines aren't drawn at the correct width. See Twilight Princess map.
"\tif (" I_LINEPTPARAMS ".y * to.y > " I_LINEPTPARAMS ".x * to.x) {{\n"
// Line is more tall. Extend geometry left and right.
// Lerp LineWidth/2 from [0..VpWidth] to [-1..1]
"\t\toffset = float2(" I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".x, 0);\n"
"\t}} else {{\n"
// Line is more wide. Extend geometry up and down.
// Lerp LineWidth/2 from [0..VpHeight] to [1..-1]
"\t\toffset = float2(0, -" I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".y);\n"
"\t}}\n");
}
else if (primitive_type == PrimitiveType::Points)
{
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
{
out.WriteFmt("\tVS_OUTPUT center;\n");
out.Write("\tVS_OUTPUT center;\n");
AssignVSOutputMembers(out, "center", "vs[0]", uid_data->numTexGens, host_config);
}
else
{
out.WriteFmt("\tVS_OUTPUT center = o[0];\n");
out.Write("\tVS_OUTPUT center = o[0];\n");
}
// Offset from center to upper right vertex
// Lerp PointSize/2 from [0,0..VpWidth,VpHeight] to [-1,1..1,-1]
out.WriteFmt("\tfloat2 offset = float2(" I_LINEPTPARAMS ".w / " I_LINEPTPARAMS
".x, -" I_LINEPTPARAMS ".w / " I_LINEPTPARAMS ".y) * center.pos.w;\n");
out.Write("\tfloat2 offset = float2(" I_LINEPTPARAMS ".w / " I_LINEPTPARAMS
".x, -" I_LINEPTPARAMS ".w / " I_LINEPTPARAMS ".y) * center.pos.w;\n");
}
if (stereo)
@ -207,19 +205,19 @@ ShaderCode GenerateGeometryShaderCode(APIType ApiType, const ShaderHostConfig& h
// If the GPU supports invocation we don't need a for loop and can simply use the
// invocation identifier to determine which layer we're rendering.
if (host_config.backend_gs_instancing)
out.WriteFmt("\tint eye = InstanceID;\n");
out.Write("\tint eye = InstanceID;\n");
else
out.WriteFmt("\tfor (int eye = 0; eye < 2; ++eye) {{\n");
out.Write("\tfor (int eye = 0; eye < 2; ++eye) {{\n");
}
if (wireframe)
out.WriteFmt("\tVS_OUTPUT first;\n");
out.Write("\tVS_OUTPUT first;\n");
out.WriteFmt("\tfor (int i = 0; i < {}; ++i) {{\n", vertex_in);
out.Write("\tfor (int i = 0; i < {}; ++i) {{\n", vertex_in);
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
{
out.WriteFmt("\tVS_OUTPUT f;\n");
out.Write("\tVS_OUTPUT f;\n");
AssignVSOutputMembers(out, "f", "vs[i]", uid_data->numTexGens, host_config);
if (host_config.backend_depth_clamp &&
@ -227,21 +225,21 @@ ShaderCode GenerateGeometryShaderCode(APIType ApiType, const ShaderHostConfig& h
{
// On certain GPUs we have to consume the clip distance from the vertex shader
// or else the other vertex shader outputs will get corrupted.
out.WriteFmt("\tf.clipDist0 = gl_in[i].gl_ClipDistance[0];\n"
"\tf.clipDist1 = gl_in[i].gl_ClipDistance[1];\n");
out.Write("\tf.clipDist0 = gl_in[i].gl_ClipDistance[0];\n"
"\tf.clipDist1 = gl_in[i].gl_ClipDistance[1];\n");
}
}
else
{
out.WriteFmt("\tVS_OUTPUT f = o[i];\n");
out.Write("\tVS_OUTPUT f = o[i];\n");
}
if (stereo)
{
// Select the output layer
out.WriteFmt("\tps.layer = eye;\n");
out.Write("\tps.layer = eye;\n");
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
out.WriteFmt("\tgl_Layer = eye;\n");
out.Write("\tgl_Layer = eye;\n");
// For stereoscopy add a small horizontal offset in Normalized Device Coordinates proportional
// to the depth of the vertex. We retrieve the depth value from the w-component of the projected
@ -250,56 +248,56 @@ ShaderCode GenerateGeometryShaderCode(APIType ApiType, const ShaderHostConfig& h
// the depth value. This results in objects at a distance smaller than the convergence
// distance to seemingly appear in front of the screen.
// This formula is based on page 13 of the "Nvidia 3D Vision Automatic, Best Practices Guide"
out.WriteFmt("\tfloat hoffset = (eye == 0) ? " I_STEREOPARAMS ".x : " I_STEREOPARAMS ".y;\n");
out.WriteFmt("\tf.pos.x += hoffset * (f.pos.w - " I_STEREOPARAMS ".z);\n");
out.Write("\tfloat hoffset = (eye == 0) ? " I_STEREOPARAMS ".x : " I_STEREOPARAMS ".y;\n");
out.Write("\tf.pos.x += hoffset * (f.pos.w - " I_STEREOPARAMS ".z);\n");
}
if (primitive_type == PrimitiveType::Lines)
{
out.WriteFmt("\tVS_OUTPUT l = f;\n"
"\tVS_OUTPUT r = f;\n");
out.Write("\tVS_OUTPUT l = f;\n"
"\tVS_OUTPUT r = f;\n");
out.WriteFmt("\tl.pos.xy -= offset * l.pos.w;\n"
"\tr.pos.xy += offset * r.pos.w;\n");
out.Write("\tl.pos.xy -= offset * l.pos.w;\n"
"\tr.pos.xy += offset * r.pos.w;\n");
out.WriteFmt("\tif (" I_TEXOFFSET "[2] != 0) {{\n");
out.WriteFmt("\tfloat texOffset = 1.0 / float(" I_TEXOFFSET "[2]);\n");
out.Write("\tif (" I_TEXOFFSET "[2] != 0) {{\n");
out.Write("\tfloat texOffset = 1.0 / float(" I_TEXOFFSET "[2]);\n");
for (u32 i = 0; i < uid_data->numTexGens; ++i)
{
out.WriteFmt("\tif (((" I_TEXOFFSET "[0] >> {}) & 0x1) != 0)\n", i);
out.WriteFmt("\t\tr.tex{}.x += texOffset;\n", i);
out.Write("\tif (((" I_TEXOFFSET "[0] >> {}) & 0x1) != 0)\n", i);
out.Write("\t\tr.tex{}.x += texOffset;\n", i);
}
out.WriteFmt("\t}}\n");
out.Write("\t}}\n");
EmitVertex(out, host_config, uid_data, "l", ApiType, wireframe, true);
EmitVertex(out, host_config, uid_data, "r", ApiType, wireframe);
}
else if (primitive_type == PrimitiveType::Points)
{
out.WriteFmt("\tVS_OUTPUT ll = f;\n"
"\tVS_OUTPUT lr = f;\n"
"\tVS_OUTPUT ul = f;\n"
"\tVS_OUTPUT ur = f;\n");
out.Write("\tVS_OUTPUT ll = f;\n"
"\tVS_OUTPUT lr = f;\n"
"\tVS_OUTPUT ul = f;\n"
"\tVS_OUTPUT ur = f;\n");
out.WriteFmt("\tll.pos.xy += float2(-1,-1) * offset;\n"
"\tlr.pos.xy += float2(1,-1) * offset;\n"
"\tul.pos.xy += float2(-1,1) * offset;\n"
"\tur.pos.xy += offset;\n");
out.Write("\tll.pos.xy += float2(-1,-1) * offset;\n"
"\tlr.pos.xy += float2(1,-1) * offset;\n"
"\tul.pos.xy += float2(-1,1) * offset;\n"
"\tur.pos.xy += offset;\n");
out.WriteFmt("\tif (" I_TEXOFFSET "[3] != 0) {{\n");
out.WriteFmt("\tfloat2 texOffset = float2(1.0 / float(" I_TEXOFFSET
"[3]), 1.0 / float(" I_TEXOFFSET "[3]));\n");
out.Write("\tif (" I_TEXOFFSET "[3] != 0) {{\n");
out.Write("\tfloat2 texOffset = float2(1.0 / float(" I_TEXOFFSET
"[3]), 1.0 / float(" I_TEXOFFSET "[3]));\n");
for (u32 i = 0; i < uid_data->numTexGens; ++i)
{
out.WriteFmt("\tif (((" I_TEXOFFSET "[1] >> {}) & 0x1) != 0) {{\n", i);
out.WriteFmt("\t\tul.tex{}.xy += float2(0,1) * texOffset;\n", i);
out.WriteFmt("\t\tur.tex{}.xy += texOffset;\n", i);
out.WriteFmt("\t\tlr.tex{}.xy += float2(1,0) * texOffset;\n", i);
out.WriteFmt("\t}}\n");
out.Write("\tif (((" I_TEXOFFSET "[1] >> {}) & 0x1) != 0) {{\n", i);
out.Write("\t\tul.tex{}.xy += float2(0,1) * texOffset;\n", i);
out.Write("\t\tur.tex{}.xy += texOffset;\n", i);
out.Write("\t\tlr.tex{}.xy += float2(1,0) * texOffset;\n", i);
out.Write("\t}}\n");
}
out.WriteFmt("\t}}\n");
out.Write("\t}}\n");
EmitVertex(out, host_config, uid_data, "ll", ApiType, wireframe, true);
EmitVertex(out, host_config, uid_data, "lr", ApiType, wireframe);
@ -311,14 +309,14 @@ ShaderCode GenerateGeometryShaderCode(APIType ApiType, const ShaderHostConfig& h
EmitVertex(out, host_config, uid_data, "f", ApiType, wireframe, true);
}
out.WriteFmt("\t}}\n");
out.Write("\t}}\n");
EndPrimitive(out, host_config, uid_data, ApiType, wireframe);
if (stereo && !host_config.backend_gs_instancing)
out.WriteFmt("\t}}\n");
out.Write("\t}}\n");
out.WriteFmt("}}\n");
out.Write("}}\n");
return out;
}
@ -328,34 +326,34 @@ static void EmitVertex(ShaderCode& out, const ShaderHostConfig& host_config,
APIType ApiType, bool wireframe, bool first_vertex)
{
if (wireframe && first_vertex)
out.WriteFmt("\tif (i == 0) first = {};\n", vertex);
out.Write("\tif (i == 0) first = {};\n", vertex);
if (ApiType == APIType::OpenGL)
{
out.WriteFmt("\tgl_Position = {}.pos;\n", vertex);
out.Write("\tgl_Position = {}.pos;\n", vertex);
if (host_config.backend_depth_clamp)
{
out.WriteFmt("\tgl_ClipDistance[0] = {}.clipDist0;\n", vertex);
out.WriteFmt("\tgl_ClipDistance[1] = {}.clipDist1;\n", vertex);
out.Write("\tgl_ClipDistance[0] = {}.clipDist0;\n", vertex);
out.Write("\tgl_ClipDistance[1] = {}.clipDist1;\n", vertex);
}
AssignVSOutputMembers(out, "ps", vertex, uid_data->numTexGens, host_config);
}
else if (ApiType == APIType::Vulkan)
{
// Vulkan NDC space has Y pointing down (right-handed NDC space).
out.WriteFmt("\tgl_Position = {}.pos;\n", vertex);
out.WriteFmt("\tgl_Position.y = -gl_Position.y;\n");
out.Write("\tgl_Position = {}.pos;\n", vertex);
out.Write("\tgl_Position.y = -gl_Position.y;\n");
AssignVSOutputMembers(out, "ps", vertex, uid_data->numTexGens, host_config);
}
else
{
out.WriteFmt("\tps.o = {};\n", vertex);
out.Write("\tps.o = {};\n", vertex);
}
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
out.WriteFmt("\tEmitVertex();\n");
out.Write("\tEmitVertex();\n");
else
out.WriteFmt("\toutput.Append(ps);\n");
out.Write("\toutput.Append(ps);\n");
}
static void EndPrimitive(ShaderCode& out, const ShaderHostConfig& host_config,
@ -365,9 +363,9 @@ static void EndPrimitive(ShaderCode& out, const ShaderHostConfig& host_config,
EmitVertex(out, host_config, uid_data, "first", ApiType, wireframe);
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
out.WriteFmt("\tEndPrimitive();\n");
out.Write("\tEndPrimitive();\n");
else
out.WriteFmt("\toutput.RestartStrip();\n");
out.Write("\toutput.RestartStrip();\n");
}
void EnumerateGeometryShaderUids(const std::function<void(const GeometryShaderUid&)>& callback)

View File

@ -24,54 +24,53 @@ static void GenerateLightShader(ShaderCode& object, const LightingUidData& uid_d
{
case LIGHTATTN_NONE:
case LIGHTATTN_DIR:
object.WriteFmt("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(index));
object.WriteFmt("attn = 1.0;\n");
object.WriteFmt("if (length(ldir) == 0.0)\n\t ldir = _norm0;\n");
object.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(index));
object.Write("attn = 1.0;\n");
object.Write("if (length(ldir) == 0.0)\n\t ldir = _norm0;\n");
break;
case LIGHTATTN_SPEC:
object.WriteFmt("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(index));
object.WriteFmt("attn = (dot(_norm0, ldir) >= 0.0) ? max(0.0, dot(_norm0, " LIGHT_DIR
".xyz)) : 0.0;\n",
LIGHT_DIR_PARAMS(index));
object.WriteFmt("cosAttn = " LIGHT_COSATT ".xyz;\n", LIGHT_COSATT_PARAMS(index));
object.WriteFmt("distAttn = {}(" LIGHT_DISTATT ".xyz);\n",
(diffusefunc == LIGHTDIF_NONE) ? "" : "normalize", LIGHT_DISTATT_PARAMS(index));
object.WriteFmt("attn = max(0.0f, dot(cosAttn, float3(1.0, attn, attn*attn))) / dot(distAttn, "
"float3(1.0, attn, attn*attn));\n");
object.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(index));
object.Write("attn = (dot(_norm0, ldir) >= 0.0) ? max(0.0, dot(_norm0, " LIGHT_DIR
".xyz)) : 0.0;\n",
LIGHT_DIR_PARAMS(index));
object.Write("cosAttn = " LIGHT_COSATT ".xyz;\n", LIGHT_COSATT_PARAMS(index));
object.Write("distAttn = {}(" LIGHT_DISTATT ".xyz);\n",
(diffusefunc == LIGHTDIF_NONE) ? "" : "normalize", LIGHT_DISTATT_PARAMS(index));
object.Write("attn = max(0.0f, dot(cosAttn, float3(1.0, attn, attn*attn))) / dot(distAttn, "
"float3(1.0, attn, attn*attn));\n");
break;
case LIGHTATTN_SPOT:
object.WriteFmt("ldir = " LIGHT_POS ".xyz - pos.xyz;\n", LIGHT_POS_PARAMS(index));
object.WriteFmt("dist2 = dot(ldir, ldir);\n"
"dist = sqrt(dist2);\n"
"ldir = ldir / dist;\n"
"attn = max(0.0, dot(ldir, " LIGHT_DIR ".xyz));\n",
LIGHT_DIR_PARAMS(index));
object.Write("ldir = " LIGHT_POS ".xyz - pos.xyz;\n", LIGHT_POS_PARAMS(index));
object.Write("dist2 = dot(ldir, ldir);\n"
"dist = sqrt(dist2);\n"
"ldir = ldir / dist;\n"
"attn = max(0.0, dot(ldir, " LIGHT_DIR ".xyz));\n",
LIGHT_DIR_PARAMS(index));
// attn*attn may overflow
object.WriteFmt("attn = max(0.0, " LIGHT_COSATT ".x + " LIGHT_COSATT ".y*attn + " LIGHT_COSATT
".z*attn*attn) / dot(" LIGHT_DISTATT ".xyz, float3(1.0,dist,dist2));\n",
LIGHT_COSATT_PARAMS(index), LIGHT_COSATT_PARAMS(index),
LIGHT_COSATT_PARAMS(index), LIGHT_DISTATT_PARAMS(index));
object.Write("attn = max(0.0, " LIGHT_COSATT ".x + " LIGHT_COSATT ".y*attn + " LIGHT_COSATT
".z*attn*attn) / dot(" LIGHT_DISTATT ".xyz, float3(1.0,dist,dist2));\n",
LIGHT_COSATT_PARAMS(index), LIGHT_COSATT_PARAMS(index), LIGHT_COSATT_PARAMS(index),
LIGHT_DISTATT_PARAMS(index));
break;
}
switch (diffusefunc)
{
case LIGHTDIF_NONE:
object.WriteFmt("lacc.{} += int{}(round(attn * float{}(" LIGHT_COL ")));\n", swizzle,
swizzle_components, swizzle_components, LIGHT_COL_PARAMS(index, swizzle));
object.Write("lacc.{} += int{}(round(attn * float{}(" LIGHT_COL ")));\n", swizzle,
swizzle_components, swizzle_components, LIGHT_COL_PARAMS(index, swizzle));
break;
case LIGHTDIF_SIGN:
case LIGHTDIF_CLAMP:
object.WriteFmt("lacc.{} += int{}(round(attn * {}dot(ldir, _norm0)) * float{}(" LIGHT_COL
")));\n",
swizzle, swizzle_components, diffusefunc != LIGHTDIF_SIGN ? "max(0.0," : "(",
swizzle_components, LIGHT_COL_PARAMS(index, swizzle));
object.Write("lacc.{} += int{}(round(attn * {}dot(ldir, _norm0)) * float{}(" LIGHT_COL ")));\n",
swizzle, swizzle_components, diffusefunc != LIGHTDIF_SIGN ? "max(0.0," : "(",
swizzle_components, LIGHT_COL_PARAMS(index, swizzle));
break;
default:
ASSERT(0);
}
object.WriteFmt("\n");
object.Write("\n");
}
// vertex shader
@ -84,21 +83,21 @@ void GenerateLightingShaderCode(ShaderCode& object, const LightingUidData& uid_d
{
for (u32 j = 0; j < NUM_XF_COLOR_CHANNELS; j++)
{
object.WriteFmt("{{\n");
object.Write("{{\n");
const bool colormatsource = !!(uid_data.matsource & (1 << j));
if (colormatsource) // from vertex
{
if ((components & (VB_HAS_COL0 << j)) != 0)
object.WriteFmt("int4 mat = int4(round({}{} * 255.0));\n", in_color_name, j);
object.Write("int4 mat = int4(round({}{} * 255.0));\n", in_color_name, j);
else if ((components & VB_HAS_COL0) != 0)
object.WriteFmt("int4 mat = int4(round({}0 * 255.0));\n", in_color_name);
object.Write("int4 mat = int4(round({}0 * 255.0));\n", in_color_name);
else
object.WriteFmt("int4 mat = int4(255, 255, 255, 255);\n");
object.Write("int4 mat = int4(255, 255, 255, 255);\n");
}
else // from color
{
object.WriteFmt("int4 mat = {}[{}];\n", I_MATERIALS, j + 2);
object.Write("int4 mat = {}[{}];\n", I_MATERIALS, j + 2);
}
if ((uid_data.enablelighting & (1 << j)) != 0)
@ -107,28 +106,28 @@ void GenerateLightingShaderCode(ShaderCode& object, const LightingUidData& uid_d
{
if ((components & (VB_HAS_COL0 << j)) != 0)
{
object.WriteFmt("lacc = int4(round({}{} * 255.0));\n", in_color_name, j);
object.Write("lacc = int4(round({}{} * 255.0));\n", in_color_name, j);
}
else if ((components & VB_HAS_COL0) != 0)
{
object.WriteFmt("lacc = int4(round({}0 * 255.0));\n", in_color_name);
object.Write("lacc = int4(round({}0 * 255.0));\n", in_color_name);
}
else
{
// TODO: this isn't verified. Here we want to read the ambient from the vertex,
// but the vertex itself has no color. So we don't know which value to read.
// Returning 1.0 is the same as disabled lightning, so this could be fine
object.WriteFmt("lacc = int4(255, 255, 255, 255);\n");
object.Write("lacc = int4(255, 255, 255, 255);\n");
}
}
else // from color
{
object.WriteFmt("lacc = {}[{}];\n", I_MATERIALS, j);
object.Write("lacc = {}[{}];\n", I_MATERIALS, j);
}
}
else
{
object.WriteFmt("lacc = int4(255, 255, 255, 255);\n");
object.Write("lacc = int4(255, 255, 255, 255);\n");
}
// check if alpha is different
@ -138,15 +137,15 @@ void GenerateLightingShaderCode(ShaderCode& object, const LightingUidData& uid_d
if (alphamatsource) // from vertex
{
if ((components & (VB_HAS_COL0 << j)) != 0)
object.WriteFmt("mat.w = int(round({}{}.w * 255.0));\n", in_color_name, j);
object.Write("mat.w = int(round({}{}.w * 255.0));\n", in_color_name, j);
else if ((components & VB_HAS_COL0) != 0)
object.WriteFmt("mat.w = int(round({}0.w * 255.0));\n", in_color_name);
object.Write("mat.w = int(round({}0.w * 255.0));\n", in_color_name);
else
object.WriteFmt("mat.w = 255;\n");
object.Write("mat.w = 255;\n");
}
else // from color
{
object.WriteFmt("mat.w = {}[{}].w;\n", I_MATERIALS, j + 2);
object.Write("mat.w = {}[{}].w;\n", I_MATERIALS, j + 2);
}
}
@ -156,26 +155,26 @@ void GenerateLightingShaderCode(ShaderCode& object, const LightingUidData& uid_d
{
if ((components & (VB_HAS_COL0 << j)) != 0)
{
object.WriteFmt("lacc.w = int(round({}{}.w * 255.0));\n", in_color_name, j);
object.Write("lacc.w = int(round({}{}.w * 255.0));\n", in_color_name, j);
}
else if ((components & VB_HAS_COL0) != 0)
{
object.WriteFmt("lacc.w = int(round({}0.w * 255.0));\n", in_color_name);
object.Write("lacc.w = int(round({}0.w * 255.0));\n", in_color_name);
}
else
{
// TODO: The same for alpha: We want to read from vertex, but the vertex has no color
object.WriteFmt("lacc.w = 255;\n");
object.Write("lacc.w = 255;\n");
}
}
else // from color
{
object.WriteFmt("lacc.w = {}[{}].w;\n", I_MATERIALS, j);
object.Write("lacc.w = {}[{}].w;\n", I_MATERIALS, j);
}
}
else
{
object.WriteFmt("lacc.w = 255;\n");
object.Write("lacc.w = 255;\n");
}
if ((uid_data.enablelighting & (1 << j)) != 0) // Color lights
@ -194,9 +193,9 @@ void GenerateLightingShaderCode(ShaderCode& object, const LightingUidData& uid_d
GenerateLightShader(object, uid_data, i, j + 2, true);
}
}
object.WriteFmt("lacc = clamp(lacc, 0, 255);\n");
object.WriteFmt("{}{} = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n", dest, j);
object.WriteFmt("}}\n");
object.Write("lacc = clamp(lacc, 0, 255);\n");
object.Write("{}{} = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n", dest, j);
object.Write("}}\n");
}
}

File diff suppressed because it is too large Load Diff

View File

@ -91,20 +91,20 @@ static void DefineOutputMember(ShaderCode& object, APIType api_type, std::string
std::string_view type, std::string_view name, int var_index,
std::string_view semantic = {}, int semantic_index = -1)
{
object.WriteFmt("\t{} {} {}", qualifier, type, name);
object.Write("\t{} {} {}", qualifier, type, name);
if (var_index != -1)
object.WriteFmt("{}", var_index);
object.Write("{}", var_index);
if (api_type == APIType::D3D && !semantic.empty())
{
if (semantic_index != -1)
object.WriteFmt(" : {}{}", semantic, semantic_index);
object.Write(" : {}{}", semantic, semantic_index);
else
object.WriteFmt(" : {}", semantic);
object.Write(" : {}", semantic);
}
object.WriteFmt(";\n");
object.Write(";\n");
}
void GenerateVSOutputMembers(ShaderCode& object, APIType api_type, u32 texgens,
@ -138,26 +138,26 @@ void GenerateVSOutputMembers(ShaderCode& object, APIType api_type, u32 texgens,
void AssignVSOutputMembers(ShaderCode& object, std::string_view a, std::string_view b, u32 texgens,
const ShaderHostConfig& host_config)
{
object.WriteFmt("\t{}.pos = {}.pos;\n", a, b);
object.WriteFmt("\t{}.colors_0 = {}.colors_0;\n", a, b);
object.WriteFmt("\t{}.colors_1 = {}.colors_1;\n", a, b);
object.Write("\t{}.pos = {}.pos;\n", a, b);
object.Write("\t{}.colors_0 = {}.colors_0;\n", a, b);
object.Write("\t{}.colors_1 = {}.colors_1;\n", a, b);
for (unsigned int i = 0; i < texgens; ++i)
object.WriteFmt("\t{}.tex{} = {}.tex{};\n", a, i, b, i);
object.Write("\t{}.tex{} = {}.tex{};\n", a, i, b, i);
if (!host_config.fast_depth_calc)
object.WriteFmt("\t{}.clipPos = {}.clipPos;\n", a, b);
object.Write("\t{}.clipPos = {}.clipPos;\n", a, b);
if (host_config.per_pixel_lighting)
{
object.WriteFmt("\t{}.Normal = {}.Normal;\n", a, b);
object.WriteFmt("\t{}.WorldPos = {}.WorldPos;\n", a, b);
object.Write("\t{}.Normal = {}.Normal;\n", a, b);
object.Write("\t{}.WorldPos = {}.WorldPos;\n", a, b);
}
if (host_config.backend_geometry_shaders)
{
object.WriteFmt("\t{}.clipDist0 = {}.clipDist0;\n", a, b);
object.WriteFmt("\t{}.clipDist1 = {}.clipDist1;\n", a, b);
object.Write("\t{}.clipDist0 = {}.clipDist0;\n", a, b);
object.Write("\t{}.clipDist1 = {}.clipDist1;\n", a, b);
}
}

View File

@ -4,7 +4,6 @@
#pragma once
#include <cstdarg>
#include <cstring>
#include <iterator>
#include <string>
@ -104,21 +103,9 @@ public:
ShaderCode() { m_buffer.reserve(16384); }
const std::string& GetBuffer() const { return m_buffer; }
// Deprecated: Writes format strings using traditional printf format strings.
void Write(const char* fmt, ...)
#ifdef __GNUC__
__attribute__((format(printf, 2, 3)))
#endif
{
va_list arglist;
va_start(arglist, fmt);
m_buffer += StringFromFormatV(fmt, arglist);
va_end(arglist);
}
// Writes format strings using fmtlib format strings.
template <typename... Args>
void WriteFmt(std::string_view format, Args&&... args)
void Write(std::string_view format, Args&&... args)
{
fmt::format_to(std::back_inserter(m_buffer), format, std::forward<Args>(args)...);
}

View File

@ -60,63 +60,63 @@ static void WriteHeader(ShaderCode& code, APIType api_type)
{
// left, top, of source rectangle within source texture
// width of the destination rectangle, scale_factor (1 or 2)
code.WriteFmt("UBO_BINDING(std140, 1) uniform PSBlock {{\n"
" int4 position;\n"
" float y_scale;\n"
" float gamma_rcp;\n"
" float2 clamp_tb;\n"
" float3 filter_coefficients;\n"
"}};\n");
code.Write("UBO_BINDING(std140, 1) uniform PSBlock {{\n"
" int4 position;\n"
" float y_scale;\n"
" float gamma_rcp;\n"
" float2 clamp_tb;\n"
" float3 filter_coefficients;\n"
"}};\n");
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
{
code.WriteFmt("VARYING_LOCATION(0) in VertexData {{\n"
" float3 v_tex0;\n"
"}};\n");
code.Write("VARYING_LOCATION(0) in VertexData {{\n"
" float3 v_tex0;\n"
"}};\n");
}
else
{
code.WriteFmt("VARYING_LOCATION(0) in float3 v_tex0;\n");
code.Write("VARYING_LOCATION(0) in float3 v_tex0;\n");
}
code.WriteFmt("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"
"FRAGMENT_OUTPUT_LOCATION(0) out float4 ocol0;\n");
code.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"
"FRAGMENT_OUTPUT_LOCATION(0) out float4 ocol0;\n");
}
else // D3D
{
code.WriteFmt("cbuffer PSBlock : register(b0) {{\n"
" int4 position;\n"
" float y_scale;\n"
" float gamma_rcp;\n"
" float2 clamp_tb;\n"
" float3 filter_coefficients;\n"
"}};\n"
"sampler samp0 : register(s0);\n"
"Texture2DArray Tex0 : register(t0);\n");
code.Write("cbuffer PSBlock : register(b0) {{\n"
" int4 position;\n"
" float y_scale;\n"
" float gamma_rcp;\n"
" float2 clamp_tb;\n"
" float3 filter_coefficients;\n"
"}};\n"
"sampler samp0 : register(s0);\n"
"Texture2DArray Tex0 : register(t0);\n");
}
// D3D does not have roundEven(), only round(), which is specified "to the nearest integer".
// This differs from the roundEven() behavior, but to get consistency across drivers in OpenGL
// we need to use roundEven().
if (api_type == APIType::D3D)
code.WriteFmt("#define roundEven(x) round(x)\n");
code.Write("#define roundEven(x) round(x)\n");
// Alpha channel in the copy is set to 1 the EFB format does not have an alpha channel.
code.WriteFmt("float4 RGBA8ToRGB8(float4 src)\n"
"{{\n"
" return float4(src.xyz, 1.0);\n"
"}}\n"
code.Write("float4 RGBA8ToRGB8(float4 src)\n"
"{{\n"
" return float4(src.xyz, 1.0);\n"
"}}\n"
"float4 RGBA8ToRGBA6(float4 src)\n"
"{{\n"
" int4 val = int4(roundEven(src * 255.0)) >> 2;\n"
" return float4(val) / 63.0;\n"
"}}\n"
"float4 RGBA8ToRGBA6(float4 src)\n"
"{{\n"
" int4 val = int4(roundEven(src * 255.0)) >> 2;\n"
" return float4(val) / 63.0;\n"
"}}\n"
"float4 RGBA8ToRGB565(float4 src)\n"
"{{\n"
" int4 val = int4(roundEven(src * 255.0));\n"
" val = int4(val.r >> 3, val.g >> 2, val.b >> 3, 1);\n"
" return float4(val) / float4(31.0, 63.0, 31.0, 1.0);\n"
"}}\n");
"float4 RGBA8ToRGB565(float4 src)\n"
"{{\n"
" int4 val = int4(roundEven(src * 255.0));\n"
" val = int4(val.r >> 3, val.g >> 2, val.b >> 3, 1);\n"
" return float4(val) / float4(31.0, 63.0, 31.0, 1.0);\n"
"}}\n");
}
static void WriteSampleFunction(ShaderCode& code, const EFBCopyParams& params, APIType api_type)
@ -127,16 +127,16 @@ static void WriteSampleFunction(ShaderCode& code, const EFBCopyParams& params, A
switch (params.efb_format)
{
case PEControl::RGB8_Z24:
code.WriteFmt("RGBA8ToRGB8(");
code.Write("RGBA8ToRGB8(");
break;
case PEControl::RGBA6_Z24:
code.WriteFmt("RGBA8ToRGBA6(");
code.Write("RGBA8ToRGBA6(");
break;
case PEControl::RGB565_Z16:
code.WriteFmt("RGBA8ToRGB565(");
code.Write("RGBA8ToRGB565(");
break;
default:
code.WriteFmt("(");
code.Write("(");
break;
}
}
@ -144,63 +144,63 @@ static void WriteSampleFunction(ShaderCode& code, const EFBCopyParams& params, A
{
// Handle D3D depth inversion.
if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange)
code.WriteFmt("1.0 - (");
code.Write("1.0 - (");
else
code.WriteFmt("(");
code.Write("(");
}
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
code.WriteFmt("texture(samp0, float3(");
code.Write("texture(samp0, float3(");
else
code.WriteFmt("Tex0.Sample(samp0, float3(");
code.Write("Tex0.Sample(samp0, float3(");
code.WriteFmt("uv.x + float(xoffset) * pixel_size.x, ");
code.Write("uv.x + float(xoffset) * pixel_size.x, ");
// Reverse the direction for OpenGL, since positive numbers are distance from the bottom row.
if (yoffset != 0)
{
if (api_type == APIType::OpenGL)
code.WriteFmt("clamp(uv.y - float({}) * pixel_size.y, clamp_tb.x, clamp_tb.y)", yoffset);
code.Write("clamp(uv.y - float({}) * pixel_size.y, clamp_tb.x, clamp_tb.y)", yoffset);
else
code.WriteFmt("clamp(uv.y + float({}) * pixel_size.y, clamp_tb.x, clamp_tb.y)", yoffset);
code.Write("clamp(uv.y + float({}) * pixel_size.y, clamp_tb.x, clamp_tb.y)", yoffset);
}
else
{
code.WriteFmt("uv.y");
code.Write("uv.y");
}
code.WriteFmt(", 0.0)))");
code.Write(", 0.0)))");
};
// The copy filter applies to both color and depth copies. This has been verified on hardware.
// The filter is only applied to the RGB channels, the alpha channel is left intact.
code.WriteFmt("float4 SampleEFB(float2 uv, float2 pixel_size, int xoffset)\n"
"{{\n");
code.Write("float4 SampleEFB(float2 uv, float2 pixel_size, int xoffset)\n"
"{{\n");
if (params.copy_filter)
{
code.WriteFmt(" float4 prev_row = ");
code.Write(" float4 prev_row = ");
WriteSampleOp(-1);
code.WriteFmt(";\n"
" float4 current_row = ");
code.Write(";\n"
" float4 current_row = ");
WriteSampleOp(0);
code.WriteFmt(";\n"
" float4 next_row = ");
code.Write(";\n"
" float4 next_row = ");
WriteSampleOp(1);
code.WriteFmt(";\n"
" return float4(min(prev_row.rgb * filter_coefficients[0] +\n"
" current_row.rgb * filter_coefficients[1] +\n"
" next_row.rgb * filter_coefficients[2], \n"
" float3(1, 1, 1)), current_row.a);\n");
code.Write(";\n"
" return float4(min(prev_row.rgb * filter_coefficients[0] +\n"
" current_row.rgb * filter_coefficients[1] +\n"
" next_row.rgb * filter_coefficients[2], \n"
" float3(1, 1, 1)), current_row.a);\n");
}
else
{
code.WriteFmt(" float4 current_row = ");
code.Write(" float4 current_row = ");
WriteSampleOp(0);
code.WriteFmt(";\n"
"return float4(min(current_row.rgb * filter_coefficients[1], float3(1, 1, 1)),\n"
" current_row.a);\n");
code.Write(";\n"
"return float4(min(current_row.rgb * filter_coefficients[1], float3(1, 1, 1)),\n"
" current_row.a);\n");
}
code.WriteFmt("}}\n");
code.Write("}}\n");
}
// Block dimensions : widthStride, heightStride
@ -213,101 +213,101 @@ static void WriteSwizzler(ShaderCode& code, const EFBCopyParams& params, EFBCopy
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
code.WriteFmt("void main()\n"
"{{\n"
" int2 sampleUv;\n"
" int2 uv1 = int2(gl_FragCoord.xy);\n");
code.Write("void main()\n"
"{{\n"
" int2 sampleUv;\n"
" int2 uv1 = int2(gl_FragCoord.xy);\n");
}
else // D3D
{
code.WriteFmt("void main(\n"
" in float3 v_tex0 : TEXCOORD0,\n"
" in float4 rawpos : SV_Position,\n"
" out float4 ocol0 : SV_Target)\n"
"{{\n"
" int2 sampleUv;\n"
" int2 uv1 = int2(rawpos.xy);\n");
code.Write("void main(\n"
" in float3 v_tex0 : TEXCOORD0,\n"
" in float4 rawpos : SV_Position,\n"
" out float4 ocol0 : SV_Target)\n"
"{{\n"
" int2 sampleUv;\n"
" int2 uv1 = int2(rawpos.xy);\n");
}
const int blkW = TexDecoder_GetEFBCopyBlockWidthInTexels(format);
const int blkH = TexDecoder_GetEFBCopyBlockHeightInTexels(format);
int samples = GetEncodedSampleCount(format);
code.WriteFmt(" int x_block_position = (uv1.x >> {}) << {};\n", IntLog2(blkH * blkW / samples),
IntLog2(blkW));
code.WriteFmt(" int y_block_position = uv1.y << {};\n", IntLog2(blkH));
code.Write(" int x_block_position = (uv1.x >> {}) << {};\n", IntLog2(blkH * blkW / samples),
IntLog2(blkW));
code.Write(" int y_block_position = uv1.y << {};\n", IntLog2(blkH));
if (samples == 1)
{
// With samples == 1, we write out pairs of blocks; one A8R8, one G8B8.
code.WriteFmt(" bool first = (uv1.x & {}) == 0;\n", blkH * blkW / 2);
code.Write(" bool first = (uv1.x & {}) == 0;\n", blkH * blkW / 2);
samples = 2;
}
code.WriteFmt(" int offset_in_block = uv1.x & {};\n", (blkH * blkW / samples) - 1);
code.WriteFmt(" int y_offset_in_block = offset_in_block >> {};\n", IntLog2(blkW / samples));
code.WriteFmt(" int x_offset_in_block = (offset_in_block & {}) << {};\n", (blkW / samples) - 1,
IntLog2(samples));
code.Write(" int offset_in_block = uv1.x & {};\n", (blkH * blkW / samples) - 1);
code.Write(" int y_offset_in_block = offset_in_block >> {};\n", IntLog2(blkW / samples));
code.Write(" int x_offset_in_block = (offset_in_block & {}) << {};\n", (blkW / samples) - 1,
IntLog2(samples));
code.WriteFmt(" sampleUv.x = x_block_position + x_offset_in_block;\n"
" sampleUv.y = y_block_position + y_offset_in_block;\n");
code.Write(" sampleUv.x = x_block_position + x_offset_in_block;\n"
" sampleUv.y = y_block_position + y_offset_in_block;\n");
// sampleUv is the sample position in (int)gx_coords
code.WriteFmt(" float2 uv0 = float2(sampleUv);\n");
code.Write(" float2 uv0 = float2(sampleUv);\n");
// Move to center of pixel
code.WriteFmt(" uv0 += float2(0.5, 0.5);\n");
code.Write(" uv0 += float2(0.5, 0.5);\n");
// Scale by two if needed (also move to pixel borders
// so that linear filtering will average adjacent
// pixel)
code.WriteFmt(" uv0 *= float(position.w);\n");
code.Write(" uv0 *= float(position.w);\n");
// Move to copied rect
code.WriteFmt(" uv0 += float2(position.xy);\n");
code.Write(" uv0 += float2(position.xy);\n");
// Normalize to [0:1]
code.WriteFmt(" uv0 /= float2({}, {});\n", EFB_WIDTH, EFB_HEIGHT);
code.Write(" uv0 /= float2({}, {});\n", EFB_WIDTH, EFB_HEIGHT);
// Apply the y scaling
code.WriteFmt(" uv0 /= float2(1, y_scale);\n");
code.Write(" uv0 /= float2(1, y_scale);\n");
// OGL has to flip up and down
if (api_type == APIType::OpenGL)
{
code.WriteFmt(" uv0.y = 1.0-uv0.y;\n");
code.Write(" uv0.y = 1.0-uv0.y;\n");
}
code.WriteFmt(" float2 pixel_size = float2(position.w, position.w) / float2({}, {});\n",
EFB_WIDTH, EFB_HEIGHT);
code.Write(" float2 pixel_size = float2(position.w, position.w) / float2({}, {});\n", EFB_WIDTH,
EFB_HEIGHT);
}
static void WriteSampleColor(ShaderCode& code, std::string_view color_comp, std::string_view dest,
int x_offset, APIType api_type, const EFBCopyParams& params)
{
code.WriteFmt(" {} = SampleEFB(uv0, pixel_size, {}).{};\n", dest, x_offset, color_comp);
code.Write(" {} = SampleEFB(uv0, pixel_size, {}).{};\n", dest, x_offset, color_comp);
}
static void WriteColorToIntensity(ShaderCode& code, std::string_view src, std::string_view dest)
{
if (!IntensityConstantAdded)
{
code.WriteFmt(" float4 IntensityConst = float4(0.257f,0.504f,0.098f,0.0625f);\n");
code.Write(" float4 IntensityConst = float4(0.257f,0.504f,0.098f,0.0625f);\n");
IntensityConstantAdded = true;
}
code.WriteFmt(" {} = dot(IntensityConst.rgb, {}.rgb);\n", dest, src);
code.Write(" {} = dot(IntensityConst.rgb, {}.rgb);\n", dest, src);
// don't add IntensityConst.a yet, because doing it later is faster and uses less instructions,
// due to vectorization
}
static void WriteToBitDepth(ShaderCode& code, u8 depth, std::string_view src, std::string_view dest)
{
code.WriteFmt(" {} = floor({} * 255.0 / exp2(8.0 - {}.0));\n", dest, src, depth);
code.Write(" {} = floor({} * 255.0 / exp2(8.0 - {}.0));\n", dest, src, depth);
}
static void WriteEncoderEnd(ShaderCode& code)
{
code.WriteFmt("}}\n");
code.Write("}}\n");
IntensityConstantAdded = false;
}
static void WriteI8Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params)
{
WriteSwizzler(code, params, EFBCopyFormat::R8, api_type);
code.WriteFmt(" float3 texSample;\n");
code.Write(" float3 texSample;\n");
WriteSampleColor(code, "rgb", "texSample", 0, api_type, params);
WriteColorToIntensity(code, "texSample", "ocol0.b");
@ -322,7 +322,7 @@ static void WriteI8Encoder(ShaderCode& code, APIType api_type, const EFBCopyPara
WriteColorToIntensity(code, "texSample", "ocol0.a");
// See WriteColorToIntensity
code.WriteFmt(" ocol0.rgba += IntensityConst.aaaa;\n");
code.Write(" ocol0.rgba += IntensityConst.aaaa;\n");
WriteEncoderEnd(code);
}
@ -330,9 +330,9 @@ static void WriteI8Encoder(ShaderCode& code, APIType api_type, const EFBCopyPara
static void WriteI4Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params)
{
WriteSwizzler(code, params, EFBCopyFormat::R4, api_type);
code.WriteFmt(" float3 texSample;\n"
" float4 color0;\n"
" float4 color1;\n");
code.Write(" float3 texSample;\n"
" float4 color0;\n"
" float4 color1;\n");
WriteSampleColor(code, "rgb", "texSample", 0, api_type, params);
WriteColorToIntensity(code, "texSample", "color0.b");
@ -358,30 +358,30 @@ static void WriteI4Encoder(ShaderCode& code, APIType api_type, const EFBCopyPara
WriteSampleColor(code, "rgb", "texSample", 7, api_type, params);
WriteColorToIntensity(code, "texSample", "color1.a");
code.WriteFmt(" color0.rgba += IntensityConst.aaaa;\n"
" color1.rgba += IntensityConst.aaaa;\n");
code.Write(" color0.rgba += IntensityConst.aaaa;\n"
" color1.rgba += IntensityConst.aaaa;\n");
WriteToBitDepth(code, 4, "color0", "color0");
WriteToBitDepth(code, 4, "color1", "color1");
code.WriteFmt(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n");
code.Write(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n");
WriteEncoderEnd(code);
}
static void WriteIA8Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params)
{
WriteSwizzler(code, params, EFBCopyFormat::RA8, api_type);
code.WriteFmt(" float4 texSample;\n");
code.Write(" float4 texSample;\n");
WriteSampleColor(code, "rgba", "texSample", 0, api_type, params);
code.WriteFmt(" ocol0.b = texSample.a;\n");
code.Write(" ocol0.b = texSample.a;\n");
WriteColorToIntensity(code, "texSample", "ocol0.g");
WriteSampleColor(code, "rgba", "texSample", 1, api_type, params);
code.WriteFmt(" ocol0.r = texSample.a;\n");
code.Write(" ocol0.r = texSample.a;\n");
WriteColorToIntensity(code, "texSample", "ocol0.a");
code.WriteFmt(" ocol0.ga += IntensityConst.aa;\n");
code.Write(" ocol0.ga += IntensityConst.aa;\n");
WriteEncoderEnd(code);
}
@ -389,57 +389,57 @@ static void WriteIA8Encoder(ShaderCode& code, APIType api_type, const EFBCopyPar
static void WriteIA4Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params)
{
WriteSwizzler(code, params, EFBCopyFormat::RA4, api_type);
code.WriteFmt(" float4 texSample;\n"
" float4 color0;\n"
" float4 color1;\n");
code.Write(" float4 texSample;\n"
" float4 color0;\n"
" float4 color1;\n");
WriteSampleColor(code, "rgba", "texSample", 0, api_type, params);
code.WriteFmt(" color0.b = texSample.a;\n");
code.Write(" color0.b = texSample.a;\n");
WriteColorToIntensity(code, "texSample", "color1.b");
WriteSampleColor(code, "rgba", "texSample", 1, api_type, params);
code.WriteFmt(" color0.g = texSample.a;\n");
code.Write(" color0.g = texSample.a;\n");
WriteColorToIntensity(code, "texSample", "color1.g");
WriteSampleColor(code, "rgba", "texSample", 2, api_type, params);
code.WriteFmt(" color0.r = texSample.a;\n");
code.Write(" color0.r = texSample.a;\n");
WriteColorToIntensity(code, "texSample", "color1.r");
WriteSampleColor(code, "rgba", "texSample", 3, api_type, params);
code.WriteFmt(" color0.a = texSample.a;\n");
code.Write(" color0.a = texSample.a;\n");
WriteColorToIntensity(code, "texSample", "color1.a");
code.WriteFmt(" color1.rgba += IntensityConst.aaaa;\n");
code.Write(" color1.rgba += IntensityConst.aaaa;\n");
WriteToBitDepth(code, 4, "color0", "color0");
WriteToBitDepth(code, 4, "color1", "color1");
code.WriteFmt(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n");
code.Write(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n");
WriteEncoderEnd(code);
}
static void WriteRGB565Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params)
{
WriteSwizzler(code, params, EFBCopyFormat::RGB565, api_type);
code.WriteFmt(" float3 texSample0;\n"
" float3 texSample1;\n");
code.Write(" float3 texSample0;\n"
" float3 texSample1;\n");
WriteSampleColor(code, "rgb", "texSample0", 0, api_type, params);
WriteSampleColor(code, "rgb", "texSample1", 1, api_type, params);
code.WriteFmt(" float2 texRs = float2(texSample0.r, texSample1.r);\n"
" float2 texGs = float2(texSample0.g, texSample1.g);\n"
" float2 texBs = float2(texSample0.b, texSample1.b);\n");
code.Write(" float2 texRs = float2(texSample0.r, texSample1.r);\n"
" float2 texGs = float2(texSample0.g, texSample1.g);\n"
" float2 texBs = float2(texSample0.b, texSample1.b);\n");
WriteToBitDepth(code, 6, "texGs", "float2 gInt");
code.WriteFmt(" float2 gUpper = floor(gInt / 8.0);\n"
" float2 gLower = gInt - gUpper * 8.0;\n");
code.Write(" float2 gUpper = floor(gInt / 8.0);\n"
" float2 gLower = gInt - gUpper * 8.0;\n");
WriteToBitDepth(code, 5, "texRs", "ocol0.br");
code.WriteFmt(" ocol0.br = ocol0.br * 8.0 + gUpper;\n");
code.Write(" ocol0.br = ocol0.br * 8.0 + gUpper;\n");
WriteToBitDepth(code, 5, "texBs", "ocol0.ga");
code.WriteFmt(" ocol0.ga = ocol0.ga + gLower * 32.0;\n");
code.Write(" ocol0.ga = ocol0.ga + gLower * 32.0;\n");
code.WriteFmt(" ocol0 = ocol0 / 255.0;\n");
code.Write(" ocol0 = ocol0 / 255.0;\n");
WriteEncoderEnd(code);
}
@ -447,63 +447,63 @@ static void WriteRGB5A3Encoder(ShaderCode& code, APIType api_type, const EFBCopy
{
WriteSwizzler(code, params, EFBCopyFormat::RGB5A3, api_type);
code.WriteFmt(" float4 texSample;\n"
" float color0;\n"
" float gUpper;\n"
" float gLower;\n");
code.Write(" float4 texSample;\n"
" float color0;\n"
" float gUpper;\n"
" float gLower;\n");
WriteSampleColor(code, "rgba", "texSample", 0, api_type, params);
// 0.8784 = 224 / 255 which is the maximum alpha value that can be represented in 3 bits
code.WriteFmt("if(texSample.a > 0.878f) {{\n");
code.Write("if(texSample.a > 0.878f) {{\n");
WriteToBitDepth(code, 5, "texSample.g", "color0");
code.WriteFmt(" gUpper = floor(color0 / 8.0);\n"
" gLower = color0 - gUpper * 8.0;\n");
code.Write(" gUpper = floor(color0 / 8.0);\n"
" gLower = color0 - gUpper * 8.0;\n");
WriteToBitDepth(code, 5, "texSample.r", "ocol0.b");
code.WriteFmt(" ocol0.b = ocol0.b * 4.0 + gUpper + 128.0;\n");
code.Write(" ocol0.b = ocol0.b * 4.0 + gUpper + 128.0;\n");
WriteToBitDepth(code, 5, "texSample.b", "ocol0.g");
code.WriteFmt(" ocol0.g = ocol0.g + gLower * 32.0;\n");
code.Write(" ocol0.g = ocol0.g + gLower * 32.0;\n");
code.WriteFmt("}} else {{\n");
code.Write("}} else {{\n");
WriteToBitDepth(code, 4, "texSample.r", "ocol0.b");
WriteToBitDepth(code, 4, "texSample.b", "ocol0.g");
WriteToBitDepth(code, 3, "texSample.a", "color0");
code.WriteFmt("ocol0.b = ocol0.b + color0 * 16.0;\n");
code.Write("ocol0.b = ocol0.b + color0 * 16.0;\n");
WriteToBitDepth(code, 4, "texSample.g", "color0");
code.WriteFmt("ocol0.g = ocol0.g + color0 * 16.0;\n");
code.Write("ocol0.g = ocol0.g + color0 * 16.0;\n");
code.WriteFmt("}}\n");
code.Write("}}\n");
WriteSampleColor(code, "rgba", "texSample", 1, api_type, params);
code.WriteFmt("if(texSample.a > 0.878f) {{\n");
code.Write("if(texSample.a > 0.878f) {{\n");
WriteToBitDepth(code, 5, "texSample.g", "color0");
code.WriteFmt(" gUpper = floor(color0 / 8.0);\n"
" gLower = color0 - gUpper * 8.0;\n");
code.Write(" gUpper = floor(color0 / 8.0);\n"
" gLower = color0 - gUpper * 8.0;\n");
WriteToBitDepth(code, 5, "texSample.r", "ocol0.r");
code.WriteFmt(" ocol0.r = ocol0.r * 4.0 + gUpper + 128.0;\n");
code.Write(" ocol0.r = ocol0.r * 4.0 + gUpper + 128.0;\n");
WriteToBitDepth(code, 5, "texSample.b", "ocol0.a");
code.WriteFmt(" ocol0.a = ocol0.a + gLower * 32.0;\n");
code.Write(" ocol0.a = ocol0.a + gLower * 32.0;\n");
code.WriteFmt("}} else {{\n");
code.Write("}} else {{\n");
WriteToBitDepth(code, 4, "texSample.r", "ocol0.r");
WriteToBitDepth(code, 4, "texSample.b", "ocol0.a");
WriteToBitDepth(code, 3, "texSample.a", "color0");
code.WriteFmt("ocol0.r = ocol0.r + color0 * 16.0;\n");
code.Write("ocol0.r = ocol0.r + color0 * 16.0;\n");
WriteToBitDepth(code, 4, "texSample.g", "color0");
code.WriteFmt("ocol0.a = ocol0.a + color0 * 16.0;\n");
code.Write("ocol0.a = ocol0.a + color0 * 16.0;\n");
code.WriteFmt("}}\n");
code.Write("}}\n");
code.WriteFmt(" ocol0 = ocol0 / 255.0;\n");
code.Write(" ocol0 = ocol0 / 255.0;\n");
WriteEncoderEnd(code);
}
@ -511,23 +511,23 @@ static void WriteRGBA8Encoder(ShaderCode& code, APIType api_type, const EFBCopyP
{
WriteSwizzler(code, params, EFBCopyFormat::RGBA8, api_type);
code.WriteFmt(" float4 texSample;\n"
" float4 color0;\n"
" float4 color1;\n");
code.Write(" float4 texSample;\n"
" float4 color0;\n"
" float4 color1;\n");
WriteSampleColor(code, "rgba", "texSample", 0, api_type, params);
code.WriteFmt(" color0.b = texSample.a;\n"
" color0.g = texSample.r;\n"
" color1.b = texSample.g;\n"
" color1.g = texSample.b;\n");
code.Write(" color0.b = texSample.a;\n"
" color0.g = texSample.r;\n"
" color1.b = texSample.g;\n"
" color1.g = texSample.b;\n");
WriteSampleColor(code, "rgba", "texSample", 1, api_type, params);
code.WriteFmt(" color0.r = texSample.a;\n"
" color0.a = texSample.r;\n"
" color1.r = texSample.g;\n"
" color1.a = texSample.b;\n");
code.Write(" color0.r = texSample.a;\n"
" color0.a = texSample.r;\n"
" color1.r = texSample.g;\n"
" color1.a = texSample.b;\n");
code.WriteFmt(" ocol0 = first ? color0 : color1;\n");
code.Write(" ocol0 = first ? color0 : color1;\n");
WriteEncoderEnd(code);
}
@ -536,8 +536,8 @@ static void WriteC4Encoder(ShaderCode& code, std::string_view comp, APIType api_
const EFBCopyParams& params)
{
WriteSwizzler(code, params, EFBCopyFormat::R4, api_type);
code.WriteFmt(" float4 color0;\n"
" float4 color1;\n");
code.Write(" float4 color0;\n"
" float4 color1;\n");
WriteSampleColor(code, comp, "color0.b", 0, api_type, params);
WriteSampleColor(code, comp, "color1.b", 1, api_type, params);
@ -551,7 +551,7 @@ static void WriteC4Encoder(ShaderCode& code, std::string_view comp, APIType api_
WriteToBitDepth(code, 4, "color0", "color0");
WriteToBitDepth(code, 4, "color1", "color1");
code.WriteFmt(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n");
code.Write(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n");
WriteEncoderEnd(code);
}
@ -572,30 +572,30 @@ static void WriteCC4Encoder(ShaderCode& code, std::string_view comp, APIType api
const EFBCopyParams& params)
{
WriteSwizzler(code, params, EFBCopyFormat::RA4, api_type);
code.WriteFmt(" float2 texSample;\n"
" float4 color0;\n"
" float4 color1;\n");
code.Write(" float2 texSample;\n"
" float4 color0;\n"
" float4 color1;\n");
WriteSampleColor(code, comp, "texSample", 0, api_type, params);
code.WriteFmt(" color0.b = texSample.x;\n"
" color1.b = texSample.y;\n");
code.Write(" color0.b = texSample.x;\n"
" color1.b = texSample.y;\n");
WriteSampleColor(code, comp, "texSample", 1, api_type, params);
code.WriteFmt(" color0.g = texSample.x;\n"
" color1.g = texSample.y;\n");
code.Write(" color0.g = texSample.x;\n"
" color1.g = texSample.y;\n");
WriteSampleColor(code, comp, "texSample", 2, api_type, params);
code.WriteFmt(" color0.r = texSample.x;\n"
" color1.r = texSample.y;\n");
code.Write(" color0.r = texSample.x;\n"
" color1.r = texSample.y;\n");
WriteSampleColor(code, comp, "texSample", 3, api_type, params);
code.WriteFmt(" color0.a = texSample.x;\n"
" color1.a = texSample.y;\n");
code.Write(" color0.a = texSample.x;\n"
" color1.a = texSample.y;\n");
WriteToBitDepth(code, 4, "color0", "color0");
WriteToBitDepth(code, 4, "color1", "color1");
code.WriteFmt(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n");
code.Write(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n");
WriteEncoderEnd(code);
}
@ -615,19 +615,19 @@ static void WriteZ8Encoder(ShaderCode& code, std::string_view multiplier, APITyp
{
WriteSwizzler(code, params, EFBCopyFormat::G8, api_type);
code.WriteFmt(" float depth;\n");
code.Write(" float depth;\n");
WriteSampleColor(code, "r", "depth", 0, api_type, params);
code.WriteFmt("ocol0.b = frac(depth * {});\n", multiplier);
code.Write("ocol0.b = frac(depth * {});\n", multiplier);
WriteSampleColor(code, "r", "depth", 1, api_type, params);
code.WriteFmt("ocol0.g = frac(depth * {});\n", multiplier);
code.Write("ocol0.g = frac(depth * {});\n", multiplier);
WriteSampleColor(code, "r", "depth", 2, api_type, params);
code.WriteFmt("ocol0.r = frac(depth * {});\n", multiplier);
code.Write("ocol0.r = frac(depth * {});\n", multiplier);
WriteSampleColor(code, "r", "depth", 3, api_type, params);
code.WriteFmt("ocol0.a = frac(depth * {});\n", multiplier);
code.Write("ocol0.a = frac(depth * {});\n", multiplier);
WriteEncoderEnd(code);
}
@ -636,30 +636,30 @@ static void WriteZ16Encoder(ShaderCode& code, APIType api_type, const EFBCopyPar
{
WriteSwizzler(code, params, EFBCopyFormat::RA8, api_type);
code.WriteFmt(" float depth;\n"
" float3 expanded;\n");
code.Write(" float depth;\n"
" float3 expanded;\n");
// Byte order is reversed
WriteSampleColor(code, "r", "depth", 0, api_type, params);
code.WriteFmt(" depth *= 16777216.0;\n"
" expanded.r = floor(depth / (256.0 * 256.0));\n"
" depth -= expanded.r * 256.0 * 256.0;\n"
" expanded.g = floor(depth / 256.0);\n");
code.Write(" depth *= 16777216.0;\n"
" expanded.r = floor(depth / (256.0 * 256.0));\n"
" depth -= expanded.r * 256.0 * 256.0;\n"
" expanded.g = floor(depth / 256.0);\n");
code.WriteFmt(" ocol0.b = expanded.g / 255.0;\n"
" ocol0.g = expanded.r / 255.0;\n");
code.Write(" ocol0.b = expanded.g / 255.0;\n"
" ocol0.g = expanded.r / 255.0;\n");
WriteSampleColor(code, "r", "depth", 1, api_type, params);
code.WriteFmt(" depth *= 16777216.0;\n"
" expanded.r = floor(depth / (256.0 * 256.0));\n"
" depth -= expanded.r * 256.0 * 256.0;\n"
" expanded.g = floor(depth / 256.0);\n");
code.Write(" depth *= 16777216.0;\n"
" expanded.r = floor(depth / (256.0 * 256.0));\n"
" depth -= expanded.r * 256.0 * 256.0;\n"
" expanded.g = floor(depth / 256.0);\n");
code.WriteFmt(" ocol0.r = expanded.g / 255.0;\n"
" ocol0.a = expanded.r / 255.0;\n");
code.Write(" ocol0.r = expanded.g / 255.0;\n"
" ocol0.a = expanded.r / 255.0;\n");
WriteEncoderEnd(code);
}
@ -668,34 +668,34 @@ static void WriteZ16LEncoder(ShaderCode& code, APIType api_type, const EFBCopyPa
{
WriteSwizzler(code, params, EFBCopyFormat::GB8, api_type);
code.WriteFmt(" float depth;\n"
" float3 expanded;\n");
code.Write(" float depth;\n"
" float3 expanded;\n");
// Byte order is reversed
WriteSampleColor(code, "r", "depth", 0, api_type, params);
code.WriteFmt(" depth *= 16777216.0;\n"
" expanded.r = floor(depth / (256.0 * 256.0));\n"
" depth -= expanded.r * 256.0 * 256.0;\n"
" expanded.g = floor(depth / 256.0);\n"
" depth -= expanded.g * 256.0;\n"
" expanded.b = depth;\n");
code.Write(" depth *= 16777216.0;\n"
" expanded.r = floor(depth / (256.0 * 256.0));\n"
" depth -= expanded.r * 256.0 * 256.0;\n"
" expanded.g = floor(depth / 256.0);\n"
" depth -= expanded.g * 256.0;\n"
" expanded.b = depth;\n");
code.WriteFmt(" ocol0.b = expanded.b / 255.0;\n"
" ocol0.g = expanded.g / 255.0;\n");
code.Write(" ocol0.b = expanded.b / 255.0;\n"
" ocol0.g = expanded.g / 255.0;\n");
WriteSampleColor(code, "r", "depth", 1, api_type, params);
code.WriteFmt(" depth *= 16777216.0;\n"
" expanded.r = floor(depth / (256.0 * 256.0));\n"
" depth -= expanded.r * 256.0 * 256.0;\n"
" expanded.g = floor(depth / 256.0);\n"
" depth -= expanded.g * 256.0;\n"
" expanded.b = depth;\n");
code.Write(" depth *= 16777216.0;\n"
" expanded.r = floor(depth / (256.0 * 256.0));\n"
" depth -= expanded.r * 256.0 * 256.0;\n"
" expanded.g = floor(depth / 256.0);\n"
" depth -= expanded.g * 256.0;\n"
" expanded.b = depth;\n");
code.WriteFmt(" ocol0.r = expanded.b / 255.0;\n"
" ocol0.a = expanded.g / 255.0;\n");
code.Write(" ocol0.r = expanded.b / 255.0;\n"
" ocol0.a = expanded.g / 255.0;\n");
WriteEncoderEnd(code);
}
@ -704,38 +704,38 @@ static void WriteZ24Encoder(ShaderCode& code, APIType api_type, const EFBCopyPar
{
WriteSwizzler(code, params, EFBCopyFormat::RGBA8, api_type);
code.WriteFmt(" float depth0;\n"
" float depth1;\n"
" float3 expanded0;\n"
" float3 expanded1;\n");
code.Write(" float depth0;\n"
" float depth1;\n"
" float3 expanded0;\n"
" float3 expanded1;\n");
WriteSampleColor(code, "r", "depth0", 0, api_type, params);
WriteSampleColor(code, "r", "depth1", 1, api_type, params);
for (int i = 0; i < 2; i++)
{
code.WriteFmt(" depth{} *= 16777216.0;\n", i);
code.Write(" depth{} *= 16777216.0;\n", i);
code.WriteFmt(" expanded{}.r = floor(depth{} / (256.0 * 256.0));\n", i, i);
code.WriteFmt(" depth{} -= expanded{}.r * 256.0 * 256.0;\n", i, i);
code.WriteFmt(" expanded{}.g = floor(depth{} / 256.0);\n", i, i);
code.WriteFmt(" depth{} -= expanded{}.g * 256.0;\n", i, i);
code.WriteFmt(" expanded{}.b = depth{};\n", i, i);
code.Write(" expanded{}.r = floor(depth{} / (256.0 * 256.0));\n", i, i);
code.Write(" depth{} -= expanded{}.r * 256.0 * 256.0;\n", i, i);
code.Write(" expanded{}.g = floor(depth{} / 256.0);\n", i, i);
code.Write(" depth{} -= expanded{}.g * 256.0;\n", i, i);
code.Write(" expanded{}.b = depth{};\n", i, i);
}
code.WriteFmt(" if (!first) {{\n");
code.Write(" if (!first) {{\n");
// Upper 16
code.WriteFmt(" ocol0.b = expanded0.g / 255.0;\n"
" ocol0.g = expanded0.b / 255.0;\n"
" ocol0.r = expanded1.g / 255.0;\n"
" ocol0.a = expanded1.b / 255.0;\n"
" }} else {{\n");
code.Write(" ocol0.b = expanded0.g / 255.0;\n"
" ocol0.g = expanded0.b / 255.0;\n"
" ocol0.r = expanded1.g / 255.0;\n"
" ocol0.a = expanded1.b / 255.0;\n"
" }} else {{\n");
// Lower 8
code.WriteFmt(" ocol0.b = 1.0;\n"
" ocol0.g = expanded0.r / 255.0;\n"
" ocol0.r = 1.0;\n"
" ocol0.a = expanded1.r / 255.0;\n"
" }}\n");
code.Write(" ocol0.b = 1.0;\n"
" ocol0.g = expanded0.r / 255.0;\n"
" ocol0.r = 1.0;\n"
" ocol0.a = expanded1.r / 255.0;\n"
" }}\n");
WriteEncoderEnd(code);
}
@ -744,23 +744,23 @@ static void WriteXFBEncoder(ShaderCode& code, APIType api_type, const EFBCopyPar
{
WriteSwizzler(code, params, EFBCopyFormat::XFB, api_type);
code.WriteFmt("float3 color0, color1;\n");
code.Write("float3 color0, color1;\n");
WriteSampleColor(code, "rgb", "color0", 0, api_type, params);
WriteSampleColor(code, "rgb", "color1", 1, api_type, params);
// Gamma is only applied to XFB copies.
code.WriteFmt(" color0 = pow(color0, float3(gamma_rcp, gamma_rcp, gamma_rcp));\n"
" color1 = pow(color1, float3(gamma_rcp, gamma_rcp, gamma_rcp));\n");
code.Write(" color0 = pow(color0, float3(gamma_rcp, gamma_rcp, gamma_rcp));\n"
" color1 = pow(color1, float3(gamma_rcp, gamma_rcp, gamma_rcp));\n");
// Convert to YUV.
code.WriteFmt(" const float3 y_const = float3(0.257, 0.504, 0.098);\n"
" const float3 u_const = float3(-0.148, -0.291, 0.439);\n"
" const float3 v_const = float3(0.439, -0.368, -0.071);\n"
" float3 average = (color0 + color1) * 0.5;\n"
" ocol0.b = dot(color0, y_const) + 0.0625;\n"
" ocol0.g = dot(average, u_const) + 0.5;\n"
" ocol0.r = dot(color1, y_const) + 0.0625;\n"
" ocol0.a = dot(average, v_const) + 0.5;\n");
code.Write(" const float3 y_const = float3(0.257, 0.504, 0.098);\n"
" const float3 u_const = float3(-0.148, -0.291, 0.439);\n"
" const float3 v_const = float3(0.439, -0.368, -0.071);\n"
" float3 average = (color0 + color1) * 0.5;\n"
" ocol0.b = dot(color0, y_const) + 0.0625;\n"
" ocol0.g = dot(average, u_const) + 0.5;\n"
" ocol0.r = dot(color1, y_const) + 0.0625;\n"
" ocol0.a = dot(average, v_const) + 0.5;\n");
WriteEncoderEnd(code);
}

View File

@ -32,23 +32,23 @@ static void WriteHeader(APIType api_type, ShaderCode& out)
{
if (api_type == APIType::D3D)
{
out.WriteFmt("cbuffer PSBlock : register(b0) {{\n"
" float2 src_offset, src_size;\n"
" float3 filter_coefficients;\n"
" float gamma_rcp;\n"
" float2 clamp_tb;\n"
" float pixel_height;\n"
"}};\n\n");
out.Write("cbuffer PSBlock : register(b0) {{\n"
" float2 src_offset, src_size;\n"
" float3 filter_coefficients;\n"
" float gamma_rcp;\n"
" float2 clamp_tb;\n"
" float pixel_height;\n"
"}};\n\n");
}
else if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
out.WriteFmt("UBO_BINDING(std140, 1) uniform PSBlock {{\n"
" float2 src_offset, src_size;\n"
" float3 filter_coefficients;\n"
" float gamma_rcp;\n"
" float2 clamp_tb;\n"
" float pixel_height;\n"
"}};\n");
out.Write("UBO_BINDING(std140, 1) uniform PSBlock {{\n"
" float2 src_offset, src_size;\n"
" float3 filter_coefficients;\n"
" float gamma_rcp;\n"
" float2 clamp_tb;\n"
" float pixel_height;\n"
"}};\n");
}
}
@ -59,35 +59,35 @@ ShaderCode GenerateVertexShader(APIType api_type)
if (api_type == APIType::D3D)
{
out.WriteFmt("void main(in uint id : SV_VertexID, out float3 v_tex0 : TEXCOORD0,\n"
" out float4 opos : SV_Position) {{\n");
out.Write("void main(in uint id : SV_VertexID, out float3 v_tex0 : TEXCOORD0,\n"
" out float4 opos : SV_Position) {{\n");
}
else if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
{
out.WriteFmt("VARYING_LOCATION(0) out VertexData {{\n"
" float3 v_tex0;\n"
"}};\n");
out.Write("VARYING_LOCATION(0) out VertexData {{\n"
" float3 v_tex0;\n"
"}};\n");
}
else
{
out.WriteFmt("VARYING_LOCATION(0) out float3 v_tex0;\n");
out.Write("VARYING_LOCATION(0) out float3 v_tex0;\n");
}
out.WriteFmt("#define id gl_VertexID\n"
"#define opos gl_Position\n"
"void main() {{\n");
out.Write("#define id gl_VertexID\n"
"#define opos gl_Position\n"
"void main() {{\n");
}
out.WriteFmt(" v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n");
out.WriteFmt(
out.Write(" v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n");
out.Write(
" opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n");
out.WriteFmt(" v_tex0 = float3(src_offset + (src_size * v_tex0.xy), 0.0f);\n");
out.Write(" v_tex0 = float3(src_offset + (src_size * v_tex0.xy), 0.0f);\n");
// NDC space is flipped in Vulkan
if (api_type == APIType::Vulkan)
out.WriteFmt(" opos.y = -opos.y;\n");
out.Write(" opos.y = -opos.y;\n");
out.WriteFmt("}}\n");
out.Write("}}\n");
return out;
}
@ -101,52 +101,52 @@ ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data)
if (api_type == APIType::D3D)
{
out.WriteFmt("Texture2DArray tex0 : register(t0);\n"
"SamplerState samp0 : register(s0);\n"
"float4 SampleEFB(float3 uv, float y_offset) {{\n"
" return tex0.Sample(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), "
"clamp_tb.x, clamp_tb.y), {}));\n"
"}}\n\n",
mono_depth ? "0.0" : "uv.z");
out.WriteFmt("void main(in float3 v_tex0 : TEXCOORD0, out float4 ocol0 : SV_Target)\n{{\n");
out.Write("Texture2DArray tex0 : register(t0);\n"
"SamplerState samp0 : register(s0);\n"
"float4 SampleEFB(float3 uv, float y_offset) {{\n"
" return tex0.Sample(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), "
"clamp_tb.x, clamp_tb.y), {}));\n"
"}}\n\n",
mono_depth ? "0.0" : "uv.z");
out.Write("void main(in float3 v_tex0 : TEXCOORD0, out float4 ocol0 : SV_Target)\n{{\n");
}
else if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
out.WriteFmt("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n");
out.WriteFmt("float4 SampleEFB(float3 uv, float y_offset) {{\n"
" return texture(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), "
"clamp_tb.x, clamp_tb.y), {}));\n"
"}}\n",
mono_depth ? "0.0" : "uv.z");
out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n");
out.Write("float4 SampleEFB(float3 uv, float y_offset) {{\n"
" return texture(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), "
"clamp_tb.x, clamp_tb.y), {}));\n"
"}}\n",
mono_depth ? "0.0" : "uv.z");
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
{
out.WriteFmt("VARYING_LOCATION(0) in VertexData {{\n"
" float3 v_tex0;\n"
"}};\n");
out.Write("VARYING_LOCATION(0) in VertexData {{\n"
" float3 v_tex0;\n"
"}};\n");
}
else
{
out.WriteFmt("VARYING_LOCATION(0) in vec3 v_tex0;\n");
out.Write("VARYING_LOCATION(0) in vec3 v_tex0;\n");
}
out.WriteFmt("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;"
"void main()\n{{\n");
out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;"
"void main()\n{{\n");
}
// The copy filter applies to both color and depth copies. This has been verified on hardware.
// The filter is only applied to the RGB channels, the alpha channel is left intact.
if (uid_data->copy_filter)
{
out.WriteFmt(" float4 prev_row = SampleEFB(v_tex0, -1.0f);\n"
" float4 current_row = SampleEFB(v_tex0, 0.0f);\n"
" float4 next_row = SampleEFB(v_tex0, 1.0f);\n"
" float4 texcol = float4(min(prev_row.rgb * filter_coefficients[0] +\n"
" current_row.rgb * filter_coefficients[1] +\n"
" next_row.rgb * filter_coefficients[2], \n"
" float3(1, 1, 1)), current_row.a);\n");
out.Write(" float4 prev_row = SampleEFB(v_tex0, -1.0f);\n"
" float4 current_row = SampleEFB(v_tex0, 0.0f);\n"
" float4 next_row = SampleEFB(v_tex0, 1.0f);\n"
" float4 texcol = float4(min(prev_row.rgb * filter_coefficients[0] +\n"
" current_row.rgb * filter_coefficients[1] +\n"
" next_row.rgb * filter_coefficients[2], \n"
" float3(1, 1, 1)), current_row.a);\n");
}
else
{
out.WriteFmt(
out.Write(
" float4 current_row = SampleEFB(v_tex0, 0.0f);\n"
" float4 texcol = float4(min(current_row.rgb * filter_coefficients[1], float3(1, 1, 1)),\n"
" current_row.a);\n");
@ -155,62 +155,62 @@ ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data)
if (uid_data->is_depth_copy)
{
if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange)
out.WriteFmt("texcol.x = 1.0 - texcol.x;\n");
out.Write("texcol.x = 1.0 - texcol.x;\n");
out.WriteFmt(" int depth = int(texcol.x * 16777216.0);\n"
out.Write(" int depth = int(texcol.x * 16777216.0);\n"
// Convert to Z24 format
" int4 workspace;\n"
" workspace.r = (depth >> 16) & 255;\n"
" workspace.g = (depth >> 8) & 255;\n"
" workspace.b = depth & 255;\n"
// Convert to Z24 format
" int4 workspace;\n"
" workspace.r = (depth >> 16) & 255;\n"
" workspace.g = (depth >> 8) & 255;\n"
" workspace.b = depth & 255;\n"
// Convert to Z4 format
" workspace.a = (depth >> 16) & 0xF0;\n"
// Convert to Z4 format
" workspace.a = (depth >> 16) & 0xF0;\n"
// Normalize components to [0.0..1.0]
" texcol = float4(workspace) / 255.0;\n");
// Normalize components to [0.0..1.0]
" texcol = float4(workspace) / 255.0;\n");
switch (uid_data->dst_format)
{
case EFBCopyFormat::R4: // Z4
out.WriteFmt(" ocol0 = texcol.aaaa;\n");
out.Write(" ocol0 = texcol.aaaa;\n");
break;
case EFBCopyFormat::R8_0x1: // Z8
case EFBCopyFormat::R8: // Z8H
out.WriteFmt(" ocol0 = texcol.rrrr;\n");
out.Write(" ocol0 = texcol.rrrr;\n");
break;
case EFBCopyFormat::RA8: // Z16
out.WriteFmt(" ocol0 = texcol.gggr;\n");
out.Write(" ocol0 = texcol.gggr;\n");
break;
case EFBCopyFormat::RG8: // Z16 (reverse order)
out.WriteFmt(" ocol0 = texcol.rrrg;\n");
out.Write(" ocol0 = texcol.rrrg;\n");
break;
case EFBCopyFormat::RGBA8: // Z24X8
out.WriteFmt(" ocol0 = float4(texcol.rgb, 1.0);\n");
out.Write(" ocol0 = float4(texcol.rgb, 1.0);\n");
break;
case EFBCopyFormat::G8: // Z8M
out.WriteFmt(" ocol0 = texcol.gggg;\n");
out.Write(" ocol0 = texcol.gggg;\n");
break;
case EFBCopyFormat::B8: // Z8L
out.WriteFmt(" ocol0 = texcol.bbbb;\n");
out.Write(" ocol0 = texcol.bbbb;\n");
break;
case EFBCopyFormat::GB8: // Z16L - copy lower 16 depth bits
// expected to be used as an IA8 texture (upper 8 bits stored as intensity, lower 8 bits
// stored as alpha)
// Used e.g. in Zelda: Skyward Sword
out.WriteFmt(" ocol0 = texcol.gggb;\n");
out.Write(" ocol0 = texcol.gggb;\n");
break;
default:
ERROR_LOG(VIDEO, "Unknown copy zbuf format: 0x%X", static_cast<int>(uid_data->dst_format));
out.WriteFmt(" ocol0 = float4(texcol.bgr, 0.0);\n");
out.Write(" ocol0 = float4(texcol.bgr, 0.0);\n");
break;
}
}
@ -229,99 +229,99 @@ ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data)
case EFBCopyFormat::RA4: // IA4
case EFBCopyFormat::RA8: // IA8
if (has_four_bits)
out.WriteFmt(" texcol = float4(int4(texcol * 255.0) & 0xF0) * (1.0 / 240.0);\n");
out.Write(" texcol = float4(int4(texcol * 255.0) & 0xF0) * (1.0 / 240.0);\n");
// TODO - verify these coefficients
out.WriteFmt(" const float3 coefficients = float3(0.257, 0.504, 0.098);\n"
" float intensity = dot(texcol.rgb, coefficients) + 16.0 / 255.0;\n"
" ocol0 = float4(intensity, intensity, intensity, {});\n",
has_alpha ? "texcol.a" : "intensity");
out.Write(" const float3 coefficients = float3(0.257, 0.504, 0.098);\n"
" float intensity = dot(texcol.rgb, coefficients) + 16.0 / 255.0;\n"
" ocol0 = float4(intensity, intensity, intensity, {});\n",
has_alpha ? "texcol.a" : "intensity");
break;
default:
ERROR_LOG(VIDEO, "Unknown copy intensity format: 0x%X",
static_cast<int>(uid_data->dst_format));
out.WriteFmt(" ocol0 = texcol;\n");
out.Write(" ocol0 = texcol;\n");
break;
}
}
else
{
if (!uid_data->efb_has_alpha)
out.WriteFmt(" texcol.a = 1.0;\n");
out.Write(" texcol.a = 1.0;\n");
switch (uid_data->dst_format)
{
case EFBCopyFormat::R4: // R4
out.WriteFmt(" float red = float(int(texcol.r * 255.0) & 0xF0) * (1.0 / 240.0);\n"
" ocol0 = float4(red, red, red, red);\n");
out.Write(" float red = float(int(texcol.r * 255.0) & 0xF0) * (1.0 / 240.0);\n"
" ocol0 = float4(red, red, red, red);\n");
break;
case EFBCopyFormat::R8_0x1: // R8
case EFBCopyFormat::R8: // R8
out.WriteFmt(" ocol0 = texcol.rrrr;\n");
out.Write(" ocol0 = texcol.rrrr;\n");
break;
case EFBCopyFormat::RA4: // RA4
out.WriteFmt(" float2 red_alpha = float2(int2(texcol.ra * 255.0) & 0xF0) * (1.0 / 240.0);\n"
" ocol0 = red_alpha.rrrg;\n");
out.Write(" float2 red_alpha = float2(int2(texcol.ra * 255.0) & 0xF0) * (1.0 / 240.0);\n"
" ocol0 = red_alpha.rrrg;\n");
break;
case EFBCopyFormat::RA8: // RA8
out.WriteFmt(" ocol0 = texcol.rrra;\n");
out.Write(" ocol0 = texcol.rrra;\n");
break;
case EFBCopyFormat::A8: // A8
out.WriteFmt(" ocol0 = texcol.aaaa;\n");
out.Write(" ocol0 = texcol.aaaa;\n");
break;
case EFBCopyFormat::G8: // G8
out.WriteFmt(" ocol0 = texcol.gggg;\n");
out.Write(" ocol0 = texcol.gggg;\n");
break;
case EFBCopyFormat::B8: // B8
out.WriteFmt(" ocol0 = texcol.bbbb;\n");
out.Write(" ocol0 = texcol.bbbb;\n");
break;
case EFBCopyFormat::RG8: // RG8
out.WriteFmt(" ocol0 = texcol.rrrg;\n");
out.Write(" ocol0 = texcol.rrrg;\n");
break;
case EFBCopyFormat::GB8: // GB8
out.WriteFmt(" ocol0 = texcol.gggb;\n");
out.Write(" ocol0 = texcol.gggb;\n");
break;
case EFBCopyFormat::RGB565: // RGB565
out.WriteFmt(" float2 red_blue = float2(int2(texcol.rb * 255.0) & 0xF8) * (1.0 / 248.0);\n"
" float green = float(int(texcol.g * 255.0) & 0xFC) * (1.0 / 252.0);\n"
" ocol0 = float4(red_blue.r, green, red_blue.g, 1.0);\n");
out.Write(" float2 red_blue = float2(int2(texcol.rb * 255.0) & 0xF8) * (1.0 / 248.0);\n"
" float green = float(int(texcol.g * 255.0) & 0xFC) * (1.0 / 252.0);\n"
" ocol0 = float4(red_blue.r, green, red_blue.g, 1.0);\n");
break;
case EFBCopyFormat::RGB5A3: // RGB5A3
// TODO: The MSB controls whether we have RGB5 or RGB4A3, this selection
// will need to be implemented once we move away from floats.
out.WriteFmt(" float3 color = float3(int3(texcol.rgb * 255.0) & 0xF8) * (1.0 / 248.0);\n"
" float alpha = float(int(texcol.a * 255.0) & 0xE0) * (1.0 / 224.0);\n"
" ocol0 = float4(color, alpha);\n");
out.Write(" float3 color = float3(int3(texcol.rgb * 255.0) & 0xF8) * (1.0 / 248.0);\n"
" float alpha = float(int(texcol.a * 255.0) & 0xE0) * (1.0 / 224.0);\n"
" ocol0 = float4(color, alpha);\n");
break;
case EFBCopyFormat::RGBA8: // RGBA8
out.WriteFmt(" ocol0 = texcol;\n");
out.Write(" ocol0 = texcol;\n");
break;
case EFBCopyFormat::XFB:
out.WriteFmt(
out.Write(
" ocol0 = float4(pow(texcol.rgb, float3(gamma_rcp, gamma_rcp, gamma_rcp)), 1.0f);\n");
break;
default:
ERROR_LOG(VIDEO, "Unknown copy color format: 0x%X", static_cast<int>(uid_data->dst_format));
out.WriteFmt(" ocol0 = texcol;\n");
out.Write(" ocol0 = texcol;\n");
break;
}
}
out.WriteFmt("}}\n");
out.Write("}}\n");
return out;
}

View File

@ -18,14 +18,13 @@ void WriteUberShaderCommonHeader(ShaderCode& out, APIType api_type,
// ==============================================
if (!host_config.backend_bitfield)
{
out.WriteFmt(
"uint bitfieldExtract(uint val, int off, int size) {{\n"
" // This built-in function is only support in OpenGL 4.0+ and ES 3.1+\n"
" // Microsoft's HLSL compiler automatically optimises this to a bitfield extract "
"instruction.\n"
" uint mask = uint((1 << size) - 1);\n"
" return uint(val >> off) & mask;\n"
"}}\n\n");
out.Write("uint bitfieldExtract(uint val, int off, int size) {{\n"
" // This built-in function is only support in OpenGL 4.0+ and ES 3.1+\n"
" // Microsoft's HLSL compiler automatically optimises this to a bitfield extract "
"instruction.\n"
" uint mask = uint((1 << size) - 1);\n"
" return uint(val >> off) & mask;\n"
"}}\n\n");
}
}
@ -34,61 +33,60 @@ void WriteLightingFunction(ShaderCode& out)
// ==============================================
// Lighting channel calculation helper
// ==============================================
out.WriteFmt("int4 CalculateLighting(uint index, uint attnfunc, uint diffusefunc, float3 pos, "
"float3 normal) {{\n"
" float3 ldir, h, cosAttn, distAttn;\n"
" float dist, dist2, attn;\n"
"\n"
" switch (attnfunc) {{\n");
out.WriteFmt(" case {}u: // LIGNTATTN_NONE\n", LIGHTATTN_NONE);
out.WriteFmt(" case {}u: // LIGHTATTN_DIR\n", LIGHTATTN_DIR);
out.WriteFmt(" ldir = normalize(" I_LIGHTS "[index].pos.xyz - pos.xyz);\n"
" attn = 1.0;\n"
" if (length(ldir) == 0.0)\n"
" ldir = normal;\n"
" break;\n\n");
out.WriteFmt(" case {}u: // LIGHTATTN_SPEC\n", LIGHTATTN_SPEC);
out.WriteFmt(" ldir = normalize(" I_LIGHTS "[index].pos.xyz - pos.xyz);\n"
" attn = (dot(normal, ldir) >= 0.0) ? max(0.0, dot(normal, " I_LIGHTS
"[index].dir.xyz)) : 0.0;\n"
" cosAttn = " I_LIGHTS "[index].cosatt.xyz;\n");
out.WriteFmt(" if (diffusefunc == {}u) // LIGHTDIF_NONE\n", LIGHTDIF_NONE);
out.WriteFmt(" distAttn = " I_LIGHTS "[index].distatt.xyz;\n"
" else\n"
" distAttn = normalize(" I_LIGHTS "[index].distatt.xyz);\n"
" attn = max(0.0, dot(cosAttn, float3(1.0, attn, attn*attn))) / dot(distAttn, "
"float3(1.0, attn, attn*attn));\n"
" break;\n\n");
out.WriteFmt(" case {}u: // LIGHTATTN_SPOT\n", LIGHTATTN_SPOT);
out.WriteFmt(" ldir = " I_LIGHTS "[index].pos.xyz - pos.xyz;\n"
" dist2 = dot(ldir, ldir);\n"
" dist = sqrt(dist2);\n"
" ldir = ldir / dist;\n"
" attn = max(0.0, dot(ldir, " I_LIGHTS "[index].dir.xyz));\n"
" attn = max(0.0, " I_LIGHTS "[index].cosatt.x + " I_LIGHTS
"[index].cosatt.y * attn + " I_LIGHTS
"[index].cosatt.z * attn * attn) / dot(" I_LIGHTS
"[index].distatt.xyz, float3(1.0, dist, dist2));\n"
" break;\n\n");
out.WriteFmt(" default:\n"
" attn = 1.0;\n"
" ldir = normal;\n"
" break;\n"
" }}\n"
"\n"
" switch (diffusefunc) {{\n");
out.WriteFmt(" case {}u: // LIGHTDIF_NONE\n", LIGHTDIF_NONE);
out.WriteFmt(" return int4(round(attn * float4(" I_LIGHTS "[index].color)));\n\n");
out.WriteFmt(" case {}u: // LIGHTDIF_SIGN\n", LIGHTDIF_SIGN);
out.WriteFmt(" return int4(round(attn * dot(ldir, normal) * float4(" I_LIGHTS
"[index].color)));\n\n");
out.WriteFmt(" case {}u: // LIGHTDIF_CLAMP\n", LIGHTDIF_CLAMP);
out.WriteFmt(" return int4(round(attn * max(0.0, dot(ldir, normal)) * float4(" I_LIGHTS
"[index].color)));\n\n");
out.WriteFmt(" default:\n"
" return int4(0, 0, 0, 0);\n"
" }}\n"
"}}\n\n");
out.Write("int4 CalculateLighting(uint index, uint attnfunc, uint diffusefunc, float3 pos, "
"float3 normal) {{\n"
" float3 ldir, h, cosAttn, distAttn;\n"
" float dist, dist2, attn;\n"
"\n"
" switch (attnfunc) {{\n");
out.Write(" case {}u: // LIGNTATTN_NONE\n", LIGHTATTN_NONE);
out.Write(" case {}u: // LIGHTATTN_DIR\n", LIGHTATTN_DIR);
out.Write(" ldir = normalize(" I_LIGHTS "[index].pos.xyz - pos.xyz);\n"
" attn = 1.0;\n"
" if (length(ldir) == 0.0)\n"
" ldir = normal;\n"
" break;\n\n");
out.Write(" case {}u: // LIGHTATTN_SPEC\n", LIGHTATTN_SPEC);
out.Write(" ldir = normalize(" I_LIGHTS "[index].pos.xyz - pos.xyz);\n"
" attn = (dot(normal, ldir) >= 0.0) ? max(0.0, dot(normal, " I_LIGHTS
"[index].dir.xyz)) : 0.0;\n"
" cosAttn = " I_LIGHTS "[index].cosatt.xyz;\n");
out.Write(" if (diffusefunc == {}u) // LIGHTDIF_NONE\n", LIGHTDIF_NONE);
out.Write(" distAttn = " I_LIGHTS "[index].distatt.xyz;\n"
" else\n"
" distAttn = normalize(" I_LIGHTS "[index].distatt.xyz);\n"
" attn = max(0.0, dot(cosAttn, float3(1.0, attn, attn*attn))) / dot(distAttn, "
"float3(1.0, attn, attn*attn));\n"
" break;\n\n");
out.Write(" case {}u: // LIGHTATTN_SPOT\n", LIGHTATTN_SPOT);
out.Write(" ldir = " I_LIGHTS "[index].pos.xyz - pos.xyz;\n"
" dist2 = dot(ldir, ldir);\n"
" dist = sqrt(dist2);\n"
" ldir = ldir / dist;\n"
" attn = max(0.0, dot(ldir, " I_LIGHTS "[index].dir.xyz));\n"
" attn = max(0.0, " I_LIGHTS "[index].cosatt.x + " I_LIGHTS
"[index].cosatt.y * attn + " I_LIGHTS "[index].cosatt.z * attn * attn) / dot(" I_LIGHTS
"[index].distatt.xyz, float3(1.0, dist, dist2));\n"
" break;\n\n");
out.Write(" default:\n"
" attn = 1.0;\n"
" ldir = normal;\n"
" break;\n"
" }}\n"
"\n"
" switch (diffusefunc) {{\n");
out.Write(" case {}u: // LIGHTDIF_NONE\n", LIGHTDIF_NONE);
out.Write(" return int4(round(attn * float4(" I_LIGHTS "[index].color)));\n\n");
out.Write(" case {}u: // LIGHTDIF_SIGN\n", LIGHTDIF_SIGN);
out.Write(" return int4(round(attn * dot(ldir, normal) * float4(" I_LIGHTS
"[index].color)));\n\n");
out.Write(" case {}u: // LIGHTDIF_CLAMP\n", LIGHTDIF_CLAMP);
out.Write(" return int4(round(attn * max(0.0, dot(ldir, normal)) * float4(" I_LIGHTS
"[index].color)));\n\n");
out.Write(" default:\n"
" return int4(0, 0, 0, 0);\n"
" }}\n"
"}}\n\n");
}
void WriteVertexLighting(ShaderCode& out, APIType api_type, std::string_view world_pos_var,
@ -96,105 +94,102 @@ void WriteVertexLighting(ShaderCode& out, APIType api_type, std::string_view wor
std::string_view in_color_1_var, std::string_view out_color_0_var,
std::string_view out_color_1_var)
{
out.WriteFmt("// Lighting\n");
out.WriteFmt("{}for (uint chan = 0u; chan < {}u; chan++) {{\n",
api_type == APIType::D3D ? "[loop] " : "", NUM_XF_COLOR_CHANNELS);
out.WriteFmt(" uint colorreg = xfmem_color(chan);\n"
" uint alphareg = xfmem_alpha(chan);\n"
" int4 mat = " I_MATERIALS "[chan + 2u]; \n"
" int4 lacc = int4(255, 255, 255, 255);\n"
"\n");
out.Write("// Lighting\n");
out.Write("{}for (uint chan = 0u; chan < {}u; chan++) {{\n",
api_type == APIType::D3D ? "[loop] " : "", NUM_XF_COLOR_CHANNELS);
out.Write(" uint colorreg = xfmem_color(chan);\n"
" uint alphareg = xfmem_alpha(chan);\n"
" int4 mat = " I_MATERIALS "[chan + 2u]; \n"
" int4 lacc = int4(255, 255, 255, 255);\n"
"\n");
out.WriteFmt(" if ({} != 0u) {{\n", BitfieldExtract("colorreg", LitChannel().matsource));
out.WriteFmt(" if ((components & ({}u << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
out.WriteFmt(" mat.xyz = int3(round(((chan == 0u) ? {}.xyz : {}.xyz) * 255.0));\n",
in_color_0_var, in_color_1_var);
out.WriteFmt(" else if ((components & {}u) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0);
out.WriteFmt(" mat.xyz = int3(round({}.xyz * 255.0));\n", in_color_0_var);
out.WriteFmt(" else\n"
" mat.xyz = int3(255, 255, 255);\n"
" }}\n"
"\n");
out.Write(" if ({} != 0u) {{\n", BitfieldExtract("colorreg", LitChannel().matsource));
out.Write(" if ((components & ({}u << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
out.Write(" mat.xyz = int3(round(((chan == 0u) ? {}.xyz : {}.xyz) * 255.0));\n",
in_color_0_var, in_color_1_var);
out.Write(" else if ((components & {}u) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0);
out.Write(" mat.xyz = int3(round({}.xyz * 255.0));\n", in_color_0_var);
out.Write(" else\n"
" mat.xyz = int3(255, 255, 255);\n"
" }}\n"
"\n");
out.WriteFmt(" if ({} != 0u) {{\n", BitfieldExtract("alphareg", LitChannel().matsource));
out.WriteFmt(" if ((components & ({}u << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
out.WriteFmt(" mat.w = int(round(((chan == 0u) ? {}.w : {}.w) * 255.0));\n", in_color_0_var,
in_color_1_var);
out.WriteFmt(" else if ((components & {}u) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0);
out.WriteFmt(" mat.w = int(round({}.w * 255.0));\n", in_color_0_var);
out.WriteFmt(" else\n"
" mat.w = 255;\n"
" }} else {{\n"
" mat.w = " I_MATERIALS " [chan + 2u].w;\n"
" }}\n"
"\n");
out.Write(" if ({} != 0u) {{\n", BitfieldExtract("alphareg", LitChannel().matsource));
out.Write(" if ((components & ({}u << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
out.Write(" mat.w = int(round(((chan == 0u) ? {}.w : {}.w) * 255.0));\n", in_color_0_var,
in_color_1_var);
out.Write(" else if ((components & {}u) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0);
out.Write(" mat.w = int(round({}.w * 255.0));\n", in_color_0_var);
out.Write(" else\n"
" mat.w = 255;\n"
" }} else {{\n"
" mat.w = " I_MATERIALS " [chan + 2u].w;\n"
" }}\n"
"\n");
out.WriteFmt(" if ({} != 0u) {{\n", BitfieldExtract("colorreg", LitChannel().enablelighting));
out.WriteFmt(" if ({} != 0u) {{\n", BitfieldExtract("colorreg", LitChannel().ambsource));
out.WriteFmt(" if ((components & ({}u << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
out.WriteFmt(" lacc.xyz = int3(round(((chan == 0u) ? {}.xyz : {}.xyz) * 255.0));\n",
in_color_0_var, in_color_1_var);
out.WriteFmt(" else if ((components & {}u) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0);
out.WriteFmt(" lacc.xyz = int3(round({}.xyz * 255.0));\n", in_color_0_var);
out.WriteFmt(" else\n"
" lacc.xyz = int3(255, 255, 255);\n"
" }} else {{\n"
" lacc.xyz = " I_MATERIALS " [chan].xyz;\n"
" }}\n"
"\n");
out.WriteFmt(" uint light_mask = {} | ({} << 4u);\n",
BitfieldExtract("colorreg", LitChannel().lightMask0_3),
BitfieldExtract("colorreg", LitChannel().lightMask4_7));
out.WriteFmt(" uint attnfunc = {};\n", BitfieldExtract("colorreg", LitChannel().attnfunc));
out.WriteFmt(" uint diffusefunc = {};\n",
BitfieldExtract("colorreg", LitChannel().diffusefunc));
out.WriteFmt(
out.Write(" if ({} != 0u) {{\n", BitfieldExtract("colorreg", LitChannel().enablelighting));
out.Write(" if ({} != 0u) {{\n", BitfieldExtract("colorreg", LitChannel().ambsource));
out.Write(" if ((components & ({}u << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
out.Write(" lacc.xyz = int3(round(((chan == 0u) ? {}.xyz : {}.xyz) * 255.0));\n",
in_color_0_var, in_color_1_var);
out.Write(" else if ((components & {}u) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0);
out.Write(" lacc.xyz = int3(round({}.xyz * 255.0));\n", in_color_0_var);
out.Write(" else\n"
" lacc.xyz = int3(255, 255, 255);\n"
" }} else {{\n"
" lacc.xyz = " I_MATERIALS " [chan].xyz;\n"
" }}\n"
"\n");
out.Write(" uint light_mask = {} | ({} << 4u);\n",
BitfieldExtract("colorreg", LitChannel().lightMask0_3),
BitfieldExtract("colorreg", LitChannel().lightMask4_7));
out.Write(" uint attnfunc = {};\n", BitfieldExtract("colorreg", LitChannel().attnfunc));
out.Write(" uint diffusefunc = {};\n", BitfieldExtract("colorreg", LitChannel().diffusefunc));
out.Write(
" for (uint light_index = 0u; light_index < 8u; light_index++) {{\n"
" if ((light_mask & (1u << light_index)) != 0u)\n"
" lacc.xyz += CalculateLighting(light_index, attnfunc, diffusefunc, {}, {}).xyz;\n",
world_pos_var, normal_var);
out.WriteFmt(" }}\n"
" }}\n"
"\n");
out.Write(" }}\n"
" }}\n"
"\n");
out.WriteFmt(" if ({} != 0u) {{\n", BitfieldExtract("alphareg", LitChannel().enablelighting));
out.WriteFmt(" if ({} != 0u) {{\n", BitfieldExtract("alphareg", LitChannel().ambsource));
out.WriteFmt(" if ((components & ({}u << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
out.WriteFmt(" lacc.w = int(round(((chan == 0u) ? {}.w : {}.w) * 255.0));\n",
in_color_0_var, in_color_1_var);
out.WriteFmt(" else if ((components & {}u) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0);
out.WriteFmt(" lacc.w = int(round({}.w * 255.0));\n", in_color_0_var);
out.WriteFmt(" else\n"
" lacc.w = 255;\n"
" }} else {{\n"
" lacc.w = " I_MATERIALS " [chan].w;\n"
" }}\n"
"\n");
out.WriteFmt(" uint light_mask = {} | ({} << 4u);\n",
BitfieldExtract("alphareg", LitChannel().lightMask0_3),
BitfieldExtract("alphareg", LitChannel().lightMask4_7));
out.WriteFmt(" uint attnfunc = {};\n", BitfieldExtract("alphareg", LitChannel().attnfunc));
out.WriteFmt(" uint diffusefunc = {};\n",
BitfieldExtract("alphareg", LitChannel().diffusefunc));
out.WriteFmt(
" for (uint light_index = 0u; light_index < 8u; light_index++) {{\n\n"
" if ((light_mask & (1u << light_index)) != 0u)\n\n"
" lacc.w += CalculateLighting(light_index, attnfunc, diffusefunc, {}, {}).w;\n",
world_pos_var, normal_var);
out.WriteFmt(" }}\n"
" }}\n"
"\n");
out.Write(" if ({} != 0u) {{\n", BitfieldExtract("alphareg", LitChannel().enablelighting));
out.Write(" if ({} != 0u) {{\n", BitfieldExtract("alphareg", LitChannel().ambsource));
out.Write(" if ((components & ({}u << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
out.Write(" lacc.w = int(round(((chan == 0u) ? {}.w : {}.w) * 255.0));\n", in_color_0_var,
in_color_1_var);
out.Write(" else if ((components & {}u) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0);
out.Write(" lacc.w = int(round({}.w * 255.0));\n", in_color_0_var);
out.Write(" else\n"
" lacc.w = 255;\n"
" }} else {{\n"
" lacc.w = " I_MATERIALS " [chan].w;\n"
" }}\n"
"\n");
out.Write(" uint light_mask = {} | ({} << 4u);\n",
BitfieldExtract("alphareg", LitChannel().lightMask0_3),
BitfieldExtract("alphareg", LitChannel().lightMask4_7));
out.Write(" uint attnfunc = {};\n", BitfieldExtract("alphareg", LitChannel().attnfunc));
out.Write(" uint diffusefunc = {};\n", BitfieldExtract("alphareg", LitChannel().diffusefunc));
out.Write(" for (uint light_index = 0u; light_index < 8u; light_index++) {{\n\n"
" if ((light_mask & (1u << light_index)) != 0u)\n\n"
" lacc.w += CalculateLighting(light_index, attnfunc, diffusefunc, {}, {}).w;\n",
world_pos_var, normal_var);
out.Write(" }}\n"
" }}\n"
"\n");
out.WriteFmt(" lacc = clamp(lacc, 0, 255);\n"
"\n"
" // Hopefully GPUs that can support dynamic indexing will optimize this.\n"
" float4 lit_color = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n"
" switch (chan) {{\n"
" case 0u: {} = lit_color; break;\n",
out_color_0_var);
out.WriteFmt(" case 1u: {} = lit_color; break;\n", out_color_1_var);
out.WriteFmt(" }}\n"
"}}\n"
"\n");
out.Write(" lacc = clamp(lacc, 0, 255);\n"
"\n"
" // Hopefully GPUs that can support dynamic indexing will optimize this.\n"
" float4 lit_color = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n"
" switch (chan) {{\n"
" case 0u: {} = lit_color; break;\n",
out_color_0_var);
out.Write(" case 1u: {} = lit_color; break;\n", out_color_1_var);
out.Write(" }}\n"
"}}\n"
"\n");
}
} // namespace UberShader

File diff suppressed because it is too large Load Diff

View File

@ -35,145 +35,145 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config
const u32 num_texgen = uid_data->num_texgens;
ShaderCode out;
out.WriteFmt("// Vertex UberShader\n\n");
out.WriteFmt("{}", s_lighting_struct);
out.Write("// Vertex UberShader\n\n");
out.Write("{}", s_lighting_struct);
// uniforms
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
out.WriteFmt("UBO_BINDING(std140, 2) uniform VSBlock {{\n");
out.Write("UBO_BINDING(std140, 2) uniform VSBlock {{\n");
else
out.WriteFmt("cbuffer VSBlock {{\n");
out.WriteFmt("{}", s_shader_uniforms);
out.WriteFmt("}};\n");
out.Write("cbuffer VSBlock {{\n");
out.Write("{}", s_shader_uniforms);
out.Write("}};\n");
out.WriteFmt("struct VS_OUTPUT {{\n");
out.Write("struct VS_OUTPUT {{\n");
GenerateVSOutputMembers(out, api_type, num_texgen, host_config, "");
out.WriteFmt("}};\n\n");
out.Write("}};\n\n");
WriteUberShaderCommonHeader(out, api_type, host_config);
WriteLightingFunction(out);
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
out.WriteFmt("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB);
out.WriteFmt("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB);
out.WriteFmt("ATTRIBUTE_LOCATION({}) in float3 rawnorm0;\n", SHADER_NORM0_ATTRIB);
out.WriteFmt("ATTRIBUTE_LOCATION({}) in float3 rawnorm1;\n", SHADER_NORM1_ATTRIB);
out.WriteFmt("ATTRIBUTE_LOCATION({}) in float3 rawnorm2;\n", SHADER_NORM2_ATTRIB);
out.WriteFmt("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB);
out.WriteFmt("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnorm0;\n", SHADER_NORM0_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnorm1;\n", SHADER_NORM1_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnorm2;\n", SHADER_NORM2_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB);
for (int i = 0; i < 8; ++i)
out.WriteFmt("ATTRIBUTE_LOCATION({}) in float3 rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i, i);
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i, i);
if (host_config.backend_geometry_shaders)
{
out.WriteFmt("VARYING_LOCATION(0) out VertexData {{\n");
out.Write("VARYING_LOCATION(0) out VertexData {{\n");
GenerateVSOutputMembers(out, api_type, num_texgen, host_config,
GetInterpolationQualifier(msaa, ssaa, true, false));
out.WriteFmt("}} vs;\n");
out.Write("}} vs;\n");
}
else
{
// Let's set up attributes
u32 counter = 0;
out.WriteFmt("VARYING_LOCATION({}) {} out float4 colors_0;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.WriteFmt("VARYING_LOCATION({}) {} out float4 colors_1;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.Write("VARYING_LOCATION({}) {} out float4 colors_0;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.Write("VARYING_LOCATION({}) {} out float4 colors_1;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
for (u32 i = 0; i < num_texgen; ++i)
{
out.WriteFmt("VARYING_LOCATION({}) {} out float3 tex{};\n", counter++,
GetInterpolationQualifier(msaa, ssaa), i);
out.Write("VARYING_LOCATION({}) {} out float3 tex{};\n", counter++,
GetInterpolationQualifier(msaa, ssaa), i);
}
if (!host_config.fast_depth_calc)
{
out.WriteFmt("VARYING_LOCATION({}) {} out float4 clipPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.Write("VARYING_LOCATION({}) {} out float4 clipPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
}
if (per_pixel_lighting)
{
out.WriteFmt("VARYING_LOCATION({}) {} out float3 Normal;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.WriteFmt("VARYING_LOCATION({}) {} out float3 WorldPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.Write("VARYING_LOCATION({}) {} out float3 Normal;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.Write("VARYING_LOCATION({}) {} out float3 WorldPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
}
}
out.WriteFmt("void main()\n{{\n");
out.Write("void main()\n{{\n");
}
else // D3D
{
out.WriteFmt("VS_OUTPUT main(\n");
out.Write("VS_OUTPUT main(\n");
// inputs
out.WriteFmt(" float3 rawnorm0 : NORMAL0,\n"
" float3 rawnorm1 : NORMAL1,\n"
" float3 rawnorm2 : NORMAL2,\n"
" float4 rawcolor0 : COLOR0,\n"
" float4 rawcolor1 : COLOR1,\n");
out.Write(" float3 rawnorm0 : NORMAL0,\n"
" float3 rawnorm1 : NORMAL1,\n"
" float3 rawnorm2 : NORMAL2,\n"
" float4 rawcolor0 : COLOR0,\n"
" float4 rawcolor1 : COLOR1,\n");
for (int i = 0; i < 8; ++i)
out.WriteFmt(" float3 rawtex{} : TEXCOORD{},\n", i, i);
out.WriteFmt(" uint posmtx : BLENDINDICES,\n");
out.WriteFmt(" float4 rawpos : POSITION) {{\n");
out.Write(" float3 rawtex{} : TEXCOORD{},\n", i, i);
out.Write(" uint posmtx : BLENDINDICES,\n");
out.Write(" float4 rawpos : POSITION) {{\n");
}
out.WriteFmt("VS_OUTPUT o;\n"
"\n");
out.Write("VS_OUTPUT o;\n"
"\n");
// Transforms
out.WriteFmt("// Position matrix\n"
"float4 P0;\n"
"float4 P1;\n"
"float4 P2;\n"
"\n"
"// Normal matrix\n"
"float3 N0;\n"
"float3 N1;\n"
"float3 N2;\n"
"\n"
"if ((components & {}u) != 0u) {{// VB_HAS_POSMTXIDX\n",
VB_HAS_POSMTXIDX);
out.WriteFmt(" // Vertex format has a per-vertex matrix\n"
" int posidx = int(posmtx.r);\n"
" P0 = " I_TRANSFORMMATRICES "[posidx];\n"
" P1 = " I_TRANSFORMMATRICES "[posidx+1];\n"
" P2 = " I_TRANSFORMMATRICES "[posidx+2];\n"
"\n"
" int normidx = posidx >= 32 ? (posidx - 32) : posidx;\n"
" N0 = " I_NORMALMATRICES "[normidx].xyz;\n"
" N1 = " I_NORMALMATRICES "[normidx+1].xyz;\n"
" N2 = " I_NORMALMATRICES "[normidx+2].xyz;\n"
"}} else {{\n"
" // One shared matrix\n"
" P0 = " I_POSNORMALMATRIX "[0];\n"
" P1 = " I_POSNORMALMATRIX "[1];\n"
" P2 = " I_POSNORMALMATRIX "[2];\n"
" N0 = " I_POSNORMALMATRIX "[3].xyz;\n"
" N1 = " I_POSNORMALMATRIX "[4].xyz;\n"
" N2 = " I_POSNORMALMATRIX "[5].xyz;\n"
"}}\n"
"\n"
"float4 pos = float4(dot(P0, rawpos), dot(P1, rawpos), dot(P2, rawpos), 1.0);\n"
"o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION
"[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n"
"\n"
"// Only the first normal gets normalized (TODO: why?)\n"
"float3 _norm0 = float3(0.0, 0.0, 0.0);\n"
"if ((components & {}u) != 0u) // VB_HAS_NRM0\n",
VB_HAS_NRM0);
out.WriteFmt(
out.Write("// Position matrix\n"
"float4 P0;\n"
"float4 P1;\n"
"float4 P2;\n"
"\n"
"// Normal matrix\n"
"float3 N0;\n"
"float3 N1;\n"
"float3 N2;\n"
"\n"
"if ((components & {}u) != 0u) {{// VB_HAS_POSMTXIDX\n",
VB_HAS_POSMTXIDX);
out.Write(" // Vertex format has a per-vertex matrix\n"
" int posidx = int(posmtx.r);\n"
" P0 = " I_TRANSFORMMATRICES "[posidx];\n"
" P1 = " I_TRANSFORMMATRICES "[posidx+1];\n"
" P2 = " I_TRANSFORMMATRICES "[posidx+2];\n"
"\n"
" int normidx = posidx >= 32 ? (posidx - 32) : posidx;\n"
" N0 = " I_NORMALMATRICES "[normidx].xyz;\n"
" N1 = " I_NORMALMATRICES "[normidx+1].xyz;\n"
" N2 = " I_NORMALMATRICES "[normidx+2].xyz;\n"
"}} else {{\n"
" // One shared matrix\n"
" P0 = " I_POSNORMALMATRIX "[0];\n"
" P1 = " I_POSNORMALMATRIX "[1];\n"
" P2 = " I_POSNORMALMATRIX "[2];\n"
" N0 = " I_POSNORMALMATRIX "[3].xyz;\n"
" N1 = " I_POSNORMALMATRIX "[4].xyz;\n"
" N2 = " I_POSNORMALMATRIX "[5].xyz;\n"
"}}\n"
"\n"
"float4 pos = float4(dot(P0, rawpos), dot(P1, rawpos), dot(P2, rawpos), 1.0);\n"
"o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION
"[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n"
"\n"
"// Only the first normal gets normalized (TODO: why?)\n"
"float3 _norm0 = float3(0.0, 0.0, 0.0);\n"
"if ((components & {}u) != 0u) // VB_HAS_NRM0\n",
VB_HAS_NRM0);
out.Write(
" _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n"
"\n"
"float3 _norm1 = float3(0.0, 0.0, 0.0);\n"
"if ((components & {}u) != 0u) // VB_HAS_NRM1\n",
VB_HAS_NRM1);
out.WriteFmt(" _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n"
"\n"
"float3 _norm2 = float3(0.0, 0.0, 0.0);\n"
"if ((components & {}u) != 0u) // VB_HAS_NRM2\n",
VB_HAS_NRM2);
out.WriteFmt(" _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n"
"\n");
out.Write(" _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n"
"\n"
"float3 _norm2 = float3(0.0, 0.0, 0.0);\n"
"if ((components & {}u) != 0u) // VB_HAS_NRM2\n",
VB_HAS_NRM2);
out.Write(" _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n"
"\n");
// Hardware Lighting
WriteVertexLighting(out, api_type, "pos.xyz", "_norm0", "rawcolor0", "rawcolor1", "o.colors_0",
@ -183,37 +183,37 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config
if (num_texgen > 0)
GenVertexShaderTexGens(api_type, num_texgen, out);
out.WriteFmt("if (xfmem_numColorChans == 0u) {{\n"
" if ((components & {}u) != 0u)\n"
" o.colors_0 = rawcolor0;\n"
" else\n"
" o.colors_1 = float4(1.0, 1.0, 1.0, 1.0);\n"
"}}\n",
VB_HAS_COL0);
out.WriteFmt("if (xfmem_numColorChans < 2u) {{\n"
" if ((components & {}u) != 0u)\n"
" o.colors_0 = rawcolor1;\n"
" else\n"
" o.colors_1 = float4(1.0, 1.0, 1.0, 1.0);\n"
"}}\n",
VB_HAS_COL1);
out.Write("if (xfmem_numColorChans == 0u) {{\n"
" if ((components & {}u) != 0u)\n"
" o.colors_0 = rawcolor0;\n"
" else\n"
" o.colors_1 = float4(1.0, 1.0, 1.0, 1.0);\n"
"}}\n",
VB_HAS_COL0);
out.Write("if (xfmem_numColorChans < 2u) {{\n"
" if ((components & {}u) != 0u)\n"
" o.colors_0 = rawcolor1;\n"
" else\n"
" o.colors_1 = float4(1.0, 1.0, 1.0, 1.0);\n"
"}}\n",
VB_HAS_COL1);
if (!host_config.fast_depth_calc)
{
// clipPos/w needs to be done in pixel shader, not here
out.WriteFmt("o.clipPos = o.pos;\n");
out.Write("o.clipPos = o.pos;\n");
}
if (per_pixel_lighting)
{
out.WriteFmt("o.Normal = _norm0;\n"
"o.WorldPos = pos.xyz;\n");
out.WriteFmt("if ((components & {}u) != 0u) // VB_HAS_COL0\n"
" o.colors_0 = rawcolor0;\n",
VB_HAS_COL0);
out.WriteFmt("if ((components & {}u) != 0u) // VB_HAS_COL1\n"
" o.colors_1 = rawcolor1;\n",
VB_HAS_COL1);
out.Write("o.Normal = _norm0;\n"
"o.WorldPos = pos.xyz;\n");
out.Write("if ((components & {}u) != 0u) // VB_HAS_COL0\n"
" o.colors_0 = rawcolor0;\n",
VB_HAS_COL0);
out.Write("if ((components & {}u) != 0u) // VB_HAS_COL1\n"
" o.colors_1 = rawcolor1;\n",
VB_HAS_COL1);
}
// If we can disable the incorrect depth clipping planes using depth clamping, then we can do
@ -225,13 +225,13 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config
// own clipping. We want to clip so that -w <= z <= 0, which matches the console -1..0 range.
// We adjust our depth value for clipping purposes to match the perspective projection in the
// software backend, which is a hack to fix Sonic Adventure and Unleashed games.
out.WriteFmt("float clipDepth = o.pos.z * (1.0 - 1e-7);\n"
"float clipDist0 = clipDepth + o.pos.w;\n" // Near: z < -w
"float clipDist1 = -clipDepth;\n"); // Far: z > 0
out.Write("float clipDepth = o.pos.z * (1.0 - 1e-7);\n"
"float clipDist0 = clipDepth + o.pos.w;\n" // Near: z < -w
"float clipDist1 = -clipDepth;\n"); // Far: z > 0
if (host_config.backend_geometry_shaders)
{
out.WriteFmt("o.clipDist0 = clipDist0;\n"
"o.clipDist1 = clipDist1;\n");
out.Write("o.clipDist0 = clipDist0;\n"
"o.clipDist1 = clipDist1;\n");
}
}
@ -246,20 +246,20 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config
// divide, because some games will use a depth range larger than what is allowed by the
// graphics API. These large depth ranges will still be clipped to the 0..1 range, so these
// games effectively add a depth bias to the values written to the depth buffer.
out.WriteFmt("o.pos.z = o.pos.w * " I_PIXELCENTERCORRECTION ".w - "
"o.pos.z * " I_PIXELCENTERCORRECTION ".z;\n");
out.Write("o.pos.z = o.pos.w * " I_PIXELCENTERCORRECTION ".w - "
"o.pos.z * " I_PIXELCENTERCORRECTION ".z;\n");
if (!host_config.backend_clip_control)
{
// If the graphics API doesn't support a depth range of 0..1, then we need to map z to
// the -1..1 range. Unfortunately we have to use a substraction, which is a lossy floating-point
// operation that can introduce a round-trip error.
out.WriteFmt("o.pos.z = o.pos.z * 2.0 - o.pos.w;\n");
out.Write("o.pos.z = o.pos.z * 2.0 - o.pos.w;\n");
}
// Correct for negative viewports by mirroring all vertices. We need to negate the height here,
// since the viewport height is already negated by the render backend.
out.WriteFmt("o.pos.xy *= sign(" I_PIXELCENTERCORRECTION ".xy * float2(1.0, -1.0));\n");
out.Write("o.pos.xy *= sign(" I_PIXELCENTERCORRECTION ".xy * float2(1.0, -1.0));\n");
// The console GPU places the pixel center at 7/12 in screen space unless
// antialiasing is enabled, while D3D and OpenGL place it at 0.5. This results
@ -267,7 +267,7 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config
// which in turn can be critical if it happens for clear quads.
// Hence, we compensate for this pixel center difference so that primitives
// get rasterized correctly.
out.WriteFmt("o.pos.xy = o.pos.xy - o.pos.w * " I_PIXELCENTERCORRECTION ".xy;\n");
out.Write("o.pos.xy = o.pos.xy - o.pos.w * " I_PIXELCENTERCORRECTION ".xy;\n");
if (vertex_rounding)
{
@ -275,18 +275,18 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config
// cause an additional pixel offset. Due to a higher pixel density we need to correct this
// by converting our clip-space position into the Wii's screen-space.
// Acquire the right pixel and then convert it back.
out.WriteFmt("if (o.pos.w == 1.0f)\n"
"{{\n");
out.Write("if (o.pos.w == 1.0f)\n"
"{{\n");
out.WriteFmt("\tfloat ss_pixel_x = ((o.pos.x + 1.0f) * (" I_VIEWPORT_SIZE ".x * 0.5f));\n"
"\tfloat ss_pixel_y = ((o.pos.y + 1.0f) * (" I_VIEWPORT_SIZE ".y * 0.5f));\n");
out.Write("\tfloat ss_pixel_x = ((o.pos.x + 1.0f) * (" I_VIEWPORT_SIZE ".x * 0.5f));\n"
"\tfloat ss_pixel_y = ((o.pos.y + 1.0f) * (" I_VIEWPORT_SIZE ".y * 0.5f));\n");
out.WriteFmt("\tss_pixel_x = round(ss_pixel_x);\n"
"\tss_pixel_y = round(ss_pixel_y);\n");
out.Write("\tss_pixel_x = round(ss_pixel_x);\n"
"\tss_pixel_y = round(ss_pixel_y);\n");
out.WriteFmt("\to.pos.x = ((ss_pixel_x / (" I_VIEWPORT_SIZE ".x * 0.5f)) - 1.0f);\n"
"\to.pos.y = ((ss_pixel_y / (" I_VIEWPORT_SIZE ".y * 0.5f)) - 1.0f);\n"
"}}\n");
out.Write("\to.pos.x = ((ss_pixel_x / (" I_VIEWPORT_SIZE ".x * 0.5f)) - 1.0f);\n"
"\to.pos.y = ((ss_pixel_y / (" I_VIEWPORT_SIZE ".y * 0.5f)) - 1.0f);\n"
"}}\n");
}
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
@ -300,35 +300,35 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config
// TODO: Pass interface blocks between shader stages even if geometry shaders
// are not supported, however that will require at least OpenGL 3.2 support.
for (u32 i = 0; i < num_texgen; ++i)
out.WriteFmt("tex{}.xyz = o.tex{};\n", i, i);
out.Write("tex{}.xyz = o.tex{};\n", i, i);
if (!host_config.fast_depth_calc)
out.WriteFmt("clipPos = o.clipPos;\n");
out.Write("clipPos = o.clipPos;\n");
if (per_pixel_lighting)
{
out.WriteFmt("Normal = o.Normal;\n"
"WorldPos = o.WorldPos;\n");
out.Write("Normal = o.Normal;\n"
"WorldPos = o.WorldPos;\n");
}
out.WriteFmt("colors_0 = o.colors_0;\n"
"colors_1 = o.colors_1;\n");
out.Write("colors_0 = o.colors_0;\n"
"colors_1 = o.colors_1;\n");
}
if (host_config.backend_depth_clamp)
{
out.WriteFmt("gl_ClipDistance[0] = clipDist0;\n"
"gl_ClipDistance[1] = clipDist1;\n");
out.Write("gl_ClipDistance[0] = clipDist0;\n"
"gl_ClipDistance[1] = clipDist1;\n");
}
// Vulkan NDC space has Y pointing down (right-handed NDC space).
if (api_type == APIType::Vulkan)
out.WriteFmt("gl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n");
out.Write("gl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n");
else
out.WriteFmt("gl_Position = o.pos;\n");
out.Write("gl_Position = o.pos;\n");
}
else // D3D
{
out.WriteFmt("return o;\n");
out.Write("return o;\n");
}
out.WriteFmt("}}\n");
out.Write("}}\n");
return out;
}
@ -338,160 +338,158 @@ static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode&
// The HLSL compiler complains that the output texture coordinates are uninitialized when trying
// to dynamically index them.
for (u32 i = 0; i < num_texgen; i++)
out.WriteFmt("o.tex{} = float3(0.0, 0.0, 0.0);\n", i);
out.Write("o.tex{} = float3(0.0, 0.0, 0.0);\n", i);
out.WriteFmt("// Texture coordinate generation\n");
out.Write("// Texture coordinate generation\n");
if (num_texgen == 1)
{
out.WriteFmt("{{ const uint texgen = 0u;\n");
out.Write("{{ const uint texgen = 0u;\n");
}
else
{
out.WriteFmt("{}for (uint texgen = 0u; texgen < {}u; texgen++) {{\n",
api_type == APIType::D3D ? "[loop] " : "", num_texgen);
out.Write("{}for (uint texgen = 0u; texgen < {}u; texgen++) {{\n",
api_type == APIType::D3D ? "[loop] " : "", num_texgen);
}
out.WriteFmt(" // Texcoord transforms\n");
out.WriteFmt(" float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n"
" uint texMtxInfo = xfmem_texMtxInfo(texgen);\n");
out.WriteFmt(" switch ({}) {{\n", BitfieldExtract("texMtxInfo", TexMtxInfo().sourcerow));
out.WriteFmt(" case {}u: // XF_SRCGEOM_INROW\n", XF_SRCGEOM_INROW);
out.WriteFmt(" coord.xyz = rawpos.xyz;\n");
out.WriteFmt(" break;\n\n");
out.WriteFmt(" case {}u: // XF_SRCNORMAL_INROW\n", XF_SRCNORMAL_INROW);
out.WriteFmt(
out.Write(" // Texcoord transforms\n");
out.Write(" float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n"
" uint texMtxInfo = xfmem_texMtxInfo(texgen);\n");
out.Write(" switch ({}) {{\n", BitfieldExtract("texMtxInfo", TexMtxInfo().sourcerow));
out.Write(" case {}u: // XF_SRCGEOM_INROW\n", XF_SRCGEOM_INROW);
out.Write(" coord.xyz = rawpos.xyz;\n");
out.Write(" break;\n\n");
out.Write(" case {}u: // XF_SRCNORMAL_INROW\n", XF_SRCNORMAL_INROW);
out.Write(
" coord.xyz = ((components & {}u /* VB_HAS_NRM0 */) != 0u) ? rawnorm0.xyz : coord.xyz;",
VB_HAS_NRM0);
out.WriteFmt(" break;\n\n");
out.WriteFmt(" case {}u: // XF_SRCBINORMAL_T_INROW\n", XF_SRCBINORMAL_T_INROW);
out.WriteFmt(
out.Write(" break;\n\n");
out.Write(" case {}u: // XF_SRCBINORMAL_T_INROW\n", XF_SRCBINORMAL_T_INROW);
out.Write(
" coord.xyz = ((components & {}u /* VB_HAS_NRM1 */) != 0u) ? rawnorm1.xyz : coord.xyz;",
VB_HAS_NRM1);
out.WriteFmt(" break;\n\n");
out.WriteFmt(" case {}u: // XF_SRCBINORMAL_B_INROW\n", XF_SRCBINORMAL_B_INROW);
out.WriteFmt(
out.Write(" break;\n\n");
out.Write(" case {}u: // XF_SRCBINORMAL_B_INROW\n", XF_SRCBINORMAL_B_INROW);
out.Write(
" coord.xyz = ((components & {}u /* VB_HAS_NRM2 */) != 0u) ? rawnorm2.xyz : coord.xyz;",
VB_HAS_NRM2);
out.WriteFmt(" break;\n\n");
out.Write(" break;\n\n");
for (u32 i = 0; i < 8; i++)
{
out.WriteFmt(" case {}u: // XF_SRCTEX{}_INROW\n", XF_SRCTEX0_INROW + i, i);
out.WriteFmt(
out.Write(" case {}u: // XF_SRCTEX{}_INROW\n", XF_SRCTEX0_INROW + i, i);
out.Write(
" coord = ((components & {}u /* VB_HAS_UV{} */) != 0u) ? float4(rawtex{}.x, rawtex{}.y, "
"1.0, 1.0) : coord;\n",
VB_HAS_UV0 << i, i, i, i);
out.WriteFmt(" break;\n\n");
out.Write(" break;\n\n");
}
out.WriteFmt(" }}\n"
"\n");
out.Write(" }}\n"
"\n");
out.WriteFmt(" // Input form of AB11 sets z element to 1.0\n");
out.WriteFmt(" if ({} == {}u) // inputform == XF_TEXINPUT_AB11\n",
BitfieldExtract("texMtxInfo", TexMtxInfo().inputform), XF_TEXINPUT_AB11);
out.WriteFmt(" coord.z = 1.0f;\n"
"\n");
out.Write(" // Input form of AB11 sets z element to 1.0\n");
out.Write(" if ({} == {}u) // inputform == XF_TEXINPUT_AB11\n",
BitfieldExtract("texMtxInfo", TexMtxInfo().inputform), XF_TEXINPUT_AB11);
out.Write(" coord.z = 1.0f;\n"
"\n");
out.WriteFmt(" // first transformation\n");
out.WriteFmt(" uint texgentype = {};\n", BitfieldExtract("texMtxInfo", TexMtxInfo().texgentype));
out.WriteFmt(" float3 output_tex;\n"
" switch (texgentype)\n"
" {{\n");
out.WriteFmt(" case {}u: // XF_TEXGEN_EMBOSS_MAP\n", XF_TEXGEN_EMBOSS_MAP);
out.WriteFmt(" {{\n");
out.WriteFmt(" uint light = {};\n",
BitfieldExtract("texMtxInfo", TexMtxInfo().embosslightshift));
out.WriteFmt(" uint source = {};\n",
BitfieldExtract("texMtxInfo", TexMtxInfo().embosssourceshift));
out.WriteFmt(" switch (source) {{\n");
out.Write(" // first transformation\n");
out.Write(" uint texgentype = {};\n", BitfieldExtract("texMtxInfo", TexMtxInfo().texgentype));
out.Write(" float3 output_tex;\n"
" switch (texgentype)\n"
" {{\n");
out.Write(" case {}u: // XF_TEXGEN_EMBOSS_MAP\n", XF_TEXGEN_EMBOSS_MAP);
out.Write(" {{\n");
out.Write(" uint light = {};\n",
BitfieldExtract("texMtxInfo", TexMtxInfo().embosslightshift));
out.Write(" uint source = {};\n",
BitfieldExtract("texMtxInfo", TexMtxInfo().embosssourceshift));
out.Write(" switch (source) {{\n");
for (u32 i = 0; i < num_texgen; i++)
out.WriteFmt(" case {}u: output_tex.xyz = o.tex{}; break;\n", i, i);
out.WriteFmt(" default: output_tex.xyz = float3(0.0, 0.0, 0.0); break;\n"
" }}\n");
out.WriteFmt(" if ((components & {}u) != 0u) {{ // VB_HAS_NRM1 | VB_HAS_NRM2\n",
VB_HAS_NRM1 | VB_HAS_NRM2); // Should this be VB_HAS_NRM1 | VB_HAS_NRM2
out.WriteFmt(" float3 ldir = normalize(" I_LIGHTS "[light].pos.xyz - pos.xyz);\n"
" output_tex.xyz += float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0);\n"
" }}\n"
" }}\n"
" break;\n\n");
out.WriteFmt(" case {}u: // XF_TEXGEN_COLOR_STRGBC0\n", XF_TEXGEN_COLOR_STRGBC0);
out.WriteFmt(" output_tex.xyz = float3(o.colors_0.x, o.colors_0.y, 1.0);\n"
" break;\n\n");
out.WriteFmt(" case {}u: // XF_TEXGEN_COLOR_STRGBC1\n", XF_TEXGEN_COLOR_STRGBC1);
out.WriteFmt(" output_tex.xyz = float3(o.colors_1.x, o.colors_1.y, 1.0);\n"
" break;\n\n");
out.WriteFmt(" default: // Also XF_TEXGEN_REGULAR\n"
" {{\n");
out.WriteFmt(" if ((components & ({}u /* VB_HAS_TEXMTXIDX0 */ << texgen)) != 0u) {{\n",
VB_HAS_TEXMTXIDX0);
out.WriteFmt(
" // This is messy, due to dynamic indexing of the input texture coordinates.\n"
" // Hopefully the compiler will unroll this whole loop anyway and the switch.\n"
" int tmp = 0;\n"
" switch (texgen) {{\n");
out.Write(" case {}u: output_tex.xyz = o.tex{}; break;\n", i, i);
out.Write(" default: output_tex.xyz = float3(0.0, 0.0, 0.0); break;\n"
" }}\n");
out.Write(" if ((components & {}u) != 0u) {{ // VB_HAS_NRM1 | VB_HAS_NRM2\n",
VB_HAS_NRM1 | VB_HAS_NRM2); // Should this be VB_HAS_NRM1 | VB_HAS_NRM2
out.Write(" float3 ldir = normalize(" I_LIGHTS "[light].pos.xyz - pos.xyz);\n"
" output_tex.xyz += float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0);\n"
" }}\n"
" }}\n"
" break;\n\n");
out.Write(" case {}u: // XF_TEXGEN_COLOR_STRGBC0\n", XF_TEXGEN_COLOR_STRGBC0);
out.Write(" output_tex.xyz = float3(o.colors_0.x, o.colors_0.y, 1.0);\n"
" break;\n\n");
out.Write(" case {}u: // XF_TEXGEN_COLOR_STRGBC1\n", XF_TEXGEN_COLOR_STRGBC1);
out.Write(" output_tex.xyz = float3(o.colors_1.x, o.colors_1.y, 1.0);\n"
" break;\n\n");
out.Write(" default: // Also XF_TEXGEN_REGULAR\n"
" {{\n");
out.Write(" if ((components & ({}u /* VB_HAS_TEXMTXIDX0 */ << texgen)) != 0u) {{\n",
VB_HAS_TEXMTXIDX0);
out.Write(" // This is messy, due to dynamic indexing of the input texture coordinates.\n"
" // Hopefully the compiler will unroll this whole loop anyway and the switch.\n"
" int tmp = 0;\n"
" switch (texgen) {{\n");
for (u32 i = 0; i < num_texgen; i++)
out.WriteFmt(" case {}u: tmp = int(rawtex{}.z); break;\n", i, i);
out.WriteFmt(" }}\n"
"\n");
out.WriteFmt(" if ({} == {}u) {{\n",
BitfieldExtract("texMtxInfo", TexMtxInfo().projection), XF_TEXPROJ_STQ);
out.WriteFmt(" output_tex.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]),\n"
" dot(coord, " I_TRANSFORMMATRICES "[tmp + 1]),\n"
" dot(coord, " I_TRANSFORMMATRICES "[tmp + 2]));\n"
" }} else {{\n"
" output_tex.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]),\n"
" dot(coord, " I_TRANSFORMMATRICES "[tmp + 1]),\n"
" 1.0);\n"
" }}\n"
" }} else {{\n");
out.WriteFmt(" if ({} == {}u) {{\n",
BitfieldExtract("texMtxInfo", TexMtxInfo().projection), XF_TEXPROJ_STQ);
out.WriteFmt(
" output_tex.xyz = float3(dot(coord, " I_TEXMATRICES "[3u * texgen]),\n"
" dot(coord, " I_TEXMATRICES "[3u * texgen + 1u]),\n"
" dot(coord, " I_TEXMATRICES "[3u * texgen + 2u]));\n"
" }} else {{\n"
" output_tex.xyz = float3(dot(coord, " I_TEXMATRICES "[3u * texgen]),\n"
" dot(coord, " I_TEXMATRICES "[3u * texgen + 1u]),\n"
" 1.0);\n"
" }}\n"
" }}\n"
" }}\n"
" break;\n\n"
" }}\n"
"\n");
out.Write(" case {}u: tmp = int(rawtex{}.z); break;\n", i, i);
out.Write(" }}\n"
"\n");
out.Write(" if ({} == {}u) {{\n", BitfieldExtract("texMtxInfo", TexMtxInfo().projection),
XF_TEXPROJ_STQ);
out.Write(" output_tex.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]),\n"
" dot(coord, " I_TRANSFORMMATRICES "[tmp + 1]),\n"
" dot(coord, " I_TRANSFORMMATRICES "[tmp + 2]));\n"
" }} else {{\n"
" output_tex.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]),\n"
" dot(coord, " I_TRANSFORMMATRICES "[tmp + 1]),\n"
" 1.0);\n"
" }}\n"
" }} else {{\n");
out.Write(" if ({} == {}u) {{\n", BitfieldExtract("texMtxInfo", TexMtxInfo().projection),
XF_TEXPROJ_STQ);
out.Write(" output_tex.xyz = float3(dot(coord, " I_TEXMATRICES "[3u * texgen]),\n"
" dot(coord, " I_TEXMATRICES "[3u * texgen + 1u]),\n"
" dot(coord, " I_TEXMATRICES "[3u * texgen + 2u]));\n"
" }} else {{\n"
" output_tex.xyz = float3(dot(coord, " I_TEXMATRICES "[3u * texgen]),\n"
" dot(coord, " I_TEXMATRICES "[3u * texgen + 1u]),\n"
" 1.0);\n"
" }}\n"
" }}\n"
" }}\n"
" break;\n\n"
" }}\n"
"\n");
out.WriteFmt(" if (xfmem_dualTexInfo != 0u) {{\n");
out.WriteFmt(" uint postMtxInfo = xfmem_postMtxInfo(texgen);");
out.WriteFmt(" uint base_index = {};\n", BitfieldExtract("postMtxInfo", PostMtxInfo().index));
out.WriteFmt(" float4 P0 = " I_POSTTRANSFORMMATRICES "[base_index & 0x3fu];\n"
" float4 P1 = " I_POSTTRANSFORMMATRICES "[(base_index + 1u) & 0x3fu];\n"
" float4 P2 = " I_POSTTRANSFORMMATRICES "[(base_index + 2u) & 0x3fu];\n"
"\n");
out.WriteFmt(" if ({} != 0u)\n", BitfieldExtract("postMtxInfo", PostMtxInfo().normalize));
out.WriteFmt(" output_tex.xyz = normalize(output_tex.xyz);\n"
"\n"
" // multiply by postmatrix\n"
" output_tex.xyz = float3(dot(P0.xyz, output_tex.xyz) + P0.w,\n"
" dot(P1.xyz, output_tex.xyz) + P1.w,\n"
" dot(P2.xyz, output_tex.xyz) + P2.w);\n"
" }}\n\n");
out.Write(" if (xfmem_dualTexInfo != 0u) {{\n");
out.Write(" uint postMtxInfo = xfmem_postMtxInfo(texgen);");
out.Write(" uint base_index = {};\n", BitfieldExtract("postMtxInfo", PostMtxInfo().index));
out.Write(" float4 P0 = " I_POSTTRANSFORMMATRICES "[base_index & 0x3fu];\n"
" float4 P1 = " I_POSTTRANSFORMMATRICES "[(base_index + 1u) & 0x3fu];\n"
" float4 P2 = " I_POSTTRANSFORMMATRICES "[(base_index + 2u) & 0x3fu];\n"
"\n");
out.Write(" if ({} != 0u)\n", BitfieldExtract("postMtxInfo", PostMtxInfo().normalize));
out.Write(" output_tex.xyz = normalize(output_tex.xyz);\n"
"\n"
" // multiply by postmatrix\n"
" output_tex.xyz = float3(dot(P0.xyz, output_tex.xyz) + P0.w,\n"
" dot(P1.xyz, output_tex.xyz) + P1.w,\n"
" dot(P2.xyz, output_tex.xyz) + P2.w);\n"
" }}\n\n");
// When q is 0, the GameCube appears to have a special case
// This can be seen in devkitPro's neheGX Lesson08 example for Wii
// Makes differences in Rogue Squadron 3 (Hoth sky) and The Last Story (shadow culling)
out.WriteFmt(" if (texgentype == {}u && output_tex.z == 0.0) // XF_TEXGEN_REGULAR\n",
XF_TEXGEN_REGULAR);
out.WriteFmt(
out.Write(" if (texgentype == {}u && output_tex.z == 0.0) // XF_TEXGEN_REGULAR\n",
XF_TEXGEN_REGULAR);
out.Write(
" output_tex.xy = clamp(output_tex.xy / 2.0f, float2(-1.0f,-1.0f), float2(1.0f,1.0f));\n"
"\n");
out.WriteFmt(" // Hopefully GPUs that can support dynamic indexing will optimize this.\n");
out.WriteFmt(" switch (texgen) {{\n");
out.Write(" // Hopefully GPUs that can support dynamic indexing will optimize this.\n");
out.Write(" switch (texgen) {{\n");
for (u32 i = 0; i < num_texgen; i++)
out.WriteFmt(" case {}u: o.tex{} = output_tex; break;\n", i, i);
out.WriteFmt(" }}\n"
"}}\n");
out.Write(" case {}u: o.tex{} = output_tex; break;\n", i, i);
out.Write(" }}\n"
"}}\n");
}
void EnumerateVertexShaderUids(const std::function<void(const VertexShaderUid&)>& callback)

View File

@ -83,37 +83,37 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
const bool ssaa = host_config.ssaa;
const bool vertex_rounding = host_config.vertex_rounding;
out.WriteFmt("{}", s_lighting_struct);
out.Write("{}", s_lighting_struct);
// uniforms
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
out.WriteFmt("UBO_BINDING(std140, 2) uniform VSBlock {{\n");
out.Write("UBO_BINDING(std140, 2) uniform VSBlock {{\n");
else
out.WriteFmt("cbuffer VSBlock {{\n");
out.Write("cbuffer VSBlock {{\n");
out.WriteFmt("{}", s_shader_uniforms);
out.WriteFmt("}};\n");
out.Write("{}", s_shader_uniforms);
out.Write("}};\n");
out.WriteFmt("struct VS_OUTPUT {{\n");
out.Write("struct VS_OUTPUT {{\n");
GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, host_config, "");
out.WriteFmt("}};\n");
out.Write("}};\n");
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
out.WriteFmt("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB);
if ((uid_data->components & VB_HAS_POSMTXIDX) != 0)
out.WriteFmt("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB);
if ((uid_data->components & VB_HAS_NRM0) != 0)
out.WriteFmt("ATTRIBUTE_LOCATION({}) in float3 rawnorm0;\n", SHADER_NORM0_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnorm0;\n", SHADER_NORM0_ATTRIB);
if ((uid_data->components & VB_HAS_NRM1) != 0)
out.WriteFmt("ATTRIBUTE_LOCATION({}) in float3 rawnorm1;\n", SHADER_NORM1_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnorm1;\n", SHADER_NORM1_ATTRIB);
if ((uid_data->components & VB_HAS_NRM2) != 0)
out.WriteFmt("ATTRIBUTE_LOCATION({}) in float3 rawnorm2;\n", SHADER_NORM2_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnorm2;\n", SHADER_NORM2_ATTRIB);
if ((uid_data->components & VB_HAS_COL0) != 0)
out.WriteFmt("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB);
if ((uid_data->components & VB_HAS_COL1) != 0)
out.WriteFmt("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB);
for (u32 i = 0; i < 8; ++i)
{
@ -121,161 +121,161 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
if ((uid_data->components & (VB_HAS_UV0 << i)) != 0 || has_texmtx != 0)
{
out.WriteFmt("ATTRIBUTE_LOCATION({}) in float{} rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i,
has_texmtx != 0 ? 3 : 2, i);
out.Write("ATTRIBUTE_LOCATION({}) in float{} rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i,
has_texmtx != 0 ? 3 : 2, i);
}
}
if (host_config.backend_geometry_shaders)
{
out.WriteFmt("VARYING_LOCATION(0) out VertexData {{\n");
out.Write("VARYING_LOCATION(0) out VertexData {{\n");
GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, host_config,
GetInterpolationQualifier(msaa, ssaa, true, false));
out.WriteFmt("}} vs;\n");
out.Write("}} vs;\n");
}
else
{
// Let's set up attributes
u32 counter = 0;
out.WriteFmt("VARYING_LOCATION({}) {} out float4 colors_0;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.WriteFmt("VARYING_LOCATION({}) {} out float4 colors_1;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.Write("VARYING_LOCATION({}) {} out float4 colors_0;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.Write("VARYING_LOCATION({}) {} out float4 colors_1;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
for (u32 i = 0; i < uid_data->numTexGens; ++i)
{
out.WriteFmt("VARYING_LOCATION({}) {} out float3 tex{};\n", counter++,
GetInterpolationQualifier(msaa, ssaa), i);
out.Write("VARYING_LOCATION({}) {} out float3 tex{};\n", counter++,
GetInterpolationQualifier(msaa, ssaa), i);
}
if (!host_config.fast_depth_calc)
{
out.WriteFmt("VARYING_LOCATION({}) {} out float4 clipPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.Write("VARYING_LOCATION({}) {} out float4 clipPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
}
if (per_pixel_lighting)
{
out.WriteFmt("VARYING_LOCATION({}) {} out float3 Normal;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.WriteFmt("VARYING_LOCATION({}) {} out float3 WorldPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.Write("VARYING_LOCATION({}) {} out float3 Normal;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.Write("VARYING_LOCATION({}) {} out float3 WorldPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
}
}
out.WriteFmt("void main()\n{{\n");
out.Write("void main()\n{{\n");
}
else // D3D
{
out.WriteFmt("VS_OUTPUT main(\n");
out.Write("VS_OUTPUT main(\n");
// inputs
if ((uid_data->components & VB_HAS_NRM0) != 0)
out.WriteFmt(" float3 rawnorm0 : NORMAL0,\n");
out.Write(" float3 rawnorm0 : NORMAL0,\n");
if ((uid_data->components & VB_HAS_NRM1) != 0)
out.WriteFmt(" float3 rawnorm1 : NORMAL1,\n");
out.Write(" float3 rawnorm1 : NORMAL1,\n");
if ((uid_data->components & VB_HAS_NRM2) != 0)
out.WriteFmt(" float3 rawnorm2 : NORMAL2,\n");
out.Write(" float3 rawnorm2 : NORMAL2,\n");
if ((uid_data->components & VB_HAS_COL0) != 0)
out.WriteFmt(" float4 rawcolor0 : COLOR0,\n");
out.Write(" float4 rawcolor0 : COLOR0,\n");
if ((uid_data->components & VB_HAS_COL1) != 0)
out.WriteFmt(" float4 rawcolor1 : COLOR1,\n");
out.Write(" float4 rawcolor1 : COLOR1,\n");
for (u32 i = 0; i < 8; ++i)
{
const u32 has_texmtx = (uid_data->components & (VB_HAS_TEXMTXIDX0 << i));
if ((uid_data->components & (VB_HAS_UV0 << i)) != 0 || has_texmtx != 0)
out.WriteFmt(" float{} rawtex{} : TEXCOORD{},\n", has_texmtx ? 3 : 2, i, i);
out.Write(" float{} rawtex{} : TEXCOORD{},\n", has_texmtx ? 3 : 2, i, i);
}
if ((uid_data->components & VB_HAS_POSMTXIDX) != 0)
out.WriteFmt(" uint4 posmtx : BLENDINDICES,\n");
out.WriteFmt(" float4 rawpos : POSITION) {{\n");
out.Write(" uint4 posmtx : BLENDINDICES,\n");
out.Write(" float4 rawpos : POSITION) {{\n");
}
out.WriteFmt("VS_OUTPUT o;\n");
out.Write("VS_OUTPUT o;\n");
// transforms
if ((uid_data->components & VB_HAS_POSMTXIDX) != 0)
{
out.WriteFmt("int posidx = int(posmtx.r);\n"
"float4 pos = float4(dot(" I_TRANSFORMMATRICES
"[posidx], rawpos), dot(" I_TRANSFORMMATRICES
"[posidx+1], rawpos), dot(" I_TRANSFORMMATRICES "[posidx+2], rawpos), 1);\n");
out.Write("int posidx = int(posmtx.r);\n"
"float4 pos = float4(dot(" I_TRANSFORMMATRICES
"[posidx], rawpos), dot(" I_TRANSFORMMATRICES
"[posidx+1], rawpos), dot(" I_TRANSFORMMATRICES "[posidx+2], rawpos), 1);\n");
if ((uid_data->components & VB_HAS_NRMALL) != 0)
{
out.WriteFmt("int normidx = posidx & 31;\n"
"float3 N0 = " I_NORMALMATRICES "[normidx].xyz, N1 = " I_NORMALMATRICES
"[normidx+1].xyz, N2 = " I_NORMALMATRICES "[normidx+2].xyz;\n");
out.Write("int normidx = posidx & 31;\n"
"float3 N0 = " I_NORMALMATRICES "[normidx].xyz, N1 = " I_NORMALMATRICES
"[normidx+1].xyz, N2 = " I_NORMALMATRICES "[normidx+2].xyz;\n");
}
if ((uid_data->components & VB_HAS_NRM0) != 0)
{
out.WriteFmt("float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, "
"rawnorm0)));\n");
out.Write("float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, "
"rawnorm0)));\n");
}
if ((uid_data->components & VB_HAS_NRM1) != 0)
{
out.WriteFmt(
out.Write(
"float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n");
}
if ((uid_data->components & VB_HAS_NRM2) != 0)
{
out.WriteFmt(
out.Write(
"float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n");
}
}
else
{
out.WriteFmt("float4 pos = float4(dot(" I_POSNORMALMATRIX "[0], rawpos), dot(" I_POSNORMALMATRIX
"[1], rawpos), dot(" I_POSNORMALMATRIX "[2], rawpos), 1.0);\n");
out.Write("float4 pos = float4(dot(" I_POSNORMALMATRIX "[0], rawpos), dot(" I_POSNORMALMATRIX
"[1], rawpos), dot(" I_POSNORMALMATRIX "[2], rawpos), 1.0);\n");
if ((uid_data->components & VB_HAS_NRM0) != 0)
{
out.WriteFmt("float3 _norm0 = normalize(float3(dot(" I_POSNORMALMATRIX
"[3].xyz, rawnorm0), dot(" I_POSNORMALMATRIX
"[4].xyz, rawnorm0), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm0)));\n");
out.Write("float3 _norm0 = normalize(float3(dot(" I_POSNORMALMATRIX
"[3].xyz, rawnorm0), dot(" I_POSNORMALMATRIX
"[4].xyz, rawnorm0), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm0)));\n");
}
if ((uid_data->components & VB_HAS_NRM1) != 0)
{
out.WriteFmt("float3 _norm1 = float3(dot(" I_POSNORMALMATRIX
"[3].xyz, rawnorm1), dot(" I_POSNORMALMATRIX
"[4].xyz, rawnorm1), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm1));\n");
out.Write("float3 _norm1 = float3(dot(" I_POSNORMALMATRIX
"[3].xyz, rawnorm1), dot(" I_POSNORMALMATRIX
"[4].xyz, rawnorm1), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm1));\n");
}
if ((uid_data->components & VB_HAS_NRM2) != 0)
{
out.WriteFmt("float3 _norm2 = float3(dot(" I_POSNORMALMATRIX
"[3].xyz, rawnorm2), dot(" I_POSNORMALMATRIX
"[4].xyz, rawnorm2), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm2));\n");
out.Write("float3 _norm2 = float3(dot(" I_POSNORMALMATRIX
"[3].xyz, rawnorm2), dot(" I_POSNORMALMATRIX
"[4].xyz, rawnorm2), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm2));\n");
}
}
if ((uid_data->components & VB_HAS_NRM0) == 0)
out.WriteFmt("float3 _norm0 = float3(0.0, 0.0, 0.0);\n");
out.Write("float3 _norm0 = float3(0.0, 0.0, 0.0);\n");
out.WriteFmt("o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION
"[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n");
out.Write("o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION
"[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n");
out.WriteFmt("int4 lacc;\n"
"float3 ldir, h, cosAttn, distAttn;\n"
"float dist, dist2, attn;\n");
out.Write("int4 lacc;\n"
"float3 ldir, h, cosAttn, distAttn;\n"
"float dist, dist2, attn;\n");
GenerateLightingShaderCode(out, uid_data->lighting, uid_data->components, "rawcolor",
"o.colors_");
// transform texcoords
out.WriteFmt("float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n");
out.Write("float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n");
for (u32 i = 0; i < uid_data->numTexGens; ++i)
{
auto& texinfo = uid_data->texMtxInfo[i];
out.WriteFmt("{{\n");
out.WriteFmt("coord = float4(0.0, 0.0, 1.0, 1.0);\n");
out.Write("{{\n");
out.Write("coord = float4(0.0, 0.0, 1.0, 1.0);\n");
switch (texinfo.sourcerow)
{
case XF_SRCGEOM_INROW:
out.WriteFmt("coord.xyz = rawpos.xyz;\n");
out.Write("coord.xyz = rawpos.xyz;\n");
break;
case XF_SRCNORMAL_INROW:
if ((uid_data->components & VB_HAS_NRM0) != 0)
{
out.WriteFmt("coord.xyz = rawnorm0.xyz;\n");
out.Write("coord.xyz = rawnorm0.xyz;\n");
}
break;
case XF_SRCCOLORS_INROW:
@ -285,28 +285,28 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
case XF_SRCBINORMAL_T_INROW:
if ((uid_data->components & VB_HAS_NRM1) != 0)
{
out.WriteFmt("coord.xyz = rawnorm1.xyz;\n");
out.Write("coord.xyz = rawnorm1.xyz;\n");
}
break;
case XF_SRCBINORMAL_B_INROW:
if ((uid_data->components & VB_HAS_NRM2) != 0)
{
out.WriteFmt("coord.xyz = rawnorm2.xyz;\n");
out.Write("coord.xyz = rawnorm2.xyz;\n");
}
break;
default:
ASSERT(texinfo.sourcerow <= XF_SRCTEX7_INROW);
if ((uid_data->components & (VB_HAS_UV0 << (texinfo.sourcerow - XF_SRCTEX0_INROW))) != 0)
{
out.WriteFmt("coord = float4(rawtex{}.x, rawtex{}.y, 1.0, 1.0);\n",
texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW);
out.Write("coord = float4(rawtex{}.x, rawtex{}.y, 1.0, 1.0);\n",
texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW);
}
break;
}
// Input form of AB11 sets z element to 1.0
if (texinfo.inputform == XF_TEXINPUT_AB11)
out.WriteFmt("coord.z = 1.0;\n");
out.Write("coord.z = 1.0;\n");
// first transformation
switch (texinfo.texgentype)
@ -316,9 +316,9 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
if ((uid_data->components & (VB_HAS_NRM1 | VB_HAS_NRM2)) != 0)
{
// transform the light dir into tangent space
out.WriteFmt("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n",
LIGHT_POS_PARAMS(texinfo.embosslightshift));
out.WriteFmt(
out.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n",
LIGHT_POS_PARAMS(texinfo.embosslightshift));
out.Write(
"o.tex{}.xyz = o.tex{}.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0);\n", i,
texinfo.embosssourceshift);
}
@ -327,49 +327,49 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
// The following assert was triggered in House of the Dead Overkill and Star Wars Rogue
// Squadron 2
// ASSERT(0); // should have normals
out.WriteFmt("o.tex{}.xyz = o.tex{}.xyz;\n", i, texinfo.embosssourceshift);
out.Write("o.tex{}.xyz = o.tex{}.xyz;\n", i, texinfo.embosssourceshift);
}
break;
case XF_TEXGEN_COLOR_STRGBC0:
out.WriteFmt("o.tex{}.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i);
out.Write("o.tex{}.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i);
break;
case XF_TEXGEN_COLOR_STRGBC1:
out.WriteFmt("o.tex{}.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i);
out.Write("o.tex{}.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i);
break;
case XF_TEXGEN_REGULAR:
default:
if ((uid_data->components & (VB_HAS_TEXMTXIDX0 << i)) != 0)
{
out.WriteFmt("int tmp = int(rawtex{}.z);\n", i);
out.Write("int tmp = int(rawtex{}.z);\n", i);
if (((uid_data->texMtxInfo_n_projection >> i) & 1) == XF_TEXPROJ_STQ)
{
out.WriteFmt("o.tex{}.xyz = float3(dot(coord, " I_TRANSFORMMATRICES
"[tmp]), dot(coord, " I_TRANSFORMMATRICES
"[tmp+1]), dot(coord, " I_TRANSFORMMATRICES "[tmp+2]));\n",
i);
out.Write("o.tex{}.xyz = float3(dot(coord, " I_TRANSFORMMATRICES
"[tmp]), dot(coord, " I_TRANSFORMMATRICES
"[tmp+1]), dot(coord, " I_TRANSFORMMATRICES "[tmp+2]));\n",
i);
}
else
{
out.WriteFmt("o.tex{}.xyz = float3(dot(coord, " I_TRANSFORMMATRICES
"[tmp]), dot(coord, " I_TRANSFORMMATRICES "[tmp+1]), 1);\n",
i);
out.Write("o.tex{}.xyz = float3(dot(coord, " I_TRANSFORMMATRICES
"[tmp]), dot(coord, " I_TRANSFORMMATRICES "[tmp+1]), 1);\n",
i);
}
}
else
{
if (((uid_data->texMtxInfo_n_projection >> i) & 1) == XF_TEXPROJ_STQ)
{
out.WriteFmt("o.tex{}.xyz = float3(dot(coord, " I_TEXMATRICES
"[{}]), dot(coord, " I_TEXMATRICES "[{}]), dot(coord, " I_TEXMATRICES
"[{}]));\n",
i, 3 * i, 3 * i + 1, 3 * i + 2);
out.Write("o.tex{}.xyz = float3(dot(coord, " I_TEXMATRICES
"[{}]), dot(coord, " I_TEXMATRICES "[{}]), dot(coord, " I_TEXMATRICES
"[{}]));\n",
i, 3 * i, 3 * i + 1, 3 * i + 2);
}
else
{
out.WriteFmt("o.tex{}.xyz = float3(dot(coord, " I_TEXMATRICES
"[{}]), dot(coord, " I_TEXMATRICES "[{}]), 1);\n",
i, 3 * i, 3 * i + 1);
out.Write("o.tex{}.xyz = float3(dot(coord, " I_TEXMATRICES
"[{}]), dot(coord, " I_TEXMATRICES "[{}]), 1);\n",
i, 3 * i, 3 * i + 1);
}
}
break;
@ -380,16 +380,16 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
{
auto& postInfo = uid_data->postMtxInfo[i];
out.WriteFmt("float4 P0 = " I_POSTTRANSFORMMATRICES "[{}];\n"
"float4 P1 = " I_POSTTRANSFORMMATRICES "[{}];\n"
"float4 P2 = " I_POSTTRANSFORMMATRICES "[{}];\n",
postInfo.index & 0x3f, (postInfo.index + 1) & 0x3f, (postInfo.index + 2) & 0x3f);
out.Write("float4 P0 = " I_POSTTRANSFORMMATRICES "[{}];\n"
"float4 P1 = " I_POSTTRANSFORMMATRICES "[{}];\n"
"float4 P2 = " I_POSTTRANSFORMMATRICES "[{}];\n",
postInfo.index & 0x3f, (postInfo.index + 1) & 0x3f, (postInfo.index + 2) & 0x3f);
if (postInfo.normalize)
out.WriteFmt("o.tex{}.xyz = normalize(o.tex{}.xyz);\n", i, i);
out.Write("o.tex{}.xyz = normalize(o.tex{}.xyz);\n", i, i);
// multiply by postmatrix
out.WriteFmt(
out.Write(
"o.tex{0}.xyz = float3(dot(P0.xyz, o.tex{0}.xyz) + P0.w, dot(P1.xyz, o.tex{0}.xyz) + "
"P1.w, dot(P2.xyz, o.tex{0}.xyz) + P2.w);\n",
i);
@ -401,44 +401,44 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
// TODO: check if this only affects XF_TEXGEN_REGULAR
if (texinfo.texgentype == XF_TEXGEN_REGULAR)
{
out.WriteFmt(
out.Write(
"if(o.tex{0}.z == 0.0f)\n"
"\to.tex{0}.xy = clamp(o.tex{0}.xy / 2.0f, float2(-1.0f,-1.0f), float2(1.0f,1.0f));\n",
i);
}
out.WriteFmt("}}\n");
out.Write("}}\n");
}
if (uid_data->numColorChans == 0)
{
if ((uid_data->components & VB_HAS_COL0) != 0)
out.WriteFmt("o.colors_0 = rawcolor0;\n");
out.Write("o.colors_0 = rawcolor0;\n");
else
out.WriteFmt("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n");
out.Write("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n");
}
if (uid_data->numColorChans < 2)
{
if ((uid_data->components & VB_HAS_COL1) != 0)
out.WriteFmt("o.colors_1 = rawcolor1;\n");
out.Write("o.colors_1 = rawcolor1;\n");
else
out.WriteFmt("o.colors_1 = o.colors_0;\n");
out.Write("o.colors_1 = o.colors_0;\n");
}
// clipPos/w needs to be done in pixel shader, not here
if (!host_config.fast_depth_calc)
out.WriteFmt("o.clipPos = o.pos;\n");
out.Write("o.clipPos = o.pos;\n");
if (per_pixel_lighting)
{
out.WriteFmt("o.Normal = _norm0;\n"
"o.WorldPos = pos.xyz;\n");
out.Write("o.Normal = _norm0;\n"
"o.WorldPos = pos.xyz;\n");
if ((uid_data->components & VB_HAS_COL0) != 0)
out.WriteFmt("o.colors_0 = rawcolor0;\n");
out.Write("o.colors_0 = rawcolor0;\n");
if ((uid_data->components & VB_HAS_COL1) != 0)
out.WriteFmt("o.colors_1 = rawcolor1;\n");
out.Write("o.colors_1 = rawcolor1;\n");
}
// If we can disable the incorrect depth clipping planes using depth clamping, then we can do
@ -450,14 +450,14 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
// own clipping. We want to clip so that -w <= z <= 0, which matches the console -1..0 range.
// We adjust our depth value for clipping purposes to match the perspective projection in the
// software backend, which is a hack to fix Sonic Adventure and Unleashed games.
out.WriteFmt("float clipDepth = o.pos.z * (1.0 - 1e-7);\n"
"float clipDist0 = clipDepth + o.pos.w;\n" // Near: z < -w
"float clipDist1 = -clipDepth;\n"); // Far: z > 0
out.Write("float clipDepth = o.pos.z * (1.0 - 1e-7);\n"
"float clipDist0 = clipDepth + o.pos.w;\n" // Near: z < -w
"float clipDist1 = -clipDepth;\n"); // Far: z > 0
if (host_config.backend_geometry_shaders)
{
out.WriteFmt("o.clipDist0 = clipDist0;\n"
"o.clipDist1 = clipDist1;\n");
out.Write("o.clipDist0 = clipDist0;\n"
"o.clipDist1 = clipDist1;\n");
}
}
@ -472,20 +472,20 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
// divide, because some games will use a depth range larger than what is allowed by the
// graphics API. These large depth ranges will still be clipped to the 0..1 range, so these
// games effectively add a depth bias to the values written to the depth buffer.
out.WriteFmt("o.pos.z = o.pos.w * " I_PIXELCENTERCORRECTION ".w - "
"o.pos.z * " I_PIXELCENTERCORRECTION ".z;\n");
out.Write("o.pos.z = o.pos.w * " I_PIXELCENTERCORRECTION ".w - "
"o.pos.z * " I_PIXELCENTERCORRECTION ".z;\n");
if (!host_config.backend_clip_control)
{
// If the graphics API doesn't support a depth range of 0..1, then we need to map z to
// the -1..1 range. Unfortunately we have to use a substraction, which is a lossy floating-point
// operation that can introduce a round-trip error.
out.WriteFmt("o.pos.z = o.pos.z * 2.0 - o.pos.w;\n");
out.Write("o.pos.z = o.pos.z * 2.0 - o.pos.w;\n");
}
// Correct for negative viewports by mirroring all vertices. We need to negate the height here,
// since the viewport height is already negated by the render backend.
out.WriteFmt("o.pos.xy *= sign(" I_PIXELCENTERCORRECTION ".xy * float2(1.0, -1.0));\n");
out.Write("o.pos.xy *= sign(" I_PIXELCENTERCORRECTION ".xy * float2(1.0, -1.0));\n");
// The console GPU places the pixel center at 7/12 in screen space unless
// antialiasing is enabled, while D3D and OpenGL place it at 0.5. This results
@ -493,7 +493,7 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
// which in turn can be critical if it happens for clear quads.
// Hence, we compensate for this pixel center difference so that primitives
// get rasterized correctly.
out.WriteFmt("o.pos.xy = o.pos.xy - o.pos.w * " I_PIXELCENTERCORRECTION ".xy;\n");
out.Write("o.pos.xy = o.pos.xy - o.pos.w * " I_PIXELCENTERCORRECTION ".xy;\n");
if (vertex_rounding)
{
@ -504,18 +504,18 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
// we need to correct this by converting our
// clip-space position into the Wii's screen-space
// acquire the right pixel and then convert it back
out.WriteFmt("if (o.pos.w == 1.0f)\n"
"{{\n"
out.Write("if (o.pos.w == 1.0f)\n"
"{{\n"
"\tfloat ss_pixel_x = ((o.pos.x + 1.0f) * (" I_VIEWPORT_SIZE ".x * 0.5f));\n"
"\tfloat ss_pixel_y = ((o.pos.y + 1.0f) * (" I_VIEWPORT_SIZE ".y * 0.5f));\n"
"\tfloat ss_pixel_x = ((o.pos.x + 1.0f) * (" I_VIEWPORT_SIZE ".x * 0.5f));\n"
"\tfloat ss_pixel_y = ((o.pos.y + 1.0f) * (" I_VIEWPORT_SIZE ".y * 0.5f));\n"
"\tss_pixel_x = round(ss_pixel_x);\n"
"\tss_pixel_y = round(ss_pixel_y);\n"
"\tss_pixel_x = round(ss_pixel_x);\n"
"\tss_pixel_y = round(ss_pixel_y);\n"
"\to.pos.x = ((ss_pixel_x / (" I_VIEWPORT_SIZE ".x * 0.5f)) - 1.0f);\n"
"\to.pos.y = ((ss_pixel_y / (" I_VIEWPORT_SIZE ".y * 0.5f)) - 1.0f);\n"
"}}\n");
"\to.pos.x = ((ss_pixel_x / (" I_VIEWPORT_SIZE ".x * 0.5f)) - 1.0f);\n"
"\to.pos.y = ((ss_pixel_y / (" I_VIEWPORT_SIZE ".y * 0.5f)) - 1.0f);\n"
"}}\n");
}
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
@ -529,35 +529,35 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
// TODO: Pass interface blocks between shader stages even if geometry shaders
// are not supported, however that will require at least OpenGL 3.2 support.
for (u32 i = 0; i < uid_data->numTexGens; ++i)
out.WriteFmt("tex{}.xyz = o.tex{};\n", i, i);
out.Write("tex{}.xyz = o.tex{};\n", i, i);
if (!host_config.fast_depth_calc)
out.WriteFmt("clipPos = o.clipPos;\n");
out.Write("clipPos = o.clipPos;\n");
if (per_pixel_lighting)
{
out.WriteFmt("Normal = o.Normal;\n"
"WorldPos = o.WorldPos;\n");
out.Write("Normal = o.Normal;\n"
"WorldPos = o.WorldPos;\n");
}
out.WriteFmt("colors_0 = o.colors_0;\n"
"colors_1 = o.colors_1;\n");
out.Write("colors_0 = o.colors_0;\n"
"colors_1 = o.colors_1;\n");
}
if (host_config.backend_depth_clamp)
{
out.WriteFmt("gl_ClipDistance[0] = clipDist0;\n"
"gl_ClipDistance[1] = clipDist1;\n");
out.Write("gl_ClipDistance[0] = clipDist0;\n"
"gl_ClipDistance[1] = clipDist1;\n");
}
// Vulkan NDC space has Y pointing down (right-handed NDC space).
if (api_type == APIType::Vulkan)
out.WriteFmt("gl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n");
out.Write("gl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n");
else
out.WriteFmt("gl_Position = o.pos;\n");
out.Write("gl_Position = o.pos;\n");
}
else // D3D
{
out.WriteFmt("return o;\n");
out.Write("return o;\n");
}
out.WriteFmt("}}\n");
out.Write("}}\n");
return out;
}