diff --git a/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp b/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp index 69dedb4d4b..bb1aa04911 100644 --- a/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp @@ -593,11 +593,8 @@ void PixelShaderCache::Shutdown() bool PixelShaderCache::SetShader() { - if (g_ActiveConfig.CanUseUberShaders() && - (g_ActiveConfig.bDisableSpecializedShaders || g_ActiveConfig.bForcePixelUberShaders)) - { + if (g_ActiveConfig.bDisableSpecializedShaders || g_ActiveConfig.bForcePixelUberShaders) return SetUberShader(); - } PixelShaderUid uid = GetPixelShaderUid(); if (last_entry && uid == last_uid) diff --git a/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp b/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp index a90de7c105..0c56deeb4a 100644 --- a/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp @@ -249,11 +249,8 @@ void VertexShaderCache::Shutdown() bool VertexShaderCache::SetShader(D3DVertexFormat* vertex_format) { - if (g_ActiveConfig.CanUseUberShaders() && - (g_ActiveConfig.bDisableSpecializedShaders || g_ActiveConfig.bForceVertexUberShaders)) - { + if (g_ActiveConfig.bDisableSpecializedShaders || g_ActiveConfig.bForceVertexUberShaders) return SetUberShader(vertex_format); - } VertexShaderUid uid = GetVertexShaderUid(); if (last_entry && uid == last_uid) diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp index e2d201e9b7..57d9705a7c 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp @@ -223,7 +223,7 @@ void ProgramShaderCache::UploadConstants() SHADER* ProgramShaderCache::SetShader(u32 primitive_type, const GLVertexFormat* vertex_format) { - if (g_ActiveConfig.bDisableSpecializedShaders && g_ActiveConfig.CanUseUberShaders()) + if (g_ActiveConfig.bDisableSpecializedShaders) return SetUberShader(primitive_type, vertex_format); SHADERUID uid; diff --git a/Source/Core/VideoBackends/Vulkan/StateTracker.cpp b/Source/Core/VideoBackends/Vulkan/StateTracker.cpp index 7d7ed0d956..2ab6af50ff 100644 --- a/Source/Core/VideoBackends/Vulkan/StateTracker.cpp +++ b/Source/Core/VideoBackends/Vulkan/StateTracker.cpp @@ -389,13 +389,6 @@ bool StateTracker::CheckForShaderChanges(u32 gx_primitive_type) bool uber_vertex_shader = use_ubershaders || g_ActiveConfig.bForceVertexUberShaders; bool uber_pixel_shader = use_ubershaders || g_ActiveConfig.bForcePixelUberShaders; bool using_ubershaders = uber_vertex_shader || uber_pixel_shader; - if (!g_ActiveConfig.CanUseUberShaders()) - { - // Per-pixel lighting disables ubershaders. - uber_vertex_shader = false; - uber_pixel_shader = false; - using_ubershaders = false; - } // Switching to/from ubershaders? Have to adjust the vertex format and pipeline layout. if (using_ubershaders != m_using_ubershaders) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 43dba06263..b385776f96 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -333,7 +333,8 @@ PixelShaderUid GetPixelShaderUid() return out; } -void WritePixelShaderCommonHeader(ShaderCode& out, APIType ApiType, bool bounding_box) +void WritePixelShaderCommonHeader(ShaderCode& out, APIType ApiType, u32 num_texgens, + bool per_pixel_lighting, bool bounding_box) { // dot product for integer vectors out.Write("int idot(int3 x, int3 y)\n" @@ -404,6 +405,19 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType ApiType, bool boundin "#define bpmem_tevorder(i) (bpmem_pack2[(i)].x)\n" "#define bpmem_tevksel(i) (bpmem_pack2[(i)].y)\n\n"); + if (per_pixel_lighting) + { + out.Write("%s", s_lighting_struct); + + if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) + out.Write("UBO_BINDING(std140, 2) uniform VSBlock {\n"); + else + out.Write("cbuffer VSBlock : register(b1) {\n"); + + out.Write(s_shader_uniforms); + out.Write("};\n"); + } + if (bounding_box) { if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) @@ -417,6 +431,10 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType ApiType, bool boundin out.Write("globallycoherent RWBuffer bbox_data : register(u2);\n"); } } + + out.Write("struct VS_OUTPUT {\n"); + GenerateVSOutputMembers(out, ApiType, num_texgens, per_pixel_lighting, ""); + out.Write("};\n"); } static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, int n, @@ -447,24 +465,8 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host uid_data->genMode_numindstages); // Stuff that is shared between ubershaders and pixelgen. - WritePixelShaderCommonHeader(out, ApiType, uid_data->bounding_box); - - if (per_pixel_lighting) - { - out.Write("%s", s_lighting_struct); - - if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) - out.Write("UBO_BINDING(std140, 2) uniform VSBlock {\n"); - else - out.Write("cbuffer VSBlock : register(b1) {\n"); - - out.Write(s_shader_uniforms); - out.Write("};\n"); - } - - out.Write("struct VS_OUTPUT {\n"); - GenerateVSOutputMembers(out, ApiType, uid_data->genMode_numtexgens, per_pixel_lighting, ""); - out.Write("};\n"); + WritePixelShaderCommonHeader(out, ApiType, uid_data->genMode_numtexgens, per_pixel_lighting, + uid_data->bounding_box); if (uid_data->forced_early_z) { diff --git a/Source/Core/VideoCommon/PixelShaderGen.h b/Source/Core/VideoCommon/PixelShaderGen.h index 17d2353a36..ee422bee8d 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.h +++ b/Source/Core/VideoCommon/PixelShaderGen.h @@ -159,6 +159,7 @@ typedef ShaderUid PixelShaderUid; ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host_config, const pixel_shader_uid_data* uid_data); -void WritePixelShaderCommonHeader(ShaderCode& out, APIType ApiType, bool bounding_box); +void WritePixelShaderCommonHeader(ShaderCode& out, APIType ApiType, u32 num_texgens, + bool per_pixel_lighting, bool bounding_box); ShaderCode GeneratePixelShaderCode(APIType ApiType, const pixel_shader_uid_data* uid_data); PixelShaderUid GetPixelShaderUid(); diff --git a/Source/Core/VideoCommon/UberShaderCommon.cpp b/Source/Core/VideoCommon/UberShaderCommon.cpp index c6f5167b3c..58e33e5942 100644 --- a/Source/Core/VideoCommon/UberShaderCommon.cpp +++ b/Source/Core/VideoCommon/UberShaderCommon.cpp @@ -3,7 +3,9 @@ // Refer to the license.txt file included. #include "VideoCommon/UberShaderCommon.h" +#include "VideoCommon/NativeVertexFormat.h" #include "VideoCommon/VideoConfig.h" +#include "VideoCommon/XFMemory.h" namespace UberShader { @@ -24,4 +26,178 @@ void WriteUberShaderCommonHeader(ShaderCode& out, APIType api_type, "}\n\n"); } } + +void WriteLightingFunction(ShaderCode& out) +{ + // ============================================== + // Lighting channel calculation helper + // ============================================== + out.Write("int4 CalculateLighting(uint index, uint attnfunc, uint diffusefunc, float3 pos, " + "float3 normal) {\n" + " float3 ldir, h, cosAttn, distAttn;\n" + " float dist, dist2, attn;\n" + "\n" + " switch (attnfunc) {\n"); + out.Write(" case %uu: // LIGNTATTN_NONE\n", LIGHTATTN_NONE); + out.Write(" case %uu: // LIGHTATTN_DIR\n", LIGHTATTN_DIR); + out.Write(" ldir = normalize(" I_LIGHTS "[index].pos.xyz - pos.xyz);\n" + " attn = 1.0;\n" + " if (length(ldir) == 0.0)\n" + " ldir = normal;\n" + " break;\n\n"); + out.Write(" case %uu: // LIGHTATTN_SPEC\n", LIGHTATTN_SPEC); + out.Write(" ldir = normalize(" I_LIGHTS "[index].pos.xyz - pos.xyz);\n" + " attn = (dot(normal, ldir) >= 0.0) ? max(0.0, dot(normal, " I_LIGHTS + "[index].dir.xyz)) : 0.0;\n" + " cosAttn = " I_LIGHTS "[index].cosatt.xyz;\n"); + out.Write(" if (diffusefunc == %uu) // LIGHTDIF_NONE\n", LIGHTDIF_NONE); + out.Write(" distAttn = " I_LIGHTS "[index].distatt.xyz;\n" + " else\n" + " distAttn = normalize(" I_LIGHTS "[index].distatt.xyz);\n" + " attn = max(0.0, dot(cosAttn, float3(1.0, attn, attn*attn))) / dot(distAttn, " + "float3(1.0, attn, attn*attn));\n" + " break;\n\n"); + out.Write(" case %uu: // LIGHTATTN_SPOT\n", LIGHTATTN_SPOT); + out.Write(" ldir = " I_LIGHTS "[index].pos.xyz - pos.xyz;\n" + " dist2 = dot(ldir, ldir);\n" + " dist = sqrt(dist2);\n" + " ldir = ldir / dist;\n" + " attn = max(0.0, dot(ldir, " I_LIGHTS "[index].dir.xyz));\n" + " attn = max(0.0, " I_LIGHTS "[index].cosatt.x + " I_LIGHTS + "[index].cosatt.y * attn + " I_LIGHTS "[index].cosatt.z * attn * attn) / dot(" I_LIGHTS + "[index].distatt.xyz, float3(1.0, dist, dist2));\n" + " break;\n\n"); + out.Write(" default:\n" + " attn = 1.0;\n" + " ldir = normal;\n" + " break;\n" + " }\n" + "\n" + " switch (diffusefunc) {\n"); + out.Write(" case %uu: // LIGHTDIF_NONE\n", LIGHTDIF_NONE); + out.Write(" return int4(round(attn * float4(" I_LIGHTS "[index].color)));\n\n"); + out.Write(" case %uu: // LIGHTDIF_SIGN\n", LIGHTDIF_SIGN); + out.Write(" return int4(round(attn * dot(ldir, normal) * float4(" I_LIGHTS + "[index].color)));\n\n"); + out.Write(" case %uu: // LIGHTDIF_CLAMP\n", LIGHTDIF_CLAMP); + out.Write(" return int4(round(attn * max(0.0, dot(ldir, normal)) * float4(" I_LIGHTS + "[index].color)));\n\n"); + out.Write(" default:\n" + " return int4(0, 0, 0, 0);\n" + " }\n" + "}\n\n"); +} + +void WriteVertexLighting(ShaderCode& out, APIType api_type, const char* world_pos_var, + const char* normal_var, const char* in_color_0_var, + const char* in_color_1_var, const char* out_color_0_var, + const char* out_color_1_var) +{ + out.Write("// Lighting\n"); + out.Write("%sfor (uint chan = 0u; chan < xfmem_numColorChans; chan++) {\n", + api_type == APIType::D3D ? "[loop] " : ""); + out.Write(" uint colorreg = xfmem_color(chan);\n" + " uint alphareg = xfmem_alpha(chan);\n" + " int4 mat = " I_MATERIALS "[chan + 2u]; \n" + " int4 lacc = int4(255, 255, 255, 255);\n" + "\n"); + + out.Write(" if (%s != 0u) {\n", BitfieldExtract("colorreg", LitChannel().matsource).c_str()); + out.Write(" if ((components & (%uu << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0); + out.Write(" mat.xyz = int3(round(((chan == 0u) ? %s.xyz : %s.xyz) * 255.0));\n", + in_color_0_var, in_color_1_var); + out.Write(" else if ((components & %uu) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0); + out.Write(" mat.xyz = int3(round(%s.xyz * 255.0));\n", in_color_0_var); + out.Write(" else\n" + " mat.xyz = int3(255, 255, 255);\n" + " }\n" + "\n"); + + out.Write(" if (%s != 0u) {\n", BitfieldExtract("alphareg", LitChannel().matsource).c_str()); + out.Write(" if ((components & (%uu << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0); + out.Write(" mat.w = int(round(((chan == 0u) ? %s.w : %s.w) * 255.0));\n", in_color_0_var, + in_color_1_var); + out.Write(" else if ((components & %uu) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0); + out.Write(" mat.w = int(round(%s.w * 255.0));\n", in_color_0_var); + out.Write(" else\n" + " mat.w = 255;\n" + " } else {\n" + " mat.w = " I_MATERIALS " [chan + 2u].w;\n" + " }\n" + "\n"); + + out.Write(" if (%s != 0u) {\n", + BitfieldExtract("colorreg", LitChannel().enablelighting).c_str()); + out.Write(" if (%s != 0u) {\n", BitfieldExtract("colorreg", LitChannel().ambsource).c_str()); + out.Write(" if ((components & (%uu << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0); + out.Write(" lacc.xyz = int3(round(((chan == 0u) ? %s.xyz : %s.xyz) * 255.0));\n", + in_color_0_var, in_color_1_var); + out.Write(" else if ((components & %uu) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0); + out.Write(" lacc.xyz = int3(round(%s.xyz * 255.0));\n", in_color_0_var); + out.Write(" else\n" + " lacc.xyz = int3(255, 255, 255);\n" + " } else {\n" + " lacc.xyz = " I_MATERIALS " [chan].xyz;\n" + " }\n" + "\n"); + out.Write(" uint light_mask = %s | (%s << 4u);\n", + BitfieldExtract("colorreg", LitChannel().lightMask0_3).c_str(), + BitfieldExtract("colorreg", LitChannel().lightMask4_7).c_str()); + out.Write(" uint attnfunc = %s;\n", + BitfieldExtract("colorreg", LitChannel().attnfunc).c_str()); + out.Write(" uint diffusefunc = %s;\n", + BitfieldExtract("colorreg", LitChannel().diffusefunc).c_str()); + out.Write( + " for (uint light_index = 0u; light_index < 8u; light_index++) {\n" + " if ((light_mask & (1u << light_index)) != 0u)\n" + " lacc.xyz += CalculateLighting(light_index, attnfunc, diffusefunc, %s, %s).xyz;\n", + world_pos_var, normal_var); + out.Write(" }\n" + " }\n" + "\n"); + + out.Write(" if (%s != 0u) {\n", + BitfieldExtract("alphareg", LitChannel().enablelighting).c_str()); + out.Write(" if (%s != 0u) {\n", BitfieldExtract("alphareg", LitChannel().ambsource).c_str()); + out.Write(" if ((components & (%uu << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0); + out.Write(" lacc.w = int(round(((chan == 0u) ? %s.w : %s.w) * 255.0));\n", in_color_0_var, + in_color_1_var); + out.Write(" else if ((components & %uu) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0); + out.Write(" lacc.w = int(round(%s.w * 255.0));\n", in_color_0_var); + out.Write(" else\n" + " lacc.w = 255;\n" + " } else {\n" + " lacc.w = " I_MATERIALS " [chan].w;\n" + " }\n" + "\n"); + out.Write(" uint light_mask = %s | (%s << 4u);\n", + BitfieldExtract("alphareg", LitChannel().lightMask0_3).c_str(), + BitfieldExtract("alphareg", LitChannel().lightMask4_7).c_str()); + out.Write(" uint attnfunc = %s;\n", + BitfieldExtract("alphareg", LitChannel().attnfunc).c_str()); + out.Write(" uint diffusefunc = %s;\n", + BitfieldExtract("alphareg", LitChannel().diffusefunc).c_str()); + out.Write(" for (uint light_index = 0u; light_index < 8u; light_index++) {\n\n" + " if ((light_mask & (1u << light_index)) != 0u)\n\n" + " lacc.w += CalculateLighting(light_index, attnfunc, diffusefunc, %s, %s).w;\n", + world_pos_var, normal_var); + out.Write(" }\n" + " }\n" + "\n"); + + out.Write(" lacc = clamp(lacc, 0, 255);\n" + "\n" + " // Hopefully GPUs that can support dynamic indexing will optimize this.\n" + " float4 lit_color = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n" + " switch (chan) {\n" + " case 0u: %s = lit_color; break;\n", + out_color_0_var); + out.Write(" case 1u: %s = lit_color; break;\n", out_color_1_var); + out.Write(" }\n" + "}\n" + "\n"); + + out.Write("if (xfmem_numColorChans < 2u && (components & %uu) == 0u)\n", VB_HAS_COL1); + out.Write(" %s = %s;\n\n", out_color_1_var, out_color_0_var); +} } diff --git a/Source/Core/VideoCommon/UberShaderCommon.h b/Source/Core/VideoCommon/UberShaderCommon.h index d6edaf1f73..a623e9d58d 100644 --- a/Source/Core/VideoCommon/UberShaderCommon.h +++ b/Source/Core/VideoCommon/UberShaderCommon.h @@ -13,6 +13,13 @@ namespace UberShader void WriteUberShaderCommonHeader(ShaderCode& out, APIType api_type, const ShaderHostConfig& host_config); +// Vertex lighting +void WriteLightingFunction(ShaderCode& out); +void WriteVertexLighting(ShaderCode& out, APIType api_type, const char* world_pos_var, + const char* normal_var, const char* in_color_0_var, + const char* in_color_1_var, const char* out_color_0_var, + const char* out_color_1_var); + // bitfieldExtract generator for BitField types template std::string BitfieldExtract(const std::string& source, T type) diff --git a/Source/Core/VideoCommon/UberShaderPixel.cpp b/Source/Core/VideoCommon/UberShaderPixel.cpp index 82e3382e2d..23d6725b3b 100644 --- a/Source/Core/VideoCommon/UberShaderPixel.cpp +++ b/Source/Core/VideoCommon/UberShaderPixel.cpp @@ -5,6 +5,7 @@ #include "VideoCommon/UberShaderPixel.h" #include "VideoCommon/BPMemory.h" #include "VideoCommon/DriverDetails.h" +#include "VideoCommon/NativeVertexFormat.h" #include "VideoCommon/UberShaderCommon.h" #include "VideoCommon/XFMemory.h" @@ -30,8 +31,6 @@ PixelShaderUid GetPixelShaderUid() ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, const pixel_ubershader_uid_data* uid_data) { - // TODO: Support per-pixel lighting. - // This can be based on the vertex ubershaders, at the cost of a more expensive pixel shader. const bool per_pixel_lighting = host_config.per_pixel_lighting; const bool msaa = host_config.msaa; const bool ssaa = host_config.ssaa; @@ -46,12 +45,10 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, out.Write("// Pixel UberShader for %u texgens%s%s\n", numTexgen, early_depth ? ", early-depth" : "", per_pixel_depth ? ", per-pixel depth" : ""); - WritePixelShaderCommonHeader(out, ApiType, bounding_box); + WritePixelShaderCommonHeader(out, ApiType, numTexgen, per_pixel_lighting, bounding_box); WriteUberShaderCommonHeader(out, ApiType, host_config); - - out.Write("struct VS_OUTPUT {\n"); - GenerateVSOutputMembers(out, ApiType, numTexgen, per_pixel_lighting, ""); - out.Write("};\n"); + if (per_pixel_lighting) + WriteLightingFunction(out); // Shader inputs/outputs in GLSL (HLSL is in main). if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) @@ -133,8 +130,6 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, "}\n\n"); } - // TODO: Per pixel lighting (not really needed) - // ===================== // Texture Sampling // ===================== @@ -346,23 +341,13 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, " uint cc;\n" " uint ac;\n"); - // For D3D, we need to store colors in the struct, since we access it from outside - // the main function, where they are declared. Hopefully the compiler can propagate - // these through when it inlines the function. - if (ApiType == APIType::D3D) - { - for (u32 i = 0; i < numTexgen; i++) - out.Write(" float3 tex%d;\n", i); - out.Write(" float4 colors_0;\n" - " float4 colors_1;\n"); - } - out.Write("};\n" "\n" - "int4 getRasColor(State s, StageState ss);\n" + "int4 getRasColor(State s, StageState ss, float4 colors_0, float4 colors_1);\n" "int4 getKonstColor(State s, StageState ss);\n" "\n" - "int3 selectColorInput(State s, StageState ss, uint index) {\n" + "int3 selectColorInput(State s, StageState ss, float4 colors_0, float4 colors_1, uint " + "index) {\n" " switch (index) {\n" " case 0u: // prev.rgb\n" " return s.Reg[0].rgb;\n" @@ -385,9 +370,9 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, " case 9u:\n" " return s.TexColor.aaa;\n" " case 10u:\n" - " return getRasColor(s, ss).rgb;\n" + " return getRasColor(s, ss, colors_0, colors_1).rgb;\n" " case 11u:\n" - " return getRasColor(s, ss).aaa;\n" + " return getRasColor(s, ss, colors_0, colors_1).aaa;\n" " case 12u: // One\n" " return int3(255, 255, 255);\n" " case 13u: // Half\n" @@ -399,7 +384,8 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, " }\n" "}\n" "\n" - "int selectAlphaInput(State s, StageState ss, uint index) {\n" + "int selectAlphaInput(State s, StageState ss, float4 colors_0, float4 colors_1, uint " + "index) {\n" " switch (index) {\n" " case 0u: // prev.a\n" " return s.Reg[0].a;\n" @@ -412,7 +398,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, " case 4u:\n" " return s.TexColor.a;\n" " case 5u:\n" - " return getRasColor(s, ss).a;\n" + " return getRasColor(s, ss, colors_0, colors_1).a;\n" " case 6u:\n" " return getKonstColor(s, ss).a;\n" " case 7u: // Zero\n" @@ -538,6 +524,18 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, for (int i = 0; i < 4; i++) out.Write(" s.Reg[%d] = " I_COLORS "[%d];\n", i, i); + const char* color_input_prefix = ""; + if (per_pixel_lighting) + { + out.Write(" float4 lit_colors_0 = colors_0;\n"); + out.Write(" float4 lit_colors_1 = colors_1;\n"); + out.Write(" float3 lit_normal = normalize(Normal.xyz);\n"); + out.Write(" float3 lit_pos = WorldPos.xyz;\n"); + WriteVertexLighting(out, ApiType, "lit_pos", "lit_normal", "colors_0", "colors_1", + "lit_colors_0", "lit_colors_1"); + color_input_prefix = "lit_"; + } + out.Write(" uint num_stages = %s;\n\n", BitfieldExtract("bpmem_genmode", bpmem.genMode.numtevstages).c_str()); @@ -559,12 +557,6 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, " ss.order = ss.order >> %d;\n\n", int(TwoTevStageOrders().enable1.StartBit() - TwoTevStageOrders().enable0.StartBit())); - if (ApiType == APIType::D3D) - { - out.Write(" ss.colors_0 = colors_0;\n" - " ss.colors_1 = colors_1;\n"); - } - // Disable texturing when there are no texgens (for now) if (numTexgen != 0) { @@ -715,16 +707,21 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, out.Write(" uint color_dest = %s;\n", BitfieldExtract("ss.cc", TevStageCombiner().colorC.dest).c_str()); + out.Write(" uint color_compare_op = color_shift << 1 | uint(color_op);\n" + "\n" + " int3 color_A = selectColorInput(s, ss, %scolors_0, %scolors_1, color_a) & " + "int3(255, 255, 255);\n" + " int3 color_B = selectColorInput(s, ss, %scolors_0, %scolors_1, color_b) & " + "int3(255, 255, 255);\n" + " int3 color_C = selectColorInput(s, ss, %scolors_0, %scolors_1, color_c) & " + "int3(255, 255, 255);\n" + " int3 color_D = selectColorInput(s, ss, %scolors_0, %scolors_1, color_d); // 10 " + "bits + sign\n" + "\n", // TODO: do we need to sign extend? + color_input_prefix, + color_input_prefix, color_input_prefix, color_input_prefix, color_input_prefix, + color_input_prefix, color_input_prefix, color_input_prefix); out.Write( - " uint color_compare_op = color_shift << 1 | uint(color_op);\n" - "\n" - " int3 color_A = selectColorInput(s, ss, color_a) & int3(255, 255, 255);\n" - " int3 color_B = selectColorInput(s, ss, color_b) & int3(255, 255, 255);\n" - " int3 color_C = selectColorInput(s, ss, color_c) & int3(255, 255, 255);\n" - " int3 color_D = selectColorInput(s, ss, color_d); // 10 bits + sign\n" // TODO: do we - // need to sign - // extend? - "\n" " int3 color;\n" " if(color_bias != 3u) { // Normal mode\n" " color = tevLerp3(color_A, color_B, color_C, color_D, color_bias, color_op, false, " @@ -788,41 +785,44 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, " int alpha_B;\n" " if (alpha_bias != 3u || alpha_compare_op > 5u) {\n" " // Small optimisation here: alpha_A and alpha_B are unused by compare ops 0-5\n" - " alpha_A = selectAlphaInput(s, ss, alpha_a) & 255;\n" - " alpha_B = selectAlphaInput(s, ss, alpha_b) & 255;\n" + " alpha_A = selectAlphaInput(s, ss, %scolors_0, %scolors_1, alpha_a) & 255;\n" + " alpha_B = selectAlphaInput(s, ss, %scolors_0, %scolors_1, alpha_b) & 255;\n" " };\n" - " int alpha_C = selectAlphaInput(s, ss, alpha_c) & 255;\n" - " int alpha_D = selectAlphaInput(s, ss, alpha_d); // 10 bits + sign\n" // TODO: do we - // need to sign - // extend? - "\n" - " int alpha;\n" - " if(alpha_bias != 3u) { // Normal mode\n" - " alpha = tevLerp(alpha_A, alpha_B, alpha_C, alpha_D, alpha_bias, alpha_op, " - "true, alpha_shift);\n" - " } else { // Compare mode\n" - " if (alpha_compare_op == 6u) {\n" - " // TEVCMP_A8_GT\n" - " alpha = (alpha_A > alpha_B) ? alpha_C : 0;\n" - " } else if (alpha_compare_op == 7u) {\n" - " // TEVCMP_A8_EQ\n" - " alpha = (alpha_A == alpha_B) ? alpha_C : 0;\n" - " } else {\n" - " // All remaining alpha compare ops actually compare the color channels\n" - " alpha = tevCompare(alpha_compare_op, color_A, color_B) ? alpha_C : 0;\n" - " }\n" - " alpha = alpha_D + alpha;\n" - " }\n" - "\n" - " // Clamp result\n" - " if (alpha_clamp)\n" - " alpha = clamp(alpha, 0, 255);\n" - " else\n" - " alpha = clamp(alpha, -1024, 1023);\n" - "\n" - " // Write result to the correct input register of the next stage\n" - " setRegAlpha(s, alpha_dest, alpha);\n" - " }\n"); + " int alpha_C = selectAlphaInput(s, ss, %scolors_0, %scolors_1, alpha_c) & 255;\n" + " int alpha_D = selectAlphaInput(s, ss, %scolors_0, %scolors_1, alpha_d); // 10 bits + " + "sign\n" + "\n", // TODO: do we need to sign extend? + color_input_prefix, + color_input_prefix, color_input_prefix, color_input_prefix, color_input_prefix, + color_input_prefix, color_input_prefix, color_input_prefix); + out.Write("\n" + " int alpha;\n" + " if(alpha_bias != 3u) { // Normal mode\n" + " alpha = tevLerp(alpha_A, alpha_B, alpha_C, alpha_D, alpha_bias, alpha_op, " + "true, alpha_shift);\n" + " } else { // Compare mode\n" + " if (alpha_compare_op == 6u) {\n" + " // TEVCMP_A8_GT\n" + " alpha = (alpha_A > alpha_B) ? alpha_C : 0;\n" + " } else if (alpha_compare_op == 7u) {\n" + " // TEVCMP_A8_EQ\n" + " alpha = (alpha_A == alpha_B) ? alpha_C : 0;\n" + " } else {\n" + " // All remaining alpha compare ops actually compare the color channels\n" + " alpha = tevCompare(alpha_compare_op, color_A, color_B) ? alpha_C : 0;\n" + " }\n" + " alpha = alpha_D + alpha;\n" + " }\n" + "\n" + " // Clamp result\n" + " if (alpha_clamp)\n" + " alpha = clamp(alpha, 0, 255);\n" + " else\n" + " alpha = clamp(alpha, -1024, 1023);\n" + "\n" + " // Write result to the correct input register of the next stage\n" + " setRegAlpha(s, alpha_dest, alpha);\n" + " }\n"); out.Write(" } // Main tev loop\n" "\n"); @@ -1036,14 +1036,13 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, out.Write("}\n" "\n" - "int4 getRasColor(State s, StageState ss) {\n" + "int4 getRasColor(State s, StageState ss, float4 colors_0, float4 colors_1) {\n" " // Select Ras for stage\n" " uint ras = %s;\n", BitfieldExtract("ss.order", TwoTevStageOrders().colorchan0).c_str()); out.Write(" if (ras < 2u) { // Lighting Channel 0 or 1\n" - " int4 color = iround(((ras == 0u) ? %scolors_0 : %scolors_1) * 255.0);\n", - (ApiType == APIType::D3D) ? "ss." : "", (ApiType == APIType::D3D) ? "ss." : ""); - out.Write(" uint swap = %s;\n", + " int4 color = iround(((ras == 0u) ? colors_0 : colors_1) * 255.0);\n" + " uint swap = %s;\n", BitfieldExtract("ss.ac", TevStageCombiner().alphaC.rswap).c_str()); out.Write(" return Swizzle(swap, color);\n"); out.Write(" } else if (ras == 5u) { // Alpha Bumb\n" diff --git a/Source/Core/VideoCommon/UberShaderVertex.cpp b/Source/Core/VideoCommon/UberShaderVertex.cpp index fbc2815035..ebc9c80f0e 100644 --- a/Source/Core/VideoCommon/UberShaderVertex.cpp +++ b/Source/Core/VideoCommon/UberShaderVertex.cpp @@ -21,7 +21,6 @@ VertexShaderUid GetVertexShaderUid() return out; } -static void GenVertexShaderLighting(APIType ApiType, ShaderCode& out); static void GenVertexShaderTexGens(APIType ApiType, u32 numTexgen, ShaderCode& out); ShaderCode GenVertexShader(APIType ApiType, const ShaderHostConfig& host_config, @@ -35,8 +34,6 @@ ShaderCode GenVertexShader(APIType ApiType, const ShaderHostConfig& host_config, ShaderCode out; out.Write("// Vertex UberShader\n\n"); - WriteUberShaderCommonHeader(out, ApiType, host_config); - out.Write("%s", s_lighting_struct); // uniforms @@ -47,65 +44,13 @@ ShaderCode GenVertexShader(APIType ApiType, const ShaderHostConfig& host_config, out.Write(s_shader_uniforms); out.Write("};\n"); - out.Write("int4 CalculateLighting(uint index, uint attnfunc, uint diffusefunc, float4 pos, " - "float3 _norm0) {\n" - " float3 ldir, h, cosAttn, distAttn;\n" - " float dist, dist2, attn;\n" - "\n" - " switch (attnfunc) {\n"); - out.Write(" case %uu: // LIGNTATTN_NONE\n", LIGHTATTN_NONE); - out.Write(" case %uu: // LIGHTATTN_DIR\n", LIGHTATTN_DIR); - out.Write(" ldir = normalize(" I_LIGHTS "[index].pos.xyz - pos.xyz);\n" - " attn = 1.0;\n" - " if (length(ldir) == 0.0)\n" - " ldir = _norm0;\n" - " break;\n\n"); - out.Write(" case %uu: // LIGHTATTN_SPEC\n", LIGHTATTN_SPEC); - out.Write(" ldir = normalize(" I_LIGHTS "[index].pos.xyz - pos.xyz);\n" - " attn = (dot(_norm0, ldir) >= 0.0) ? max(0.0, dot(_norm0, " I_LIGHTS - "[index].dir.xyz)) : 0.0;\n" - " cosAttn = " I_LIGHTS "[index].cosatt.xyz;\n"); - out.Write(" if (diffusefunc == %uu) // LIGHTDIF_NONE\n", LIGHTDIF_NONE); - out.Write(" distAttn = " I_LIGHTS "[index].distatt.xyz;\n" - " else\n" - " distAttn = normalize(" I_LIGHTS "[index].distatt.xyz);\n" - " attn = max(0.0, dot(cosAttn, float3(1.0, attn, attn*attn))) / dot(distAttn, " - "float3(1.0, attn, attn*attn));\n" - " break;\n\n"); - out.Write(" case %uu: // LIGHTATTN_SPOT\n", LIGHTATTN_SPOT); - out.Write(" ldir = " I_LIGHTS "[index].pos.xyz - pos.xyz;\n" - " dist2 = dot(ldir, ldir);\n" - " dist = sqrt(dist2);\n" - " ldir = ldir / dist;\n" - " attn = max(0.0, dot(ldir, " I_LIGHTS "[index].dir.xyz));\n" - " attn = max(0.0, " I_LIGHTS "[index].cosatt.x + " I_LIGHTS - "[index].cosatt.y * attn + " I_LIGHTS "[index].cosatt.z * attn * attn) / dot(" I_LIGHTS - "[index].distatt.xyz, float3(1.0, dist, dist2));\n" - " break;\n\n"); - out.Write(" default:\n" - " attn = 1.0;\n" - " ldir = _norm0;\n" - " break;\n" - " }\n" - "\n" - " switch (diffusefunc) {\n"); - out.Write(" case %uu: // LIGHTDIF_NONE\n", LIGHTDIF_NONE); - out.Write(" return int4(round(attn * float4(" I_LIGHTS "[index].color)));\n\n"); - out.Write(" case %uu: // LIGHTDIF_SIGN\n", LIGHTDIF_SIGN); - out.Write(" return int4(round(attn * dot(ldir, _norm0) * float4(" I_LIGHTS - "[index].color)));\n\n"); - out.Write(" case %uu: // LIGHTDIF_CLAMP\n", LIGHTDIF_CLAMP); - out.Write(" return int4(round(attn * max(0.0, dot(ldir, _norm0)) * float4(" I_LIGHTS - "[index].color)));\n\n"); - out.Write(" default:\n" - " return int4(0, 0, 0, 0);\n" - " }\n" - "}\n\n"); - out.Write("struct VS_OUTPUT {\n"); - GenerateVSOutputMembers(out, ApiType, numTexgen, false, ""); + GenerateVSOutputMembers(out, ApiType, numTexgen, per_pixel_lighting, ""); out.Write("};\n\n"); + WriteUberShaderCommonHeader(out, ApiType, host_config); + WriteLightingFunction(out); + if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) { out.Write("ATTRIBUTE_LOCATION(%d) in float4 rawpos;\n", SHADER_POSITION_ATTRIB); @@ -219,7 +164,8 @@ ShaderCode GenVertexShader(APIType ApiType, const ShaderHostConfig& host_config, "\n"); // Hardware Lighting - GenVertexShaderLighting(ApiType, out); + WriteVertexLighting(out, ApiType, "pos.xyz", "_norm0", "rawcolor0", "rawcolor1", "o.colors_0", + "o.colors_1"); // Texture Coordinates if (numTexgen > 0) @@ -228,6 +174,16 @@ ShaderCode GenVertexShader(APIType ApiType, const ShaderHostConfig& host_config, // clipPos/w needs to be done in pixel shader, not here out.Write("o.clipPos = o.pos;\n"); + if (per_pixel_lighting) + { + out.Write("o.Normal = _norm0;\n"); + out.Write("o.WorldPos = pos.xyz;\n"); + out.Write("if ((components & %uu) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0); + out.Write(" o.colors_0 = rawcolor0;\n"); + out.Write("if ((components & %uu) != 0u) // VB_HAS_COL1\n", VB_HAS_COL1); + out.Write(" o.colors_1 = rawcolor1;\n"); + } + // If we can disable the incorrect depth clipping planes using depth clamping, then we can do // our own depth clipping and calculate the depth range before the perspective divide if // necessary. @@ -300,7 +256,7 @@ ShaderCode GenVertexShader(APIType ApiType, const ShaderHostConfig& host_config, { if (host_config.backend_geometry_shaders || ApiType == APIType::Vulkan) { - AssignVSOutputMembers(out, "vs", "o", numTexgen, false); + AssignVSOutputMembers(out, "vs", "o", numTexgen, per_pixel_lighting); } else { @@ -309,6 +265,11 @@ ShaderCode GenVertexShader(APIType ApiType, const ShaderHostConfig& host_config, for (u32 i = 0; i < numTexgen; ++i) out.Write("tex%d.xyz = o.tex%d;\n", i, i); out.Write("clipPos = o.clipPos;\n"); + if (per_pixel_lighting) + { + out.Write("Normal = o.Normal;\n"); + out.Write("WorldPos = o.WorldPos;\n"); + } out.Write("colors_0 = o.colors_0;\n"); out.Write("colors_1 = o.colors_1;\n"); } @@ -334,123 +295,6 @@ ShaderCode GenVertexShader(APIType ApiType, const ShaderHostConfig& host_config, return out; } -void GenVertexShaderLighting(APIType ApiType, ShaderCode& out) -{ - out.Write("if ((components & %uu) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0); - out.Write(" o.colors_0 = rawcolor0;\n" - "else\n" - " o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n" - "\n"); - out.Write("if ((components & %uu) != 0u) // VB_HAS_COL1\n", VB_HAS_COL1); - out.Write(" o.colors_1 = rawcolor1;\n" - "else\n" - " o.colors_1 = float4(1.0, 1.0, 1.0, 1.0);\n" - "\n"); - - out.Write("// Lighting\n"); - out.Write("%sfor (uint chan = 0u; chan < xfmem_numColorChans; chan++) {\n", - ApiType == APIType::D3D ? "[loop] " : ""); - out.Write(" uint colorreg = xfmem_color(chan);\n" - " uint alphareg = xfmem_alpha(chan);\n" - " int4 mat = " I_MATERIALS "[chan + 2u]; \n" - " int4 lacc = int4(255, 255, 255, 255);\n" - "\n"); - - out.Write(" if (%s != 0u) {\n", BitfieldExtract("colorreg", LitChannel().matsource).c_str()); - out.Write(" if ((components & (%uu << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0); - out.Write( - " mat.xyz = int3(round(((chan == 0u) ? rawcolor0.xyz : rawcolor1.xyz) * 255.0));\n"); - out.Write(" else if ((components & %uu) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0); - out.Write(" mat.xyz = int3(round(rawcolor0.xyz * 255.0));\n" - " else\n" - " mat.xyz = int3(255, 255, 255);\n" - " }\n" - "\n"); - - out.Write(" if (%s != 0u) {\n", BitfieldExtract("alphareg", LitChannel().matsource).c_str()); - out.Write(" if ((components & (%uu << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0); - out.Write(" mat.w = int(round(((chan == 0u) ? rawcolor0.w : rawcolor1.w) * 255.0));\n"); - out.Write(" else if ((components & %uu) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0); - out.Write(" mat.w = int(round(rawcolor0.w * 255.0));\n" - " else\n" - " mat.w = 255;\n" - " } else {\n" - " mat.w = " I_MATERIALS " [chan + 2u].w;\n" - " }\n" - "\n"); - - out.Write(" if (%s != 0u) {\n", - BitfieldExtract("colorreg", LitChannel().enablelighting).c_str()); - out.Write(" if (%s != 0u) {\n", BitfieldExtract("colorreg", LitChannel().ambsource).c_str()); - out.Write(" if ((components & (%uu << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0); - out.Write( - " lacc.xyz = int3(round(((chan == 0u) ? rawcolor0.xyz : rawcolor1.xyz) * 255.0));\n"); - out.Write(" else if ((components & %uu) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0); - out.Write(" lacc.xyz = int3(round(rawcolor0.xyz * 255.0));\n" - " else\n" - " lacc.xyz = int3(255, 255, 255);\n" - " } else {\n" - " lacc.xyz = " I_MATERIALS " [chan].xyz;\n" - " }\n" - "\n"); - out.Write(" uint light_mask = %s | (%s << 4u);\n", - BitfieldExtract("colorreg", LitChannel().lightMask0_3).c_str(), - BitfieldExtract("colorreg", LitChannel().lightMask4_7).c_str()); - out.Write(" uint attnfunc = %s;\n", - BitfieldExtract("colorreg", LitChannel().attnfunc).c_str()); - out.Write(" uint diffusefunc = %s;\n", - BitfieldExtract("colorreg", LitChannel().diffusefunc).c_str()); - out.Write(" for (uint light_index = 0u; light_index < 8u; light_index++) {\n" - " if ((light_mask & (1u << light_index)) != 0u)\n" - " lacc.xyz += CalculateLighting(light_index, attnfunc, diffusefunc, pos, " - "_norm0).xyz;\n" - " }\n" - " }\n" - "\n"); - - out.Write(" if (%s != 0u) {\n", - BitfieldExtract("alphareg", LitChannel().enablelighting).c_str()); - out.Write(" if (%s != 0u) {\n", BitfieldExtract("alphareg", LitChannel().ambsource).c_str()); - out.Write(" if ((components & (%uu << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0); - out.Write(" lacc.w = int(round(((chan == 0u) ? rawcolor0.w : rawcolor1.w) * 255.0));\n"); - out.Write(" else if ((components & %uu) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0); - out.Write(" lacc.w = int(round(rawcolor0.w * 255.0));\n" - " else\n" - " lacc.w = 255;\n" - " } else {\n" - " lacc.w = " I_MATERIALS " [chan].w;\n" - " }\n" - "\n"); - out.Write(" uint light_mask = %s | (%s << 4u);\n", - BitfieldExtract("alphareg", LitChannel().lightMask0_3).c_str(), - BitfieldExtract("alphareg", LitChannel().lightMask4_7).c_str()); - out.Write(" uint attnfunc = %s;\n", - BitfieldExtract("alphareg", LitChannel().attnfunc).c_str()); - out.Write(" uint diffusefunc = %s;\n", - BitfieldExtract("alphareg", LitChannel().diffusefunc).c_str()); - out.Write( - " for (uint light_index = 0u; light_index < 8u; light_index++) {\n\n" - " if ((light_mask & (1u << light_index)) != 0u)\n\n" - " lacc.w += CalculateLighting(light_index, attnfunc, diffusefunc, pos, _norm0).w;\n" - " }\n" - " }\n" - "\n"); - - out.Write(" lacc = clamp(lacc, 0, 255);\n" - "\n" - " // Hopefully GPUs that can support dynamic indexing will optimize this.\n" - " float4 lit_color = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n" - " switch (chan) {\n" - " case 0u: o.colors_0 = lit_color; break;\n" - " case 1u: o.colors_1 = lit_color; break;\n" - " }\n" - "}\n" - "\n"); - - out.Write("if (xfmem_numColorChans < 2u && (components & %uu) == 0u)\n", VB_HAS_COL1); - out.Write(" o.colors_1 = o.colors_0;\n\n"); -} - void GenVertexShaderTexGens(APIType ApiType, u32 numTexgen, ShaderCode& out) { // The HLSL compiler complains that the output texture coordinates are uninitialized when trying diff --git a/Source/Core/VideoCommon/VideoConfig.cpp b/Source/Core/VideoCommon/VideoConfig.cpp index 2b0b169649..c277386f71 100644 --- a/Source/Core/VideoCommon/VideoConfig.cpp +++ b/Source/Core/VideoCommon/VideoConfig.cpp @@ -219,21 +219,14 @@ u32 VideoConfig::GetShaderPrecompilerThreads() const return GetNumAutoShaderCompilerThreads(); } -bool VideoConfig::CanUseUberShaders() const -{ - // Ubershaders are currently incompatible with per-pixel lighting. - return !bEnablePixelLighting; -} - bool VideoConfig::CanPrecompileUberShaders() const { // We don't want to precompile ubershaders if they're never going to be used. - return bPrecompileUberShaders && (bBackgroundShaderCompiling || bDisableSpecializedShaders) && - CanUseUberShaders(); + return bPrecompileUberShaders && (bBackgroundShaderCompiling || bDisableSpecializedShaders); } bool VideoConfig::CanBackgroundCompileShaders() const { // We require precompiled ubershaders to background compile shaders. - return bBackgroundShaderCompiling && bPrecompileUberShaders && CanUseUberShaders(); + return bBackgroundShaderCompiling && bPrecompileUberShaders; } diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index fd327f2c89..f876e63802 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -258,7 +258,6 @@ struct VideoConfig final bool UseVertexRounding() const { return bVertexRounding && iEFBScale != SCALE_1X; } u32 GetShaderCompilerThreads() const; u32 GetShaderPrecompilerThreads() const; - bool CanUseUberShaders() const; bool CanPrecompileUberShaders() const; bool CanBackgroundCompileShaders() const; };