Ubershaders: Support per-pixel lighting

This commit is contained in:
Stenzek 2017-07-27 20:52:20 +10:00
parent c8f31656cb
commit e968c191ff
12 changed files with 311 additions and 303 deletions

View File

@ -593,11 +593,8 @@ void PixelShaderCache::Shutdown()
bool PixelShaderCache::SetShader()
{
if (g_ActiveConfig.CanUseUberShaders() &&
(g_ActiveConfig.bDisableSpecializedShaders || g_ActiveConfig.bForcePixelUberShaders))
{
if (g_ActiveConfig.bDisableSpecializedShaders || g_ActiveConfig.bForcePixelUberShaders)
return SetUberShader();
}
PixelShaderUid uid = GetPixelShaderUid();
if (last_entry && uid == last_uid)

View File

@ -249,11 +249,8 @@ void VertexShaderCache::Shutdown()
bool VertexShaderCache::SetShader(D3DVertexFormat* vertex_format)
{
if (g_ActiveConfig.CanUseUberShaders() &&
(g_ActiveConfig.bDisableSpecializedShaders || g_ActiveConfig.bForceVertexUberShaders))
{
if (g_ActiveConfig.bDisableSpecializedShaders || g_ActiveConfig.bForceVertexUberShaders)
return SetUberShader(vertex_format);
}
VertexShaderUid uid = GetVertexShaderUid();
if (last_entry && uid == last_uid)

View File

@ -223,7 +223,7 @@ void ProgramShaderCache::UploadConstants()
SHADER* ProgramShaderCache::SetShader(u32 primitive_type, const GLVertexFormat* vertex_format)
{
if (g_ActiveConfig.bDisableSpecializedShaders && g_ActiveConfig.CanUseUberShaders())
if (g_ActiveConfig.bDisableSpecializedShaders)
return SetUberShader(primitive_type, vertex_format);
SHADERUID uid;

View File

@ -389,13 +389,6 @@ bool StateTracker::CheckForShaderChanges(u32 gx_primitive_type)
bool uber_vertex_shader = use_ubershaders || g_ActiveConfig.bForceVertexUberShaders;
bool uber_pixel_shader = use_ubershaders || g_ActiveConfig.bForcePixelUberShaders;
bool using_ubershaders = uber_vertex_shader || uber_pixel_shader;
if (!g_ActiveConfig.CanUseUberShaders())
{
// Per-pixel lighting disables ubershaders.
uber_vertex_shader = false;
uber_pixel_shader = false;
using_ubershaders = false;
}
// Switching to/from ubershaders? Have to adjust the vertex format and pipeline layout.
if (using_ubershaders != m_using_ubershaders)

View File

@ -333,7 +333,8 @@ PixelShaderUid GetPixelShaderUid()
return out;
}
void WritePixelShaderCommonHeader(ShaderCode& out, APIType ApiType, bool bounding_box)
void WritePixelShaderCommonHeader(ShaderCode& out, APIType ApiType, u32 num_texgens,
bool per_pixel_lighting, bool bounding_box)
{
// dot product for integer vectors
out.Write("int idot(int3 x, int3 y)\n"
@ -404,6 +405,19 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType ApiType, bool boundin
"#define bpmem_tevorder(i) (bpmem_pack2[(i)].x)\n"
"#define bpmem_tevksel(i) (bpmem_pack2[(i)].y)\n\n");
if (per_pixel_lighting)
{
out.Write("%s", s_lighting_struct);
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
out.Write("UBO_BINDING(std140, 2) uniform VSBlock {\n");
else
out.Write("cbuffer VSBlock : register(b1) {\n");
out.Write(s_shader_uniforms);
out.Write("};\n");
}
if (bounding_box)
{
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
@ -417,6 +431,10 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType ApiType, bool boundin
out.Write("globallycoherent RWBuffer<int> bbox_data : register(u2);\n");
}
}
out.Write("struct VS_OUTPUT {\n");
GenerateVSOutputMembers(out, ApiType, num_texgens, per_pixel_lighting, "");
out.Write("};\n");
}
static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, int n,
@ -447,24 +465,8 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host
uid_data->genMode_numindstages);
// Stuff that is shared between ubershaders and pixelgen.
WritePixelShaderCommonHeader(out, ApiType, uid_data->bounding_box);
if (per_pixel_lighting)
{
out.Write("%s", s_lighting_struct);
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
out.Write("UBO_BINDING(std140, 2) uniform VSBlock {\n");
else
out.Write("cbuffer VSBlock : register(b1) {\n");
out.Write(s_shader_uniforms);
out.Write("};\n");
}
out.Write("struct VS_OUTPUT {\n");
GenerateVSOutputMembers(out, ApiType, uid_data->genMode_numtexgens, per_pixel_lighting, "");
out.Write("};\n");
WritePixelShaderCommonHeader(out, ApiType, uid_data->genMode_numtexgens, per_pixel_lighting,
uid_data->bounding_box);
if (uid_data->forced_early_z)
{

View File

@ -159,6 +159,7 @@ typedef ShaderUid<pixel_shader_uid_data> PixelShaderUid;
ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host_config,
const pixel_shader_uid_data* uid_data);
void WritePixelShaderCommonHeader(ShaderCode& out, APIType ApiType, bool bounding_box);
void WritePixelShaderCommonHeader(ShaderCode& out, APIType ApiType, u32 num_texgens,
bool per_pixel_lighting, bool bounding_box);
ShaderCode GeneratePixelShaderCode(APIType ApiType, const pixel_shader_uid_data* uid_data);
PixelShaderUid GetPixelShaderUid();

View File

@ -3,7 +3,9 @@
// Refer to the license.txt file included.
#include "VideoCommon/UberShaderCommon.h"
#include "VideoCommon/NativeVertexFormat.h"
#include "VideoCommon/VideoConfig.h"
#include "VideoCommon/XFMemory.h"
namespace UberShader
{
@ -24,4 +26,178 @@ void WriteUberShaderCommonHeader(ShaderCode& out, APIType api_type,
"}\n\n");
}
}
void WriteLightingFunction(ShaderCode& out)
{
// ==============================================
// Lighting channel calculation helper
// ==============================================
out.Write("int4 CalculateLighting(uint index, uint attnfunc, uint diffusefunc, float3 pos, "
"float3 normal) {\n"
" float3 ldir, h, cosAttn, distAttn;\n"
" float dist, dist2, attn;\n"
"\n"
" switch (attnfunc) {\n");
out.Write(" case %uu: // LIGNTATTN_NONE\n", LIGHTATTN_NONE);
out.Write(" case %uu: // LIGHTATTN_DIR\n", LIGHTATTN_DIR);
out.Write(" ldir = normalize(" I_LIGHTS "[index].pos.xyz - pos.xyz);\n"
" attn = 1.0;\n"
" if (length(ldir) == 0.0)\n"
" ldir = normal;\n"
" break;\n\n");
out.Write(" case %uu: // LIGHTATTN_SPEC\n", LIGHTATTN_SPEC);
out.Write(" ldir = normalize(" I_LIGHTS "[index].pos.xyz - pos.xyz);\n"
" attn = (dot(normal, ldir) >= 0.0) ? max(0.0, dot(normal, " I_LIGHTS
"[index].dir.xyz)) : 0.0;\n"
" cosAttn = " I_LIGHTS "[index].cosatt.xyz;\n");
out.Write(" if (diffusefunc == %uu) // LIGHTDIF_NONE\n", LIGHTDIF_NONE);
out.Write(" distAttn = " I_LIGHTS "[index].distatt.xyz;\n"
" else\n"
" distAttn = normalize(" I_LIGHTS "[index].distatt.xyz);\n"
" attn = max(0.0, dot(cosAttn, float3(1.0, attn, attn*attn))) / dot(distAttn, "
"float3(1.0, attn, attn*attn));\n"
" break;\n\n");
out.Write(" case %uu: // LIGHTATTN_SPOT\n", LIGHTATTN_SPOT);
out.Write(" ldir = " I_LIGHTS "[index].pos.xyz - pos.xyz;\n"
" dist2 = dot(ldir, ldir);\n"
" dist = sqrt(dist2);\n"
" ldir = ldir / dist;\n"
" attn = max(0.0, dot(ldir, " I_LIGHTS "[index].dir.xyz));\n"
" attn = max(0.0, " I_LIGHTS "[index].cosatt.x + " I_LIGHTS
"[index].cosatt.y * attn + " I_LIGHTS "[index].cosatt.z * attn * attn) / dot(" I_LIGHTS
"[index].distatt.xyz, float3(1.0, dist, dist2));\n"
" break;\n\n");
out.Write(" default:\n"
" attn = 1.0;\n"
" ldir = normal;\n"
" break;\n"
" }\n"
"\n"
" switch (diffusefunc) {\n");
out.Write(" case %uu: // LIGHTDIF_NONE\n", LIGHTDIF_NONE);
out.Write(" return int4(round(attn * float4(" I_LIGHTS "[index].color)));\n\n");
out.Write(" case %uu: // LIGHTDIF_SIGN\n", LIGHTDIF_SIGN);
out.Write(" return int4(round(attn * dot(ldir, normal) * float4(" I_LIGHTS
"[index].color)));\n\n");
out.Write(" case %uu: // LIGHTDIF_CLAMP\n", LIGHTDIF_CLAMP);
out.Write(" return int4(round(attn * max(0.0, dot(ldir, normal)) * float4(" I_LIGHTS
"[index].color)));\n\n");
out.Write(" default:\n"
" return int4(0, 0, 0, 0);\n"
" }\n"
"}\n\n");
}
void WriteVertexLighting(ShaderCode& out, APIType api_type, const char* world_pos_var,
const char* normal_var, const char* in_color_0_var,
const char* in_color_1_var, const char* out_color_0_var,
const char* out_color_1_var)
{
out.Write("// Lighting\n");
out.Write("%sfor (uint chan = 0u; chan < xfmem_numColorChans; chan++) {\n",
api_type == APIType::D3D ? "[loop] " : "");
out.Write(" uint colorreg = xfmem_color(chan);\n"
" uint alphareg = xfmem_alpha(chan);\n"
" int4 mat = " I_MATERIALS "[chan + 2u]; \n"
" int4 lacc = int4(255, 255, 255, 255);\n"
"\n");
out.Write(" if (%s != 0u) {\n", BitfieldExtract("colorreg", LitChannel().matsource).c_str());
out.Write(" if ((components & (%uu << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
out.Write(" mat.xyz = int3(round(((chan == 0u) ? %s.xyz : %s.xyz) * 255.0));\n",
in_color_0_var, in_color_1_var);
out.Write(" else if ((components & %uu) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0);
out.Write(" mat.xyz = int3(round(%s.xyz * 255.0));\n", in_color_0_var);
out.Write(" else\n"
" mat.xyz = int3(255, 255, 255);\n"
" }\n"
"\n");
out.Write(" if (%s != 0u) {\n", BitfieldExtract("alphareg", LitChannel().matsource).c_str());
out.Write(" if ((components & (%uu << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
out.Write(" mat.w = int(round(((chan == 0u) ? %s.w : %s.w) * 255.0));\n", in_color_0_var,
in_color_1_var);
out.Write(" else if ((components & %uu) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0);
out.Write(" mat.w = int(round(%s.w * 255.0));\n", in_color_0_var);
out.Write(" else\n"
" mat.w = 255;\n"
" } else {\n"
" mat.w = " I_MATERIALS " [chan + 2u].w;\n"
" }\n"
"\n");
out.Write(" if (%s != 0u) {\n",
BitfieldExtract("colorreg", LitChannel().enablelighting).c_str());
out.Write(" if (%s != 0u) {\n", BitfieldExtract("colorreg", LitChannel().ambsource).c_str());
out.Write(" if ((components & (%uu << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
out.Write(" lacc.xyz = int3(round(((chan == 0u) ? %s.xyz : %s.xyz) * 255.0));\n",
in_color_0_var, in_color_1_var);
out.Write(" else if ((components & %uu) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0);
out.Write(" lacc.xyz = int3(round(%s.xyz * 255.0));\n", in_color_0_var);
out.Write(" else\n"
" lacc.xyz = int3(255, 255, 255);\n"
" } else {\n"
" lacc.xyz = " I_MATERIALS " [chan].xyz;\n"
" }\n"
"\n");
out.Write(" uint light_mask = %s | (%s << 4u);\n",
BitfieldExtract("colorreg", LitChannel().lightMask0_3).c_str(),
BitfieldExtract("colorreg", LitChannel().lightMask4_7).c_str());
out.Write(" uint attnfunc = %s;\n",
BitfieldExtract("colorreg", LitChannel().attnfunc).c_str());
out.Write(" uint diffusefunc = %s;\n",
BitfieldExtract("colorreg", LitChannel().diffusefunc).c_str());
out.Write(
" for (uint light_index = 0u; light_index < 8u; light_index++) {\n"
" if ((light_mask & (1u << light_index)) != 0u)\n"
" lacc.xyz += CalculateLighting(light_index, attnfunc, diffusefunc, %s, %s).xyz;\n",
world_pos_var, normal_var);
out.Write(" }\n"
" }\n"
"\n");
out.Write(" if (%s != 0u) {\n",
BitfieldExtract("alphareg", LitChannel().enablelighting).c_str());
out.Write(" if (%s != 0u) {\n", BitfieldExtract("alphareg", LitChannel().ambsource).c_str());
out.Write(" if ((components & (%uu << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
out.Write(" lacc.w = int(round(((chan == 0u) ? %s.w : %s.w) * 255.0));\n", in_color_0_var,
in_color_1_var);
out.Write(" else if ((components & %uu) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0);
out.Write(" lacc.w = int(round(%s.w * 255.0));\n", in_color_0_var);
out.Write(" else\n"
" lacc.w = 255;\n"
" } else {\n"
" lacc.w = " I_MATERIALS " [chan].w;\n"
" }\n"
"\n");
out.Write(" uint light_mask = %s | (%s << 4u);\n",
BitfieldExtract("alphareg", LitChannel().lightMask0_3).c_str(),
BitfieldExtract("alphareg", LitChannel().lightMask4_7).c_str());
out.Write(" uint attnfunc = %s;\n",
BitfieldExtract("alphareg", LitChannel().attnfunc).c_str());
out.Write(" uint diffusefunc = %s;\n",
BitfieldExtract("alphareg", LitChannel().diffusefunc).c_str());
out.Write(" for (uint light_index = 0u; light_index < 8u; light_index++) {\n\n"
" if ((light_mask & (1u << light_index)) != 0u)\n\n"
" lacc.w += CalculateLighting(light_index, attnfunc, diffusefunc, %s, %s).w;\n",
world_pos_var, normal_var);
out.Write(" }\n"
" }\n"
"\n");
out.Write(" lacc = clamp(lacc, 0, 255);\n"
"\n"
" // Hopefully GPUs that can support dynamic indexing will optimize this.\n"
" float4 lit_color = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n"
" switch (chan) {\n"
" case 0u: %s = lit_color; break;\n",
out_color_0_var);
out.Write(" case 1u: %s = lit_color; break;\n", out_color_1_var);
out.Write(" }\n"
"}\n"
"\n");
out.Write("if (xfmem_numColorChans < 2u && (components & %uu) == 0u)\n", VB_HAS_COL1);
out.Write(" %s = %s;\n\n", out_color_1_var, out_color_0_var);
}
}

View File

@ -13,6 +13,13 @@ namespace UberShader
void WriteUberShaderCommonHeader(ShaderCode& out, APIType api_type,
const ShaderHostConfig& host_config);
// Vertex lighting
void WriteLightingFunction(ShaderCode& out);
void WriteVertexLighting(ShaderCode& out, APIType api_type, const char* world_pos_var,
const char* normal_var, const char* in_color_0_var,
const char* in_color_1_var, const char* out_color_0_var,
const char* out_color_1_var);
// bitfieldExtract generator for BitField types
template <typename T>
std::string BitfieldExtract(const std::string& source, T type)

View File

@ -5,6 +5,7 @@
#include "VideoCommon/UberShaderPixel.h"
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/DriverDetails.h"
#include "VideoCommon/NativeVertexFormat.h"
#include "VideoCommon/UberShaderCommon.h"
#include "VideoCommon/XFMemory.h"
@ -30,8 +31,6 @@ PixelShaderUid GetPixelShaderUid()
ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
const pixel_ubershader_uid_data* uid_data)
{
// TODO: Support per-pixel lighting.
// This can be based on the vertex ubershaders, at the cost of a more expensive pixel shader.
const bool per_pixel_lighting = host_config.per_pixel_lighting;
const bool msaa = host_config.msaa;
const bool ssaa = host_config.ssaa;
@ -46,12 +45,10 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
out.Write("// Pixel UberShader for %u texgens%s%s\n", numTexgen,
early_depth ? ", early-depth" : "", per_pixel_depth ? ", per-pixel depth" : "");
WritePixelShaderCommonHeader(out, ApiType, bounding_box);
WritePixelShaderCommonHeader(out, ApiType, numTexgen, per_pixel_lighting, bounding_box);
WriteUberShaderCommonHeader(out, ApiType, host_config);
out.Write("struct VS_OUTPUT {\n");
GenerateVSOutputMembers(out, ApiType, numTexgen, per_pixel_lighting, "");
out.Write("};\n");
if (per_pixel_lighting)
WriteLightingFunction(out);
// Shader inputs/outputs in GLSL (HLSL is in main).
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
@ -133,8 +130,6 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
"}\n\n");
}
// TODO: Per pixel lighting (not really needed)
// =====================
// Texture Sampling
// =====================
@ -346,23 +341,13 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
" uint cc;\n"
" uint ac;\n");
// For D3D, we need to store colors in the struct, since we access it from outside
// the main function, where they are declared. Hopefully the compiler can propagate
// these through when it inlines the function.
if (ApiType == APIType::D3D)
{
for (u32 i = 0; i < numTexgen; i++)
out.Write(" float3 tex%d;\n", i);
out.Write(" float4 colors_0;\n"
" float4 colors_1;\n");
}
out.Write("};\n"
"\n"
"int4 getRasColor(State s, StageState ss);\n"
"int4 getRasColor(State s, StageState ss, float4 colors_0, float4 colors_1);\n"
"int4 getKonstColor(State s, StageState ss);\n"
"\n"
"int3 selectColorInput(State s, StageState ss, uint index) {\n"
"int3 selectColorInput(State s, StageState ss, float4 colors_0, float4 colors_1, uint "
"index) {\n"
" switch (index) {\n"
" case 0u: // prev.rgb\n"
" return s.Reg[0].rgb;\n"
@ -385,9 +370,9 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
" case 9u:\n"
" return s.TexColor.aaa;\n"
" case 10u:\n"
" return getRasColor(s, ss).rgb;\n"
" return getRasColor(s, ss, colors_0, colors_1).rgb;\n"
" case 11u:\n"
" return getRasColor(s, ss).aaa;\n"
" return getRasColor(s, ss, colors_0, colors_1).aaa;\n"
" case 12u: // One\n"
" return int3(255, 255, 255);\n"
" case 13u: // Half\n"
@ -399,7 +384,8 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
" }\n"
"}\n"
"\n"
"int selectAlphaInput(State s, StageState ss, uint index) {\n"
"int selectAlphaInput(State s, StageState ss, float4 colors_0, float4 colors_1, uint "
"index) {\n"
" switch (index) {\n"
" case 0u: // prev.a\n"
" return s.Reg[0].a;\n"
@ -412,7 +398,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
" case 4u:\n"
" return s.TexColor.a;\n"
" case 5u:\n"
" return getRasColor(s, ss).a;\n"
" return getRasColor(s, ss, colors_0, colors_1).a;\n"
" case 6u:\n"
" return getKonstColor(s, ss).a;\n"
" case 7u: // Zero\n"
@ -538,6 +524,18 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
for (int i = 0; i < 4; i++)
out.Write(" s.Reg[%d] = " I_COLORS "[%d];\n", i, i);
const char* color_input_prefix = "";
if (per_pixel_lighting)
{
out.Write(" float4 lit_colors_0 = colors_0;\n");
out.Write(" float4 lit_colors_1 = colors_1;\n");
out.Write(" float3 lit_normal = normalize(Normal.xyz);\n");
out.Write(" float3 lit_pos = WorldPos.xyz;\n");
WriteVertexLighting(out, ApiType, "lit_pos", "lit_normal", "colors_0", "colors_1",
"lit_colors_0", "lit_colors_1");
color_input_prefix = "lit_";
}
out.Write(" uint num_stages = %s;\n\n",
BitfieldExtract("bpmem_genmode", bpmem.genMode.numtevstages).c_str());
@ -559,12 +557,6 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
" ss.order = ss.order >> %d;\n\n",
int(TwoTevStageOrders().enable1.StartBit() - TwoTevStageOrders().enable0.StartBit()));
if (ApiType == APIType::D3D)
{
out.Write(" ss.colors_0 = colors_0;\n"
" ss.colors_1 = colors_1;\n");
}
// Disable texturing when there are no texgens (for now)
if (numTexgen != 0)
{
@ -715,16 +707,21 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
out.Write(" uint color_dest = %s;\n",
BitfieldExtract("ss.cc", TevStageCombiner().colorC.dest).c_str());
out.Write(" uint color_compare_op = color_shift << 1 | uint(color_op);\n"
"\n"
" int3 color_A = selectColorInput(s, ss, %scolors_0, %scolors_1, color_a) & "
"int3(255, 255, 255);\n"
" int3 color_B = selectColorInput(s, ss, %scolors_0, %scolors_1, color_b) & "
"int3(255, 255, 255);\n"
" int3 color_C = selectColorInput(s, ss, %scolors_0, %scolors_1, color_c) & "
"int3(255, 255, 255);\n"
" int3 color_D = selectColorInput(s, ss, %scolors_0, %scolors_1, color_d); // 10 "
"bits + sign\n"
"\n", // TODO: do we need to sign extend?
color_input_prefix,
color_input_prefix, color_input_prefix, color_input_prefix, color_input_prefix,
color_input_prefix, color_input_prefix, color_input_prefix);
out.Write(
" uint color_compare_op = color_shift << 1 | uint(color_op);\n"
"\n"
" int3 color_A = selectColorInput(s, ss, color_a) & int3(255, 255, 255);\n"
" int3 color_B = selectColorInput(s, ss, color_b) & int3(255, 255, 255);\n"
" int3 color_C = selectColorInput(s, ss, color_c) & int3(255, 255, 255);\n"
" int3 color_D = selectColorInput(s, ss, color_d); // 10 bits + sign\n" // TODO: do we
// need to sign
// extend?
"\n"
" int3 color;\n"
" if(color_bias != 3u) { // Normal mode\n"
" color = tevLerp3(color_A, color_B, color_C, color_D, color_bias, color_op, false, "
@ -788,41 +785,44 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
" int alpha_B;\n"
" if (alpha_bias != 3u || alpha_compare_op > 5u) {\n"
" // Small optimisation here: alpha_A and alpha_B are unused by compare ops 0-5\n"
" alpha_A = selectAlphaInput(s, ss, alpha_a) & 255;\n"
" alpha_B = selectAlphaInput(s, ss, alpha_b) & 255;\n"
" alpha_A = selectAlphaInput(s, ss, %scolors_0, %scolors_1, alpha_a) & 255;\n"
" alpha_B = selectAlphaInput(s, ss, %scolors_0, %scolors_1, alpha_b) & 255;\n"
" };\n"
" int alpha_C = selectAlphaInput(s, ss, alpha_c) & 255;\n"
" int alpha_D = selectAlphaInput(s, ss, alpha_d); // 10 bits + sign\n" // TODO: do we
// need to sign
// extend?
"\n"
" int alpha;\n"
" if(alpha_bias != 3u) { // Normal mode\n"
" alpha = tevLerp(alpha_A, alpha_B, alpha_C, alpha_D, alpha_bias, alpha_op, "
"true, alpha_shift);\n"
" } else { // Compare mode\n"
" if (alpha_compare_op == 6u) {\n"
" // TEVCMP_A8_GT\n"
" alpha = (alpha_A > alpha_B) ? alpha_C : 0;\n"
" } else if (alpha_compare_op == 7u) {\n"
" // TEVCMP_A8_EQ\n"
" alpha = (alpha_A == alpha_B) ? alpha_C : 0;\n"
" } else {\n"
" // All remaining alpha compare ops actually compare the color channels\n"
" alpha = tevCompare(alpha_compare_op, color_A, color_B) ? alpha_C : 0;\n"
" }\n"
" alpha = alpha_D + alpha;\n"
" }\n"
"\n"
" // Clamp result\n"
" if (alpha_clamp)\n"
" alpha = clamp(alpha, 0, 255);\n"
" else\n"
" alpha = clamp(alpha, -1024, 1023);\n"
"\n"
" // Write result to the correct input register of the next stage\n"
" setRegAlpha(s, alpha_dest, alpha);\n"
" }\n");
" int alpha_C = selectAlphaInput(s, ss, %scolors_0, %scolors_1, alpha_c) & 255;\n"
" int alpha_D = selectAlphaInput(s, ss, %scolors_0, %scolors_1, alpha_d); // 10 bits + "
"sign\n"
"\n", // TODO: do we need to sign extend?
color_input_prefix,
color_input_prefix, color_input_prefix, color_input_prefix, color_input_prefix,
color_input_prefix, color_input_prefix, color_input_prefix);
out.Write("\n"
" int alpha;\n"
" if(alpha_bias != 3u) { // Normal mode\n"
" alpha = tevLerp(alpha_A, alpha_B, alpha_C, alpha_D, alpha_bias, alpha_op, "
"true, alpha_shift);\n"
" } else { // Compare mode\n"
" if (alpha_compare_op == 6u) {\n"
" // TEVCMP_A8_GT\n"
" alpha = (alpha_A > alpha_B) ? alpha_C : 0;\n"
" } else if (alpha_compare_op == 7u) {\n"
" // TEVCMP_A8_EQ\n"
" alpha = (alpha_A == alpha_B) ? alpha_C : 0;\n"
" } else {\n"
" // All remaining alpha compare ops actually compare the color channels\n"
" alpha = tevCompare(alpha_compare_op, color_A, color_B) ? alpha_C : 0;\n"
" }\n"
" alpha = alpha_D + alpha;\n"
" }\n"
"\n"
" // Clamp result\n"
" if (alpha_clamp)\n"
" alpha = clamp(alpha, 0, 255);\n"
" else\n"
" alpha = clamp(alpha, -1024, 1023);\n"
"\n"
" // Write result to the correct input register of the next stage\n"
" setRegAlpha(s, alpha_dest, alpha);\n"
" }\n");
out.Write(" } // Main tev loop\n"
"\n");
@ -1036,14 +1036,13 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
out.Write("}\n"
"\n"
"int4 getRasColor(State s, StageState ss) {\n"
"int4 getRasColor(State s, StageState ss, float4 colors_0, float4 colors_1) {\n"
" // Select Ras for stage\n"
" uint ras = %s;\n",
BitfieldExtract("ss.order", TwoTevStageOrders().colorchan0).c_str());
out.Write(" if (ras < 2u) { // Lighting Channel 0 or 1\n"
" int4 color = iround(((ras == 0u) ? %scolors_0 : %scolors_1) * 255.0);\n",
(ApiType == APIType::D3D) ? "ss." : "", (ApiType == APIType::D3D) ? "ss." : "");
out.Write(" uint swap = %s;\n",
" int4 color = iround(((ras == 0u) ? colors_0 : colors_1) * 255.0);\n"
" uint swap = %s;\n",
BitfieldExtract("ss.ac", TevStageCombiner().alphaC.rswap).c_str());
out.Write(" return Swizzle(swap, color);\n");
out.Write(" } else if (ras == 5u) { // Alpha Bumb\n"

View File

@ -21,7 +21,6 @@ VertexShaderUid GetVertexShaderUid()
return out;
}
static void GenVertexShaderLighting(APIType ApiType, ShaderCode& out);
static void GenVertexShaderTexGens(APIType ApiType, u32 numTexgen, ShaderCode& out);
ShaderCode GenVertexShader(APIType ApiType, const ShaderHostConfig& host_config,
@ -35,8 +34,6 @@ ShaderCode GenVertexShader(APIType ApiType, const ShaderHostConfig& host_config,
ShaderCode out;
out.Write("// Vertex UberShader\n\n");
WriteUberShaderCommonHeader(out, ApiType, host_config);
out.Write("%s", s_lighting_struct);
// uniforms
@ -47,65 +44,13 @@ ShaderCode GenVertexShader(APIType ApiType, const ShaderHostConfig& host_config,
out.Write(s_shader_uniforms);
out.Write("};\n");
out.Write("int4 CalculateLighting(uint index, uint attnfunc, uint diffusefunc, float4 pos, "
"float3 _norm0) {\n"
" float3 ldir, h, cosAttn, distAttn;\n"
" float dist, dist2, attn;\n"
"\n"
" switch (attnfunc) {\n");
out.Write(" case %uu: // LIGNTATTN_NONE\n", LIGHTATTN_NONE);
out.Write(" case %uu: // LIGHTATTN_DIR\n", LIGHTATTN_DIR);
out.Write(" ldir = normalize(" I_LIGHTS "[index].pos.xyz - pos.xyz);\n"
" attn = 1.0;\n"
" if (length(ldir) == 0.0)\n"
" ldir = _norm0;\n"
" break;\n\n");
out.Write(" case %uu: // LIGHTATTN_SPEC\n", LIGHTATTN_SPEC);
out.Write(" ldir = normalize(" I_LIGHTS "[index].pos.xyz - pos.xyz);\n"
" attn = (dot(_norm0, ldir) >= 0.0) ? max(0.0, dot(_norm0, " I_LIGHTS
"[index].dir.xyz)) : 0.0;\n"
" cosAttn = " I_LIGHTS "[index].cosatt.xyz;\n");
out.Write(" if (diffusefunc == %uu) // LIGHTDIF_NONE\n", LIGHTDIF_NONE);
out.Write(" distAttn = " I_LIGHTS "[index].distatt.xyz;\n"
" else\n"
" distAttn = normalize(" I_LIGHTS "[index].distatt.xyz);\n"
" attn = max(0.0, dot(cosAttn, float3(1.0, attn, attn*attn))) / dot(distAttn, "
"float3(1.0, attn, attn*attn));\n"
" break;\n\n");
out.Write(" case %uu: // LIGHTATTN_SPOT\n", LIGHTATTN_SPOT);
out.Write(" ldir = " I_LIGHTS "[index].pos.xyz - pos.xyz;\n"
" dist2 = dot(ldir, ldir);\n"
" dist = sqrt(dist2);\n"
" ldir = ldir / dist;\n"
" attn = max(0.0, dot(ldir, " I_LIGHTS "[index].dir.xyz));\n"
" attn = max(0.0, " I_LIGHTS "[index].cosatt.x + " I_LIGHTS
"[index].cosatt.y * attn + " I_LIGHTS "[index].cosatt.z * attn * attn) / dot(" I_LIGHTS
"[index].distatt.xyz, float3(1.0, dist, dist2));\n"
" break;\n\n");
out.Write(" default:\n"
" attn = 1.0;\n"
" ldir = _norm0;\n"
" break;\n"
" }\n"
"\n"
" switch (diffusefunc) {\n");
out.Write(" case %uu: // LIGHTDIF_NONE\n", LIGHTDIF_NONE);
out.Write(" return int4(round(attn * float4(" I_LIGHTS "[index].color)));\n\n");
out.Write(" case %uu: // LIGHTDIF_SIGN\n", LIGHTDIF_SIGN);
out.Write(" return int4(round(attn * dot(ldir, _norm0) * float4(" I_LIGHTS
"[index].color)));\n\n");
out.Write(" case %uu: // LIGHTDIF_CLAMP\n", LIGHTDIF_CLAMP);
out.Write(" return int4(round(attn * max(0.0, dot(ldir, _norm0)) * float4(" I_LIGHTS
"[index].color)));\n\n");
out.Write(" default:\n"
" return int4(0, 0, 0, 0);\n"
" }\n"
"}\n\n");
out.Write("struct VS_OUTPUT {\n");
GenerateVSOutputMembers(out, ApiType, numTexgen, false, "");
GenerateVSOutputMembers(out, ApiType, numTexgen, per_pixel_lighting, "");
out.Write("};\n\n");
WriteUberShaderCommonHeader(out, ApiType, host_config);
WriteLightingFunction(out);
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
{
out.Write("ATTRIBUTE_LOCATION(%d) in float4 rawpos;\n", SHADER_POSITION_ATTRIB);
@ -219,7 +164,8 @@ ShaderCode GenVertexShader(APIType ApiType, const ShaderHostConfig& host_config,
"\n");
// Hardware Lighting
GenVertexShaderLighting(ApiType, out);
WriteVertexLighting(out, ApiType, "pos.xyz", "_norm0", "rawcolor0", "rawcolor1", "o.colors_0",
"o.colors_1");
// Texture Coordinates
if (numTexgen > 0)
@ -228,6 +174,16 @@ ShaderCode GenVertexShader(APIType ApiType, const ShaderHostConfig& host_config,
// clipPos/w needs to be done in pixel shader, not here
out.Write("o.clipPos = o.pos;\n");
if (per_pixel_lighting)
{
out.Write("o.Normal = _norm0;\n");
out.Write("o.WorldPos = pos.xyz;\n");
out.Write("if ((components & %uu) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
out.Write(" o.colors_0 = rawcolor0;\n");
out.Write("if ((components & %uu) != 0u) // VB_HAS_COL1\n", VB_HAS_COL1);
out.Write(" o.colors_1 = rawcolor1;\n");
}
// If we can disable the incorrect depth clipping planes using depth clamping, then we can do
// our own depth clipping and calculate the depth range before the perspective divide if
// necessary.
@ -300,7 +256,7 @@ ShaderCode GenVertexShader(APIType ApiType, const ShaderHostConfig& host_config,
{
if (host_config.backend_geometry_shaders || ApiType == APIType::Vulkan)
{
AssignVSOutputMembers(out, "vs", "o", numTexgen, false);
AssignVSOutputMembers(out, "vs", "o", numTexgen, per_pixel_lighting);
}
else
{
@ -309,6 +265,11 @@ ShaderCode GenVertexShader(APIType ApiType, const ShaderHostConfig& host_config,
for (u32 i = 0; i < numTexgen; ++i)
out.Write("tex%d.xyz = o.tex%d;\n", i, i);
out.Write("clipPos = o.clipPos;\n");
if (per_pixel_lighting)
{
out.Write("Normal = o.Normal;\n");
out.Write("WorldPos = o.WorldPos;\n");
}
out.Write("colors_0 = o.colors_0;\n");
out.Write("colors_1 = o.colors_1;\n");
}
@ -334,123 +295,6 @@ ShaderCode GenVertexShader(APIType ApiType, const ShaderHostConfig& host_config,
return out;
}
void GenVertexShaderLighting(APIType ApiType, ShaderCode& out)
{
out.Write("if ((components & %uu) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
out.Write(" o.colors_0 = rawcolor0;\n"
"else\n"
" o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n"
"\n");
out.Write("if ((components & %uu) != 0u) // VB_HAS_COL1\n", VB_HAS_COL1);
out.Write(" o.colors_1 = rawcolor1;\n"
"else\n"
" o.colors_1 = float4(1.0, 1.0, 1.0, 1.0);\n"
"\n");
out.Write("// Lighting\n");
out.Write("%sfor (uint chan = 0u; chan < xfmem_numColorChans; chan++) {\n",
ApiType == APIType::D3D ? "[loop] " : "");
out.Write(" uint colorreg = xfmem_color(chan);\n"
" uint alphareg = xfmem_alpha(chan);\n"
" int4 mat = " I_MATERIALS "[chan + 2u]; \n"
" int4 lacc = int4(255, 255, 255, 255);\n"
"\n");
out.Write(" if (%s != 0u) {\n", BitfieldExtract("colorreg", LitChannel().matsource).c_str());
out.Write(" if ((components & (%uu << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
out.Write(
" mat.xyz = int3(round(((chan == 0u) ? rawcolor0.xyz : rawcolor1.xyz) * 255.0));\n");
out.Write(" else if ((components & %uu) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0);
out.Write(" mat.xyz = int3(round(rawcolor0.xyz * 255.0));\n"
" else\n"
" mat.xyz = int3(255, 255, 255);\n"
" }\n"
"\n");
out.Write(" if (%s != 0u) {\n", BitfieldExtract("alphareg", LitChannel().matsource).c_str());
out.Write(" if ((components & (%uu << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
out.Write(" mat.w = int(round(((chan == 0u) ? rawcolor0.w : rawcolor1.w) * 255.0));\n");
out.Write(" else if ((components & %uu) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0);
out.Write(" mat.w = int(round(rawcolor0.w * 255.0));\n"
" else\n"
" mat.w = 255;\n"
" } else {\n"
" mat.w = " I_MATERIALS " [chan + 2u].w;\n"
" }\n"
"\n");
out.Write(" if (%s != 0u) {\n",
BitfieldExtract("colorreg", LitChannel().enablelighting).c_str());
out.Write(" if (%s != 0u) {\n", BitfieldExtract("colorreg", LitChannel().ambsource).c_str());
out.Write(" if ((components & (%uu << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
out.Write(
" lacc.xyz = int3(round(((chan == 0u) ? rawcolor0.xyz : rawcolor1.xyz) * 255.0));\n");
out.Write(" else if ((components & %uu) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0);
out.Write(" lacc.xyz = int3(round(rawcolor0.xyz * 255.0));\n"
" else\n"
" lacc.xyz = int3(255, 255, 255);\n"
" } else {\n"
" lacc.xyz = " I_MATERIALS " [chan].xyz;\n"
" }\n"
"\n");
out.Write(" uint light_mask = %s | (%s << 4u);\n",
BitfieldExtract("colorreg", LitChannel().lightMask0_3).c_str(),
BitfieldExtract("colorreg", LitChannel().lightMask4_7).c_str());
out.Write(" uint attnfunc = %s;\n",
BitfieldExtract("colorreg", LitChannel().attnfunc).c_str());
out.Write(" uint diffusefunc = %s;\n",
BitfieldExtract("colorreg", LitChannel().diffusefunc).c_str());
out.Write(" for (uint light_index = 0u; light_index < 8u; light_index++) {\n"
" if ((light_mask & (1u << light_index)) != 0u)\n"
" lacc.xyz += CalculateLighting(light_index, attnfunc, diffusefunc, pos, "
"_norm0).xyz;\n"
" }\n"
" }\n"
"\n");
out.Write(" if (%s != 0u) {\n",
BitfieldExtract("alphareg", LitChannel().enablelighting).c_str());
out.Write(" if (%s != 0u) {\n", BitfieldExtract("alphareg", LitChannel().ambsource).c_str());
out.Write(" if ((components & (%uu << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
out.Write(" lacc.w = int(round(((chan == 0u) ? rawcolor0.w : rawcolor1.w) * 255.0));\n");
out.Write(" else if ((components & %uu) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0);
out.Write(" lacc.w = int(round(rawcolor0.w * 255.0));\n"
" else\n"
" lacc.w = 255;\n"
" } else {\n"
" lacc.w = " I_MATERIALS " [chan].w;\n"
" }\n"
"\n");
out.Write(" uint light_mask = %s | (%s << 4u);\n",
BitfieldExtract("alphareg", LitChannel().lightMask0_3).c_str(),
BitfieldExtract("alphareg", LitChannel().lightMask4_7).c_str());
out.Write(" uint attnfunc = %s;\n",
BitfieldExtract("alphareg", LitChannel().attnfunc).c_str());
out.Write(" uint diffusefunc = %s;\n",
BitfieldExtract("alphareg", LitChannel().diffusefunc).c_str());
out.Write(
" for (uint light_index = 0u; light_index < 8u; light_index++) {\n\n"
" if ((light_mask & (1u << light_index)) != 0u)\n\n"
" lacc.w += CalculateLighting(light_index, attnfunc, diffusefunc, pos, _norm0).w;\n"
" }\n"
" }\n"
"\n");
out.Write(" lacc = clamp(lacc, 0, 255);\n"
"\n"
" // Hopefully GPUs that can support dynamic indexing will optimize this.\n"
" float4 lit_color = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n"
" switch (chan) {\n"
" case 0u: o.colors_0 = lit_color; break;\n"
" case 1u: o.colors_1 = lit_color; break;\n"
" }\n"
"}\n"
"\n");
out.Write("if (xfmem_numColorChans < 2u && (components & %uu) == 0u)\n", VB_HAS_COL1);
out.Write(" o.colors_1 = o.colors_0;\n\n");
}
void GenVertexShaderTexGens(APIType ApiType, u32 numTexgen, ShaderCode& out)
{
// The HLSL compiler complains that the output texture coordinates are uninitialized when trying

View File

@ -219,21 +219,14 @@ u32 VideoConfig::GetShaderPrecompilerThreads() const
return GetNumAutoShaderCompilerThreads();
}
bool VideoConfig::CanUseUberShaders() const
{
// Ubershaders are currently incompatible with per-pixel lighting.
return !bEnablePixelLighting;
}
bool VideoConfig::CanPrecompileUberShaders() const
{
// We don't want to precompile ubershaders if they're never going to be used.
return bPrecompileUberShaders && (bBackgroundShaderCompiling || bDisableSpecializedShaders) &&
CanUseUberShaders();
return bPrecompileUberShaders && (bBackgroundShaderCompiling || bDisableSpecializedShaders);
}
bool VideoConfig::CanBackgroundCompileShaders() const
{
// We require precompiled ubershaders to background compile shaders.
return bBackgroundShaderCompiling && bPrecompileUberShaders && CanUseUberShaders();
return bBackgroundShaderCompiling && bPrecompileUberShaders;
}

View File

@ -258,7 +258,6 @@ struct VideoConfig final
bool UseVertexRounding() const { return bVertexRounding && iEFBScale != SCALE_1X; }
u32 GetShaderCompilerThreads() const;
u32 GetShaderPrecompilerThreads() const;
bool CanUseUberShaders() const;
bool CanPrecompileUberShaders() const;
bool CanBackgroundCompileShaders() const;
};