From 4c629c2beef8b3bae6ee13bbeae484315adbf67c Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Sat, 18 Jun 2022 01:09:35 -0500 Subject: [PATCH] VideoCommon: Add dynamic vertex loader to ubershaders --- Source/Core/Core/State.cpp | 2 +- Source/Core/VideoBackends/D3D/D3DMain.cpp | 1 + .../Core/VideoBackends/D3D12/VideoBackend.cpp | 1 + Source/Core/VideoBackends/Metal/MTLUtil.mm | 1 + .../Core/VideoBackends/Null/NullBackend.cpp | 1 + Source/Core/VideoBackends/OGL/OGLMain.cpp | 2 + Source/Core/VideoBackends/Software/SWmain.cpp | 1 + .../VideoBackends/Vulkan/VulkanContext.cpp | 1 + Source/Core/VideoCommon/ConstantManager.h | 7 + Source/Core/VideoCommon/ShaderCache.cpp | 2 + Source/Core/VideoCommon/ShaderGenCommon.cpp | 1 + Source/Core/VideoCommon/ShaderGenCommon.h | 10 + Source/Core/VideoCommon/UberShaderVertex.cpp | 241 +++++++++++++----- .../Core/VideoCommon/VertexLoaderManager.cpp | 3 +- .../Core/VideoCommon/VertexShaderManager.cpp | 41 ++- Source/Core/VideoCommon/VertexShaderManager.h | 3 +- Source/Core/VideoCommon/VideoConfig.h | 1 + 17 files changed, 253 insertions(+), 66 deletions(-) diff --git a/Source/Core/Core/State.cpp b/Source/Core/Core/State.cpp index a6467f2b2c..f97e166f19 100644 --- a/Source/Core/Core/State.cpp +++ b/Source/Core/Core/State.cpp @@ -74,7 +74,7 @@ static std::recursive_mutex g_save_thread_mutex; static std::thread g_save_thread; // Don't forget to increase this after doing changes on the savestate system -constexpr u32 STATE_VERSION = 148; // Last changed in PR 10768 +constexpr u32 STATE_VERSION = 149; // Last changed in PR 10781 // Maps savestate versions to Dolphin versions. // Versions after 42 don't need to be added to this list, diff --git a/Source/Core/VideoBackends/D3D/D3DMain.cpp b/Source/Core/VideoBackends/D3D/D3DMain.cpp index b8d7fb5871..755f0cf590 100644 --- a/Source/Core/VideoBackends/D3D/D3DMain.cpp +++ b/Source/Core/VideoBackends/D3D/D3DMain.cpp @@ -112,6 +112,7 @@ void VideoBackend::FillBackendInfo() g_Config.backend_info.bSupportsLogicOp = D3D::SupportsLogicOp(g_Config.iAdapter); g_Config.backend_info.bSupportsSettingObjectNames = true; g_Config.backend_info.bSupportsPartialMultisampleResolve = true; + g_Config.backend_info.bSupportsDynamicVertexLoader = false; g_Config.backend_info.Adapters = D3DCommon::GetAdapterNames(); g_Config.backend_info.AAModes = D3D::GetAAModes(g_Config.iAdapter); diff --git a/Source/Core/VideoBackends/D3D12/VideoBackend.cpp b/Source/Core/VideoBackends/D3D12/VideoBackend.cpp index b0611a8690..99cf3955c9 100644 --- a/Source/Core/VideoBackends/D3D12/VideoBackend.cpp +++ b/Source/Core/VideoBackends/D3D12/VideoBackend.cpp @@ -87,6 +87,7 @@ void VideoBackend::FillBackendInfo() g_Config.backend_info.bSupportsLodBiasInSampler = true; g_Config.backend_info.bSupportsSettingObjectNames = true; g_Config.backend_info.bSupportsPartialMultisampleResolve = true; + g_Config.backend_info.bSupportsDynamicVertexLoader = false; // We can only check texture support once we have a device. if (g_dx_context) diff --git a/Source/Core/VideoBackends/Metal/MTLUtil.mm b/Source/Core/VideoBackends/Metal/MTLUtil.mm index 14c83b5c17..ca99ac0997 100644 --- a/Source/Core/VideoBackends/Metal/MTLUtil.mm +++ b/Source/Core/VideoBackends/Metal/MTLUtil.mm @@ -74,6 +74,7 @@ void Metal::Util::PopulateBackendInfo(VideoConfig* config) config->backend_info.bSupportsSettingObjectNames = true; // Metal requires multisample resolve to be done on a render pass config->backend_info.bSupportsPartialMultisampleResolve = false; + config->backend_info.bSupportsDynamicVertexLoader = false; } void Metal::Util::PopulateBackendInfoAdapters(VideoConfig* config, diff --git a/Source/Core/VideoBackends/Null/NullBackend.cpp b/Source/Core/VideoBackends/Null/NullBackend.cpp index b68c9cfd94..7cc8919ad0 100644 --- a/Source/Core/VideoBackends/Null/NullBackend.cpp +++ b/Source/Core/VideoBackends/Null/NullBackend.cpp @@ -60,6 +60,7 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsLodBiasInSampler = false; g_Config.backend_info.bSupportsSettingObjectNames = false; g_Config.backend_info.bSupportsPartialMultisampleResolve = true; + g_Config.backend_info.bSupportsDynamicVertexLoader = false; // aamodes: We only support 1 sample, so no MSAA g_Config.backend_info.Adapters.clear(); diff --git a/Source/Core/VideoBackends/OGL/OGLMain.cpp b/Source/Core/VideoBackends/OGL/OGLMain.cpp index 24a65f746d..f6a84240e0 100644 --- a/Source/Core/VideoBackends/OGL/OGLMain.cpp +++ b/Source/Core/VideoBackends/OGL/OGLMain.cpp @@ -94,6 +94,8 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsPipelineCacheData = false; g_Config.backend_info.bSupportsLodBiasInSampler = true; g_Config.backend_info.bSupportsPartialMultisampleResolve = true; + // Unneccessary since OGL doesn't use pipelines + g_Config.backend_info.bSupportsDynamicVertexLoader = false; // TODO: There is a bug here, if texel buffers or SSBOs/atomics are not supported the graphics // options will show the option when it is not supported. The only way around this would be diff --git a/Source/Core/VideoBackends/Software/SWmain.cpp b/Source/Core/VideoBackends/Software/SWmain.cpp index 80dc4603f2..5076106089 100644 --- a/Source/Core/VideoBackends/Software/SWmain.cpp +++ b/Source/Core/VideoBackends/Software/SWmain.cpp @@ -89,6 +89,7 @@ void VideoSoftware::InitBackendInfo() g_Config.backend_info.bSupportsLodBiasInSampler = false; g_Config.backend_info.bSupportsSettingObjectNames = false; g_Config.backend_info.bSupportsPartialMultisampleResolve = true; + g_Config.backend_info.bSupportsDynamicVertexLoader = false; // aamodes g_Config.backend_info.AAModes = {1}; diff --git a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp index cad326c83a..ca2d793910 100644 --- a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp +++ b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp @@ -295,6 +295,7 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config) config->backend_info.bSupportsLodBiasInSampler = false; // Dependent on OS. config->backend_info.bSupportsSettingObjectNames = false; // Dependent on features. config->backend_info.bSupportsPartialMultisampleResolve = true; // Assumed support. + config->backend_info.bSupportsDynamicVertexLoader = false; // Not yet supported } void VulkanContext::PopulateBackendInfoAdapters(VideoConfig* config, const GPUList& gpu_list) diff --git a/Source/Core/VideoCommon/ConstantManager.h b/Source/Core/VideoCommon/ConstantManager.h index 9fd4c060cd..b9ca0264ef 100644 --- a/Source/Core/VideoCommon/ConstantManager.h +++ b/Source/Core/VideoCommon/ConstantManager.h @@ -93,6 +93,13 @@ struct VertexShaderConstants float4 cached_tangent; float4 cached_binormal; + // For UberShader vertex loader + u32 vertex_stride; + u32 vertex_offset_normals[3]; + u32 vertex_offset_position; + u32 vertex_offset_posmtx; + u32 vertex_offset_colors[2]; + u32 vertex_offset_texcoords[8]; }; struct GeometryShaderConstants diff --git a/Source/Core/VideoCommon/ShaderCache.cpp b/Source/Core/VideoCommon/ShaderCache.cpp index 6049fd29a7..cea1ac89d1 100644 --- a/Source/Core/VideoCommon/ShaderCache.cpp +++ b/Source/Core/VideoCommon/ShaderCache.cpp @@ -743,6 +743,8 @@ static GXUberPipelineUid ApplyDriverBugs(const GXUberPipelineUid& in) { GXUberPipelineUid out; memcpy(&out, &in, sizeof(out)); // Copy padding + if (g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader) + out.vertex_format = nullptr; if (g_ActiveConfig.backend_info.bSupportsFramebufferFetch) { // Always blend in shader diff --git a/Source/Core/VideoCommon/ShaderGenCommon.cpp b/Source/Core/VideoCommon/ShaderGenCommon.cpp index b9965421cc..7b407f9aaf 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.cpp +++ b/Source/Core/VideoCommon/ShaderGenCommon.cpp @@ -43,6 +43,7 @@ ShaderHostConfig ShaderHostConfig::GetCurrent() bits.manual_texture_sampling_custom_texture_sizes = g_ActiveConfig.ManualTextureSamplingWithHiResTextures(); bits.backend_sampler_lod_bias = g_ActiveConfig.backend_info.bSupportsLodBiasInSampler; + bits.backend_dynamic_vertex_loader = g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader; return bits; } diff --git a/Source/Core/VideoCommon/ShaderGenCommon.h b/Source/Core/VideoCommon/ShaderGenCommon.h index 1cbff2bfcf..73fa68af03 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/ShaderGenCommon.h @@ -177,6 +177,7 @@ union ShaderHostConfig BitField<24, 1, bool, u32> manual_texture_sampling; BitField<25, 1, bool, u32> manual_texture_sampling_custom_texture_sizes; BitField<26, 1, bool, u32> backend_sampler_lod_bias; + BitField<27, 1, bool, u32> backend_dynamic_vertex_loader; static ShaderHostConfig GetCurrent(); }; @@ -302,6 +303,15 @@ static const char s_shader_uniforms[] = "\tuint components;\n" "\tuint4 xfmem_pack1[8];\n" "\tfloat4 " I_CACHED_TANGENT ";\n" "\tfloat4 " I_CACHED_BINORMAL ";\n" + "\tuint vertex_stride;\n" + "\tuint vertex_offset_rawnormal;\n" + "\tuint vertex_offset_rawtangent;\n" + "\tuint vertex_offset_rawbinormal;\n" + "\tuint vertex_offset_rawpos;\n" + "\tuint vertex_offset_posmtx;\n" + "\tuint vertex_offset_rawcolor0;\n" + "\tuint vertex_offset_rawcolor1;\n" + "\tuint4 vertex_offset_rawtex[2];\n" // std140 is pain "\t#define xfmem_texMtxInfo(i) (xfmem_pack1[(i)].x)\n" "\t#define xfmem_postMtxInfo(i) (xfmem_pack1[(i)].y)\n" "\t#define xfmem_color(i) (xfmem_pack1[(i)].z)\n" diff --git a/Source/Core/VideoCommon/UberShaderVertex.cpp b/Source/Core/VideoCommon/UberShaderVertex.cpp index 67dd84bb7f..b49a5d8954 100644 --- a/Source/Core/VideoCommon/UberShaderVertex.cpp +++ b/Source/Core/VideoCommon/UberShaderVertex.cpp @@ -22,7 +22,11 @@ VertexShaderUid GetVertexShaderUid() return out; } -static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode& out); +static void GenVertexShaderTexGens(APIType api_type, const ShaderHostConfig& host_config, + u32 num_texgen, ShaderCode& out); +static void LoadVertexAttribute(ShaderCode& code, const ShaderHostConfig& host_config, u32 indent, + std::string_view name, std::string_view shader_type, + std::string_view stored_type, std::string_view offset_name = {}); ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config, const vertex_ubershader_uid_data* uid_data) @@ -50,15 +54,76 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config WriteBitfieldExtractHeader(out, api_type, host_config); WriteLightingFunction(out); - out.Write("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB); - out.Write("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB); - out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnormal;\n", SHADER_NORMAL_ATTRIB); - out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtangent;\n", SHADER_TANGENT_ATTRIB); - out.Write("ATTRIBUTE_LOCATION({}) in float3 rawbinormal;\n", SHADER_BINORMAL_ATTRIB); - out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB); - out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB); - for (int i = 0; i < 8; ++i) - out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i, i); + if (host_config.backend_dynamic_vertex_loader) + { + out.Write(R"( +SSBO_BINDING(1) readonly restrict buffer Vertices {{ + uint vertex_buffer[]; +}}; + +uint GetVertexBaseOffset() {{ + return gl_VertexID * vertex_stride; +}} + +uint4 load_input_uint4_ubyte4(uint vtx_offset, uint attr_offset) {{ + uint value = vertex_buffer[vtx_offset + attr_offset]; + return uint4(value & 0xff, (value >> 8) & 0xff, (value >> 16) & 0xff, value >> 24); +}} + +float4 load_input_float4_ubyte4(uint vtx_offset, uint attr_offset) {{ + return float4(load_input_uint4_ubyte4(vtx_offset, attr_offset)) / 255.0f; +}} + +float3 load_input_float3_float3(uint vtx_offset, uint attr_offset) {{ + uint offset = vtx_offset + attr_offset; + return float3(uintBitsToFloat(vertex_buffer[offset + 0]), + uintBitsToFloat(vertex_buffer[offset + 1]), + uintBitsToFloat(vertex_buffer[offset + 2])); +}} + +float4 load_input_float4_rawpos(uint vtx_offset, uint attr_offset) {{ + uint components = attr_offset >> 16; + uint offset = vtx_offset + (attr_offset & 0xffff); + if (components < 3) + return float4(uintBitsToFloat(vertex_buffer[offset + 0]), + uintBitsToFloat(vertex_buffer[offset + 1]), + 0.0f, 1.0f); + else + return float4(uintBitsToFloat(vertex_buffer[offset + 0]), + uintBitsToFloat(vertex_buffer[offset + 1]), + uintBitsToFloat(vertex_buffer[offset + 2]), + 1.0f); +}} + +float3 load_input_float3_rawtex(uint vtx_offset, uint attr_offset) {{ + uint components = attr_offset >> 16; + uint offset = vtx_offset + (attr_offset & 0xffff); + if (components < 2) + return float3(uintBitsToFloat(vertex_buffer[offset + 0]), 0.0f, 0.0f); + else if (components < 3) + return float3(uintBitsToFloat(vertex_buffer[offset + 0]), + uintBitsToFloat(vertex_buffer[offset + 1]), + 0.0f); + else + return float3(uintBitsToFloat(vertex_buffer[offset + 0]), + uintBitsToFloat(vertex_buffer[offset + 1]), + uintBitsToFloat(vertex_buffer[offset + 2])); +}} + +)"); + } + else + { + out.Write("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnormal;\n", SHADER_NORMAL_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtangent;\n", SHADER_TANGENT_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawbinormal;\n", SHADER_BINORMAL_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB); + for (int i = 0; i < 8; ++i) + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i, i); + } if (host_config.backend_geometry_shaders) { @@ -99,7 +164,12 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config out.Write("VS_OUTPUT o;\n" "\n"); - + if (host_config.backend_dynamic_vertex_loader) + { + out.Write("uint vertex_base_offset = GetVertexBaseOffset();\n"); + } + // rawpos is always needed + LoadVertexAttribute(out, host_config, 0, "rawpos", "float4", "rawpos"); // Transforms out.Write("// Position matrix\n" "float4 P0;\n" @@ -113,6 +183,7 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config "\n" "if ((components & {}u) != 0u) {{ // VB_HAS_POSMTXIDX\n", VB_HAS_POSMTXIDX); + LoadVertexAttribute(out, host_config, 2, "posmtx", "uint4", "ubyte4"); out.Write(" // Vertex format has a per-vertex matrix\n" " int posidx = int(posmtx.r);\n" " P0 = " I_TRANSFORMMATRICES "[posidx];\n" @@ -144,27 +215,40 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config "// by lighting calculations and needs to be unit length), the same transform matrix\n" "// can do double duty, scaling for emboss mapping, and not scaling for lighting.\n" "float3 _normal = float3(0.0, 0.0, 0.0);\n" - "if ((components & {}u) != 0u) // VB_HAS_NORMAL\n", + "if ((components & {}u) != 0u) // VB_HAS_NORMAL\n" + "{{\n", VB_HAS_NORMAL); + LoadVertexAttribute(out, host_config, 2, "rawnormal", "float3", "float3"); out.Write(" _normal = normalize(float3(dot(N0, rawnormal), dot(N1, rawnormal), dot(N2, " "rawnormal)));\n" + "}}\n" "\n" "float3 _tangent = float3(0.0, 0.0, 0.0);\n" - "if ((components & {}u) != 0u) // VB_HAS_TANGENT\n", + "if ((components & {}u) != 0u) // VB_HAS_TANGENT\n" + "{{\n", VB_HAS_TANGENT); + LoadVertexAttribute(out, host_config, 2, "rawtangent", "float3", "float3"); out.Write(" _tangent = float3(dot(N0, rawtangent), dot(N1, rawtangent), dot(N2, rawtangent));\n" + "}}\n" "else\n" + "{{\n" " _tangent = float3(dot(N0, " I_CACHED_TANGENT ".xyz), dot(N1, " I_CACHED_TANGENT ".xyz), dot(N2, " I_CACHED_TANGENT ".xyz));\n" + "}}\n" "\n" "float3 _binormal = float3(0.0, 0.0, 0.0);\n" - "if ((components & {}u) != 0u) // VB_HAS_BINORMAL\n", + "if ((components & {}u) != 0u) // VB_HAS_BINORMAL\n" + "{{\n", VB_HAS_BINORMAL); + LoadVertexAttribute(out, host_config, 2, "rawbinormal", "float3", "float3"); out.Write(" _binormal = float3(dot(N0, rawbinormal), dot(N1, rawbinormal), dot(N2, " "rawbinormal));\n" + "}}\n" "else\n" + "{{\n" " _binormal = float3(dot(N0, " I_CACHED_BINORMAL ".xyz), dot(N1, " I_CACHED_BINORMAL ".xyz), dot(N2, " I_CACHED_BINORMAL ".xyz));\n" + "}}\n" "\n"); // Hardware Lighting @@ -178,34 +262,40 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config "bool use_color_1 = ((components & {0}u) == {0}u); // VB_HAS_COL0 | VB_HAS_COL1\n", VB_HAS_COL0 | VB_HAS_COL1); - out.Write("for (uint color = 0u; color < {}u; color++) {{\n", NUM_XF_COLOR_CHANNELS); - out.Write(" if ((color == 0u || use_color_1) && (components & ({}u << color)) != 0u) {{\n", - VB_HAS_COL0); - out.Write(" // Use color0 for channel 0, and color1 for channel 1 if both colors 0 and 1 are " - "present.\n" - " if (color == 0u)\n" - " vertex_color_0 = rawcolor0;\n" - " else\n" - " vertex_color_1 = rawcolor1;\n" - " }} else if (color == 0u && (components & {}u) != 0u) {{\n", - VB_HAS_COL1); - out.Write(" // Use color1 for channel 0 if color0 is not present.\n" - " vertex_color_0 = rawcolor1;\n" - " }} else {{\n" - " if (color == 0u)\n" - " vertex_color_0 = missing_color_value;\n" - " else\n" - " vertex_color_1 = missing_color_value;\n" - " }}\n" + out.Write("if ((components & {0}u) == {0}u) // VB_HAS_COL0 | VB_HAS_COL1\n" + "{{\n", + VB_HAS_COL0 | VB_HAS_COL1); + LoadVertexAttribute(out, host_config, 2, "rawcolor0", "float4", "ubyte4"); + LoadVertexAttribute(out, host_config, 2, "rawcolor1", "float4", "ubyte4"); + out.Write(" vertex_color_0 = rawcolor0;\n" + " vertex_color_1 = rawcolor1;\n" "}}\n" - "\n"); + "else if ((components & {}u) != 0u) // VB_HAS_COL0\n" + "{{\n", + VB_HAS_COL0); + LoadVertexAttribute(out, host_config, 2, "rawcolor0", "float4", "ubyte4"); + out.Write(" vertex_color_0 = rawcolor0;\n" + " vertex_color_1 = rawcolor0;\n" + "}}\n" + "else if ((components & {}u) != 0u) // VB_HAS_COL1\n" + "{{\n", + VB_HAS_COL1); + LoadVertexAttribute(out, host_config, 2, "rawcolor1", "float4", "ubyte4"); + out.Write(" vertex_color_0 = rawcolor1;\n" + " vertex_color_1 = rawcolor1;\n" + "}}\n" + "else\n" + "{{\n" + " vertex_color_0 = missing_color_value;\n" + " vertex_color_1 = missing_color_value;\n" + "}}\n"); WriteVertexLighting(out, api_type, "pos.xyz", "_normal", "vertex_color_0", "vertex_color_1", "o.colors_0", "o.colors_1"); // Texture Coordinates if (num_texgen > 0) - GenVertexShaderTexGens(api_type, num_texgen, out); + GenVertexShaderTexGens(api_type, host_config, num_texgen, out); if (per_pixel_lighting) { @@ -352,7 +442,8 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config return out; } -static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode& out) +static void GenVertexShaderTexGens(APIType api_type, const ShaderHostConfig& host_config, + u32 num_texgen, ShaderCode& out) { // The HLSL compiler complains that the output texture coordinates are uninitialized when trying // to dynamically index them. @@ -377,27 +468,40 @@ static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode& out.Write(" coord.xyz = rawpos.xyz;\n"); out.Write(" break;\n\n"); out.Write(" case {:s}:\n", SourceRow::Normal); - out.Write(" coord.xyz = ((components & {}u /* VB_HAS_NORMAL */) != 0u) ? rawnormal.xyz : " - "coord.xyz;", + out.Write(" if ((components & {}u) != 0u) // VB_HAS_NORMAL\n" + " {{\n", VB_HAS_NORMAL); - out.Write(" break;\n\n"); + LoadVertexAttribute(out, host_config, 6, "rawnormal", "float3", "float3"); + out.Write(" coord.xyz = rawnormal.xyz;\n" + " }}\n" + " break;\n\n"); out.Write(" case {:s}:\n", SourceRow::BinormalT); - out.Write(" coord.xyz = ((components & {}u /* VB_HAS_TANGENT */) != 0u) ? rawtangent.xyz : " - "coord.xyz;", + out.Write(" if ((components & {}u) != 0u) // VB_HAS_TANGENT\n" + " {{\n", VB_HAS_TANGENT); - out.Write(" break;\n\n"); + LoadVertexAttribute(out, host_config, 6, "rawtangent", "float3", "float3"); + out.Write(" coord.xyz = rawtangent.xyz;\n" + " }}\n" + " break;\n\n"); out.Write(" case {:s}:\n", SourceRow::BinormalB); - out.Write(" coord.xyz = ((components & {}u /* VB_HAS_BINORMAL */) != 0u) ? rawbinormal.xyz : " - "coord.xyz;", + out.Write(" if ((components & {}u) != 0u) // VB_HAS_BINORMAL\n" + " {{\n", VB_HAS_BINORMAL); - out.Write(" break;\n\n"); + LoadVertexAttribute(out, host_config, 6, "rawbinormal", "float3", "float3"); + out.Write(" coord.xyz = rawbinormal.xyz;\n" + " }}\n" + " break;\n\n"); for (u32 i = 0; i < 8; i++) { out.Write(" case {:s}:\n", static_cast(static_cast(SourceRow::Tex0) + i)); - out.Write( - " coord = ((components & {}u /* VB_HAS_UV{} */) != 0u) ? float4(rawtex{}.x, rawtex{}.y, " - "1.0, 1.0) : coord;\n", - VB_HAS_UV0 << i, i, i, i); + out.Write(" if ((components & {}u) != 0u) // VB_HAS_UV{}\n" + " {{\n", + VB_HAS_UV0 << i, i); + LoadVertexAttribute(out, host_config, 6, fmt::format("rawtex{}", i), "float3", "rawtex", + fmt::format("rawtex[{}][{}]", i / 4, i % 4)); + out.Write(" coord = float4(rawtex{}.x, rawtex{}.y, 1.0f, 1.0f);\n" + " }}\n", + i, i); out.Write(" break;\n\n"); } out.Write(" }}\n" @@ -447,14 +551,24 @@ static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode& " {{\n"); out.Write(" if ((components & ({}u /* VB_HAS_TEXMTXIDX0 */ << texgen)) != 0u) {{\n", VB_HAS_TEXMTXIDX0); - out.Write(" // This is messy, due to dynamic indexing of the input texture coordinates.\n" - " // Hopefully the compiler will unroll this whole loop anyway and the switch.\n" - " int tmp = 0;\n" - " switch (texgen) {{\n"); - for (u32 i = 0; i < num_texgen; i++) - out.Write(" case {}u: tmp = int(rawtex{}.z); break;\n", i, i); - out.Write(" }}\n" - "\n"); + if (host_config.backend_dynamic_vertex_loader) + { + out.Write(" int tmp = int(load_input_float3_rawtex(vertex_base_offset, " + "vertex_offset_rawtex[texgen / 4][texgen % 4]).z);\n" + "\n"); + } + else + { + out.Write( + " // This is messy, due to dynamic indexing of the input texture coordinates.\n" + " // Hopefully the compiler will unroll this whole loop anyway and the switch.\n" + " int tmp = 0;\n" + " switch (texgen) {{\n"); + for (u32 i = 0; i < num_texgen; i++) + out.Write(" case {}u: tmp = int(rawtex{}.z); break;\n", i, i); + out.Write(" }}\n" + "\n"); + } out.Write(" if ({} == {:s}) {{\n", BitfieldExtract<&TexMtxInfo::projection>("texMtxInfo"), TexSize::STQ); out.Write(" output_tex.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]),\n" @@ -514,6 +628,19 @@ static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode& "}}\n"); } +static void LoadVertexAttribute(ShaderCode& code, const ShaderHostConfig& host_config, u32 indent, + std::string_view name, std::string_view shader_type, + std::string_view stored_type, std::string_view offset_name) +{ + if (host_config.backend_dynamic_vertex_loader) + { + code.Write("{:{}}{} {} = load_input_{}_{}(vertex_base_offset, vertex_offset_{});\n", "", indent, + shader_type, name, shader_type, stored_type, + offset_name.empty() ? name : offset_name); + } + // else inputs are always available +} + void EnumerateVertexShaderUids(const std::function& callback) { VertexShaderUid uid; diff --git a/Source/Core/VideoCommon/VertexLoaderManager.cpp b/Source/Core/VideoCommon/VertexLoaderManager.cpp index 607de66bee..bb843a6120 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.cpp +++ b/Source/Core/VideoCommon/VertexLoaderManager.cpp @@ -353,7 +353,8 @@ int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int coun } s_current_vtx_fmt = loader->m_native_vertex_format; g_current_components = loader->m_native_components; - VertexShaderManager::SetVertexFormat(loader->m_native_components); + VertexShaderManager::SetVertexFormat(loader->m_native_components, + loader->m_native_vertex_format->GetVertexDeclaration()); // if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads. // They still need to go through vertex loading, because we need to calculate a zfreeze refrence diff --git a/Source/Core/VideoCommon/VertexShaderManager.cpp b/Source/Core/VideoCommon/VertexShaderManager.cpp index ce43235977..78bf54da66 100644 --- a/Source/Core/VideoCommon/VertexShaderManager.cpp +++ b/Source/Core/VideoCommon/VertexShaderManager.cpp @@ -606,13 +606,42 @@ void VertexShaderManager::SetMaterialColorChanged(int index) nMaterialsChanged[index] = true; } -void VertexShaderManager::SetVertexFormat(u32 components) +static void UpdateValue(bool* dirty, u32* old_value, u32 new_value) { - if (components != constants.components) - { - constants.components = components; - dirty = true; - } + if (*old_value == new_value) + return; + *old_value = new_value; + *dirty = true; +} + +static void UpdateOffset(bool* dirty, bool include_components, u32* old_value, + const AttributeFormat& attribute) +{ + if (!attribute.enable) + return; + u32 new_value = attribute.offset / 4; // GPU uses uint offsets + if (include_components) + new_value |= attribute.components << 16; + UpdateValue(dirty, old_value, new_value); +} + +template +static void UpdateOffsets(bool* dirty, bool include_components, u32 (*old_value)[N], + const AttributeFormat (&attribute)[N]) +{ + for (size_t i = 0; i < N; i++) + UpdateOffset(dirty, include_components, &(*old_value)[i], attribute[i]); +} + +void VertexShaderManager::SetVertexFormat(u32 components, const PortableVertexDeclaration& format) +{ + UpdateValue(&dirty, &constants.components, components); + UpdateValue(&dirty, &constants.vertex_stride, format.stride / 4); + UpdateOffset(&dirty, true, &constants.vertex_offset_position, format.position); + UpdateOffset(&dirty, false, &constants.vertex_offset_posmtx, format.posmtx); + UpdateOffsets(&dirty, true, &constants.vertex_offset_texcoords, format.texcoords); + UpdateOffsets(&dirty, false, &constants.vertex_offset_colors, format.colors); + UpdateOffsets(&dirty, false, &constants.vertex_offset_normals, format.normals); } void VertexShaderManager::SetTexMatrixInfoChanged(int index) diff --git a/Source/Core/VideoCommon/VertexShaderManager.h b/Source/Core/VideoCommon/VertexShaderManager.h index 3bddf28fb0..2a8aa7b596 100644 --- a/Source/Core/VideoCommon/VertexShaderManager.h +++ b/Source/Core/VideoCommon/VertexShaderManager.h @@ -10,6 +10,7 @@ #include "VideoCommon/ConstantManager.h" class PointerWrap; +struct PortableVertexDeclaration; // The non-API dependent parts. class VertexShaderManager @@ -29,7 +30,7 @@ public: static void SetProjectionChanged(); static void SetMaterialColorChanged(int index); - static void SetVertexFormat(u32 components); + static void SetVertexFormat(u32 components, const PortableVertexDeclaration& format); static void SetTexMatrixInfoChanged(int index); static void SetLightingConfigChanged(); diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index 22817d1f61..105840bafa 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -232,6 +232,7 @@ struct VideoConfig final bool bSupportsLodBiasInSampler = false; bool bSupportsSettingObjectNames = false; bool bSupportsPartialMultisampleResolve = false; + bool bSupportsDynamicVertexLoader = false; } backend_info; // Utility