diff --git a/Source/Core/VideoBackends/D3D/D3DNativeVertexFormat.cpp b/Source/Core/VideoBackends/D3D/D3DNativeVertexFormat.cpp index b7aeab512c..b4da7fc354 100644 --- a/Source/Core/VideoBackends/D3D/D3DNativeVertexFormat.cpp +++ b/Source/Core/VideoBackends/D3D/D3DNativeVertexFormat.cpp @@ -115,11 +115,12 @@ D3DVertexFormat::D3DVertexFormat(const PortableVertexDeclaration& vtx_decl) for (int i = 0; i < 3; i++) { + static constexpr std::array NAMES = {"NORMAL", "TANGENT", "BINORMAL"}; format = &vtx_decl.normals[i]; if (format->enable) { - m_elems[m_num_elems].SemanticName = "NORMAL"; - m_elems[m_num_elems].SemanticIndex = i; + m_elems[m_num_elems].SemanticName = NAMES[i]; + m_elems[m_num_elems].SemanticIndex = 0; m_elems[m_num_elems].AlignedByteOffset = format->offset; m_elems[m_num_elems].Format = VarToD3D(format->type, format->components, format->integer); m_elems[m_num_elems].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA; diff --git a/Source/Core/VideoBackends/D3D12/DX12VertexFormat.cpp b/Source/Core/VideoBackends/D3D12/DX12VertexFormat.cpp index bd818d1a66..2d1a3028e0 100644 --- a/Source/Core/VideoBackends/D3D12/DX12VertexFormat.cpp +++ b/Source/Core/VideoBackends/D3D12/DX12VertexFormat.cpp @@ -92,7 +92,8 @@ void DXVertexFormat::MapAttributes() { if (m_decl.normals[i].enable) { - AddAttribute("NORMAL", i, 0, + static constexpr std::array NAMES = {"NORMAL", "TANGENT", "BINORMAL"}; + AddAttribute(NAMES[i], 0, 0, VarToDXGIFormat(m_decl.normals[i].type, m_decl.normals[i].components, m_decl.normals[i].integer), m_decl.normals[i].offset); diff --git a/Source/Core/VideoBackends/OGL/OGLNativeVertexFormat.cpp b/Source/Core/VideoBackends/OGL/OGLNativeVertexFormat.cpp index d21a40ebc1..1913b6dcd3 100644 --- a/Source/Core/VideoBackends/OGL/OGLNativeVertexFormat.cpp +++ b/Source/Core/VideoBackends/OGL/OGLNativeVertexFormat.cpp @@ -68,7 +68,7 @@ GLVertexFormat::GLVertexFormat(const PortableVertexDeclaration& vtx_decl) SetPointer(SHADER_POSITION_ATTRIB, vertex_stride, vtx_decl.position); for (int i = 0; i < 3; i++) - SetPointer(SHADER_NORM0_ATTRIB + i, vertex_stride, vtx_decl.normals[i]); + SetPointer(SHADER_NORMAL_ATTRIB + i, vertex_stride, vtx_decl.normals[i]); for (int i = 0; i < 2; i++) SetPointer(SHADER_COLOR0_ATTRIB + i, vertex_stride, vtx_decl.colors[i]); diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp index 659882d015..817f894296 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp @@ -139,9 +139,9 @@ void SHADER::SetProgramBindings(bool is_compute) glBindAttribLocation(glprogid, SHADER_COLOR0_ATTRIB, "rawcolor0"); glBindAttribLocation(glprogid, SHADER_COLOR1_ATTRIB, "rawcolor1"); - glBindAttribLocation(glprogid, SHADER_NORM0_ATTRIB, "rawnorm0"); - glBindAttribLocation(glprogid, SHADER_NORM1_ATTRIB, "rawnorm1"); - glBindAttribLocation(glprogid, SHADER_NORM2_ATTRIB, "rawnorm2"); + glBindAttribLocation(glprogid, SHADER_NORMAL_ATTRIB, "rawnormal"); + glBindAttribLocation(glprogid, SHADER_TANGENT_ATTRIB, "rawtangent"); + glBindAttribLocation(glprogid, SHADER_BINORMAL_ATTRIB, "rawbinormal"); } for (int i = 0; i < 8; i++) diff --git a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp index 287fcf4a4b..4b9825132e 100644 --- a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp +++ b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp @@ -25,6 +25,7 @@ #include "VideoCommon/Statistics.h" #include "VideoCommon/VertexLoaderBase.h" #include "VideoCommon/VertexLoaderManager.h" +#include "VideoCommon/VertexShaderManager.h" #include "VideoCommon/VideoConfig.h" #include "VideoCommon/XFMemory.h" @@ -89,11 +90,8 @@ void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_ OutputVertexData* outVertex = m_setup_unit.GetVertex(); TransformUnit::TransformPosition(&m_vertex, outVertex); outVertex->normal = {}; - if (VertexLoaderManager::g_current_components & VB_HAS_NRM0) - { - TransformUnit::TransformNormal( - &m_vertex, (VertexLoaderManager::g_current_components & VB_HAS_NRM2) != 0, outVertex); - } + if (VertexLoaderManager::g_current_components & VB_HAS_NORMAL) + TransformUnit::TransformNormal(&m_vertex, outVertex); TransformUnit::TransformColor(&m_vertex, outVertex); TransformUnit::TransformTexCoord(&m_vertex, outVertex); @@ -230,6 +228,18 @@ void SWVertexLoader::ParseVertex(const PortableVertexDeclaration& vdec, int inde { ReadVertexAttribute(&m_vertex.normal[i][0], src, vdec.normals[i], 0, 3, false); } + if (!vdec.normals[1].enable) + { + m_vertex.normal[1][0] = VertexShaderManager::constants.cached_tangent[0]; + m_vertex.normal[1][1] = VertexShaderManager::constants.cached_tangent[1]; + m_vertex.normal[1][2] = VertexShaderManager::constants.cached_tangent[2]; + } + if (!vdec.normals[2].enable) + { + m_vertex.normal[2][0] = VertexShaderManager::constants.cached_binormal[0]; + m_vertex.normal[2][1] = VertexShaderManager::constants.cached_binormal[1]; + m_vertex.normal[2][2] = VertexShaderManager::constants.cached_binormal[2]; + } ParseColorAttributes(&m_vertex, src, vdec); diff --git a/Source/Core/VideoBackends/Software/TransformUnit.cpp b/Source/Core/VideoBackends/Software/TransformUnit.cpp index bf56307025..6fcf42df9e 100644 --- a/Source/Core/VideoBackends/Software/TransformUnit.cpp +++ b/Source/Core/VideoBackends/Software/TransformUnit.cpp @@ -90,22 +90,19 @@ void TransformPosition(const InputVertexData* src, OutputVertexData* dst) } } -void TransformNormal(const InputVertexData* src, bool nbt, OutputVertexData* dst) +void TransformNormal(const InputVertexData* src, OutputVertexData* dst) { const float* mat = &xfmem.normalMatrices[(src->posMtx & 31) * 3]; - if (nbt) - { - MultiplyVec3Mat33(src->normal[0], mat, dst->normal[0]); - MultiplyVec3Mat33(src->normal[1], mat, dst->normal[1]); - MultiplyVec3Mat33(src->normal[2], mat, dst->normal[2]); - dst->normal[0].Normalize(); - } - else - { - MultiplyVec3Mat33(src->normal[0], mat, dst->normal[0]); - dst->normal[0].Normalize(); - } + MultiplyVec3Mat33(src->normal[0], mat, dst->normal[0]); + MultiplyVec3Mat33(src->normal[1], mat, dst->normal[1]); + MultiplyVec3Mat33(src->normal[2], mat, dst->normal[2]); + // The scale of the transform matrix is used to control the size of the emboss map effect, by + // changing the scale of the transformed binormals (which only get used by emboss map texgens). + // By normalising the first transformed normal (which is used by lighting calculations and needs + // to be unit length), the same transform matrix can do double duty, scaling for emboss mapping, + // and not scaling for lighting. + dst->normal[0].Normalize(); } static void TransformTexCoordRegular(const TexMtxInfo& texinfo, int coordNum, diff --git a/Source/Core/VideoBackends/Software/TransformUnit.h b/Source/Core/VideoBackends/Software/TransformUnit.h index e764127ec4..59152250b0 100644 --- a/Source/Core/VideoBackends/Software/TransformUnit.h +++ b/Source/Core/VideoBackends/Software/TransformUnit.h @@ -9,7 +9,7 @@ struct OutputVertexData; namespace TransformUnit { void TransformPosition(const InputVertexData* src, OutputVertexData* dst); -void TransformNormal(const InputVertexData* src, bool nbt, OutputVertexData* dst); +void TransformNormal(const InputVertexData* src, OutputVertexData* dst); void TransformColor(const InputVertexData* src, OutputVertexData* dst); void TransformTexCoord(const InputVertexData* src, OutputVertexData* dst); } // namespace TransformUnit diff --git a/Source/Core/VideoBackends/Vulkan/VKVertexFormat.cpp b/Source/Core/VideoBackends/Vulkan/VKVertexFormat.cpp index 5f53547066..6aa5ea14f1 100644 --- a/Source/Core/VideoBackends/Vulkan/VKVertexFormat.cpp +++ b/Source/Core/VideoBackends/Vulkan/VKVertexFormat.cpp @@ -73,7 +73,7 @@ void VertexFormat::MapAttributes() for (uint32_t i = 0; i < 3; i++) { if (m_decl.normals[i].enable) - AddAttribute(SHADER_NORM0_ATTRIB + i, 0, + AddAttribute(SHADER_NORMAL_ATTRIB + i, 0, VarToVkFormat(m_decl.normals[i].type, m_decl.normals[i].components, m_decl.normals[i].integer), m_decl.normals[i].offset); diff --git a/Source/Core/VideoCommon/ConstantManager.h b/Source/Core/VideoCommon/ConstantManager.h index 5335af963a..9fd4c060cd 100644 --- a/Source/Core/VideoCommon/ConstantManager.h +++ b/Source/Core/VideoCommon/ConstantManager.h @@ -90,6 +90,9 @@ struct VertexShaderConstants // .x - texMtxInfo, .y - postMtxInfo, [0..1].z = color, [0..1].w = alpha std::array xfmem_pack1; + + float4 cached_tangent; + float4 cached_binormal; }; struct GeometryShaderConstants diff --git a/Source/Core/VideoCommon/LightingShaderGen.cpp b/Source/Core/VideoCommon/LightingShaderGen.cpp index a9ff3119bc..5b34e3c89f 100644 --- a/Source/Core/VideoCommon/LightingShaderGen.cpp +++ b/Source/Core/VideoCommon/LightingShaderGen.cpp @@ -27,11 +27,11 @@ static void GenerateLightShader(ShaderCode& object, const LightingUidData& uid_d case AttenuationFunc::Dir: object.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(index)); object.Write("attn = 1.0;\n"); - object.Write("if (length(ldir) == 0.0)\n\t ldir = _norm0;\n"); + object.Write("if (length(ldir) == 0.0)\n\t ldir = _normal;\n"); break; case AttenuationFunc::Spec: object.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(index)); - object.Write("attn = (dot(_norm0, ldir) >= 0.0) ? max(0.0, dot(_norm0, " LIGHT_DIR + object.Write("attn = (dot(_normal, ldir) >= 0.0) ? max(0.0, dot(_normal, " LIGHT_DIR ".xyz)) : 0.0;\n", LIGHT_DIR_PARAMS(index)); object.Write("cosAttn = " LIGHT_COSATT ".xyz;\n", LIGHT_COSATT_PARAMS(index)); @@ -64,7 +64,8 @@ static void GenerateLightShader(ShaderCode& object, const LightingUidData& uid_d break; case DiffuseFunc::Sign: case DiffuseFunc::Clamp: - object.Write("lacc.{} += int{}(round(attn * {}dot(ldir, _norm0)) * float{}(" LIGHT_COL ")));\n", + object.Write("lacc.{} += int{}(round(attn * {}dot(ldir, _normal)) * float{}(" LIGHT_COL + ")));\n", swizzle, swizzle_components, diffusefunc != DiffuseFunc::Sign ? "max(0.0," : "(", swizzle_components, LIGHT_COL_PARAMS(index, swizzle)); break; diff --git a/Source/Core/VideoCommon/NativeVertexFormat.h b/Source/Core/VideoCommon/NativeVertexFormat.h index 7bbf0bd38c..94dc1a0fa2 100644 --- a/Source/Core/VideoCommon/NativeVertexFormat.h +++ b/Source/Core/VideoCommon/NativeVertexFormat.h @@ -25,10 +25,9 @@ enum VB_HAS_TEXMTXIDXALL = (0xff << 2), // VB_HAS_POS=0, // Implied, it always has pos! don't bother testing - VB_HAS_NRM0 = (1 << 10), - VB_HAS_NRM1 = (1 << 11), - VB_HAS_NRM2 = (1 << 12), - VB_HAS_NRMALL = (7 << 10), + VB_HAS_NORMAL = (1 << 10), + VB_HAS_TANGENT = (1 << 11), + VB_HAS_BINORMAL = (1 << 12), VB_COL_SHIFT = 13, VB_HAS_COL0 = (1 << 13), diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 837001c118..82de68a290 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -1132,7 +1132,7 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos if (per_pixel_lighting) { - out.Write("\tfloat3 _norm0 = normalize(Normal.xyz);\n\n" + out.Write("\tfloat3 _normal = normalize(Normal.xyz);\n\n" "\tfloat3 pos = WorldPos;\n"); out.Write("\tint4 lacc;\n" diff --git a/Source/Core/VideoCommon/ShaderGenCommon.h b/Source/Core/VideoCommon/ShaderGenCommon.h index bec69fe7c0..c9f0246318 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/ShaderGenCommon.h @@ -296,6 +296,8 @@ void WriteSwitch(ShaderCode& out, APIType ApiType, std::string_view variable, #define I_POSTTRANSFORMMATRICES "cpostmtx" #define I_PIXELCENTERCORRECTION "cpixelcenter" #define I_VIEWPORT_SIZE "cviewport" +#define I_CACHED_TANGENT "ctangent" +#define I_CACHED_BINORMAL "cbinormal" #define I_STEREOPARAMS "cstereo" #define I_LINEPTPARAMS "clinept" @@ -317,6 +319,8 @@ static const char s_shader_uniforms[] = "\tuint components;\n" "\tfloat4 " I_PIXELCENTERCORRECTION ";\n" "\tfloat2 " I_VIEWPORT_SIZE ";\n" "\tuint4 xfmem_pack1[8];\n" + "\tfloat4 " I_CACHED_TANGENT ";\n" + "\tfloat4 " I_CACHED_BINORMAL ";\n" "\t#define xfmem_texMtxInfo(i) (xfmem_pack1[(i)].x)\n" "\t#define xfmem_postMtxInfo(i) (xfmem_pack1[(i)].y)\n" "\t#define xfmem_color(i) (xfmem_pack1[(i)].z)\n" diff --git a/Source/Core/VideoCommon/UberShaderVertex.cpp b/Source/Core/VideoCommon/UberShaderVertex.cpp index 68915351d1..40b4cd65e5 100644 --- a/Source/Core/VideoCommon/UberShaderVertex.cpp +++ b/Source/Core/VideoCommon/UberShaderVertex.cpp @@ -57,9 +57,9 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config { out.Write("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB); out.Write("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB); - out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnorm0;\n", SHADER_NORM0_ATTRIB); - out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnorm1;\n", SHADER_NORM1_ATTRIB); - out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnorm2;\n", SHADER_NORM2_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnormal;\n", SHADER_NORMAL_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtangent;\n", SHADER_TANGENT_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawbinormal;\n", SHADER_BINORMAL_ATTRIB); out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB); out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB); for (int i = 0; i < 8; ++i) @@ -106,9 +106,9 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config out.Write("VS_OUTPUT main(\n"); // inputs - out.Write(" float3 rawnorm0 : NORMAL0,\n" - " float3 rawnorm1 : NORMAL1,\n" - " float3 rawnorm2 : NORMAL2,\n" + out.Write(" float3 rawnormal : NORMAL,\n" + " float3 rawtangent : TANGENT,\n" + " float3 rawbinormal : BINORMAL,\n" " float4 rawcolor0 : COLOR0,\n" " float4 rawcolor1 : COLOR1,\n"); for (int i = 0; i < 8; ++i) @@ -131,7 +131,7 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config "float3 N1;\n" "float3 N2;\n" "\n" - "if ((components & {}u) != 0u) {{// VB_HAS_POSMTXIDX\n", + "if ((components & {}u) != 0u) {{ // VB_HAS_POSMTXIDX\n", VB_HAS_POSMTXIDX); out.Write(" // Vertex format has a per-vertex matrix\n" " int posidx = int(posmtx.r);\n" @@ -153,26 +153,38 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config " N2 = " I_POSNORMALMATRIX "[5].xyz;\n" "}}\n" "\n" + "// Multiply the position vector by the position matrix\n" "float4 pos = float4(dot(P0, rawpos), dot(P1, rawpos), dot(P2, rawpos), 1.0);\n" "o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION "[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n" "\n" - "// Only the first normal gets normalized (TODO: why?)\n" - "float3 _norm0 = float3(0.0, 0.0, 0.0);\n" - "if ((components & {}u) != 0u) // VB_HAS_NRM0\n", - VB_HAS_NRM0); - out.Write( - " _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n" - "\n" - "float3 _norm1 = float3(0.0, 0.0, 0.0);\n" - "if ((components & {}u) != 0u) // VB_HAS_NRM1\n", - VB_HAS_NRM1); - out.Write(" _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n" + "// The scale of the transform matrix is used to control the size of the emboss map\n" + "// effect by changing the scale of the transformed binormals (which only get used by\n" + "// emboss map texgens). By normalising the first transformed normal (which is used\n" + "// by lighting calculations and needs to be unit length), the same transform matrix\n" + "// can do double duty, scaling for emboss mapping, and not scaling for lighting.\n" + "float3 _normal = float3(0.0, 0.0, 0.0);\n" + "if ((components & {}u) != 0u) // VB_HAS_NORMAL\n", + VB_HAS_NORMAL); + out.Write(" _normal = normalize(float3(dot(N0, rawnormal), dot(N1, rawnormal), dot(N2, " + "rawnormal)));\n" "\n" - "float3 _norm2 = float3(0.0, 0.0, 0.0);\n" - "if ((components & {}u) != 0u) // VB_HAS_NRM2\n", - VB_HAS_NRM2); - out.Write(" _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n" + "float3 _tangent = float3(0.0, 0.0, 0.0);\n" + "if ((components & {}u) != 0u) // VB_HAS_TANGENT\n", + VB_HAS_TANGENT); + out.Write(" _tangent = float3(dot(N0, rawtangent), dot(N1, rawtangent), dot(N2, rawtangent));\n" + "else\n" + " _tangent = float3(dot(N0, " I_CACHED_TANGENT ".xyz), dot(N1, " I_CACHED_TANGENT + ".xyz), dot(N2, " I_CACHED_TANGENT ".xyz));\n" + "\n" + "float3 _binormal = float3(0.0, 0.0, 0.0);\n" + "if ((components & {}u) != 0u) // VB_HAS_BINORMAL\n", + VB_HAS_BINORMAL); + out.Write(" _binormal = float3(dot(N0, rawbinormal), dot(N1, rawbinormal), dot(N2, " + "rawbinormal));\n" + "else\n" + " _binormal = float3(dot(N0, " I_CACHED_BINORMAL ".xyz), dot(N1, " I_CACHED_BINORMAL + ".xyz), dot(N2, " I_CACHED_BINORMAL ".xyz));\n" "\n"); // Hardware Lighting @@ -208,7 +220,7 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config "}}\n" "\n"); - WriteVertexLighting(out, api_type, "pos.xyz", "_norm0", "vertex_color_0", "vertex_color_1", + WriteVertexLighting(out, api_type, "pos.xyz", "_normal", "vertex_color_0", "vertex_color_1", "o.colors_0", "o.colors_1"); // Texture Coordinates @@ -246,7 +258,7 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config if (per_pixel_lighting) { - out.Write("o.Normal = _norm0;\n" + out.Write("o.Normal = _normal;\n" "o.WorldPos = pos.xyz;\n"); } @@ -393,19 +405,19 @@ static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode& out.Write(" coord.xyz = rawpos.xyz;\n"); out.Write(" break;\n\n"); out.Write(" case {:s}:\n", SourceRow::Normal); - out.Write( - " coord.xyz = ((components & {}u /* VB_HAS_NRM0 */) != 0u) ? rawnorm0.xyz : coord.xyz;", - VB_HAS_NRM0); + out.Write(" coord.xyz = ((components & {}u /* VB_HAS_NORMAL */) != 0u) ? rawnormal.xyz : " + "coord.xyz;", + VB_HAS_NORMAL); out.Write(" break;\n\n"); out.Write(" case {:s}:\n", SourceRow::BinormalT); - out.Write( - " coord.xyz = ((components & {}u /* VB_HAS_NRM1 */) != 0u) ? rawnorm1.xyz : coord.xyz;", - VB_HAS_NRM1); + out.Write(" coord.xyz = ((components & {}u /* VB_HAS_TANGENT */) != 0u) ? rawtangent.xyz : " + "coord.xyz;", + VB_HAS_TANGENT); out.Write(" break;\n\n"); out.Write(" case {:s}:\n", SourceRow::BinormalB); - out.Write( - " coord.xyz = ((components & {}u /* VB_HAS_NRM2 */) != 0u) ? rawnorm2.xyz : coord.xyz;", - VB_HAS_NRM2); + out.Write(" coord.xyz = ((components & {}u /* VB_HAS_BINORMAL */) != 0u) ? rawbinormal.xyz : " + "coord.xyz;", + VB_HAS_BINORMAL); out.Write(" break;\n\n"); for (u32 i = 0; i < 8; i++) { @@ -447,12 +459,9 @@ static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode& for (u32 i = 0; i < num_texgen; i++) out.Write(" case {}u: output_tex.xyz = o.tex{}; break;\n", i, i); out.Write(" default: output_tex.xyz = float3(0.0, 0.0, 0.0); break;\n" - " }}\n"); - out.Write(" if ((components & {}u) != 0u) {{ // VB_HAS_NRM1 | VB_HAS_NRM2\n", - VB_HAS_NRM1 | VB_HAS_NRM2); // Should this be VB_HAS_NRM1 | VB_HAS_NRM2 - out.Write(" float3 ldir = normalize(" I_LIGHTS "[light].pos.xyz - pos.xyz);\n" - " output_tex.xyz += float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0);\n" " }}\n" + " float3 ldir = normalize(" I_LIGHTS "[light].pos.xyz - pos.xyz);\n" + " output_tex.xyz += float3(dot(ldir, _tangent), dot(ldir, _binormal), 0.0);\n" " }}\n" " break;\n\n"); out.Write(" case {:s}:\n", TexGenType::Color0); diff --git a/Source/Core/VideoCommon/VertexLoader.cpp b/Source/Core/VideoCommon/VertexLoader.cpp index b01ac961b9..751778dd24 100644 --- a/Source/Core/VideoCommon/VertexLoader.cpp +++ b/Source/Core/VideoCommon/VertexLoader.cpp @@ -22,8 +22,8 @@ u8* g_vertex_manager_write_ptr; static void PosMtx_ReadDirect_UByte(VertexLoader* loader) { u32 posmtx = DataRead() & 0x3f; - if (loader->m_counter < 3) - VertexLoaderManager::position_matrix_index[loader->m_counter + 1] = posmtx; + if (loader->m_remaining < 3) + VertexLoaderManager::position_matrix_index_cache[loader->m_remaining] = posmtx; DataWrite(posmtx); PRIM_LOG("posmtx: {}, ", posmtx); } @@ -257,7 +257,7 @@ int VertexLoader::RunVertices(DataReader src, DataReader dst, int count) m_numLoadedVertices += count; m_skippedVertices = 0; - for (m_counter = count - 1; m_counter >= 0; m_counter--) + for (m_remaining = count - 1; m_remaining >= 0; m_remaining--) { m_tcIndex = 0; m_colIndex = 0; diff --git a/Source/Core/VideoCommon/VertexLoader.h b/Source/Core/VideoCommon/VertexLoader.h index d1f80f80c1..b3bb0b270a 100644 --- a/Source/Core/VideoCommon/VertexLoader.h +++ b/Source/Core/VideoCommon/VertexLoader.h @@ -35,7 +35,7 @@ public: int m_texmtxread; bool m_vertexSkip; int m_skippedVertices; - int m_counter; + int m_remaining; private: // Pipeline. diff --git a/Source/Core/VideoCommon/VertexLoaderARM64.cpp b/Source/Core/VideoCommon/VertexLoaderARM64.cpp index 330deef548..eff8a29993 100644 --- a/Source/Core/VideoCommon/VertexLoaderARM64.cpp +++ b/Source/Core/VideoCommon/VertexLoaderARM64.cpp @@ -14,7 +14,7 @@ using namespace Arm64Gen; constexpr ARM64Reg src_reg = ARM64Reg::X0; constexpr ARM64Reg dst_reg = ARM64Reg::X1; -constexpr ARM64Reg count_reg = ARM64Reg::W2; +constexpr ARM64Reg remaining_reg = ARM64Reg::W2; constexpr ARM64Reg skipped_reg = ARM64Reg::W17; constexpr ARM64Reg scratch1_reg = ARM64Reg::W16; constexpr ARM64Reg scratch2_reg = ARM64Reg::W15; @@ -209,12 +209,24 @@ int VertexLoaderARM64::ReadVertex(VertexComponentFormat attribute, ComponentForm // Z-Freeze if (native_format == &m_native_vtx_decl.position) { - CMP(count_reg, 3); - FixupBranch dont_store = B(CC_GT); - MOVP2R(EncodeRegTo64(scratch2_reg), VertexLoaderManager::position_cache); - ADD(EncodeRegTo64(scratch1_reg), EncodeRegTo64(scratch2_reg), EncodeRegTo64(count_reg), - ArithOption(EncodeRegTo64(count_reg), ShiftType::LSL, 4)); - m_float_emit.STUR(write_size, coords, EncodeRegTo64(scratch1_reg), -16); + CMP(remaining_reg, 3); + FixupBranch dont_store = B(CC_GE); + MOVP2R(EncodeRegTo64(scratch2_reg), VertexLoaderManager::position_cache.data()); + m_float_emit.STR(128, coords, EncodeRegTo64(scratch2_reg), ArithOption(remaining_reg, true)); + SetJumpTarget(dont_store); + } + else if (native_format == &m_native_vtx_decl.normals[1]) + { + FixupBranch dont_store = CBNZ(remaining_reg); + MOVP2R(EncodeRegTo64(scratch2_reg), VertexLoaderManager::tangent_cache.data()); + m_float_emit.STR(128, IndexType::Unsigned, coords, EncodeRegTo64(scratch2_reg), 0); + SetJumpTarget(dont_store); + } + else if (native_format == &m_native_vtx_decl.normals[2]) + { + FixupBranch dont_store = CBNZ(remaining_reg); + MOVP2R(EncodeRegTo64(scratch2_reg), VertexLoaderManager::binormal_cache.data()); + m_float_emit.STR(128, IndexType::Unsigned, coords, EncodeRegTo64(scratch2_reg), 0); SetJumpTarget(dont_store); } @@ -403,7 +415,7 @@ void VertexLoaderARM64::GenerateVertexLoader() AlignCode16(); if (IsIndexed(m_VtxDesc.low.Position)) MOV(skipped_reg, ARM64Reg::WZR); - MOV(saved_count, count_reg); + ADD(saved_count, remaining_reg, 1); MOVP2R(stride_reg, g_main_cp_state.array_strides.data()); MOVP2R(arraybase_reg, VertexLoaderManager::cached_arraybases.data()); @@ -420,10 +432,10 @@ void VertexLoaderARM64::GenerateVertexLoader() STR(IndexType::Unsigned, scratch1_reg, dst_reg, m_dst_ofs); // Z-Freeze - CMP(count_reg, 3); - FixupBranch dont_store = B(CC_GT); - MOVP2R(EncodeRegTo64(scratch2_reg), VertexLoaderManager::position_matrix_index); - STR(IndexType::Unsigned, scratch1_reg, EncodeRegTo64(scratch2_reg), 0); + CMP(remaining_reg, 3); + FixupBranch dont_store = B(CC_GE); + MOVP2R(EncodeRegTo64(scratch2_reg), VertexLoaderManager::position_matrix_index_cache.data()); + STR(scratch1_reg, EncodeRegTo64(scratch2_reg), ArithOption(remaining_reg, true)); SetJumpTarget(dont_store); m_native_vtx_decl.posmtx.components = 4; @@ -583,8 +595,8 @@ void VertexLoaderARM64::GenerateVertexLoader() const u8* cont = GetCodePtr(); ADD(src_reg, src_reg, m_src_ofs); - SUB(count_reg, count_reg, 1); - CBNZ(count_reg, loop_start); + SUBS(remaining_reg, remaining_reg, 1); + B(CCFlags::CC_GE, loop_start); if (IsIndexed(m_VtxDesc.low.Position)) { @@ -611,5 +623,5 @@ int VertexLoaderARM64::RunVertices(DataReader src, DataReader dst, int count) { m_numLoadedVertices += count; return ((int (*)(u8 * src, u8 * dst, int count)) region)(src.GetPointer(), dst.GetPointer(), - count); + count - 1); } diff --git a/Source/Core/VideoCommon/VertexLoaderBase.cpp b/Source/Core/VideoCommon/VertexLoaderBase.cpp index 12f33bf038..2b6c3cdd21 100644 --- a/Source/Core/VideoCommon/VertexLoaderBase.cpp +++ b/Source/Core/VideoCommon/VertexLoaderBase.cpp @@ -151,9 +151,9 @@ u32 VertexLoaderBase::GetVertexComponents(const TVtxDesc& vtx_desc, const VAT& v // Vertices always have positions; thus there is no VB_HAS_POS as it would always be set if (vtx_desc.low.Normal != VertexComponentFormat::NotPresent) { - components |= VB_HAS_NRM0; + components |= VB_HAS_NORMAL; if (vtx_attr.g0.NormalElements == NormalComponentCount::NBT) - components |= VB_HAS_NRM1 | VB_HAS_NRM2; + components |= VB_HAS_TANGENT | VB_HAS_BINORMAL; } for (u32 i = 0; i < vtx_desc.low.Color.Size(); i++) { diff --git a/Source/Core/VideoCommon/VertexLoaderManager.cpp b/Source/Core/VideoCommon/VertexLoaderManager.cpp index b0922e5a6e..7710b7cc20 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.cpp +++ b/Source/Core/VideoCommon/VertexLoaderManager.cpp @@ -31,11 +31,12 @@ namespace VertexLoaderManager { -float position_cache[3][4]; - -// The counter added to the address of the array is 1, 2, or 3, but never zero. -// So only index 1 - 3 are used. -u32 position_matrix_index[4]; +// Used by zfreeze +std::array position_matrix_index_cache; +// 3 vertices, 4 floats each to allow SIMD overwrite +alignas(sizeof(std::array)) std::array, 3> position_cache; +alignas(sizeof(std::array)) std::array tangent_cache; +alignas(sizeof(std::array)) std::array binormal_cache; static NativeVertexFormatMap s_native_vertex_map; static NativeVertexFormat* s_current_vtx_fmt; @@ -251,8 +252,9 @@ static VertexLoaderBase* RefreshLoader(int vtx_attr_group, bool preprocess = fal int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, DataReader src, bool is_preprocess) { - if (!count) + if (count == 0) return 0; + ASSERT(count > 0); VertexLoaderBase* loader = RefreshLoader(vtx_attr_group, is_preprocess); diff --git a/Source/Core/VideoCommon/VertexLoaderManager.h b/Source/Core/VideoCommon/VertexLoaderManager.h index d6bda13c00..c5f9ae5376 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.h +++ b/Source/Core/VideoCommon/VertexLoaderManager.h @@ -53,8 +53,12 @@ void UpdateVertexArrayPointers(); // Position cache for zfreeze (3 vertices, 4 floats each to allow SIMD overwrite). // These arrays are in reverse order. -extern float position_cache[3][4]; -extern u32 position_matrix_index[4]; +extern std::array, 3> position_cache; +extern std::array position_matrix_index_cache; +// Store the tangent and binormal vectors for games that use emboss texgens when the vertex format +// doesn't include them (e.g. RS2 and RS3). These too are 4 floats each for SIMD overwrites. +extern std::array tangent_cache; +extern std::array binormal_cache; // VB_HAS_X. Bitmask telling what vertex components are present. extern u32 g_current_components; diff --git a/Source/Core/VideoCommon/VertexLoaderX64.cpp b/Source/Core/VideoCommon/VertexLoaderX64.cpp index 7a4929361d..aebba7680d 100644 --- a/Source/Core/VideoCommon/VertexLoaderX64.cpp +++ b/Source/Core/VideoCommon/VertexLoaderX64.cpp @@ -26,7 +26,9 @@ static const X64Reg dst_reg = ABI_PARAM2; static const X64Reg scratch1 = RAX; static const X64Reg scratch2 = ABI_PARAM3; static const X64Reg scratch3 = ABI_PARAM4; -static const X64Reg count_reg = R10; +// The remaining number of vertices to be processed. Starts at count - 1, and the final loop has it +// at 0. +static const X64Reg remaining_reg = R10; static const X64Reg skipped_reg = R11; static const X64Reg base_reg = RBX; @@ -114,6 +116,35 @@ int VertexLoaderX64::ReadVertex(OpArg data, VertexComponentFormat attribute, Com X64Reg coords = XMM0; + const auto write_zfreeze = [&]() { // zfreeze + if (native_format == &m_native_vtx_decl.position) + { + CMP(32, R(remaining_reg), Imm8(3)); + FixupBranch dont_store = J_CC(CC_AE); + // The position cache is composed of 3 rows of 4 floats each; since each float is 4 bytes, + // we need to scale by 4 twice to cover the 4 floats. + LEA(32, scratch3, MScaled(remaining_reg, SCALE_4, 0)); + MOVUPS(MPIC(VertexLoaderManager::position_cache.data(), scratch3, SCALE_4), coords); + SetJumpTarget(dont_store); + } + else if (native_format == &m_native_vtx_decl.normals[1]) + { + TEST(32, R(remaining_reg), R(remaining_reg)); + FixupBranch dont_store = J_CC(CC_NZ); + // For similar reasons, the cached tangent and binormal are 4 floats each + MOVUPS(MPIC(VertexLoaderManager::tangent_cache.data()), coords); + SetJumpTarget(dont_store); + } + else if (native_format == &m_native_vtx_decl.normals[2]) + { + CMP(32, R(remaining_reg), R(remaining_reg)); + FixupBranch dont_store = J_CC(CC_NZ); + // For similar reasons, the cached tangent and binormal are 4 floats each + MOVUPS(MPIC(VertexLoaderManager::binormal_cache.data()), coords); + SetJumpTarget(dont_store); + } + }; + int elem_size = GetElementSize(format); int load_bytes = elem_size * count_in; OpArg dest = MDisp(dst_reg, m_dst_ofs); @@ -202,7 +233,9 @@ int VertexLoaderX64::ReadVertex(OpArg data, VertexComponentFormat attribute, Com dest.AddMemOffset(sizeof(float)); // zfreeze - if (native_format == &m_native_vtx_decl.position) + if (native_format == &m_native_vtx_decl.position || + native_format == &m_native_vtx_decl.normals[1] || + native_format == &m_native_vtx_decl.normals[2]) { if (cpu_info.bSSE4_1) { @@ -217,16 +250,7 @@ int VertexLoaderX64::ReadVertex(OpArg data, VertexComponentFormat attribute, Com } } - // zfreeze - if (native_format == &m_native_vtx_decl.position) - { - CMP(32, R(count_reg), Imm8(3)); - FixupBranch dont_store = J_CC(CC_A); - LEA(32, scratch3, MScaled(count_reg, SCALE_4, -4)); - MOVUPS(MPIC(VertexLoaderManager::position_cache, scratch3, SCALE_4), coords); - SetJumpTarget(dont_store); - } - return load_bytes; + write_zfreeze(); } } @@ -251,15 +275,7 @@ int VertexLoaderX64::ReadVertex(OpArg data, VertexComponentFormat attribute, Com break; } - // zfreeze - if (native_format == &m_native_vtx_decl.position) - { - CMP(32, R(count_reg), Imm8(3)); - FixupBranch dont_store = J_CC(CC_A); - LEA(32, scratch3, MScaled(count_reg, SCALE_4, -4)); - MOVUPS(MPIC(VertexLoaderManager::position_cache, scratch3, SCALE_4), coords); - SetJumpTarget(dont_store); - } + write_zfreeze(); return load_bytes; } @@ -385,8 +401,8 @@ void VertexLoaderX64::ReadColor(OpArg data, VertexComponentFormat attribute, Col void VertexLoaderX64::GenerateVertexLoader() { - BitSet32 regs = {src_reg, dst_reg, scratch1, scratch2, - scratch3, count_reg, skipped_reg, base_reg}; + BitSet32 regs = {src_reg, dst_reg, scratch1, scratch2, + scratch3, remaining_reg, skipped_reg, base_reg}; regs &= ABI_ALL_CALLEE_SAVED; ABI_PushRegistersAndAdjustStack(regs, 0); @@ -394,7 +410,9 @@ void VertexLoaderX64::GenerateVertexLoader() PUSH(32, R(ABI_PARAM3)); // ABI_PARAM3 is one of the lower registers, so free it for scratch2. - MOV(32, R(count_reg), R(ABI_PARAM3)); + // We also have it end at a value of 0, to simplify indexing for zfreeze; + // this requires subtracting 1 at the start. + LEA(32, remaining_reg, MDisp(ABI_PARAM3, -1)); MOV(64, R(base_reg), R(ABI_PARAM4)); @@ -412,9 +430,10 @@ void VertexLoaderX64::GenerateVertexLoader() MOV(32, MDisp(dst_reg, m_dst_ofs), R(scratch1)); // zfreeze - CMP(32, R(count_reg), Imm8(3)); - FixupBranch dont_store = J_CC(CC_A); - MOV(32, MPIC(VertexLoaderManager::position_matrix_index, count_reg, SCALE_4), R(scratch1)); + CMP(32, R(remaining_reg), Imm8(3)); + FixupBranch dont_store = J_CC(CC_AE); + MOV(32, MPIC(VertexLoaderManager::position_matrix_index_cache.data(), remaining_reg, SCALE_4), + R(scratch1)); SetJumpTarget(dont_store); m_native_vtx_decl.posmtx.components = 4; @@ -513,8 +532,8 @@ void VertexLoaderX64::GenerateVertexLoader() const u8* cont = GetCodePtr(); ADD(64, R(src_reg), Imm32(m_src_ofs)); - SUB(32, R(count_reg), Imm8(1)); - J_CC(CC_NZ, loop_start); + SUB(32, R(remaining_reg), Imm8(1)); + J_CC(CC_AE, loop_start); // Get the original count. POP(32, R(ABI_RETURN)); diff --git a/Source/Core/VideoCommon/VertexLoader_Normal.cpp b/Source/Core/VideoCommon/VertexLoader_Normal.cpp index 254bcacff3..db955639d7 100644 --- a/Source/Core/VideoCommon/VertexLoader_Normal.cpp +++ b/Source/Core/VideoCommon/VertexLoader_Normal.cpp @@ -40,14 +40,22 @@ constexpr float FracAdjust(float val) } template -void ReadIndirect(const T* data) +void ReadIndirect(VertexLoader* loader, const T* data) { static_assert(3 == N || 9 == N, "N is only sane as 3 or 9!"); DataReader dst(g_vertex_manager_write_ptr, nullptr); for (u32 i = 0; i < N; ++i) { - dst.Write(FracAdjust(Common::FromBigEndian(data[i]))); + const float value = FracAdjust(Common::FromBigEndian(data[i])); + if (loader->m_remaining == 0) + { + if (i >= 3 && i < 6) + VertexLoaderManager::tangent_cache[i - 3] = value; + else if (i >= 6 && i < 9) + VertexLoaderManager::binormal_cache[i - 6] = value; + } + dst.Write(value); } g_vertex_manager_write_ptr = dst.GetPointer(); @@ -57,10 +65,10 @@ void ReadIndirect(const T* data) template struct Normal_Direct { - static void function([[maybe_unused]] VertexLoader* loader) + static void function(VertexLoader* loader) { const auto source = reinterpret_cast(DataGetPosition()); - ReadIndirect(source); + ReadIndirect(loader, source); DataSkip(); } @@ -68,7 +76,7 @@ struct Normal_Direct }; template -void Normal_Index_Offset() +void Normal_Index_Offset(VertexLoader* loader) { static_assert(std::is_unsigned_v, "Only unsigned I is sane!"); @@ -76,24 +84,24 @@ void Normal_Index_Offset() const auto data = reinterpret_cast( VertexLoaderManager::cached_arraybases[CPArray::Normal] + (index * g_main_cp_state.array_strides[CPArray::Normal]) + sizeof(T) * 3 * Offset); - ReadIndirect(data); + ReadIndirect(loader, data); } template struct Normal_Index { - static void function([[maybe_unused]] VertexLoader* loader) { Normal_Index_Offset(); } + static void function(VertexLoader* loader) { Normal_Index_Offset(loader); } static constexpr u32 size = sizeof(I); }; template struct Normal_Index_Indices3 { - static void function([[maybe_unused]] VertexLoader* loader) + static void function(VertexLoader* loader) { - Normal_Index_Offset(); - Normal_Index_Offset(); - Normal_Index_Offset(); + Normal_Index_Offset(loader); + Normal_Index_Offset(loader); + Normal_Index_Offset(loader); } static constexpr u32 size = sizeof(I) * 3; diff --git a/Source/Core/VideoCommon/VertexLoader_Position.cpp b/Source/Core/VideoCommon/VertexLoader_Position.cpp index 0fe8e7ba72..15d2f6d94e 100644 --- a/Source/Core/VideoCommon/VertexLoader_Position.cpp +++ b/Source/Core/VideoCommon/VertexLoader_Position.cpp @@ -41,8 +41,8 @@ void Pos_ReadDirect(VertexLoader* loader) for (int i = 0; i < N; ++i) { const float value = PosScale(src.Read(), scale); - if (loader->m_counter < 3) - VertexLoaderManager::position_cache[loader->m_counter][i] = value; + if (loader->m_remaining < 3) + VertexLoaderManager::position_cache[loader->m_remaining][i] = value; dst.Write(value); } @@ -68,8 +68,8 @@ void Pos_ReadIndex(VertexLoader* loader) for (int i = 0; i < N; ++i) { const float value = PosScale(Common::FromBigEndian(data[i]), scale); - if (loader->m_counter < 3) - VertexLoaderManager::position_cache[loader->m_counter][i] = value; + if (loader->m_remaining < 3) + VertexLoaderManager::position_cache[loader->m_remaining][i] = value; dst.Write(value); } diff --git a/Source/Core/VideoCommon/VertexManagerBase.cpp b/Source/Core/VideoCommon/VertexManagerBase.cpp index 5fa85b2761..bb23a468ea 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/VertexManagerBase.cpp @@ -453,6 +453,7 @@ void VertexManagerBase::Flush() } } + CalculateBinormals(VertexLoaderManager::GetCurrentVertexFormat()); // Calculate ZSlope for zfreeze VertexShaderManager::SetConstants(); if (!bpmem.genMode.zfreeze) @@ -558,7 +559,7 @@ void VertexManagerBase::CalculateZSlope(NativeVertexFormat* format) { // If this vertex format has per-vertex position matrix IDs, look it up. if (vert_decl.posmtx.enable) - mtxIdx = VertexLoaderManager::position_matrix_index[3 - i]; + mtxIdx = VertexLoaderManager::position_matrix_index_cache[2 - i]; if (vert_decl.position.components == 2) VertexLoaderManager::position_cache[2 - i][2] = 0; @@ -595,6 +596,31 @@ void VertexManagerBase::CalculateZSlope(NativeVertexFormat* format) m_zslope.dirty = true; } +void VertexManagerBase::CalculateBinormals(NativeVertexFormat* format) +{ + const PortableVertexDeclaration vert_decl = format->GetVertexDeclaration(); + + // Only update the binormal/tangent vertex shader constants if the vertex format lacks binormals + // (VertexLoaderManager::binormal_cache gets updated by the vertex loader when binormals are + // present, though) + if (vert_decl.normals[1].enable) + return; + + VertexLoaderManager::tangent_cache[3] = 0; + VertexLoaderManager::binormal_cache[3] = 0; + + if (VertexShaderManager::constants.cached_tangent != VertexLoaderManager::tangent_cache) + { + VertexShaderManager::constants.cached_tangent = VertexLoaderManager::tangent_cache; + VertexShaderManager::dirty = true; + } + if (VertexShaderManager::constants.cached_binormal != VertexLoaderManager::binormal_cache) + { + VertexShaderManager::constants.cached_binormal = VertexLoaderManager::binormal_cache; + VertexShaderManager::dirty = true; + } +} + void VertexManagerBase::UpdatePipelineConfig() { NativeVertexFormat* vertex_format = VertexLoaderManager::GetCurrentVertexFormat(); diff --git a/Source/Core/VideoCommon/VertexManagerBase.h b/Source/Core/VideoCommon/VertexManagerBase.h index c413889713..e4d029e9c7 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.h +++ b/Source/Core/VideoCommon/VertexManagerBase.h @@ -172,6 +172,7 @@ protected: u32 GetRemainingIndices(OpcodeDecoder::Primitive primitive) const; void CalculateZSlope(NativeVertexFormat* format); + void CalculateBinormals(NativeVertexFormat* format); void LoadTextures(); u8* m_cur_buffer_pointer = nullptr; diff --git a/Source/Core/VideoCommon/VertexShaderGen.cpp b/Source/Core/VideoCommon/VertexShaderGen.cpp index 7f3e00609a..9fa2d6cd0e 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/VertexShaderGen.cpp @@ -39,7 +39,7 @@ VertexShaderUid GetVertexShaderUid() switch (texinfo.texgentype) { case TexGenType::EmbossMap: // calculate tex coords into bump map - if ((uid_data->components & (VB_HAS_NRM1 | VB_HAS_NRM2)) != 0) + if ((uid_data->components & (VB_HAS_TANGENT | VB_HAS_BINORMAL)) != 0) { // transform the light dir into tangent space texinfo.embosslightshift = xfmem.texMtxInfo[i].embosslightshift; @@ -105,12 +105,12 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho out.Write("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB); if ((uid_data->components & VB_HAS_POSMTXIDX) != 0) out.Write("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB); - if ((uid_data->components & VB_HAS_NRM0) != 0) - out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnorm0;\n", SHADER_NORM0_ATTRIB); - if ((uid_data->components & VB_HAS_NRM1) != 0) - out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnorm1;\n", SHADER_NORM1_ATTRIB); - if ((uid_data->components & VB_HAS_NRM2) != 0) - out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnorm2;\n", SHADER_NORM2_ATTRIB); + if ((uid_data->components & VB_HAS_NORMAL) != 0) + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnormal;\n", SHADER_NORMAL_ATTRIB); + if ((uid_data->components & VB_HAS_TANGENT) != 0) + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtangent;\n", SHADER_TANGENT_ATTRIB); + if ((uid_data->components & VB_HAS_BINORMAL) != 0) + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawbinormal;\n", SHADER_BINORMAL_ATTRIB); if ((uid_data->components & VB_HAS_COL0) != 0) out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB); @@ -169,12 +169,12 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho out.Write("VS_OUTPUT main(\n"); // inputs - if ((uid_data->components & VB_HAS_NRM0) != 0) - out.Write(" float3 rawnorm0 : NORMAL0,\n"); - if ((uid_data->components & VB_HAS_NRM1) != 0) - out.Write(" float3 rawnorm1 : NORMAL1,\n"); - if ((uid_data->components & VB_HAS_NRM2) != 0) - out.Write(" float3 rawnorm2 : NORMAL2,\n"); + if ((uid_data->components & VB_HAS_NORMAL) != 0) + out.Write(" float3 rawnormal : NORMAL,\n"); + if ((uid_data->components & VB_HAS_TANGENT) != 0) + out.Write(" float3 rawtangent : TANGENT,\n"); + if ((uid_data->components & VB_HAS_BINORMAL) != 0) + out.Write(" float3 rawbinormal : BINORMAL,\n"); if ((uid_data->components & VB_HAS_COL0) != 0) out.Write(" float4 rawcolor0 : COLOR0,\n"); if ((uid_data->components & VB_HAS_COL1) != 0) @@ -222,60 +222,60 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho // transforms if ((uid_data->components & VB_HAS_POSMTXIDX) != 0) { + // Vertex format has a per-vertex matrix out.Write("int posidx = int(posmtx.r);\n" - "float4 pos = float4(dot(" I_TRANSFORMMATRICES - "[posidx], rawpos), dot(" I_TRANSFORMMATRICES - "[posidx+1], rawpos), dot(" I_TRANSFORMMATRICES "[posidx+2], rawpos), 1);\n"); - - if ((uid_data->components & VB_HAS_NRMALL) != 0) + "float4 P0 = " I_TRANSFORMMATRICES "[posidx];\n" + "float4 P1 = " I_TRANSFORMMATRICES "[posidx + 1];\n" + "float4 P2 = " I_TRANSFORMMATRICES "[posidx + 2];\n"); + if ((uid_data->components & VB_HAS_NORMAL) != 0) { out.Write("int normidx = posidx & 31;\n" - "float3 N0 = " I_NORMALMATRICES "[normidx].xyz, N1 = " I_NORMALMATRICES - "[normidx+1].xyz, N2 = " I_NORMALMATRICES "[normidx+2].xyz;\n"); - } - - if ((uid_data->components & VB_HAS_NRM0) != 0) - { - out.Write("float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, " - "rawnorm0)));\n"); - } - if ((uid_data->components & VB_HAS_NRM1) != 0) - { - out.Write( - "float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n"); - } - if ((uid_data->components & VB_HAS_NRM2) != 0) - { - out.Write( - "float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n"); + "float3 N0 = " I_NORMALMATRICES "[normidx].xyz;\n" + "float3 N1 = " I_NORMALMATRICES "[normidx + 1].xyz;\n" + "float3 N2 = " I_NORMALMATRICES "[normidx + 2].xyz;\n"); } } else { - out.Write("float4 pos = float4(dot(" I_POSNORMALMATRIX "[0], rawpos), dot(" I_POSNORMALMATRIX - "[1], rawpos), dot(" I_POSNORMALMATRIX "[2], rawpos), 1.0);\n"); - if ((uid_data->components & VB_HAS_NRM0) != 0) + // One shared matrix + out.Write("float4 P0 = " I_POSNORMALMATRIX "[0];\n" + "float4 P1 = " I_POSNORMALMATRIX "[1];\n" + "float4 P2 = " I_POSNORMALMATRIX "[2];\n"); + if ((uid_data->components & VB_HAS_NORMAL) != 0) { - out.Write("float3 _norm0 = normalize(float3(dot(" I_POSNORMALMATRIX - "[3].xyz, rawnorm0), dot(" I_POSNORMALMATRIX - "[4].xyz, rawnorm0), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm0)));\n"); - } - if ((uid_data->components & VB_HAS_NRM1) != 0) - { - out.Write("float3 _norm1 = float3(dot(" I_POSNORMALMATRIX - "[3].xyz, rawnorm1), dot(" I_POSNORMALMATRIX - "[4].xyz, rawnorm1), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm1));\n"); - } - if ((uid_data->components & VB_HAS_NRM2) != 0) - { - out.Write("float3 _norm2 = float3(dot(" I_POSNORMALMATRIX - "[3].xyz, rawnorm2), dot(" I_POSNORMALMATRIX - "[4].xyz, rawnorm2), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm2));\n"); + out.Write("float3 N0 = " I_POSNORMALMATRIX "[3].xyz;\n" + "float3 N1 = " I_POSNORMALMATRIX "[4].xyz;\n" + "float3 N2 = " I_POSNORMALMATRIX "[5].xyz;\n"); } } - if ((uid_data->components & VB_HAS_NRM0) == 0) - out.Write("float3 _norm0 = float3(0.0, 0.0, 0.0);\n"); + out.Write("// Multiply the position vector by the position matrix\n" + "float4 pos = float4(dot(P0, rawpos), dot(P1, rawpos), dot(P2, rawpos), 1.0);\n"); + if ((uid_data->components & VB_HAS_NORMAL) != 0) + { + if ((uid_data->components & VB_HAS_TANGENT) == 0) + out.Write("float3 rawtangent = " I_CACHED_TANGENT ".xyz;\n"); + if ((uid_data->components & VB_HAS_BINORMAL) == 0) + out.Write("float3 rawbinormal = " I_CACHED_BINORMAL ".xyz;\n"); + + // The scale of the transform matrix is used to control the size of the emboss map effect, by + // changing the scale of the transformed binormals (which only get used by emboss map texgens). + // By normalising the first transformed normal (which is used by lighting calculations and needs + // to be unit length), the same transform matrix can do double duty, scaling for emboss mapping, + // and not scaling for lighting. + out.Write("float3 _normal = normalize(float3(dot(N0, rawnormal), dot(N1, rawnormal), dot(N2, " + "rawnormal)));\n" + "float3 _tangent = float3(dot(N0, rawtangent), dot(N1, rawtangent), dot(N2, " + "rawtangent));\n" + "float3 _binormal = float3(dot(N0, rawbinormal), dot(N1, rawbinormal), dot(N2, " + "rawbinormal));\n"); + } + else + { + out.Write("float3 _normal = float3(0.0, 0.0, 0.0);\n"); + out.Write("float3 _binormal = float3(0.0, 0.0, 0.0);\n"); + out.Write("float3 _tangent = float3(0.0, 0.0, 0.0);\n"); + } out.Write("o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION "[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n"); @@ -300,24 +300,24 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho out.Write("coord.xyz = rawpos.xyz;\n"); break; case SourceRow::Normal: - if ((uid_data->components & VB_HAS_NRM0) != 0) + if ((uid_data->components & VB_HAS_NORMAL) != 0) { - out.Write("coord.xyz = rawnorm0.xyz;\n"); + out.Write("coord.xyz = rawnormal.xyz;\n"); } break; case SourceRow::Colors: ASSERT(texinfo.texgentype == TexGenType::Color0 || texinfo.texgentype == TexGenType::Color1); break; case SourceRow::BinormalT: - if ((uid_data->components & VB_HAS_NRM1) != 0) + if ((uid_data->components & VB_HAS_TANGENT) != 0) { - out.Write("coord.xyz = rawnorm1.xyz;\n"); + out.Write("coord.xyz = rawtangent.xyz;\n"); } break; case SourceRow::BinormalB: - if ((uid_data->components & VB_HAS_NRM2) != 0) + if ((uid_data->components & VB_HAS_BINORMAL) != 0) { - out.Write("coord.xyz = rawnorm2.xyz;\n"); + out.Write("coord.xyz = rawbinormal.xyz;\n"); } break; default: @@ -346,22 +346,12 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho { case TexGenType::EmbossMap: // calculate tex coords into bump map - if ((uid_data->components & (VB_HAS_NRM1 | VB_HAS_NRM2)) != 0) - { - // transform the light dir into tangent space - out.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", - LIGHT_POS_PARAMS(texinfo.embosslightshift)); - out.Write( - "o.tex{}.xyz = o.tex{}.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0);\n", i, - texinfo.embosssourceshift); - } - else - { - // The following assert was triggered in House of the Dead Overkill and Star Wars Rogue - // Squadron 2 - // ASSERT(0); // should have normals - out.Write("o.tex{}.xyz = o.tex{}.xyz;\n", i, texinfo.embosssourceshift); - } + // transform the light dir into tangent space + out.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", + LIGHT_POS_PARAMS(texinfo.embosslightshift)); + out.Write( + "o.tex{}.xyz = o.tex{}.xyz + float3(dot(ldir, _tangent), dot(ldir, _binormal), 0.0);\n", + i, texinfo.embosssourceshift); break; case TexGenType::Color0: @@ -471,7 +461,7 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho if (per_pixel_lighting) { - out.Write("o.Normal = _norm0;\n" + out.Write("o.Normal = _normal;\n" "o.WorldPos = pos.xyz;\n"); } diff --git a/Source/Core/VideoCommon/VertexShaderGen.h b/Source/Core/VideoCommon/VertexShaderGen.h index 5f2a73848c..028404c6e8 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.h +++ b/Source/Core/VideoCommon/VertexShaderGen.h @@ -17,9 +17,9 @@ enum : int { SHADER_POSITION_ATTRIB = 0, SHADER_POSMTX_ATTRIB = 1, - SHADER_NORM0_ATTRIB = 2, - SHADER_NORM1_ATTRIB = 3, - SHADER_NORM2_ATTRIB = 4, + SHADER_NORMAL_ATTRIB = 2, + SHADER_TANGENT_ATTRIB = 3, + SHADER_BINORMAL_ATTRIB = 4, SHADER_COLOR0_ATTRIB = 5, SHADER_COLOR1_ATTRIB = 6,