diff --git a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp index 7d9b64f01d..4b9825132e 100644 --- a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp +++ b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp @@ -25,6 +25,7 @@ #include "VideoCommon/Statistics.h" #include "VideoCommon/VertexLoaderBase.h" #include "VideoCommon/VertexLoaderManager.h" +#include "VideoCommon/VertexShaderManager.h" #include "VideoCommon/VideoConfig.h" #include "VideoCommon/XFMemory.h" @@ -90,10 +91,7 @@ void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_ TransformUnit::TransformPosition(&m_vertex, outVertex); outVertex->normal = {}; if (VertexLoaderManager::g_current_components & VB_HAS_NORMAL) - { - TransformUnit::TransformNormal( - &m_vertex, (VertexLoaderManager::g_current_components & VB_HAS_BINORMAL) != 0, outVertex); - } + TransformUnit::TransformNormal(&m_vertex, outVertex); TransformUnit::TransformColor(&m_vertex, outVertex); TransformUnit::TransformTexCoord(&m_vertex, outVertex); @@ -230,6 +228,18 @@ void SWVertexLoader::ParseVertex(const PortableVertexDeclaration& vdec, int inde { ReadVertexAttribute(&m_vertex.normal[i][0], src, vdec.normals[i], 0, 3, false); } + if (!vdec.normals[1].enable) + { + m_vertex.normal[1][0] = VertexShaderManager::constants.cached_tangent[0]; + m_vertex.normal[1][1] = VertexShaderManager::constants.cached_tangent[1]; + m_vertex.normal[1][2] = VertexShaderManager::constants.cached_tangent[2]; + } + if (!vdec.normals[2].enable) + { + m_vertex.normal[2][0] = VertexShaderManager::constants.cached_binormal[0]; + m_vertex.normal[2][1] = VertexShaderManager::constants.cached_binormal[1]; + m_vertex.normal[2][2] = VertexShaderManager::constants.cached_binormal[2]; + } ParseColorAttributes(&m_vertex, src, vdec); diff --git a/Source/Core/VideoBackends/Software/TransformUnit.cpp b/Source/Core/VideoBackends/Software/TransformUnit.cpp index bf56307025..e1e6fd68a6 100644 --- a/Source/Core/VideoBackends/Software/TransformUnit.cpp +++ b/Source/Core/VideoBackends/Software/TransformUnit.cpp @@ -90,22 +90,14 @@ void TransformPosition(const InputVertexData* src, OutputVertexData* dst) } } -void TransformNormal(const InputVertexData* src, bool nbt, OutputVertexData* dst) +void TransformNormal(const InputVertexData* src, OutputVertexData* dst) { const float* mat = &xfmem.normalMatrices[(src->posMtx & 31) * 3]; - if (nbt) - { - MultiplyVec3Mat33(src->normal[0], mat, dst->normal[0]); - MultiplyVec3Mat33(src->normal[1], mat, dst->normal[1]); - MultiplyVec3Mat33(src->normal[2], mat, dst->normal[2]); - dst->normal[0].Normalize(); - } - else - { - MultiplyVec3Mat33(src->normal[0], mat, dst->normal[0]); - dst->normal[0].Normalize(); - } + MultiplyVec3Mat33(src->normal[0], mat, dst->normal[0]); + MultiplyVec3Mat33(src->normal[1], mat, dst->normal[1]); + MultiplyVec3Mat33(src->normal[2], mat, dst->normal[2]); + dst->normal[0].Normalize(); } static void TransformTexCoordRegular(const TexMtxInfo& texinfo, int coordNum, diff --git a/Source/Core/VideoBackends/Software/TransformUnit.h b/Source/Core/VideoBackends/Software/TransformUnit.h index e764127ec4..59152250b0 100644 --- a/Source/Core/VideoBackends/Software/TransformUnit.h +++ b/Source/Core/VideoBackends/Software/TransformUnit.h @@ -9,7 +9,7 @@ struct OutputVertexData; namespace TransformUnit { void TransformPosition(const InputVertexData* src, OutputVertexData* dst); -void TransformNormal(const InputVertexData* src, bool nbt, OutputVertexData* dst); +void TransformNormal(const InputVertexData* src, OutputVertexData* dst); void TransformColor(const InputVertexData* src, OutputVertexData* dst); void TransformTexCoord(const InputVertexData* src, OutputVertexData* dst); } // namespace TransformUnit diff --git a/Source/Core/VideoCommon/ConstantManager.h b/Source/Core/VideoCommon/ConstantManager.h index 5335af963a..9fd4c060cd 100644 --- a/Source/Core/VideoCommon/ConstantManager.h +++ b/Source/Core/VideoCommon/ConstantManager.h @@ -90,6 +90,9 @@ struct VertexShaderConstants // .x - texMtxInfo, .y - postMtxInfo, [0..1].z = color, [0..1].w = alpha std::array xfmem_pack1; + + float4 cached_tangent; + float4 cached_binormal; }; struct GeometryShaderConstants diff --git a/Source/Core/VideoCommon/ShaderGenCommon.h b/Source/Core/VideoCommon/ShaderGenCommon.h index bec69fe7c0..c9f0246318 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/ShaderGenCommon.h @@ -296,6 +296,8 @@ void WriteSwitch(ShaderCode& out, APIType ApiType, std::string_view variable, #define I_POSTTRANSFORMMATRICES "cpostmtx" #define I_PIXELCENTERCORRECTION "cpixelcenter" #define I_VIEWPORT_SIZE "cviewport" +#define I_CACHED_TANGENT "ctangent" +#define I_CACHED_BINORMAL "cbinormal" #define I_STEREOPARAMS "cstereo" #define I_LINEPTPARAMS "clinept" @@ -317,6 +319,8 @@ static const char s_shader_uniforms[] = "\tuint components;\n" "\tfloat4 " I_PIXELCENTERCORRECTION ";\n" "\tfloat2 " I_VIEWPORT_SIZE ";\n" "\tuint4 xfmem_pack1[8];\n" + "\tfloat4 " I_CACHED_TANGENT ";\n" + "\tfloat4 " I_CACHED_BINORMAL ";\n" "\t#define xfmem_texMtxInfo(i) (xfmem_pack1[(i)].x)\n" "\t#define xfmem_postMtxInfo(i) (xfmem_pack1[(i)].y)\n" "\t#define xfmem_color(i) (xfmem_pack1[(i)].z)\n" diff --git a/Source/Core/VideoCommon/UberShaderVertex.cpp b/Source/Core/VideoCommon/UberShaderVertex.cpp index 91568a4cec..c68c996b0a 100644 --- a/Source/Core/VideoCommon/UberShaderVertex.cpp +++ b/Source/Core/VideoCommon/UberShaderVertex.cpp @@ -169,12 +169,18 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config "if ((components & {}u) != 0u) // VB_HAS_TANGENT\n", VB_HAS_TANGENT); out.Write(" _tangent = float3(dot(N0, rawtangent), dot(N1, rawtangent), dot(N2, rawtangent));\n" + "else\n" + " _tangent = float3(dot(N0, " I_CACHED_TANGENT ".xyz), dot(N1, " I_CACHED_TANGENT + ".xyz), dot(N2, " I_CACHED_TANGENT ".xyz));\n" "\n" "float3 _binormal = float3(0.0, 0.0, 0.0);\n" "if ((components & {}u) != 0u) // VB_HAS_BINORMAL\n", VB_HAS_BINORMAL); out.Write(" _binormal = float3(dot(N0, rawbinormal), dot(N1, rawbinormal), dot(N2, " "rawbinormal));\n" + "else\n" + " _binormal = float3(dot(N0, " I_CACHED_BINORMAL ".xyz), dot(N1, " I_CACHED_BINORMAL + ".xyz), dot(N2, " I_CACHED_BINORMAL ".xyz));\n" "\n"); // Hardware Lighting @@ -449,12 +455,9 @@ static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode& for (u32 i = 0; i < num_texgen; i++) out.Write(" case {}u: output_tex.xyz = o.tex{}; break;\n", i, i); out.Write(" default: output_tex.xyz = float3(0.0, 0.0, 0.0); break;\n" - " }}\n"); - out.Write(" if ((components & {}u) != 0u) {{ // VB_HAS_TANGENT | VB_HAS_BINORMAL\n", - VB_HAS_TANGENT | VB_HAS_BINORMAL); - out.Write(" float3 ldir = normalize(" I_LIGHTS "[light].pos.xyz - pos.xyz);\n" - " output_tex.xyz += float3(dot(ldir, _tangent), dot(ldir, _binormal), 0.0);\n" " }}\n" + " float3 ldir = normalize(" I_LIGHTS "[light].pos.xyz - pos.xyz);\n" + " output_tex.xyz += float3(dot(ldir, _tangent), dot(ldir, _binormal), 0.0);\n" " }}\n" " break;\n\n"); out.Write(" case {:s}:\n", TexGenType::Color0); diff --git a/Source/Core/VideoCommon/VertexLoaderARM64.cpp b/Source/Core/VideoCommon/VertexLoaderARM64.cpp index ab9ced5d8a..eff8a29993 100644 --- a/Source/Core/VideoCommon/VertexLoaderARM64.cpp +++ b/Source/Core/VideoCommon/VertexLoaderARM64.cpp @@ -215,6 +215,20 @@ int VertexLoaderARM64::ReadVertex(VertexComponentFormat attribute, ComponentForm m_float_emit.STR(128, coords, EncodeRegTo64(scratch2_reg), ArithOption(remaining_reg, true)); SetJumpTarget(dont_store); } + else if (native_format == &m_native_vtx_decl.normals[1]) + { + FixupBranch dont_store = CBNZ(remaining_reg); + MOVP2R(EncodeRegTo64(scratch2_reg), VertexLoaderManager::tangent_cache.data()); + m_float_emit.STR(128, IndexType::Unsigned, coords, EncodeRegTo64(scratch2_reg), 0); + SetJumpTarget(dont_store); + } + else if (native_format == &m_native_vtx_decl.normals[2]) + { + FixupBranch dont_store = CBNZ(remaining_reg); + MOVP2R(EncodeRegTo64(scratch2_reg), VertexLoaderManager::binormal_cache.data()); + m_float_emit.STR(128, IndexType::Unsigned, coords, EncodeRegTo64(scratch2_reg), 0); + SetJumpTarget(dont_store); + } native_format->components = count_out; native_format->enable = true; diff --git a/Source/Core/VideoCommon/VertexLoaderManager.cpp b/Source/Core/VideoCommon/VertexLoaderManager.cpp index f5c9578a70..7710b7cc20 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.cpp +++ b/Source/Core/VideoCommon/VertexLoaderManager.cpp @@ -35,6 +35,8 @@ namespace VertexLoaderManager std::array position_matrix_index_cache; // 3 vertices, 4 floats each to allow SIMD overwrite alignas(sizeof(std::array)) std::array, 3> position_cache; +alignas(sizeof(std::array)) std::array tangent_cache; +alignas(sizeof(std::array)) std::array binormal_cache; static NativeVertexFormatMap s_native_vertex_map; static NativeVertexFormat* s_current_vtx_fmt; diff --git a/Source/Core/VideoCommon/VertexLoaderManager.h b/Source/Core/VideoCommon/VertexLoaderManager.h index 5573e08103..c5f9ae5376 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.h +++ b/Source/Core/VideoCommon/VertexLoaderManager.h @@ -55,6 +55,10 @@ void UpdateVertexArrayPointers(); // These arrays are in reverse order. extern std::array, 3> position_cache; extern std::array position_matrix_index_cache; +// Store the tangent and binormal vectors for games that use emboss texgens when the vertex format +// doesn't include them (e.g. RS2 and RS3). These too are 4 floats each for SIMD overwrites. +extern std::array tangent_cache; +extern std::array binormal_cache; // VB_HAS_X. Bitmask telling what vertex components are present. extern u32 g_current_components; diff --git a/Source/Core/VideoCommon/VertexLoaderX64.cpp b/Source/Core/VideoCommon/VertexLoaderX64.cpp index da52788d3c..aebba7680d 100644 --- a/Source/Core/VideoCommon/VertexLoaderX64.cpp +++ b/Source/Core/VideoCommon/VertexLoaderX64.cpp @@ -127,6 +127,22 @@ int VertexLoaderX64::ReadVertex(OpArg data, VertexComponentFormat attribute, Com MOVUPS(MPIC(VertexLoaderManager::position_cache.data(), scratch3, SCALE_4), coords); SetJumpTarget(dont_store); } + else if (native_format == &m_native_vtx_decl.normals[1]) + { + TEST(32, R(remaining_reg), R(remaining_reg)); + FixupBranch dont_store = J_CC(CC_NZ); + // For similar reasons, the cached tangent and binormal are 4 floats each + MOVUPS(MPIC(VertexLoaderManager::tangent_cache.data()), coords); + SetJumpTarget(dont_store); + } + else if (native_format == &m_native_vtx_decl.normals[2]) + { + CMP(32, R(remaining_reg), R(remaining_reg)); + FixupBranch dont_store = J_CC(CC_NZ); + // For similar reasons, the cached tangent and binormal are 4 floats each + MOVUPS(MPIC(VertexLoaderManager::binormal_cache.data()), coords); + SetJumpTarget(dont_store); + } }; int elem_size = GetElementSize(format); @@ -217,7 +233,9 @@ int VertexLoaderX64::ReadVertex(OpArg data, VertexComponentFormat attribute, Com dest.AddMemOffset(sizeof(float)); // zfreeze - if (native_format == &m_native_vtx_decl.position) + if (native_format == &m_native_vtx_decl.position || + native_format == &m_native_vtx_decl.normals[1] || + native_format == &m_native_vtx_decl.normals[2]) { if (cpu_info.bSSE4_1) { diff --git a/Source/Core/VideoCommon/VertexLoader_Normal.cpp b/Source/Core/VideoCommon/VertexLoader_Normal.cpp index 254bcacff3..db955639d7 100644 --- a/Source/Core/VideoCommon/VertexLoader_Normal.cpp +++ b/Source/Core/VideoCommon/VertexLoader_Normal.cpp @@ -40,14 +40,22 @@ constexpr float FracAdjust(float val) } template -void ReadIndirect(const T* data) +void ReadIndirect(VertexLoader* loader, const T* data) { static_assert(3 == N || 9 == N, "N is only sane as 3 or 9!"); DataReader dst(g_vertex_manager_write_ptr, nullptr); for (u32 i = 0; i < N; ++i) { - dst.Write(FracAdjust(Common::FromBigEndian(data[i]))); + const float value = FracAdjust(Common::FromBigEndian(data[i])); + if (loader->m_remaining == 0) + { + if (i >= 3 && i < 6) + VertexLoaderManager::tangent_cache[i - 3] = value; + else if (i >= 6 && i < 9) + VertexLoaderManager::binormal_cache[i - 6] = value; + } + dst.Write(value); } g_vertex_manager_write_ptr = dst.GetPointer(); @@ -57,10 +65,10 @@ void ReadIndirect(const T* data) template struct Normal_Direct { - static void function([[maybe_unused]] VertexLoader* loader) + static void function(VertexLoader* loader) { const auto source = reinterpret_cast(DataGetPosition()); - ReadIndirect(source); + ReadIndirect(loader, source); DataSkip(); } @@ -68,7 +76,7 @@ struct Normal_Direct }; template -void Normal_Index_Offset() +void Normal_Index_Offset(VertexLoader* loader) { static_assert(std::is_unsigned_v, "Only unsigned I is sane!"); @@ -76,24 +84,24 @@ void Normal_Index_Offset() const auto data = reinterpret_cast( VertexLoaderManager::cached_arraybases[CPArray::Normal] + (index * g_main_cp_state.array_strides[CPArray::Normal]) + sizeof(T) * 3 * Offset); - ReadIndirect(data); + ReadIndirect(loader, data); } template struct Normal_Index { - static void function([[maybe_unused]] VertexLoader* loader) { Normal_Index_Offset(); } + static void function(VertexLoader* loader) { Normal_Index_Offset(loader); } static constexpr u32 size = sizeof(I); }; template struct Normal_Index_Indices3 { - static void function([[maybe_unused]] VertexLoader* loader) + static void function(VertexLoader* loader) { - Normal_Index_Offset(); - Normal_Index_Offset(); - Normal_Index_Offset(); + Normal_Index_Offset(loader); + Normal_Index_Offset(loader); + Normal_Index_Offset(loader); } static constexpr u32 size = sizeof(I) * 3; diff --git a/Source/Core/VideoCommon/VertexManagerBase.cpp b/Source/Core/VideoCommon/VertexManagerBase.cpp index a93a9b34f1..bb23a468ea 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/VertexManagerBase.cpp @@ -453,6 +453,7 @@ void VertexManagerBase::Flush() } } + CalculateBinormals(VertexLoaderManager::GetCurrentVertexFormat()); // Calculate ZSlope for zfreeze VertexShaderManager::SetConstants(); if (!bpmem.genMode.zfreeze) @@ -595,6 +596,31 @@ void VertexManagerBase::CalculateZSlope(NativeVertexFormat* format) m_zslope.dirty = true; } +void VertexManagerBase::CalculateBinormals(NativeVertexFormat* format) +{ + const PortableVertexDeclaration vert_decl = format->GetVertexDeclaration(); + + // Only update the binormal/tangent vertex shader constants if the vertex format lacks binormals + // (VertexLoaderManager::binormal_cache gets updated by the vertex loader when binormals are + // present, though) + if (vert_decl.normals[1].enable) + return; + + VertexLoaderManager::tangent_cache[3] = 0; + VertexLoaderManager::binormal_cache[3] = 0; + + if (VertexShaderManager::constants.cached_tangent != VertexLoaderManager::tangent_cache) + { + VertexShaderManager::constants.cached_tangent = VertexLoaderManager::tangent_cache; + VertexShaderManager::dirty = true; + } + if (VertexShaderManager::constants.cached_binormal != VertexLoaderManager::binormal_cache) + { + VertexShaderManager::constants.cached_binormal = VertexLoaderManager::binormal_cache; + VertexShaderManager::dirty = true; + } +} + void VertexManagerBase::UpdatePipelineConfig() { NativeVertexFormat* vertex_format = VertexLoaderManager::GetCurrentVertexFormat(); diff --git a/Source/Core/VideoCommon/VertexManagerBase.h b/Source/Core/VideoCommon/VertexManagerBase.h index c413889713..e4d029e9c7 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.h +++ b/Source/Core/VideoCommon/VertexManagerBase.h @@ -172,6 +172,7 @@ protected: u32 GetRemainingIndices(OpcodeDecoder::Primitive primitive) const; void CalculateZSlope(NativeVertexFormat* format); + void CalculateBinormals(NativeVertexFormat* format); void LoadTextures(); u8* m_cur_buffer_pointer = nullptr; diff --git a/Source/Core/VideoCommon/VertexShaderGen.cpp b/Source/Core/VideoCommon/VertexShaderGen.cpp index 50ef13a9d7..0b176386e9 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/VertexShaderGen.cpp @@ -253,23 +253,24 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho "float4 pos = float4(dot(P0, rawpos), dot(P1, rawpos), dot(P2, rawpos), 1.0);\n"); if ((uid_data->components & VB_HAS_NORMAL) != 0) { + if ((uid_data->components & VB_HAS_TANGENT) == 0) + out.Write("float3 rawtangent = " I_CACHED_TANGENT ".xyz;\n"); + if ((uid_data->components & VB_HAS_BINORMAL) == 0) + out.Write("float3 rawbinormal = " I_CACHED_BINORMAL ".xyz;\n"); + // Only the first normal gets normalized (TODO: why?) out.Write("float3 _normal = normalize(float3(dot(N0, rawnormal), dot(N1, rawnormal), dot(N2, " - "rawnormal)));\n"); + "rawnormal)));\n" + "float3 _tangent = float3(dot(N0, rawtangent), dot(N1, rawtangent), dot(N2, " + "rawtangent));\n" + "float3 _binormal = float3(dot(N0, rawbinormal), dot(N1, rawbinormal), dot(N2, " + "rawbinormal));\n"); } else { out.Write("float3 _normal = float3(0.0, 0.0, 0.0);\n"); - } - if ((uid_data->components & VB_HAS_TANGENT) != 0) - { - out.Write("float3 _tangent = float3(dot(N0, rawtangent), dot(N1, rawtangent), dot(N2, " - "rawtangent));\n"); - } - if ((uid_data->components & VB_HAS_BINORMAL) != 0) - { - out.Write("float3 _binormal = float3(dot(N0, rawbinormal), dot(N1, rawbinormal), dot(N2, " - "rawbinormal));\n"); + out.Write("float3 _binormal = float3(0.0, 0.0, 0.0);\n"); + out.Write("float3 _tangent = float3(0.0, 0.0, 0.0);\n"); } out.Write("o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION @@ -341,22 +342,12 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho { case TexGenType::EmbossMap: // calculate tex coords into bump map - if ((uid_data->components & (VB_HAS_TANGENT | VB_HAS_BINORMAL)) != 0) - { - // transform the light dir into tangent space - out.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", - LIGHT_POS_PARAMS(texinfo.embosslightshift)); - out.Write( - "o.tex{}.xyz = o.tex{}.xyz + float3(dot(ldir, _tangent), dot(ldir, _binormal), 0.0);\n", - i, texinfo.embosssourceshift); - } - else - { - // The following assert was triggered in House of the Dead Overkill and Star Wars Rogue - // Squadron 2 - // ASSERT(0); // should have normals - out.Write("o.tex{}.xyz = o.tex{}.xyz;\n", i, texinfo.embosssourceshift); - } + // transform the light dir into tangent space + out.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n", + LIGHT_POS_PARAMS(texinfo.embosslightshift)); + out.Write( + "o.tex{}.xyz = o.tex{}.xyz + float3(dot(ldir, _tangent), dot(ldir, _binormal), 0.0);\n", + i, texinfo.embosssourceshift); break; case TexGenType::Color0: