From 4615a2d7b40c1126f70d83fcade79a268c6ec15d Mon Sep 17 00:00:00 2001 From: Tillmann Karras Date: Mon, 16 Dec 2024 00:56:10 +0000 Subject: [PATCH] VertexCommon: cache the per-vertex position/normal matrix index --- Source/Core/VideoCommon/ConstantManager.h | 3 +- Source/Core/VideoCommon/ShaderGenCommon.h | 4 +-- Source/Core/VideoCommon/UberShaderVertex.cpp | 27 +++++++---------- Source/Core/VideoCommon/VertexManagerBase.cpp | 7 +++++ Source/Core/VideoCommon/VertexShaderGen.cpp | 30 ++++++++----------- .../Core/VideoCommon/VertexShaderManager.cpp | 15 +--------- Source/Core/VideoCommon/XFStateManager.cpp | 19 ------------ Source/Core/VideoCommon/XFStateManager.h | 4 --- 8 files changed, 36 insertions(+), 73 deletions(-) diff --git a/Source/Core/VideoCommon/ConstantManager.h b/Source/Core/VideoCommon/ConstantManager.h index 14c8732580..2bd43c2178 100644 --- a/Source/Core/VideoCommon/ConstantManager.h +++ b/Source/Core/VideoCommon/ConstantManager.h @@ -70,7 +70,6 @@ struct alignas(16) VertexShaderConstants u32 missing_color_hex; // .w, used for change detection but not directly by shaders float4 missing_color_value; - std::array posnormalmatrix; std::array projection; std::array materials; struct Light @@ -93,6 +92,8 @@ struct alignas(16) VertexShaderConstants // .x - texMtxInfo, .y - postMtxInfo, [0..1].z = color, [0..1].w = alpha std::array xfmem_pack1; + // TODO: only first element is currently used + std::array cached_posmtxidx; float4 cached_normal; float4 cached_tangent; float4 cached_binormal; diff --git a/Source/Core/VideoCommon/ShaderGenCommon.h b/Source/Core/VideoCommon/ShaderGenCommon.h index 4723cbfc79..ba71416197 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/ShaderGenCommon.h @@ -273,7 +273,6 @@ void WriteSwitch(ShaderCode& out, APIType ApiType, std::string_view variable, #define I_ZSLOPE "czslope" #define I_EFBSCALE "cefbscale" -#define I_POSNORMALMATRIX "cpnmtx" #define I_PROJECTION "cproj" #define I_MATERIALS "cmtrl" #define I_LIGHTS "clights" @@ -283,6 +282,7 @@ void WriteSwitch(ShaderCode& out, APIType ApiType, std::string_view variable, #define I_POSTTRANSFORMMATRICES "cpostmtx" #define I_PIXELCENTERCORRECTION "cpixelcenter" #define I_VIEWPORT_SIZE "cviewport" +#define I_CACHED_POSMTXIDX "cposmtxidx" #define I_CACHED_NORMAL "cnormal" #define I_CACHED_TANGENT "ctangent" #define I_CACHED_BINORMAL "cbinormal" @@ -296,7 +296,6 @@ static const char s_shader_uniforms[] = "\tuint components;\n" "\tuint xfmem_numColorChans;\n" "\tuint missing_color_hex;\n" "\tfloat4 missing_color_value;\n" - "\tfloat4 " I_POSNORMALMATRIX "[6];\n" "\tfloat4 " I_PROJECTION "[4];\n" "\tint4 " I_MATERIALS "[4];\n" "\tLight " I_LIGHTS "[8];\n" @@ -307,6 +306,7 @@ static const char s_shader_uniforms[] = "\tuint components;\n" "\tfloat4 " I_PIXELCENTERCORRECTION ";\n" "\tfloat2 " I_VIEWPORT_SIZE ";\n" "\tuint4 xfmem_pack1[8];\n" + "\tuint4 " I_CACHED_POSMTXIDX ";\n" "\tfloat4 " I_CACHED_NORMAL ";\n" "\tfloat4 " I_CACHED_TANGENT ";\n" "\tfloat4 " I_CACHED_BINORMAL ";\n" diff --git a/Source/Core/VideoCommon/UberShaderVertex.cpp b/Source/Core/VideoCommon/UberShaderVertex.cpp index a6c0502dfe..b8de92ffbc 100644 --- a/Source/Core/VideoCommon/UberShaderVertex.cpp +++ b/Source/Core/VideoCommon/UberShaderVertex.cpp @@ -223,28 +223,23 @@ float3 load_input_float3_rawtex(uint vtx_offset, uint attr_offset) {{ "float3 N1;\n" "float3 N2;\n" "\n" + "uint posidx;\n" "if ((components & {}u) != 0u) {{ // VB_HAS_POSMTXIDX\n", Common::ToUnderlying(VB_HAS_POSMTXIDX)); LoadVertexAttribute(out, host_config, 2, "posmtx", "uint4", "ubyte4"); out.Write(" // Vertex format has a per-vertex matrix\n" - " int posidx = int(posmtx.r);\n" - " P0 = " I_TRANSFORMMATRICES "[posidx];\n" - " P1 = " I_TRANSFORMMATRICES "[posidx+1];\n" - " P2 = " I_TRANSFORMMATRICES "[posidx+2];\n" - "\n" - " int normidx = posidx >= 32 ? (posidx - 32) : posidx;\n" - " N0 = " I_NORMALMATRICES "[normidx].xyz;\n" - " N1 = " I_NORMALMATRICES "[normidx+1].xyz;\n" - " N2 = " I_NORMALMATRICES "[normidx+2].xyz;\n" + " posidx = int(posmtx.r);\n" "}} else {{\n" - " // One shared matrix\n" - " P0 = " I_POSNORMALMATRIX "[0];\n" - " P1 = " I_POSNORMALMATRIX "[1];\n" - " P2 = " I_POSNORMALMATRIX "[2];\n" - " N0 = " I_POSNORMALMATRIX "[3].xyz;\n" - " N1 = " I_POSNORMALMATRIX "[4].xyz;\n" - " N2 = " I_POSNORMALMATRIX "[5].xyz;\n" + " posidx = " I_CACHED_POSMTXIDX ".r;\n" "}}\n" + "P0 = " I_TRANSFORMMATRICES "[posidx];\n" + "P1 = " I_TRANSFORMMATRICES "[posidx+1];\n" + "P2 = " I_TRANSFORMMATRICES "[posidx+2];\n" + "\n" + "uint normidx = posidx >= 32 ? (posidx - 32) : posidx;\n" + "N0 = " I_NORMALMATRICES "[normidx].xyz;\n" + "N1 = " I_NORMALMATRICES "[normidx+1].xyz;\n" + "N2 = " I_NORMALMATRICES "[normidx+2].xyz;\n" "\n" "// Multiply the position vector by the position matrix\n" "float4 pos = float4(dot(P0, rawpos), dot(P1, rawpos), dot(P2, rawpos), 1.0);\n" diff --git a/Source/Core/VideoCommon/VertexManagerBase.cpp b/Source/Core/VideoCommon/VertexManagerBase.cpp index b53cb3d2ed..fbdd5a9120 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/VertexManagerBase.cpp @@ -558,6 +558,13 @@ void VertexManagerBase::Flush() pixel_shader_manager.constants.time_ms = seconds_elapsed * 1000; } + if (VertexLoaderManager::GetCurrentVertexFormat()->GetVertexDeclaration().posmtx.enable) + { + u32 posmtx = VertexLoaderManager::position_matrix_index_cache[0]; + u32 texmtx = xfmem.MatrixIndexA.Hex & 0xFFFF'FFC0; + xf_state_manager.SetTexMatrixChangedA(texmtx | posmtx); + xfmem.MatrixIndexA.PosNormalMtxIdx = posmtx; + } CalculateNormals(VertexLoaderManager::GetCurrentVertexFormat()); // Calculate ZSlope for zfreeze const auto used_textures = UsedTextures(); diff --git a/Source/Core/VideoCommon/VertexShaderGen.cpp b/Source/Core/VideoCommon/VertexShaderGen.cpp index 4a46834c14..0a72ad0c3d 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/VertexShaderGen.cpp @@ -306,29 +306,25 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho } // transforms - if ((uid_data->components & VB_HAS_POSMTXIDX) != 0) + if (uid_data->components & VB_HAS_POSMTXIDX) { - // Vertex format has a per-vertex matrix - out.Write("int posidx = int(posmtx.r);\n" - "float4 P0 = " I_TRANSFORMMATRICES "[posidx];\n" - "float4 P1 = " I_TRANSFORMMATRICES "[posidx + 1];\n" - "float4 P2 = " I_TRANSFORMMATRICES "[posidx + 2];\n" - "int normidx = posidx & 31;\n" - "float3 N0 = " I_NORMALMATRICES "[normidx].xyz;\n" - "float3 N1 = " I_NORMALMATRICES "[normidx + 1].xyz;\n" - "float3 N2 = " I_NORMALMATRICES "[normidx + 2].xyz;\n"); + // per-vertex position/normal matrix index + out.Write("uint posidx = int(posmtx.r);\n"); } else { - // One shared matrix - out.Write("float4 P0 = " I_POSNORMALMATRIX "[0];\n" - "float4 P1 = " I_POSNORMALMATRIX "[1];\n" - "float4 P2 = " I_POSNORMALMATRIX "[2];\n" - "float3 N0 = " I_POSNORMALMATRIX "[3].xyz;\n" - "float3 N1 = " I_POSNORMALMATRIX "[4].xyz;\n" - "float3 N2 = " I_POSNORMALMATRIX "[5].xyz;\n"); + // shared position/normal matrix index + out.Write("uint posidx = " I_CACHED_POSMTXIDX ".r;\n"); } + out.Write("float4 P0 = " I_TRANSFORMMATRICES "[posidx];\n" + "float4 P1 = " I_TRANSFORMMATRICES "[posidx + 1];\n" + "float4 P2 = " I_TRANSFORMMATRICES "[posidx + 2];\n" + "uint normidx = posidx & 31;\n" + "float3 N0 = " I_NORMALMATRICES "[normidx].xyz;\n" + "float3 N1 = " I_NORMALMATRICES "[normidx + 1].xyz;\n" + "float3 N2 = " I_NORMALMATRICES "[normidx + 2].xyz;\n"); + out.Write("// Multiply the position vector by the position matrix\n" "float4 pos = float4(dot(P0, rawpos), dot(P1, rawpos), dot(P2, rawpos), 1.0);\n"); if ((uid_data->components & VB_HAS_NORMAL) == 0) diff --git a/Source/Core/VideoCommon/VertexShaderManager.cpp b/Source/Core/VideoCommon/VertexShaderManager.cpp index 0415ac05dc..e0780c5c91 100644 --- a/Source/Core/VideoCommon/VertexShaderManager.cpp +++ b/Source/Core/VideoCommon/VertexShaderManager.cpp @@ -289,23 +289,10 @@ void VertexShaderManager::SetConstants(const std::vector& textures, } xf_state_manager.ResetMaterialChanges(); - if (xf_state_manager.DidPosNormalChange()) - { - xf_state_manager.ResetPosNormalChange(); - const float* pos = &xfmem.posMatrices[g_main_cp_state.matrix_index_a.PosNormalMtxIdx * 4]; - const float* norm = - &xfmem.normalMatrices[3 * (g_main_cp_state.matrix_index_a.PosNormalMtxIdx & 31)]; - - memcpy(constants.posnormalmatrix.data(), pos, 3 * sizeof(float4)); - memcpy(constants.posnormalmatrix[3].data(), norm, 3 * sizeof(float)); - memcpy(constants.posnormalmatrix[4].data(), norm + 3, 3 * sizeof(float)); - memcpy(constants.posnormalmatrix[5].data(), norm + 6, 3 * sizeof(float)); - dirty = true; - } - if (xf_state_manager.DidTexMatrixAChange()) { xf_state_manager.ResetTexMatrixAChange(); + constants.cached_posmtxidx[0] = g_main_cp_state.matrix_index_a.PosNormalMtxIdx; const std::array pos_matrix_ptrs{ &xfmem.posMatrices[g_main_cp_state.matrix_index_a.Tex0MtxIdx * 4], &xfmem.posMatrices[g_main_cp_state.matrix_index_a.Tex1MtxIdx * 4], diff --git a/Source/Core/VideoCommon/XFStateManager.cpp b/Source/Core/VideoCommon/XFStateManager.cpp index 98bf98482d..197e1d4e9c 100644 --- a/Source/Core/VideoCommon/XFStateManager.cpp +++ b/Source/Core/VideoCommon/XFStateManager.cpp @@ -14,7 +14,6 @@ void XFStateManager::Init() // Initialize state tracking variables ResetTexMatrixAChange(); ResetTexMatrixBChange(); - ResetPosNormalChange(); ResetProjection(); ResetViewportChange(); ResetTexMatrixInfoChange(); @@ -37,7 +36,6 @@ void XFStateManager::DoState(PointerWrap& p) p.Do(m_materials_changed); p.DoArray(m_tex_matrices_changed); - p.Do(m_pos_normal_matrix_changed); p.Do(m_projection_changed); p.Do(m_viewport_changed); p.Do(m_tex_mtx_info_changed); @@ -53,16 +51,6 @@ void XFStateManager::DoState(PointerWrap& p) void XFStateManager::InvalidateXFRange(int start, int end) { - if (((u32)start >= (u32)g_main_cp_state.matrix_index_a.PosNormalMtxIdx * 4 && - (u32)start < (u32)g_main_cp_state.matrix_index_a.PosNormalMtxIdx * 4 + 12) || - ((u32)start >= - XFMEM_NORMALMATRICES + ((u32)g_main_cp_state.matrix_index_a.PosNormalMtxIdx & 31) * 3 && - (u32)start < XFMEM_NORMALMATRICES + - ((u32)g_main_cp_state.matrix_index_a.PosNormalMtxIdx & 31) * 3 + 9)) - { - m_pos_normal_matrix_changed = true; - } - if (((u32)start >= (u32)g_main_cp_state.matrix_index_a.Tex0MtxIdx * 4 && (u32)start < (u32)g_main_cp_state.matrix_index_a.Tex0MtxIdx * 4 + 12) || ((u32)start >= (u32)g_main_cp_state.matrix_index_a.Tex1MtxIdx * 4 && @@ -174,8 +162,6 @@ void XFStateManager::SetTexMatrixChangedA(u32 Value) if (g_main_cp_state.matrix_index_a.Hex != Value) { g_vertex_manager->Flush(); - if (g_main_cp_state.matrix_index_a.PosNormalMtxIdx != (Value & 0x3f)) - m_pos_normal_matrix_changed = true; m_tex_matrices_changed[0] = true; g_main_cp_state.matrix_index_a.Hex = Value; } @@ -201,11 +187,6 @@ void XFStateManager::ResetTexMatrixBChange() m_tex_matrices_changed[1] = false; } -void XFStateManager::ResetPosNormalChange() -{ - m_pos_normal_matrix_changed = false; -} - void XFStateManager::SetProjectionChanged() { m_projection_changed = true; diff --git a/Source/Core/VideoCommon/XFStateManager.h b/Source/Core/VideoCommon/XFStateManager.h index f2b430a58a..89c366a50d 100644 --- a/Source/Core/VideoCommon/XFStateManager.h +++ b/Source/Core/VideoCommon/XFStateManager.h @@ -27,9 +27,6 @@ public: bool DidTexMatrixBChange() const { return m_tex_matrices_changed[1]; } void ResetTexMatrixBChange(); - bool DidPosNormalChange() const { return m_pos_normal_matrix_changed; } - void ResetPosNormalChange(); - void SetProjectionChanged(); bool DidProjectionChange() const { return m_projection_changed; } void ResetProjection(); @@ -74,7 +71,6 @@ public: private: // track changes std::array m_tex_matrices_changed{}; - bool m_pos_normal_matrix_changed = false; bool m_projection_changed = false; bool m_viewport_changed = false; bool m_tex_mtx_info_changed = false;