From 7703fef3a4bbf3a17630b959c4a4b39cf7c416f1 Mon Sep 17 00:00:00 2001 From: Robin Kertels Date: Sun, 19 Mar 2023 23:25:00 +0100 Subject: [PATCH 1/3] VideoCommon:VertexLoaderManager: Only update vertex format in shader manager if necessary. --- .../Core/VideoCommon/VertexLoaderManager.cpp | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/Source/Core/VideoCommon/VertexLoaderManager.cpp b/Source/Core/VideoCommon/VertexLoaderManager.cpp index 49f139c208..0236201001 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.cpp +++ b/Source/Core/VideoCommon/VertexLoaderManager.cpp @@ -258,11 +258,6 @@ VertexLoaderBase* GetOrCreateLoader(int vtx_attr_group) static void CheckCPConfiguration(int vtx_attr_group) { - if (!g_needs_cp_xf_consistency_check) [[likely]] - return; - - g_needs_cp_xf_consistency_check = false; - // Validate that the XF input configuration matches the CP configuration u32 num_cp_colors = std::count_if( g_main_cp_state.vtx_desc.low.Color.begin(), g_main_cp_state.vtx_desc.low.Color.end(), @@ -359,20 +354,25 @@ int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int coun // Doing early return for the opposite case would be cleaner // but triggers a false unreachable code warning in MSVC debug builds. - CheckCPConfiguration(vtx_attr_group); + if (g_needs_cp_xf_consistency_check) [[unlikely]] + { + CheckCPConfiguration(vtx_attr_group); + g_needs_cp_xf_consistency_check = false; + } // If the native vertex format changed, force a flush. if (loader->m_native_vertex_format != s_current_vtx_fmt || loader->m_native_components != g_current_components) [[unlikely]] { g_vertex_manager->Flush(); + + s_current_vtx_fmt = loader->m_native_vertex_format; + g_current_components = loader->m_native_components; + auto& system = Core::System::GetInstance(); + auto& vertex_shader_manager = system.GetVertexShaderManager(); + vertex_shader_manager.SetVertexFormat(loader->m_native_components, + loader->m_native_vertex_format->GetVertexDeclaration()); } - s_current_vtx_fmt = loader->m_native_vertex_format; - g_current_components = loader->m_native_components; - auto& system = Core::System::GetInstance(); - auto& vertex_shader_manager = system.GetVertexShaderManager(); - vertex_shader_manager.SetVertexFormat(loader->m_native_components, - loader->m_native_vertex_format->GetVertexDeclaration()); // CPUCull's performance increase comes from encoding fewer GPU commands, not sending less data // Therefore it's only useful to check if culling could remove a flush From 408b09da31986b11cc48c4ae91ede8ec66408222 Mon Sep 17 00:00:00 2001 From: Robin Kertels Date: Sun, 19 Mar 2023 23:28:25 +0100 Subject: [PATCH 2/3] VideoCommon:VertexShaderManager: Inline SetVertexFormat & UpdateValue/Offset --- .../Core/VideoCommon/VertexShaderManager.cpp | 38 ----------------- Source/Core/VideoCommon/VertexShaderManager.h | 41 ++++++++++++++++++- 2 files changed, 40 insertions(+), 39 deletions(-) diff --git a/Source/Core/VideoCommon/VertexShaderManager.cpp b/Source/Core/VideoCommon/VertexShaderManager.cpp index 2408281da6..276b400ab3 100644 --- a/Source/Core/VideoCommon/VertexShaderManager.cpp +++ b/Source/Core/VideoCommon/VertexShaderManager.cpp @@ -621,44 +621,6 @@ void VertexShaderManager::SetMaterialColorChanged(int index) m_materials_changed[index] = true; } -static void UpdateValue(bool* dirty, u32* old_value, u32 new_value) -{ - if (*old_value == new_value) - return; - *old_value = new_value; - *dirty = true; -} - -static void UpdateOffset(bool* dirty, bool include_components, u32* old_value, - const AttributeFormat& attribute) -{ - if (!attribute.enable) - return; - u32 new_value = attribute.offset / 4; // GPU uses uint offsets - if (include_components) - new_value |= attribute.components << 16; - UpdateValue(dirty, old_value, new_value); -} - -template -static void UpdateOffsets(bool* dirty, bool include_components, std::array* old_value, - const std::array& attribute) -{ - for (size_t i = 0; i < N; i++) - UpdateOffset(dirty, include_components, &(*old_value)[i], attribute[i]); -} - -void VertexShaderManager::SetVertexFormat(u32 components, const PortableVertexDeclaration& format) -{ - UpdateValue(&dirty, &constants.components, components); - UpdateValue(&dirty, &constants.vertex_stride, format.stride / 4); - UpdateOffset(&dirty, true, &constants.vertex_offset_position, format.position); - UpdateOffset(&dirty, false, &constants.vertex_offset_posmtx, format.posmtx); - UpdateOffsets(&dirty, true, &constants.vertex_offset_texcoords, format.texcoords); - UpdateOffsets(&dirty, false, &constants.vertex_offset_colors, format.colors); - UpdateOffsets(&dirty, false, &constants.vertex_offset_normals, format.normals); -} - void VertexShaderManager::SetTexMatrixInfoChanged(int index) { // TODO: Should we track this with more precision, like which indices changed? diff --git a/Source/Core/VideoCommon/VertexShaderManager.h b/Source/Core/VideoCommon/VertexShaderManager.h index 320e04985d..fdc8da5231 100644 --- a/Source/Core/VideoCommon/VertexShaderManager.h +++ b/Source/Core/VideoCommon/VertexShaderManager.h @@ -11,6 +11,7 @@ #include "Common/CommonTypes.h" #include "Common/Matrix.h" #include "VideoCommon/ConstantManager.h" +#include "VideoCommon/NativeVertexFormat.h" class PointerWrap; struct PortableVertexDeclaration; @@ -34,7 +35,6 @@ public: void SetProjectionChanged(); void SetMaterialColorChanged(int index); - void SetVertexFormat(u32 components, const PortableVertexDeclaration& format); void SetTexMatrixInfoChanged(int index); void SetLightingConfigChanged(); @@ -49,6 +49,45 @@ public: VertexShaderConstants constants{}; bool dirty = false; + static DOLPHIN_FORCE_INLINE void UpdateValue(bool* dirty, u32* old_value, u32 new_value) + { + if (*old_value == new_value) + return; + *old_value = new_value; + *dirty = true; + } + + static DOLPHIN_FORCE_INLINE void UpdateOffset(bool* dirty, bool include_components, + u32* old_value, const AttributeFormat& attribute) + { + if (!attribute.enable) + return; + u32 new_value = attribute.offset / 4; // GPU uses uint offsets + if (include_components) + new_value |= attribute.components << 16; + UpdateValue(dirty, old_value, new_value); + } + + template + static DOLPHIN_FORCE_INLINE void UpdateOffsets(bool* dirty, bool include_components, + std::array* old_value, + const std::array& attribute) + { + for (size_t i = 0; i < N; i++) + UpdateOffset(dirty, include_components, &(*old_value)[i], attribute[i]); + } + + DOLPHIN_FORCE_INLINE void SetVertexFormat(u32 components, const PortableVertexDeclaration& format) + { + UpdateValue(&dirty, &constants.components, components); + UpdateValue(&dirty, &constants.vertex_stride, format.stride / 4); + UpdateOffset(&dirty, true, &constants.vertex_offset_position, format.position); + UpdateOffset(&dirty, false, &constants.vertex_offset_posmtx, format.posmtx); + UpdateOffsets(&dirty, true, &constants.vertex_offset_texcoords, format.texcoords); + UpdateOffsets(&dirty, false, &constants.vertex_offset_colors, format.colors); + UpdateOffsets(&dirty, false, &constants.vertex_offset_normals, format.normals); + } + private: alignas(16) std::array m_projection_matrix; From 93fce0e4b6cfdc67fca441bb7e2c9029bd4afe15 Mon Sep 17 00:00:00 2001 From: Robin Kertels Date: Sun, 19 Mar 2023 23:37:13 +0100 Subject: [PATCH 3/3] VideoCommon:VertexManagerBase: Only calculate remaining indices once Before, both of those were calculated 3 times due to the ASSERTs. --- Source/Core/VideoCommon/VertexManagerBase.cpp | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/Source/Core/VideoCommon/VertexManagerBase.cpp b/Source/Core/VideoCommon/VertexManagerBase.cpp index 9686809c88..768ac82fe7 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/VertexManagerBase.cpp @@ -140,7 +140,7 @@ DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive PrimitiveType new_primitive_type = g_ActiveConfig.backend_info.bSupportsPrimitiveRestart ? primitive_from_gx_pr[primitive] : primitive_from_gx[primitive]; - if (m_current_primitive_type != new_primitive_type) + if (m_current_primitive_type != new_primitive_type) [[unlikely]] { Flush(); @@ -149,9 +149,11 @@ DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive SetRasterizationStateChanged(); } + u32 remaining_indices = GetRemainingIndices(primitive); + u32 remaining_index_generator_indices = m_index_generator.GetRemainingIndices(primitive); + // Check for size in buffer, if the buffer gets full, call Flush() - if (!m_is_flushed && (count > m_index_generator.GetRemainingIndices(primitive) || - count > GetRemainingIndices(primitive) || + if (!m_is_flushed && (count > remaining_index_generator_indices || count > remaining_indices || needed_vertex_bytes > GetRemainingSize())) [[unlikely]] { Flush(); @@ -160,7 +162,7 @@ DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive m_cull_all = cullall; // need to alloc new buffer - if (m_is_flushed) + if (m_is_flushed) [[unlikely]] { if (cullall) { @@ -174,6 +176,8 @@ DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive ResetBuffer(stride); } + remaining_index_generator_indices = m_index_generator.GetRemainingIndices(primitive); + remaining_indices = GetRemainingIndices(primitive); m_is_flushed = false; } @@ -181,14 +185,14 @@ DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive // won't have enough space in a few rare cases, such as vertex shader line/point expansion with a // ton of lines in one draw command, in which case we will either need to add support for // splitting a single draw command into multiple draws or using bigger indices. - ASSERT_MSG(VIDEO, count <= m_index_generator.GetRemainingIndices(primitive), + ASSERT_MSG(VIDEO, count <= remaining_index_generator_indices, "VertexManager: Too few remaining index values ({} > {}). " "32-bit indices or primitive breaking needed.", - count, m_index_generator.GetRemainingIndices(primitive)); - ASSERT_MSG(VIDEO, count <= GetRemainingIndices(primitive), + count, remaining_index_generator_indices); + ASSERT_MSG(VIDEO, count <= remaining_indices, "VertexManager: Buffer not large enough for all indices! ({} > {}) " "Increase MAXIBUFFERSIZE or we need primitive breaking after all.", - count, GetRemainingIndices(primitive)); + count, remaining_indices); ASSERT_MSG(VIDEO, needed_vertex_bytes <= GetRemainingSize(), "VertexManager: Buffer not large enough for all vertices! ({} > {}) " "Increase MAXVBUFFERSIZE or we need primitive breaking after all.",