diff --git a/Source/Core/Core/FifoPlayer/FifoPlayer.cpp b/Source/Core/Core/FifoPlayer/FifoPlayer.cpp index b93fcf980a..e6a3a6e224 100644 --- a/Source/Core/Core/FifoPlayer/FifoPlayer.cpp +++ b/Source/Core/Core/FifoPlayer/FifoPlayer.cpp @@ -55,6 +55,11 @@ public: OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; } + OPCODE_CALLBACK(u32 GetVertexSize(u8 vat)) + { + return VertexLoaderBase::GetVertexSize(GetCPState().vtx_desc, GetCPState().vtx_attr[vat]); + } + bool m_start_of_primitives = false; bool m_end_of_primitives = false; bool m_efb_copy = false; diff --git a/Source/Core/Core/FifoPlayer/FifoRecorder.cpp b/Source/Core/Core/FifoPlayer/FifoRecorder.cpp index a47877ef4f..4464615495 100644 --- a/Source/Core/Core/FifoPlayer/FifoRecorder.cpp +++ b/Source/Core/Core/FifoPlayer/FifoRecorder.cpp @@ -45,6 +45,11 @@ public: OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; } + OPCODE_CALLBACK(u32 GetVertexSize(u8 vat)) + { + return VertexLoaderBase::GetVertexSize(GetCPState().vtx_desc, GetCPState().vtx_attr[vat]); + } + private: void ProcessVertexComponent(CPArray array_index, VertexComponentFormat array_type, u32 component_offset, u32 vertex_size, u16 num_vertices, diff --git a/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp b/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp index d593a7419d..bcf0276a02 100644 --- a/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp +++ b/Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp @@ -316,6 +316,11 @@ public: OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; } + OPCODE_CALLBACK(u32 GetVertexSize(u8 vat)) + { + return VertexLoaderBase::GetVertexSize(GetCPState().vtx_desc, GetCPState().vtx_attr[vat]); + } + QString text; CPState m_cpmem; }; @@ -731,6 +736,11 @@ public: OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; } + OPCODE_CALLBACK(u32 GetVertexSize(u8 vat)) + { + return VertexLoaderBase::GetVertexSize(GetCPState().vtx_desc, GetCPState().vtx_attr[vat]); + } + QString text; CPState m_cpmem; }; diff --git a/Source/Core/VideoCommon/OpcodeDecoding.cpp b/Source/Core/VideoCommon/OpcodeDecoding.cpp index 3f90a2b6e3..bc19a1c2de 100644 --- a/Source/Core/VideoCommon/OpcodeDecoding.cpp +++ b/Source/Core/VideoCommon/OpcodeDecoding.cpp @@ -122,7 +122,7 @@ public: // HACK DataReader src{const_cast(vertex_data), const_cast(vertex_data) + size}; const u32 bytes = - VertexLoaderManager::RunVertices(vat, primitive, num_vertices, src, is_preprocess); + VertexLoaderManager::RunVertices(vat, primitive, num_vertices, src); ASSERT(bytes == size); @@ -228,6 +228,12 @@ public: return g_main_cp_state; } + OPCODE_CALLBACK(u32 GetVertexSize(u8 vat)) + { + VertexLoaderBase* loader = VertexLoaderManager::RefreshLoader(vat); + return loader->m_vertex_size; + } + u32 m_cycles = 0; bool m_in_display_list = false; }; diff --git a/Source/Core/VideoCommon/OpcodeDecoding.h b/Source/Core/VideoCommon/OpcodeDecoding.h index 1badf63196..2035f5c733 100644 --- a/Source/Core/VideoCommon/OpcodeDecoding.h +++ b/Source/Core/VideoCommon/OpcodeDecoding.h @@ -110,6 +110,8 @@ public: // Get the current CP state. Needed for vertex decoding; will also be mutated for CP commands. virtual CPState& GetCPState() = 0; + + virtual u32 GetVertexSize(u8 vat) = 0; #endif }; @@ -229,8 +231,7 @@ static DOLPHIN_FORCE_INLINE u32 RunCommand(const u8* data, u32 available, T& cal (cmdbyte & OpcodeDecoder::GX_PRIMITIVE_MASK) >> OpcodeDecoder::GX_PRIMITIVE_SHIFT); const u8 vat = cmdbyte & OpcodeDecoder::GX_VAT_MASK; - const u32 vertex_size = VertexLoaderBase::GetVertexSize(callback.GetCPState().vtx_desc, - callback.GetCPState().vtx_attr[vat]); + const u32 vertex_size = callback.GetVertexSize(vat); const u16 num_vertices = Common::swap16(&data[1]); if (available < 3 + num_vertices * vertex_size) diff --git a/Source/Core/VideoCommon/VertexLoaderManager.cpp b/Source/Core/VideoCommon/VertexLoaderManager.cpp index 102eb5f94e..607de66bee 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.cpp +++ b/Source/Core/VideoCommon/VertexLoaderManager.cpp @@ -54,7 +54,6 @@ Common::EnumMap cached_arraybases; BitSet8 g_main_vat_dirty; BitSet8 g_preprocess_vat_dirty; bool g_bases_dirty; // Main only -u8 g_current_vat; // Main only std::array g_main_vertex_loaders; std::array g_preprocess_vertex_loaders; @@ -78,7 +77,7 @@ void Clear() void UpdateVertexArrayPointers() { // Anything to update? - if (!g_bases_dirty) + if (!g_bases_dirty) [[likely]] return; // Some games such as Burnout 2 can put invalid addresses into @@ -198,59 +197,50 @@ NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl) return GetOrCreateMatchingFormat(new_decl); } -static VertexLoaderBase* RefreshLoader(int vtx_attr_group, bool preprocess = false) +namespace detail { - CPState* state = preprocess ? &g_preprocess_cp_state : &g_main_cp_state; - BitSet8& attr_dirty = preprocess ? g_preprocess_vat_dirty : g_main_vat_dirty; - auto& vertex_loaders = preprocess ? g_main_vertex_loaders : g_preprocess_vertex_loaders; - g_current_vat = vtx_attr_group; +template +VertexLoaderBase* GetOrCreateLoader(int vtx_attr_group) +{ + constexpr CPState* state = IsPreprocess ? &g_preprocess_cp_state : &g_main_cp_state; + constexpr BitSet8& attr_dirty = IsPreprocess ? g_preprocess_vat_dirty : g_main_vat_dirty; + constexpr auto& vertex_loaders = + IsPreprocess ? g_preprocess_vertex_loaders : g_main_vertex_loaders; VertexLoaderBase* loader; - if (attr_dirty[vtx_attr_group]) - { - // We are not allowed to create a native vertex format on preprocessing as this is on the wrong - // thread - bool check_for_native_format = !preprocess; - VertexLoaderUID uid(state->vtx_desc, state->vtx_attr[vtx_attr_group]); - std::lock_guard lk(s_vertex_loader_map_lock); - VertexLoaderMap::iterator iter = s_vertex_loader_map.find(uid); - if (iter != s_vertex_loader_map.end()) - { - loader = iter->second.get(); - check_for_native_format &= !loader->m_native_vertex_format; - } - else - { - s_vertex_loader_map[uid] = - VertexLoaderBase::CreateVertexLoader(state->vtx_desc, state->vtx_attr[vtx_attr_group]); - loader = s_vertex_loader_map[uid].get(); - INCSTAT(g_stats.num_vertex_loaders); - } - if (check_for_native_format) - { - // search for a cached native vertex format - const PortableVertexDeclaration& format = loader->m_native_vtx_decl; - std::unique_ptr& native = s_native_vertex_map[format]; - if (!native) - native = g_renderer->CreateNativeVertexFormat(format); - loader->m_native_vertex_format = native.get(); - } - vertex_loaders[vtx_attr_group] = loader; - attr_dirty[vtx_attr_group] = false; + // We are not allowed to create a native vertex format on preprocessing as this is on the wrong + // thread + bool check_for_native_format = !IsPreprocess; + + VertexLoaderUID uid(state->vtx_desc, state->vtx_attr[vtx_attr_group]); + std::lock_guard lk(s_vertex_loader_map_lock); + VertexLoaderMap::iterator iter = s_vertex_loader_map.find(uid); + if (iter != s_vertex_loader_map.end()) + { + loader = iter->second.get(); + check_for_native_format &= !loader->m_native_vertex_format; } else { - loader = vertex_loaders[vtx_attr_group]; + auto [it, added] = s_vertex_loader_map.try_emplace( + uid, + VertexLoaderBase::CreateVertexLoader(state->vtx_desc, state->vtx_attr[vtx_attr_group])); + loader = it->second.get(); + INCSTAT(g_stats.num_vertex_loaders); } - - // Lookup pointers for any vertex arrays. - if (!preprocess) - UpdateVertexArrayPointers(); - + if (check_for_native_format) + { + // search for a cached native vertex format + loader->m_native_vertex_format = GetOrCreateMatchingFormat(loader->m_native_vtx_decl); + } + vertex_loaders[vtx_attr_group] = loader; + attr_dirty[vtx_attr_group] = false; return loader; } +} // namespace detail + static void CheckCPConfiguration(int vtx_attr_group) { // Validate that the XF input configuration matches the CP configuration @@ -335,53 +325,61 @@ static void CheckCPConfiguration(int vtx_attr_group) } } -int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, DataReader src, - bool is_preprocess) +template +int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, DataReader src) { if (count == 0) return 0; ASSERT(count > 0); - VertexLoaderBase* loader = RefreshLoader(vtx_attr_group, is_preprocess); + VertexLoaderBase* loader = RefreshLoader(vtx_attr_group); int size = count * loader->m_vertex_size; if ((int)src.size() < size) return -1; - if (is_preprocess) - return size; - - CheckCPConfiguration(vtx_attr_group); - - // If the native vertex format changed, force a flush. - if (loader->m_native_vertex_format != s_current_vtx_fmt || - loader->m_native_components != g_current_components) + if constexpr (!IsPreprocess) { - g_vertex_manager->Flush(); + // Doing early return for the opposite case would be cleaner + // but triggers a false unreachable code warning in MSVC debug builds. + + CheckCPConfiguration(vtx_attr_group); + + // If the native vertex format changed, force a flush. + if (loader->m_native_vertex_format != s_current_vtx_fmt || + loader->m_native_components != g_current_components) + { + g_vertex_manager->Flush(); + } + s_current_vtx_fmt = loader->m_native_vertex_format; + g_current_components = loader->m_native_components; + VertexShaderManager::SetVertexFormat(loader->m_native_components); + + // if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads. + // They still need to go through vertex loading, because we need to calculate a zfreeze refrence + // slope. + bool cullall = (bpmem.genMode.cullmode == CullMode::All && + primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES); + + DataReader dst = g_vertex_manager->PrepareForAdditionalData( + primitive, count, loader->m_native_vtx_decl.stride, cullall); + + count = loader->RunVertices(src, dst, count); + + g_vertex_manager->AddIndices(primitive, count); + g_vertex_manager->FlushData(count, loader->m_native_vtx_decl.stride); + + ADDSTAT(g_stats.this_frame.num_prims, count); + INCSTAT(g_stats.this_frame.num_primitive_joins); } - s_current_vtx_fmt = loader->m_native_vertex_format; - g_current_components = loader->m_native_components; - VertexShaderManager::SetVertexFormat(loader->m_native_components); - - // if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads. - // They still need to go through vertex loading, because we need to calculate a zfreeze refrence - // slope. - bool cullall = (bpmem.genMode.cullmode == CullMode::All && - primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES); - - DataReader dst = g_vertex_manager->PrepareForAdditionalData( - primitive, count, loader->m_native_vtx_decl.stride, cullall); - - count = loader->RunVertices(src, dst, count); - - g_vertex_manager->AddIndices(primitive, count); - g_vertex_manager->FlushData(count, loader->m_native_vtx_decl.stride); - - ADDSTAT(g_stats.this_frame.num_prims, count); - INCSTAT(g_stats.this_frame.num_primitive_joins); return size; } +template int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, + DataReader src); +template int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, + DataReader src); + NativeVertexFormat* GetCurrentVertexFormat() { return s_current_vtx_fmt; diff --git a/Source/Core/VideoCommon/VertexLoaderManager.h b/Source/Core/VideoCommon/VertexLoaderManager.h index c5f9ae5376..464f024697 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.h +++ b/Source/Core/VideoCommon/VertexLoaderManager.h @@ -42,8 +42,16 @@ NativeVertexFormat* GetOrCreateMatchingFormat(const PortableVertexDeclaration& d NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl); // Returns -1 if buf_size is insufficient, else the amount of bytes consumed -int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, DataReader src, - bool is_preprocess); +template +int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, DataReader src); + +namespace detail +{ +// This will look for an existing loader in the global hashmap or create a new one if there is none. +// It should not be used directly because RefreshLoaders() has another cache for fast lookups. +template +VertexLoaderBase* GetOrCreateLoader(int vtx_attr_group); +} // namespace detail NativeVertexFormat* GetCurrentVertexFormat(); @@ -66,7 +74,31 @@ extern u32 g_current_components; extern BitSet8 g_main_vat_dirty; extern BitSet8 g_preprocess_vat_dirty; extern bool g_bases_dirty; // Main only -extern u8 g_current_vat; // Main only extern std::array g_main_vertex_loaders; extern std::array g_preprocess_vertex_loaders; + +template +VertexLoaderBase* RefreshLoader(int vtx_attr_group) +{ + constexpr const BitSet8& attr_dirty = IsPreprocess ? g_preprocess_vat_dirty : g_main_vat_dirty; + constexpr const auto& vertex_loaders = + IsPreprocess ? g_preprocess_vertex_loaders : g_main_vertex_loaders; + + VertexLoaderBase* loader; + if (!attr_dirty[vtx_attr_group]) [[likely]] + { + loader = vertex_loaders[vtx_attr_group]; + } + else [[unlikely]] + { + loader = detail::GetOrCreateLoader(vtx_attr_group); + } + + // Lookup pointers for any vertex arrays. + if constexpr (!IsPreprocess) + UpdateVertexArrayPointers(); + + return loader; +} + } // namespace VertexLoaderManager