Merge pull request #11067 from K0bin/cache-vertex-size

OpcodeDecoding: Cache vertex sizes
This commit is contained in:
JMC47 2022-09-18 22:38:06 -04:00 committed by GitHub
commit 6f4f5b0b7b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 139 additions and 82 deletions

View File

@ -55,6 +55,11 @@ public:
OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; } OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; }
OPCODE_CALLBACK(u32 GetVertexSize(u8 vat))
{
return VertexLoaderBase::GetVertexSize(GetCPState().vtx_desc, GetCPState().vtx_attr[vat]);
}
bool m_start_of_primitives = false; bool m_start_of_primitives = false;
bool m_end_of_primitives = false; bool m_end_of_primitives = false;
bool m_efb_copy = false; bool m_efb_copy = false;

View File

@ -45,6 +45,11 @@ public:
OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; } OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; }
OPCODE_CALLBACK(u32 GetVertexSize(u8 vat))
{
return VertexLoaderBase::GetVertexSize(GetCPState().vtx_desc, GetCPState().vtx_attr[vat]);
}
private: private:
void ProcessVertexComponent(CPArray array_index, VertexComponentFormat array_type, void ProcessVertexComponent(CPArray array_index, VertexComponentFormat array_type,
u32 component_offset, u32 vertex_size, u16 num_vertices, u32 component_offset, u32 vertex_size, u16 num_vertices,

View File

@ -316,6 +316,11 @@ public:
OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; } OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; }
OPCODE_CALLBACK(u32 GetVertexSize(u8 vat))
{
return VertexLoaderBase::GetVertexSize(GetCPState().vtx_desc, GetCPState().vtx_attr[vat]);
}
QString text; QString text;
CPState m_cpmem; CPState m_cpmem;
}; };
@ -731,6 +736,11 @@ public:
OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; } OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; }
OPCODE_CALLBACK(u32 GetVertexSize(u8 vat))
{
return VertexLoaderBase::GetVertexSize(GetCPState().vtx_desc, GetCPState().vtx_attr[vat]);
}
QString text; QString text;
CPState m_cpmem; CPState m_cpmem;
}; };

View File

@ -122,7 +122,7 @@ public:
// HACK // HACK
DataReader src{const_cast<u8*>(vertex_data), const_cast<u8*>(vertex_data) + size}; DataReader src{const_cast<u8*>(vertex_data), const_cast<u8*>(vertex_data) + size};
const u32 bytes = const u32 bytes =
VertexLoaderManager::RunVertices(vat, primitive, num_vertices, src, is_preprocess); VertexLoaderManager::RunVertices<is_preprocess>(vat, primitive, num_vertices, src);
ASSERT(bytes == size); ASSERT(bytes == size);
@ -228,6 +228,12 @@ public:
return g_main_cp_state; return g_main_cp_state;
} }
OPCODE_CALLBACK(u32 GetVertexSize(u8 vat))
{
VertexLoaderBase* loader = VertexLoaderManager::RefreshLoader<is_preprocess>(vat);
return loader->m_vertex_size;
}
u32 m_cycles = 0; u32 m_cycles = 0;
bool m_in_display_list = false; bool m_in_display_list = false;
}; };

View File

@ -110,6 +110,8 @@ public:
// Get the current CP state. Needed for vertex decoding; will also be mutated for CP commands. // Get the current CP state. Needed for vertex decoding; will also be mutated for CP commands.
virtual CPState& GetCPState() = 0; virtual CPState& GetCPState() = 0;
virtual u32 GetVertexSize(u8 vat) = 0;
#endif #endif
}; };
@ -229,8 +231,7 @@ static DOLPHIN_FORCE_INLINE u32 RunCommand(const u8* data, u32 available, T& cal
(cmdbyte & OpcodeDecoder::GX_PRIMITIVE_MASK) >> OpcodeDecoder::GX_PRIMITIVE_SHIFT); (cmdbyte & OpcodeDecoder::GX_PRIMITIVE_MASK) >> OpcodeDecoder::GX_PRIMITIVE_SHIFT);
const u8 vat = cmdbyte & OpcodeDecoder::GX_VAT_MASK; const u8 vat = cmdbyte & OpcodeDecoder::GX_VAT_MASK;
const u32 vertex_size = VertexLoaderBase::GetVertexSize(callback.GetCPState().vtx_desc, const u32 vertex_size = callback.GetVertexSize(vat);
callback.GetCPState().vtx_attr[vat]);
const u16 num_vertices = Common::swap16(&data[1]); const u16 num_vertices = Common::swap16(&data[1]);
if (available < 3 + num_vertices * vertex_size) if (available < 3 + num_vertices * vertex_size)

View File

@ -54,7 +54,6 @@ Common::EnumMap<u8*, CPArray::TexCoord7> cached_arraybases;
BitSet8 g_main_vat_dirty; BitSet8 g_main_vat_dirty;
BitSet8 g_preprocess_vat_dirty; BitSet8 g_preprocess_vat_dirty;
bool g_bases_dirty; // Main only bool g_bases_dirty; // Main only
u8 g_current_vat; // Main only
std::array<VertexLoaderBase*, CP_NUM_VAT_REG> g_main_vertex_loaders; std::array<VertexLoaderBase*, CP_NUM_VAT_REG> g_main_vertex_loaders;
std::array<VertexLoaderBase*, CP_NUM_VAT_REG> g_preprocess_vertex_loaders; std::array<VertexLoaderBase*, CP_NUM_VAT_REG> g_preprocess_vertex_loaders;
@ -78,7 +77,7 @@ void Clear()
void UpdateVertexArrayPointers() void UpdateVertexArrayPointers()
{ {
// Anything to update? // Anything to update?
if (!g_bases_dirty) if (!g_bases_dirty) [[likely]]
return; return;
// Some games such as Burnout 2 can put invalid addresses into // Some games such as Burnout 2 can put invalid addresses into
@ -198,19 +197,21 @@ NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl)
return GetOrCreateMatchingFormat(new_decl); return GetOrCreateMatchingFormat(new_decl);
} }
static VertexLoaderBase* RefreshLoader(int vtx_attr_group, bool preprocess = false) namespace detail
{ {
CPState* state = preprocess ? &g_preprocess_cp_state : &g_main_cp_state; template <bool IsPreprocess>
BitSet8& attr_dirty = preprocess ? g_preprocess_vat_dirty : g_main_vat_dirty; VertexLoaderBase* GetOrCreateLoader(int vtx_attr_group)
auto& vertex_loaders = preprocess ? g_main_vertex_loaders : g_preprocess_vertex_loaders; {
g_current_vat = vtx_attr_group; constexpr CPState* state = IsPreprocess ? &g_preprocess_cp_state : &g_main_cp_state;
constexpr BitSet8& attr_dirty = IsPreprocess ? g_preprocess_vat_dirty : g_main_vat_dirty;
constexpr auto& vertex_loaders =
IsPreprocess ? g_preprocess_vertex_loaders : g_main_vertex_loaders;
VertexLoaderBase* loader; VertexLoaderBase* loader;
if (attr_dirty[vtx_attr_group])
{
// We are not allowed to create a native vertex format on preprocessing as this is on the wrong // We are not allowed to create a native vertex format on preprocessing as this is on the wrong
// thread // thread
bool check_for_native_format = !preprocess; bool check_for_native_format = !IsPreprocess;
VertexLoaderUID uid(state->vtx_desc, state->vtx_attr[vtx_attr_group]); VertexLoaderUID uid(state->vtx_desc, state->vtx_attr[vtx_attr_group]);
std::lock_guard<std::mutex> lk(s_vertex_loader_map_lock); std::lock_guard<std::mutex> lk(s_vertex_loader_map_lock);
@ -222,35 +223,24 @@ static VertexLoaderBase* RefreshLoader(int vtx_attr_group, bool preprocess = fal
} }
else else
{ {
s_vertex_loader_map[uid] = auto [it, added] = s_vertex_loader_map.try_emplace(
VertexLoaderBase::CreateVertexLoader(state->vtx_desc, state->vtx_attr[vtx_attr_group]); uid,
loader = s_vertex_loader_map[uid].get(); VertexLoaderBase::CreateVertexLoader(state->vtx_desc, state->vtx_attr[vtx_attr_group]));
loader = it->second.get();
INCSTAT(g_stats.num_vertex_loaders); INCSTAT(g_stats.num_vertex_loaders);
} }
if (check_for_native_format) if (check_for_native_format)
{ {
// search for a cached native vertex format // search for a cached native vertex format
const PortableVertexDeclaration& format = loader->m_native_vtx_decl; loader->m_native_vertex_format = GetOrCreateMatchingFormat(loader->m_native_vtx_decl);
std::unique_ptr<NativeVertexFormat>& native = s_native_vertex_map[format];
if (!native)
native = g_renderer->CreateNativeVertexFormat(format);
loader->m_native_vertex_format = native.get();
} }
vertex_loaders[vtx_attr_group] = loader; vertex_loaders[vtx_attr_group] = loader;
attr_dirty[vtx_attr_group] = false; attr_dirty[vtx_attr_group] = false;
}
else
{
loader = vertex_loaders[vtx_attr_group];
}
// Lookup pointers for any vertex arrays.
if (!preprocess)
UpdateVertexArrayPointers();
return loader; return loader;
} }
} // namespace detail
static void CheckCPConfiguration(int vtx_attr_group) static void CheckCPConfiguration(int vtx_attr_group)
{ {
// Validate that the XF input configuration matches the CP configuration // Validate that the XF input configuration matches the CP configuration
@ -335,21 +325,23 @@ static void CheckCPConfiguration(int vtx_attr_group)
} }
} }
int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, DataReader src, template <bool IsPreprocess>
bool is_preprocess) int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, DataReader src)
{ {
if (count == 0) if (count == 0)
return 0; return 0;
ASSERT(count > 0); ASSERT(count > 0);
VertexLoaderBase* loader = RefreshLoader(vtx_attr_group, is_preprocess); VertexLoaderBase* loader = RefreshLoader<IsPreprocess>(vtx_attr_group);
int size = count * loader->m_vertex_size; int size = count * loader->m_vertex_size;
if ((int)src.size() < size) if ((int)src.size() < size)
return -1; return -1;
if (is_preprocess) if constexpr (!IsPreprocess)
return size; {
// Doing early return for the opposite case would be cleaner
// but triggers a false unreachable code warning in MSVC debug builds.
CheckCPConfiguration(vtx_attr_group); CheckCPConfiguration(vtx_attr_group);
@ -379,9 +371,15 @@ int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int coun
ADDSTAT(g_stats.this_frame.num_prims, count); ADDSTAT(g_stats.this_frame.num_prims, count);
INCSTAT(g_stats.this_frame.num_primitive_joins); INCSTAT(g_stats.this_frame.num_primitive_joins);
}
return size; return size;
} }
template int RunVertices<false>(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count,
DataReader src);
template int RunVertices<true>(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count,
DataReader src);
NativeVertexFormat* GetCurrentVertexFormat() NativeVertexFormat* GetCurrentVertexFormat()
{ {
return s_current_vtx_fmt; return s_current_vtx_fmt;

View File

@ -42,8 +42,16 @@ NativeVertexFormat* GetOrCreateMatchingFormat(const PortableVertexDeclaration& d
NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl); NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl);
// Returns -1 if buf_size is insufficient, else the amount of bytes consumed // Returns -1 if buf_size is insufficient, else the amount of bytes consumed
int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, DataReader src, template <bool IsPreprocess = false>
bool is_preprocess); int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, DataReader src);
namespace detail
{
// This will look for an existing loader in the global hashmap or create a new one if there is none.
// It should not be used directly because RefreshLoaders() has another cache for fast lookups.
template <bool IsPreprocess = false>
VertexLoaderBase* GetOrCreateLoader(int vtx_attr_group);
} // namespace detail
NativeVertexFormat* GetCurrentVertexFormat(); NativeVertexFormat* GetCurrentVertexFormat();
@ -66,7 +74,31 @@ extern u32 g_current_components;
extern BitSet8 g_main_vat_dirty; extern BitSet8 g_main_vat_dirty;
extern BitSet8 g_preprocess_vat_dirty; extern BitSet8 g_preprocess_vat_dirty;
extern bool g_bases_dirty; // Main only extern bool g_bases_dirty; // Main only
extern u8 g_current_vat; // Main only
extern std::array<VertexLoaderBase*, CP_NUM_VAT_REG> g_main_vertex_loaders; extern std::array<VertexLoaderBase*, CP_NUM_VAT_REG> g_main_vertex_loaders;
extern std::array<VertexLoaderBase*, CP_NUM_VAT_REG> g_preprocess_vertex_loaders; extern std::array<VertexLoaderBase*, CP_NUM_VAT_REG> g_preprocess_vertex_loaders;
template <bool IsPreprocess = false>
VertexLoaderBase* RefreshLoader(int vtx_attr_group)
{
constexpr const BitSet8& attr_dirty = IsPreprocess ? g_preprocess_vat_dirty : g_main_vat_dirty;
constexpr const auto& vertex_loaders =
IsPreprocess ? g_preprocess_vertex_loaders : g_main_vertex_loaders;
VertexLoaderBase* loader;
if (!attr_dirty[vtx_attr_group]) [[likely]]
{
loader = vertex_loaders[vtx_attr_group];
}
else [[unlikely]]
{
loader = detail::GetOrCreateLoader<IsPreprocess>(vtx_attr_group);
}
// Lookup pointers for any vertex arrays.
if constexpr (!IsPreprocess)
UpdateVertexArrayPointers();
return loader;
}
} // namespace VertexLoaderManager } // namespace VertexLoaderManager