Merge pull request #11067 from K0bin/cache-vertex-size
OpcodeDecoding: Cache vertex sizes
This commit is contained in:
commit
6f4f5b0b7b
|
@ -55,6 +55,11 @@ public:
|
|||
|
||||
OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; }
|
||||
|
||||
OPCODE_CALLBACK(u32 GetVertexSize(u8 vat))
|
||||
{
|
||||
return VertexLoaderBase::GetVertexSize(GetCPState().vtx_desc, GetCPState().vtx_attr[vat]);
|
||||
}
|
||||
|
||||
bool m_start_of_primitives = false;
|
||||
bool m_end_of_primitives = false;
|
||||
bool m_efb_copy = false;
|
||||
|
|
|
@ -45,6 +45,11 @@ public:
|
|||
|
||||
OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; }
|
||||
|
||||
OPCODE_CALLBACK(u32 GetVertexSize(u8 vat))
|
||||
{
|
||||
return VertexLoaderBase::GetVertexSize(GetCPState().vtx_desc, GetCPState().vtx_attr[vat]);
|
||||
}
|
||||
|
||||
private:
|
||||
void ProcessVertexComponent(CPArray array_index, VertexComponentFormat array_type,
|
||||
u32 component_offset, u32 vertex_size, u16 num_vertices,
|
||||
|
|
|
@ -316,6 +316,11 @@ public:
|
|||
|
||||
OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; }
|
||||
|
||||
OPCODE_CALLBACK(u32 GetVertexSize(u8 vat))
|
||||
{
|
||||
return VertexLoaderBase::GetVertexSize(GetCPState().vtx_desc, GetCPState().vtx_attr[vat]);
|
||||
}
|
||||
|
||||
QString text;
|
||||
CPState m_cpmem;
|
||||
};
|
||||
|
@ -731,6 +736,11 @@ public:
|
|||
|
||||
OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; }
|
||||
|
||||
OPCODE_CALLBACK(u32 GetVertexSize(u8 vat))
|
||||
{
|
||||
return VertexLoaderBase::GetVertexSize(GetCPState().vtx_desc, GetCPState().vtx_attr[vat]);
|
||||
}
|
||||
|
||||
QString text;
|
||||
CPState m_cpmem;
|
||||
};
|
||||
|
|
|
@ -122,7 +122,7 @@ public:
|
|||
// HACK
|
||||
DataReader src{const_cast<u8*>(vertex_data), const_cast<u8*>(vertex_data) + size};
|
||||
const u32 bytes =
|
||||
VertexLoaderManager::RunVertices(vat, primitive, num_vertices, src, is_preprocess);
|
||||
VertexLoaderManager::RunVertices<is_preprocess>(vat, primitive, num_vertices, src);
|
||||
|
||||
ASSERT(bytes == size);
|
||||
|
||||
|
@ -228,6 +228,12 @@ public:
|
|||
return g_main_cp_state;
|
||||
}
|
||||
|
||||
OPCODE_CALLBACK(u32 GetVertexSize(u8 vat))
|
||||
{
|
||||
VertexLoaderBase* loader = VertexLoaderManager::RefreshLoader<is_preprocess>(vat);
|
||||
return loader->m_vertex_size;
|
||||
}
|
||||
|
||||
u32 m_cycles = 0;
|
||||
bool m_in_display_list = false;
|
||||
};
|
||||
|
|
|
@ -110,6 +110,8 @@ public:
|
|||
|
||||
// Get the current CP state. Needed for vertex decoding; will also be mutated for CP commands.
|
||||
virtual CPState& GetCPState() = 0;
|
||||
|
||||
virtual u32 GetVertexSize(u8 vat) = 0;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
@ -229,8 +231,7 @@ static DOLPHIN_FORCE_INLINE u32 RunCommand(const u8* data, u32 available, T& cal
|
|||
(cmdbyte & OpcodeDecoder::GX_PRIMITIVE_MASK) >> OpcodeDecoder::GX_PRIMITIVE_SHIFT);
|
||||
const u8 vat = cmdbyte & OpcodeDecoder::GX_VAT_MASK;
|
||||
|
||||
const u32 vertex_size = VertexLoaderBase::GetVertexSize(callback.GetCPState().vtx_desc,
|
||||
callback.GetCPState().vtx_attr[vat]);
|
||||
const u32 vertex_size = callback.GetVertexSize(vat);
|
||||
const u16 num_vertices = Common::swap16(&data[1]);
|
||||
|
||||
if (available < 3 + num_vertices * vertex_size)
|
||||
|
|
|
@ -54,7 +54,6 @@ Common::EnumMap<u8*, CPArray::TexCoord7> cached_arraybases;
|
|||
BitSet8 g_main_vat_dirty;
|
||||
BitSet8 g_preprocess_vat_dirty;
|
||||
bool g_bases_dirty; // Main only
|
||||
u8 g_current_vat; // Main only
|
||||
std::array<VertexLoaderBase*, CP_NUM_VAT_REG> g_main_vertex_loaders;
|
||||
std::array<VertexLoaderBase*, CP_NUM_VAT_REG> g_preprocess_vertex_loaders;
|
||||
|
||||
|
@ -78,7 +77,7 @@ void Clear()
|
|||
void UpdateVertexArrayPointers()
|
||||
{
|
||||
// Anything to update?
|
||||
if (!g_bases_dirty)
|
||||
if (!g_bases_dirty) [[likely]]
|
||||
return;
|
||||
|
||||
// Some games such as Burnout 2 can put invalid addresses into
|
||||
|
@ -198,59 +197,50 @@ NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl)
|
|||
return GetOrCreateMatchingFormat(new_decl);
|
||||
}
|
||||
|
||||
static VertexLoaderBase* RefreshLoader(int vtx_attr_group, bool preprocess = false)
|
||||
namespace detail
|
||||
{
|
||||
CPState* state = preprocess ? &g_preprocess_cp_state : &g_main_cp_state;
|
||||
BitSet8& attr_dirty = preprocess ? g_preprocess_vat_dirty : g_main_vat_dirty;
|
||||
auto& vertex_loaders = preprocess ? g_main_vertex_loaders : g_preprocess_vertex_loaders;
|
||||
g_current_vat = vtx_attr_group;
|
||||
template <bool IsPreprocess>
|
||||
VertexLoaderBase* GetOrCreateLoader(int vtx_attr_group)
|
||||
{
|
||||
constexpr CPState* state = IsPreprocess ? &g_preprocess_cp_state : &g_main_cp_state;
|
||||
constexpr BitSet8& attr_dirty = IsPreprocess ? g_preprocess_vat_dirty : g_main_vat_dirty;
|
||||
constexpr auto& vertex_loaders =
|
||||
IsPreprocess ? g_preprocess_vertex_loaders : g_main_vertex_loaders;
|
||||
|
||||
VertexLoaderBase* loader;
|
||||
if (attr_dirty[vtx_attr_group])
|
||||
{
|
||||
// We are not allowed to create a native vertex format on preprocessing as this is on the wrong
|
||||
// thread
|
||||
bool check_for_native_format = !preprocess;
|
||||
|
||||
VertexLoaderUID uid(state->vtx_desc, state->vtx_attr[vtx_attr_group]);
|
||||
std::lock_guard<std::mutex> lk(s_vertex_loader_map_lock);
|
||||
VertexLoaderMap::iterator iter = s_vertex_loader_map.find(uid);
|
||||
if (iter != s_vertex_loader_map.end())
|
||||
{
|
||||
loader = iter->second.get();
|
||||
check_for_native_format &= !loader->m_native_vertex_format;
|
||||
}
|
||||
else
|
||||
{
|
||||
s_vertex_loader_map[uid] =
|
||||
VertexLoaderBase::CreateVertexLoader(state->vtx_desc, state->vtx_attr[vtx_attr_group]);
|
||||
loader = s_vertex_loader_map[uid].get();
|
||||
INCSTAT(g_stats.num_vertex_loaders);
|
||||
}
|
||||
if (check_for_native_format)
|
||||
{
|
||||
// search for a cached native vertex format
|
||||
const PortableVertexDeclaration& format = loader->m_native_vtx_decl;
|
||||
std::unique_ptr<NativeVertexFormat>& native = s_native_vertex_map[format];
|
||||
if (!native)
|
||||
native = g_renderer->CreateNativeVertexFormat(format);
|
||||
loader->m_native_vertex_format = native.get();
|
||||
}
|
||||
vertex_loaders[vtx_attr_group] = loader;
|
||||
attr_dirty[vtx_attr_group] = false;
|
||||
// We are not allowed to create a native vertex format on preprocessing as this is on the wrong
|
||||
// thread
|
||||
bool check_for_native_format = !IsPreprocess;
|
||||
|
||||
VertexLoaderUID uid(state->vtx_desc, state->vtx_attr[vtx_attr_group]);
|
||||
std::lock_guard<std::mutex> lk(s_vertex_loader_map_lock);
|
||||
VertexLoaderMap::iterator iter = s_vertex_loader_map.find(uid);
|
||||
if (iter != s_vertex_loader_map.end())
|
||||
{
|
||||
loader = iter->second.get();
|
||||
check_for_native_format &= !loader->m_native_vertex_format;
|
||||
}
|
||||
else
|
||||
{
|
||||
loader = vertex_loaders[vtx_attr_group];
|
||||
auto [it, added] = s_vertex_loader_map.try_emplace(
|
||||
uid,
|
||||
VertexLoaderBase::CreateVertexLoader(state->vtx_desc, state->vtx_attr[vtx_attr_group]));
|
||||
loader = it->second.get();
|
||||
INCSTAT(g_stats.num_vertex_loaders);
|
||||
}
|
||||
|
||||
// Lookup pointers for any vertex arrays.
|
||||
if (!preprocess)
|
||||
UpdateVertexArrayPointers();
|
||||
|
||||
if (check_for_native_format)
|
||||
{
|
||||
// search for a cached native vertex format
|
||||
loader->m_native_vertex_format = GetOrCreateMatchingFormat(loader->m_native_vtx_decl);
|
||||
}
|
||||
vertex_loaders[vtx_attr_group] = loader;
|
||||
attr_dirty[vtx_attr_group] = false;
|
||||
return loader;
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
static void CheckCPConfiguration(int vtx_attr_group)
|
||||
{
|
||||
// Validate that the XF input configuration matches the CP configuration
|
||||
|
@ -335,53 +325,61 @@ static void CheckCPConfiguration(int vtx_attr_group)
|
|||
}
|
||||
}
|
||||
|
||||
int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, DataReader src,
|
||||
bool is_preprocess)
|
||||
template <bool IsPreprocess>
|
||||
int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, DataReader src)
|
||||
{
|
||||
if (count == 0)
|
||||
return 0;
|
||||
ASSERT(count > 0);
|
||||
|
||||
VertexLoaderBase* loader = RefreshLoader(vtx_attr_group, is_preprocess);
|
||||
VertexLoaderBase* loader = RefreshLoader<IsPreprocess>(vtx_attr_group);
|
||||
|
||||
int size = count * loader->m_vertex_size;
|
||||
if ((int)src.size() < size)
|
||||
return -1;
|
||||
|
||||
if (is_preprocess)
|
||||
return size;
|
||||
|
||||
CheckCPConfiguration(vtx_attr_group);
|
||||
|
||||
// If the native vertex format changed, force a flush.
|
||||
if (loader->m_native_vertex_format != s_current_vtx_fmt ||
|
||||
loader->m_native_components != g_current_components)
|
||||
if constexpr (!IsPreprocess)
|
||||
{
|
||||
g_vertex_manager->Flush();
|
||||
// Doing early return for the opposite case would be cleaner
|
||||
// but triggers a false unreachable code warning in MSVC debug builds.
|
||||
|
||||
CheckCPConfiguration(vtx_attr_group);
|
||||
|
||||
// If the native vertex format changed, force a flush.
|
||||
if (loader->m_native_vertex_format != s_current_vtx_fmt ||
|
||||
loader->m_native_components != g_current_components)
|
||||
{
|
||||
g_vertex_manager->Flush();
|
||||
}
|
||||
s_current_vtx_fmt = loader->m_native_vertex_format;
|
||||
g_current_components = loader->m_native_components;
|
||||
VertexShaderManager::SetVertexFormat(loader->m_native_components);
|
||||
|
||||
// if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads.
|
||||
// They still need to go through vertex loading, because we need to calculate a zfreeze refrence
|
||||
// slope.
|
||||
bool cullall = (bpmem.genMode.cullmode == CullMode::All &&
|
||||
primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES);
|
||||
|
||||
DataReader dst = g_vertex_manager->PrepareForAdditionalData(
|
||||
primitive, count, loader->m_native_vtx_decl.stride, cullall);
|
||||
|
||||
count = loader->RunVertices(src, dst, count);
|
||||
|
||||
g_vertex_manager->AddIndices(primitive, count);
|
||||
g_vertex_manager->FlushData(count, loader->m_native_vtx_decl.stride);
|
||||
|
||||
ADDSTAT(g_stats.this_frame.num_prims, count);
|
||||
INCSTAT(g_stats.this_frame.num_primitive_joins);
|
||||
}
|
||||
s_current_vtx_fmt = loader->m_native_vertex_format;
|
||||
g_current_components = loader->m_native_components;
|
||||
VertexShaderManager::SetVertexFormat(loader->m_native_components);
|
||||
|
||||
// if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads.
|
||||
// They still need to go through vertex loading, because we need to calculate a zfreeze refrence
|
||||
// slope.
|
||||
bool cullall = (bpmem.genMode.cullmode == CullMode::All &&
|
||||
primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES);
|
||||
|
||||
DataReader dst = g_vertex_manager->PrepareForAdditionalData(
|
||||
primitive, count, loader->m_native_vtx_decl.stride, cullall);
|
||||
|
||||
count = loader->RunVertices(src, dst, count);
|
||||
|
||||
g_vertex_manager->AddIndices(primitive, count);
|
||||
g_vertex_manager->FlushData(count, loader->m_native_vtx_decl.stride);
|
||||
|
||||
ADDSTAT(g_stats.this_frame.num_prims, count);
|
||||
INCSTAT(g_stats.this_frame.num_primitive_joins);
|
||||
return size;
|
||||
}
|
||||
|
||||
template int RunVertices<false>(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count,
|
||||
DataReader src);
|
||||
template int RunVertices<true>(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count,
|
||||
DataReader src);
|
||||
|
||||
NativeVertexFormat* GetCurrentVertexFormat()
|
||||
{
|
||||
return s_current_vtx_fmt;
|
||||
|
|
|
@ -42,8 +42,16 @@ NativeVertexFormat* GetOrCreateMatchingFormat(const PortableVertexDeclaration& d
|
|||
NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl);
|
||||
|
||||
// Returns -1 if buf_size is insufficient, else the amount of bytes consumed
|
||||
int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, DataReader src,
|
||||
bool is_preprocess);
|
||||
template <bool IsPreprocess = false>
|
||||
int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, DataReader src);
|
||||
|
||||
namespace detail
|
||||
{
|
||||
// This will look for an existing loader in the global hashmap or create a new one if there is none.
|
||||
// It should not be used directly because RefreshLoaders() has another cache for fast lookups.
|
||||
template <bool IsPreprocess = false>
|
||||
VertexLoaderBase* GetOrCreateLoader(int vtx_attr_group);
|
||||
} // namespace detail
|
||||
|
||||
NativeVertexFormat* GetCurrentVertexFormat();
|
||||
|
||||
|
@ -66,7 +74,31 @@ extern u32 g_current_components;
|
|||
extern BitSet8 g_main_vat_dirty;
|
||||
extern BitSet8 g_preprocess_vat_dirty;
|
||||
extern bool g_bases_dirty; // Main only
|
||||
extern u8 g_current_vat; // Main only
|
||||
extern std::array<VertexLoaderBase*, CP_NUM_VAT_REG> g_main_vertex_loaders;
|
||||
extern std::array<VertexLoaderBase*, CP_NUM_VAT_REG> g_preprocess_vertex_loaders;
|
||||
|
||||
template <bool IsPreprocess = false>
|
||||
VertexLoaderBase* RefreshLoader(int vtx_attr_group)
|
||||
{
|
||||
constexpr const BitSet8& attr_dirty = IsPreprocess ? g_preprocess_vat_dirty : g_main_vat_dirty;
|
||||
constexpr const auto& vertex_loaders =
|
||||
IsPreprocess ? g_preprocess_vertex_loaders : g_main_vertex_loaders;
|
||||
|
||||
VertexLoaderBase* loader;
|
||||
if (!attr_dirty[vtx_attr_group]) [[likely]]
|
||||
{
|
||||
loader = vertex_loaders[vtx_attr_group];
|
||||
}
|
||||
else [[unlikely]]
|
||||
{
|
||||
loader = detail::GetOrCreateLoader<IsPreprocess>(vtx_attr_group);
|
||||
}
|
||||
|
||||
// Lookup pointers for any vertex arrays.
|
||||
if constexpr (!IsPreprocess)
|
||||
UpdateVertexArrayPointers();
|
||||
|
||||
return loader;
|
||||
}
|
||||
|
||||
} // namespace VertexLoaderManager
|
||||
|
|
Loading…
Reference in New Issue