Merge pull request #11673 from K0bin/vertex-loader-micro-opt

Vertex Loader Microoptimization
This commit is contained in:
Markus Wick 2023-03-30 11:05:02 +02:00 committed by GitHub
commit 50a45bd614
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 64 additions and 59 deletions

View File

@ -258,11 +258,6 @@ VertexLoaderBase* GetOrCreateLoader(int vtx_attr_group)
static void CheckCPConfiguration(int vtx_attr_group)
{
if (!g_needs_cp_xf_consistency_check) [[likely]]
return;
g_needs_cp_xf_consistency_check = false;
// Validate that the XF input configuration matches the CP configuration
u32 num_cp_colors = std::count_if(
g_main_cp_state.vtx_desc.low.Color.begin(), g_main_cp_state.vtx_desc.low.Color.end(),
@ -359,20 +354,25 @@ int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int coun
// Doing early return for the opposite case would be cleaner
// but triggers a false unreachable code warning in MSVC debug builds.
if (g_needs_cp_xf_consistency_check) [[unlikely]]
{
CheckCPConfiguration(vtx_attr_group);
g_needs_cp_xf_consistency_check = false;
}
// If the native vertex format changed, force a flush.
if (loader->m_native_vertex_format != s_current_vtx_fmt ||
loader->m_native_components != g_current_components) [[unlikely]]
{
g_vertex_manager->Flush();
}
s_current_vtx_fmt = loader->m_native_vertex_format;
g_current_components = loader->m_native_components;
auto& system = Core::System::GetInstance();
auto& vertex_shader_manager = system.GetVertexShaderManager();
vertex_shader_manager.SetVertexFormat(loader->m_native_components,
loader->m_native_vertex_format->GetVertexDeclaration());
}
// CPUCull's performance increase comes from encoding fewer GPU commands, not sending less data
// Therefore it's only useful to check if culling could remove a flush

View File

@ -140,7 +140,7 @@ DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive
PrimitiveType new_primitive_type = g_ActiveConfig.backend_info.bSupportsPrimitiveRestart ?
primitive_from_gx_pr[primitive] :
primitive_from_gx[primitive];
if (m_current_primitive_type != new_primitive_type)
if (m_current_primitive_type != new_primitive_type) [[unlikely]]
{
Flush();
@ -149,9 +149,11 @@ DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive
SetRasterizationStateChanged();
}
u32 remaining_indices = GetRemainingIndices(primitive);
u32 remaining_index_generator_indices = m_index_generator.GetRemainingIndices(primitive);
// Check for size in buffer, if the buffer gets full, call Flush()
if (!m_is_flushed && (count > m_index_generator.GetRemainingIndices(primitive) ||
count > GetRemainingIndices(primitive) ||
if (!m_is_flushed && (count > remaining_index_generator_indices || count > remaining_indices ||
needed_vertex_bytes > GetRemainingSize())) [[unlikely]]
{
Flush();
@ -160,7 +162,7 @@ DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive
m_cull_all = cullall;
// need to alloc new buffer
if (m_is_flushed)
if (m_is_flushed) [[unlikely]]
{
if (cullall)
{
@ -174,6 +176,8 @@ DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive
ResetBuffer(stride);
}
remaining_index_generator_indices = m_index_generator.GetRemainingIndices(primitive);
remaining_indices = GetRemainingIndices(primitive);
m_is_flushed = false;
}
@ -181,14 +185,14 @@ DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive
// won't have enough space in a few rare cases, such as vertex shader line/point expansion with a
// ton of lines in one draw command, in which case we will either need to add support for
// splitting a single draw command into multiple draws or using bigger indices.
ASSERT_MSG(VIDEO, count <= m_index_generator.GetRemainingIndices(primitive),
ASSERT_MSG(VIDEO, count <= remaining_index_generator_indices,
"VertexManager: Too few remaining index values ({} > {}). "
"32-bit indices or primitive breaking needed.",
count, m_index_generator.GetRemainingIndices(primitive));
ASSERT_MSG(VIDEO, count <= GetRemainingIndices(primitive),
count, remaining_index_generator_indices);
ASSERT_MSG(VIDEO, count <= remaining_indices,
"VertexManager: Buffer not large enough for all indices! ({} > {}) "
"Increase MAXIBUFFERSIZE or we need primitive breaking after all.",
count, GetRemainingIndices(primitive));
count, remaining_indices);
ASSERT_MSG(VIDEO, needed_vertex_bytes <= GetRemainingSize(),
"VertexManager: Buffer not large enough for all vertices! ({} > {}) "
"Increase MAXVBUFFERSIZE or we need primitive breaking after all.",

View File

@ -621,44 +621,6 @@ void VertexShaderManager::SetMaterialColorChanged(int index)
m_materials_changed[index] = true;
}
static void UpdateValue(bool* dirty, u32* old_value, u32 new_value)
{
if (*old_value == new_value)
return;
*old_value = new_value;
*dirty = true;
}
static void UpdateOffset(bool* dirty, bool include_components, u32* old_value,
const AttributeFormat& attribute)
{
if (!attribute.enable)
return;
u32 new_value = attribute.offset / 4; // GPU uses uint offsets
if (include_components)
new_value |= attribute.components << 16;
UpdateValue(dirty, old_value, new_value);
}
template <size_t N>
static void UpdateOffsets(bool* dirty, bool include_components, std::array<u32, N>* old_value,
const std::array<AttributeFormat, N>& attribute)
{
for (size_t i = 0; i < N; i++)
UpdateOffset(dirty, include_components, &(*old_value)[i], attribute[i]);
}
void VertexShaderManager::SetVertexFormat(u32 components, const PortableVertexDeclaration& format)
{
UpdateValue(&dirty, &constants.components, components);
UpdateValue(&dirty, &constants.vertex_stride, format.stride / 4);
UpdateOffset(&dirty, true, &constants.vertex_offset_position, format.position);
UpdateOffset(&dirty, false, &constants.vertex_offset_posmtx, format.posmtx);
UpdateOffsets(&dirty, true, &constants.vertex_offset_texcoords, format.texcoords);
UpdateOffsets(&dirty, false, &constants.vertex_offset_colors, format.colors);
UpdateOffsets(&dirty, false, &constants.vertex_offset_normals, format.normals);
}
void VertexShaderManager::SetTexMatrixInfoChanged(int index)
{
// TODO: Should we track this with more precision, like which indices changed?

View File

@ -11,6 +11,7 @@
#include "Common/CommonTypes.h"
#include "Common/Matrix.h"
#include "VideoCommon/ConstantManager.h"
#include "VideoCommon/NativeVertexFormat.h"
class PointerWrap;
struct PortableVertexDeclaration;
@ -34,7 +35,6 @@ public:
void SetProjectionChanged();
void SetMaterialColorChanged(int index);
void SetVertexFormat(u32 components, const PortableVertexDeclaration& format);
void SetTexMatrixInfoChanged(int index);
void SetLightingConfigChanged();
@ -49,6 +49,45 @@ public:
VertexShaderConstants constants{};
bool dirty = false;
static DOLPHIN_FORCE_INLINE void UpdateValue(bool* dirty, u32* old_value, u32 new_value)
{
if (*old_value == new_value)
return;
*old_value = new_value;
*dirty = true;
}
static DOLPHIN_FORCE_INLINE void UpdateOffset(bool* dirty, bool include_components,
u32* old_value, const AttributeFormat& attribute)
{
if (!attribute.enable)
return;
u32 new_value = attribute.offset / 4; // GPU uses uint offsets
if (include_components)
new_value |= attribute.components << 16;
UpdateValue(dirty, old_value, new_value);
}
template <size_t N>
static DOLPHIN_FORCE_INLINE void UpdateOffsets(bool* dirty, bool include_components,
std::array<u32, N>* old_value,
const std::array<AttributeFormat, N>& attribute)
{
for (size_t i = 0; i < N; i++)
UpdateOffset(dirty, include_components, &(*old_value)[i], attribute[i]);
}
DOLPHIN_FORCE_INLINE void SetVertexFormat(u32 components, const PortableVertexDeclaration& format)
{
UpdateValue(&dirty, &constants.components, components);
UpdateValue(&dirty, &constants.vertex_stride, format.stride / 4);
UpdateOffset(&dirty, true, &constants.vertex_offset_position, format.position);
UpdateOffset(&dirty, false, &constants.vertex_offset_posmtx, format.posmtx);
UpdateOffsets(&dirty, true, &constants.vertex_offset_texcoords, format.texcoords);
UpdateOffsets(&dirty, false, &constants.vertex_offset_colors, format.colors);
UpdateOffsets(&dirty, false, &constants.vertex_offset_normals, format.normals);
}
private:
alignas(16) std::array<float, 16> m_projection_matrix;