Merge pull request #11673 from K0bin/vertex-loader-micro-opt
Vertex Loader Microoptimization
This commit is contained in:
commit
50a45bd614
|
@ -258,11 +258,6 @@ VertexLoaderBase* GetOrCreateLoader(int vtx_attr_group)
|
|||
|
||||
static void CheckCPConfiguration(int vtx_attr_group)
|
||||
{
|
||||
if (!g_needs_cp_xf_consistency_check) [[likely]]
|
||||
return;
|
||||
|
||||
g_needs_cp_xf_consistency_check = false;
|
||||
|
||||
// Validate that the XF input configuration matches the CP configuration
|
||||
u32 num_cp_colors = std::count_if(
|
||||
g_main_cp_state.vtx_desc.low.Color.begin(), g_main_cp_state.vtx_desc.low.Color.end(),
|
||||
|
@ -359,20 +354,25 @@ int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int coun
|
|||
// Doing early return for the opposite case would be cleaner
|
||||
// but triggers a false unreachable code warning in MSVC debug builds.
|
||||
|
||||
if (g_needs_cp_xf_consistency_check) [[unlikely]]
|
||||
{
|
||||
CheckCPConfiguration(vtx_attr_group);
|
||||
g_needs_cp_xf_consistency_check = false;
|
||||
}
|
||||
|
||||
// If the native vertex format changed, force a flush.
|
||||
if (loader->m_native_vertex_format != s_current_vtx_fmt ||
|
||||
loader->m_native_components != g_current_components) [[unlikely]]
|
||||
{
|
||||
g_vertex_manager->Flush();
|
||||
}
|
||||
|
||||
s_current_vtx_fmt = loader->m_native_vertex_format;
|
||||
g_current_components = loader->m_native_components;
|
||||
auto& system = Core::System::GetInstance();
|
||||
auto& vertex_shader_manager = system.GetVertexShaderManager();
|
||||
vertex_shader_manager.SetVertexFormat(loader->m_native_components,
|
||||
loader->m_native_vertex_format->GetVertexDeclaration());
|
||||
}
|
||||
|
||||
// CPUCull's performance increase comes from encoding fewer GPU commands, not sending less data
|
||||
// Therefore it's only useful to check if culling could remove a flush
|
||||
|
|
|
@ -140,7 +140,7 @@ DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive
|
|||
PrimitiveType new_primitive_type = g_ActiveConfig.backend_info.bSupportsPrimitiveRestart ?
|
||||
primitive_from_gx_pr[primitive] :
|
||||
primitive_from_gx[primitive];
|
||||
if (m_current_primitive_type != new_primitive_type)
|
||||
if (m_current_primitive_type != new_primitive_type) [[unlikely]]
|
||||
{
|
||||
Flush();
|
||||
|
||||
|
@ -149,9 +149,11 @@ DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive
|
|||
SetRasterizationStateChanged();
|
||||
}
|
||||
|
||||
u32 remaining_indices = GetRemainingIndices(primitive);
|
||||
u32 remaining_index_generator_indices = m_index_generator.GetRemainingIndices(primitive);
|
||||
|
||||
// Check for size in buffer, if the buffer gets full, call Flush()
|
||||
if (!m_is_flushed && (count > m_index_generator.GetRemainingIndices(primitive) ||
|
||||
count > GetRemainingIndices(primitive) ||
|
||||
if (!m_is_flushed && (count > remaining_index_generator_indices || count > remaining_indices ||
|
||||
needed_vertex_bytes > GetRemainingSize())) [[unlikely]]
|
||||
{
|
||||
Flush();
|
||||
|
@ -160,7 +162,7 @@ DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive
|
|||
m_cull_all = cullall;
|
||||
|
||||
// need to alloc new buffer
|
||||
if (m_is_flushed)
|
||||
if (m_is_flushed) [[unlikely]]
|
||||
{
|
||||
if (cullall)
|
||||
{
|
||||
|
@ -174,6 +176,8 @@ DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive
|
|||
ResetBuffer(stride);
|
||||
}
|
||||
|
||||
remaining_index_generator_indices = m_index_generator.GetRemainingIndices(primitive);
|
||||
remaining_indices = GetRemainingIndices(primitive);
|
||||
m_is_flushed = false;
|
||||
}
|
||||
|
||||
|
@ -181,14 +185,14 @@ DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive
|
|||
// won't have enough space in a few rare cases, such as vertex shader line/point expansion with a
|
||||
// ton of lines in one draw command, in which case we will either need to add support for
|
||||
// splitting a single draw command into multiple draws or using bigger indices.
|
||||
ASSERT_MSG(VIDEO, count <= m_index_generator.GetRemainingIndices(primitive),
|
||||
ASSERT_MSG(VIDEO, count <= remaining_index_generator_indices,
|
||||
"VertexManager: Too few remaining index values ({} > {}). "
|
||||
"32-bit indices or primitive breaking needed.",
|
||||
count, m_index_generator.GetRemainingIndices(primitive));
|
||||
ASSERT_MSG(VIDEO, count <= GetRemainingIndices(primitive),
|
||||
count, remaining_index_generator_indices);
|
||||
ASSERT_MSG(VIDEO, count <= remaining_indices,
|
||||
"VertexManager: Buffer not large enough for all indices! ({} > {}) "
|
||||
"Increase MAXIBUFFERSIZE or we need primitive breaking after all.",
|
||||
count, GetRemainingIndices(primitive));
|
||||
count, remaining_indices);
|
||||
ASSERT_MSG(VIDEO, needed_vertex_bytes <= GetRemainingSize(),
|
||||
"VertexManager: Buffer not large enough for all vertices! ({} > {}) "
|
||||
"Increase MAXVBUFFERSIZE or we need primitive breaking after all.",
|
||||
|
|
|
@ -621,44 +621,6 @@ void VertexShaderManager::SetMaterialColorChanged(int index)
|
|||
m_materials_changed[index] = true;
|
||||
}
|
||||
|
||||
static void UpdateValue(bool* dirty, u32* old_value, u32 new_value)
|
||||
{
|
||||
if (*old_value == new_value)
|
||||
return;
|
||||
*old_value = new_value;
|
||||
*dirty = true;
|
||||
}
|
||||
|
||||
static void UpdateOffset(bool* dirty, bool include_components, u32* old_value,
|
||||
const AttributeFormat& attribute)
|
||||
{
|
||||
if (!attribute.enable)
|
||||
return;
|
||||
u32 new_value = attribute.offset / 4; // GPU uses uint offsets
|
||||
if (include_components)
|
||||
new_value |= attribute.components << 16;
|
||||
UpdateValue(dirty, old_value, new_value);
|
||||
}
|
||||
|
||||
template <size_t N>
|
||||
static void UpdateOffsets(bool* dirty, bool include_components, std::array<u32, N>* old_value,
|
||||
const std::array<AttributeFormat, N>& attribute)
|
||||
{
|
||||
for (size_t i = 0; i < N; i++)
|
||||
UpdateOffset(dirty, include_components, &(*old_value)[i], attribute[i]);
|
||||
}
|
||||
|
||||
void VertexShaderManager::SetVertexFormat(u32 components, const PortableVertexDeclaration& format)
|
||||
{
|
||||
UpdateValue(&dirty, &constants.components, components);
|
||||
UpdateValue(&dirty, &constants.vertex_stride, format.stride / 4);
|
||||
UpdateOffset(&dirty, true, &constants.vertex_offset_position, format.position);
|
||||
UpdateOffset(&dirty, false, &constants.vertex_offset_posmtx, format.posmtx);
|
||||
UpdateOffsets(&dirty, true, &constants.vertex_offset_texcoords, format.texcoords);
|
||||
UpdateOffsets(&dirty, false, &constants.vertex_offset_colors, format.colors);
|
||||
UpdateOffsets(&dirty, false, &constants.vertex_offset_normals, format.normals);
|
||||
}
|
||||
|
||||
void VertexShaderManager::SetTexMatrixInfoChanged(int index)
|
||||
{
|
||||
// TODO: Should we track this with more precision, like which indices changed?
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include "Common/CommonTypes.h"
|
||||
#include "Common/Matrix.h"
|
||||
#include "VideoCommon/ConstantManager.h"
|
||||
#include "VideoCommon/NativeVertexFormat.h"
|
||||
|
||||
class PointerWrap;
|
||||
struct PortableVertexDeclaration;
|
||||
|
@ -34,7 +35,6 @@ public:
|
|||
void SetProjectionChanged();
|
||||
void SetMaterialColorChanged(int index);
|
||||
|
||||
void SetVertexFormat(u32 components, const PortableVertexDeclaration& format);
|
||||
void SetTexMatrixInfoChanged(int index);
|
||||
void SetLightingConfigChanged();
|
||||
|
||||
|
@ -49,6 +49,45 @@ public:
|
|||
VertexShaderConstants constants{};
|
||||
bool dirty = false;
|
||||
|
||||
static DOLPHIN_FORCE_INLINE void UpdateValue(bool* dirty, u32* old_value, u32 new_value)
|
||||
{
|
||||
if (*old_value == new_value)
|
||||
return;
|
||||
*old_value = new_value;
|
||||
*dirty = true;
|
||||
}
|
||||
|
||||
static DOLPHIN_FORCE_INLINE void UpdateOffset(bool* dirty, bool include_components,
|
||||
u32* old_value, const AttributeFormat& attribute)
|
||||
{
|
||||
if (!attribute.enable)
|
||||
return;
|
||||
u32 new_value = attribute.offset / 4; // GPU uses uint offsets
|
||||
if (include_components)
|
||||
new_value |= attribute.components << 16;
|
||||
UpdateValue(dirty, old_value, new_value);
|
||||
}
|
||||
|
||||
template <size_t N>
|
||||
static DOLPHIN_FORCE_INLINE void UpdateOffsets(bool* dirty, bool include_components,
|
||||
std::array<u32, N>* old_value,
|
||||
const std::array<AttributeFormat, N>& attribute)
|
||||
{
|
||||
for (size_t i = 0; i < N; i++)
|
||||
UpdateOffset(dirty, include_components, &(*old_value)[i], attribute[i]);
|
||||
}
|
||||
|
||||
DOLPHIN_FORCE_INLINE void SetVertexFormat(u32 components, const PortableVertexDeclaration& format)
|
||||
{
|
||||
UpdateValue(&dirty, &constants.components, components);
|
||||
UpdateValue(&dirty, &constants.vertex_stride, format.stride / 4);
|
||||
UpdateOffset(&dirty, true, &constants.vertex_offset_position, format.position);
|
||||
UpdateOffset(&dirty, false, &constants.vertex_offset_posmtx, format.posmtx);
|
||||
UpdateOffsets(&dirty, true, &constants.vertex_offset_texcoords, format.texcoords);
|
||||
UpdateOffsets(&dirty, false, &constants.vertex_offset_colors, format.colors);
|
||||
UpdateOffsets(&dirty, false, &constants.vertex_offset_normals, format.normals);
|
||||
}
|
||||
|
||||
private:
|
||||
alignas(16) std::array<float, 16> m_projection_matrix;
|
||||
|
||||
|
|
Loading…
Reference in New Issue