VideoCommon: Cull vertices on the CPU
This commit is contained in:
parent
b170ef9651
commit
1be0149146
|
@ -93,6 +93,7 @@ const Info<bool> GFX_SAVE_TEXTURE_CACHE_TO_STATE{
|
||||||
{System::GFX, "Settings", "SaveTextureCacheToState"}, true};
|
{System::GFX, "Settings", "SaveTextureCacheToState"}, true};
|
||||||
const Info<bool> GFX_PREFER_VS_FOR_LINE_POINT_EXPANSION{
|
const Info<bool> GFX_PREFER_VS_FOR_LINE_POINT_EXPANSION{
|
||||||
{System::GFX, "Settings", "PreferVSForLinePointExpansion"}, false};
|
{System::GFX, "Settings", "PreferVSForLinePointExpansion"}, false};
|
||||||
|
const Info<bool> GFX_CPU_CULL{{System::GFX, "Settings", "CPUCull"}, false};
|
||||||
|
|
||||||
const Info<TriState> GFX_MTL_MANUALLY_UPLOAD_BUFFERS{
|
const Info<TriState> GFX_MTL_MANUALLY_UPLOAD_BUFFERS{
|
||||||
{System::GFX, "Settings", "ManuallyUploadBuffers"}, TriState::Auto};
|
{System::GFX, "Settings", "ManuallyUploadBuffers"}, TriState::Auto};
|
||||||
|
|
|
@ -82,6 +82,7 @@ extern const Info<int> GFX_SHADER_COMPILER_THREADS;
|
||||||
extern const Info<int> GFX_SHADER_PRECOMPILER_THREADS;
|
extern const Info<int> GFX_SHADER_PRECOMPILER_THREADS;
|
||||||
extern const Info<bool> GFX_SAVE_TEXTURE_CACHE_TO_STATE;
|
extern const Info<bool> GFX_SAVE_TEXTURE_CACHE_TO_STATE;
|
||||||
extern const Info<bool> GFX_PREFER_VS_FOR_LINE_POINT_EXPANSION;
|
extern const Info<bool> GFX_PREFER_VS_FOR_LINE_POINT_EXPANSION;
|
||||||
|
extern const Info<bool> GFX_CPU_CULL;
|
||||||
|
|
||||||
extern const Info<TriState> GFX_MTL_MANUALLY_UPLOAD_BUFFERS;
|
extern const Info<TriState> GFX_MTL_MANUALLY_UPLOAD_BUFFERS;
|
||||||
extern const Info<bool> GFX_MTL_USE_PRESENT_DRAWABLE;
|
extern const Info<bool> GFX_MTL_USE_PRESENT_DRAWABLE;
|
||||||
|
|
|
@ -30,6 +30,7 @@
|
||||||
#include "VideoCommon/VertexLoaderBase.h"
|
#include "VideoCommon/VertexLoaderBase.h"
|
||||||
#include "VideoCommon/VertexManagerBase.h"
|
#include "VideoCommon/VertexManagerBase.h"
|
||||||
#include "VideoCommon/VertexShaderManager.h"
|
#include "VideoCommon/VertexShaderManager.h"
|
||||||
|
#include "VideoCommon/VideoConfig.h"
|
||||||
#include "VideoCommon/XFMemory.h"
|
#include "VideoCommon/XFMemory.h"
|
||||||
|
|
||||||
namespace VertexLoaderManager
|
namespace VertexLoaderManager
|
||||||
|
@ -366,17 +367,33 @@ int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int coun
|
||||||
vertex_shader_manager.SetVertexFormat(loader->m_native_components,
|
vertex_shader_manager.SetVertexFormat(loader->m_native_components,
|
||||||
loader->m_native_vertex_format->GetVertexDeclaration());
|
loader->m_native_vertex_format->GetVertexDeclaration());
|
||||||
|
|
||||||
|
// CPUCull's performance increase comes from encoding fewer GPU commands, not sending less data
|
||||||
|
// Therefore it's only useful to check if culling could remove a flush
|
||||||
|
const bool can_cpu_cull = g_ActiveConfig.bCPUCull &&
|
||||||
|
primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES &&
|
||||||
|
!g_vertex_manager->HasSendableVertices();
|
||||||
|
|
||||||
// if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads.
|
// if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads.
|
||||||
// They still need to go through vertex loading, because we need to calculate a zfreeze refrence
|
// They still need to go through vertex loading, because we need to calculate a zfreeze
|
||||||
// slope.
|
// reference slope.
|
||||||
bool cullall = (bpmem.genMode.cullmode == CullMode::All &&
|
const bool cullall = (bpmem.genMode.cullmode == CullMode::All &&
|
||||||
primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES);
|
primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES);
|
||||||
|
|
||||||
DataReader dst = g_vertex_manager->PrepareForAdditionalData(
|
const int stride = loader->m_native_vtx_decl.stride;
|
||||||
primitive, count, loader->m_native_vtx_decl.stride, cullall);
|
DataReader dst = g_vertex_manager->PrepareForAdditionalData(primitive, count, stride,
|
||||||
|
cullall || can_cpu_cull);
|
||||||
|
|
||||||
count = loader->RunVertices(src, dst.GetPointer(), count);
|
count = loader->RunVertices(src, dst.GetPointer(), count);
|
||||||
|
|
||||||
|
if (can_cpu_cull && !cullall)
|
||||||
|
{
|
||||||
|
if (!g_vertex_manager->AreAllVerticesCulled(loader, primitive, dst.GetPointer(), count))
|
||||||
|
{
|
||||||
|
DataReader new_dst = g_vertex_manager->DisableCullAll(stride);
|
||||||
|
memmove(new_dst.GetPointer(), dst.GetPointer(), count * stride);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
g_vertex_manager->AddIndices(primitive, count);
|
g_vertex_manager->AddIndices(primitive, count);
|
||||||
g_vertex_manager->FlushData(count, loader->m_native_vtx_decl.stride);
|
g_vertex_manager->FlushData(count, loader->m_native_vtx_decl.stride);
|
||||||
|
|
||||||
|
|
|
@ -104,6 +104,7 @@ VertexManagerBase::~VertexManagerBase() = default;
|
||||||
bool VertexManagerBase::Initialize()
|
bool VertexManagerBase::Initialize()
|
||||||
{
|
{
|
||||||
m_index_generator.Init();
|
m_index_generator.Init();
|
||||||
|
m_cpu_cull.Init();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -117,6 +118,13 @@ void VertexManagerBase::AddIndices(OpcodeDecoder::Primitive primitive, u32 num_v
|
||||||
m_index_generator.AddIndices(primitive, num_vertices);
|
m_index_generator.AddIndices(primitive, num_vertices);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool VertexManagerBase::AreAllVerticesCulled(VertexLoaderBase* loader,
|
||||||
|
OpcodeDecoder::Primitive primitive, const u8* src,
|
||||||
|
u32 count)
|
||||||
|
{
|
||||||
|
return m_cpu_cull.AreAllVerticesCulled(loader, primitive, src, count);
|
||||||
|
}
|
||||||
|
|
||||||
DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive primitive,
|
DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive primitive,
|
||||||
u32 count, u32 stride, bool cullall)
|
u32 count, u32 stride, bool cullall)
|
||||||
{
|
{
|
||||||
|
@ -185,6 +193,16 @@ DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive
|
||||||
return DataReader(m_cur_buffer_pointer, m_end_buffer_pointer);
|
return DataReader(m_cur_buffer_pointer, m_end_buffer_pointer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DataReader VertexManagerBase::DisableCullAll(u32 stride)
|
||||||
|
{
|
||||||
|
if (m_cull_all)
|
||||||
|
{
|
||||||
|
m_cull_all = false;
|
||||||
|
ResetBuffer(stride);
|
||||||
|
}
|
||||||
|
return DataReader(m_cur_buffer_pointer, m_end_buffer_pointer);
|
||||||
|
}
|
||||||
|
|
||||||
void VertexManagerBase::FlushData(u32 count, u32 stride)
|
void VertexManagerBase::FlushData(u32 count, u32 stride)
|
||||||
{
|
{
|
||||||
m_cur_buffer_pointer += count * stride;
|
m_cur_buffer_pointer += count * stride;
|
||||||
|
@ -546,6 +564,8 @@ void VertexManagerBase::Flush()
|
||||||
// Now the vertices can be flushed to the GPU. Everything following the CommitBuffer() call
|
// Now the vertices can be flushed to the GPU. Everything following the CommitBuffer() call
|
||||||
// must be careful to not upload any utility vertices, as the binding will be lost otherwise.
|
// must be careful to not upload any utility vertices, as the binding will be lost otherwise.
|
||||||
const u32 num_indices = m_index_generator.GetIndexLen();
|
const u32 num_indices = m_index_generator.GetIndexLen();
|
||||||
|
if (num_indices == 0)
|
||||||
|
return;
|
||||||
u32 base_vertex, base_index;
|
u32 base_vertex, base_index;
|
||||||
CommitBuffer(m_index_generator.GetNumVerts(),
|
CommitBuffer(m_index_generator.GetNumVerts(),
|
||||||
VertexLoaderManager::GetCurrentVertexFormat()->GetVertexStride(), num_indices,
|
VertexLoaderManager::GetCurrentVertexFormat()->GetVertexStride(), num_indices,
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
#include "Common/BitSet.h"
|
#include "Common/BitSet.h"
|
||||||
#include "Common/CommonTypes.h"
|
#include "Common/CommonTypes.h"
|
||||||
#include "Common/MathUtil.h"
|
#include "Common/MathUtil.h"
|
||||||
|
#include "VideoCommon/CPUCull.h"
|
||||||
#include "VideoCommon/IndexGenerator.h"
|
#include "VideoCommon/IndexGenerator.h"
|
||||||
#include "VideoCommon/RenderState.h"
|
#include "VideoCommon/RenderState.h"
|
||||||
#include "VideoCommon/ShaderCache.h"
|
#include "VideoCommon/ShaderCache.h"
|
||||||
|
@ -100,11 +101,18 @@ public:
|
||||||
|
|
||||||
PrimitiveType GetCurrentPrimitiveType() const { return m_current_primitive_type; }
|
PrimitiveType GetCurrentPrimitiveType() const { return m_current_primitive_type; }
|
||||||
void AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices);
|
void AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices);
|
||||||
|
bool AreAllVerticesCulled(VertexLoaderBase* loader, OpcodeDecoder::Primitive primitive,
|
||||||
|
const u8* src, u32 count);
|
||||||
virtual DataReader PrepareForAdditionalData(OpcodeDecoder::Primitive primitive, u32 count,
|
virtual DataReader PrepareForAdditionalData(OpcodeDecoder::Primitive primitive, u32 count,
|
||||||
u32 stride, bool cullall);
|
u32 stride, bool cullall);
|
||||||
|
/// Switch cullall off after a call to PrepareForAdditionalData with cullall true
|
||||||
|
/// Expects that you will add a nonzero number of primitives before the next flush
|
||||||
|
/// Returns whether cullall was changed (false if cullall was already off)
|
||||||
|
DataReader DisableCullAll(u32 stride);
|
||||||
void FlushData(u32 count, u32 stride);
|
void FlushData(u32 count, u32 stride);
|
||||||
|
|
||||||
void Flush();
|
void Flush();
|
||||||
|
bool HasSendableVertices() const { return !m_is_flushed && !m_cull_all; }
|
||||||
|
|
||||||
void DoState(PointerWrap& p);
|
void DoState(PointerWrap& p);
|
||||||
|
|
||||||
|
@ -201,6 +209,7 @@ protected:
|
||||||
bool m_cull_all = false;
|
bool m_cull_all = false;
|
||||||
|
|
||||||
IndexGenerator m_index_generator;
|
IndexGenerator m_index_generator;
|
||||||
|
CPUCull m_cpu_cull;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Minimum number of draws per command buffer when attempting to preempt a readback operation.
|
// Minimum number of draws per command buffer when attempting to preempt a readback operation.
|
||||||
|
|
|
@ -113,6 +113,7 @@ void VideoConfig::Refresh()
|
||||||
iShaderCompilationMode = Config::Get(Config::GFX_SHADER_COMPILATION_MODE);
|
iShaderCompilationMode = Config::Get(Config::GFX_SHADER_COMPILATION_MODE);
|
||||||
iShaderCompilerThreads = Config::Get(Config::GFX_SHADER_COMPILER_THREADS);
|
iShaderCompilerThreads = Config::Get(Config::GFX_SHADER_COMPILER_THREADS);
|
||||||
iShaderPrecompilerThreads = Config::Get(Config::GFX_SHADER_PRECOMPILER_THREADS);
|
iShaderPrecompilerThreads = Config::Get(Config::GFX_SHADER_PRECOMPILER_THREADS);
|
||||||
|
bCPUCull = Config::Get(Config::GFX_CPU_CULL);
|
||||||
|
|
||||||
texture_filtering_mode = Config::Get(Config::GFX_ENHANCE_FORCE_TEXTURE_FILTERING);
|
texture_filtering_mode = Config::Get(Config::GFX_ENHANCE_FORCE_TEXTURE_FILTERING);
|
||||||
iMaxAnisotropy = Config::Get(Config::GFX_ENHANCE_MAX_ANISOTROPY);
|
iMaxAnisotropy = Config::Get(Config::GFX_ENHANCE_MAX_ANISOTROPY);
|
||||||
|
|
|
@ -138,6 +138,7 @@ struct VideoConfig final
|
||||||
bool bPerfQueriesEnable = false;
|
bool bPerfQueriesEnable = false;
|
||||||
bool bBBoxEnable = false;
|
bool bBBoxEnable = false;
|
||||||
bool bForceProgressive = false;
|
bool bForceProgressive = false;
|
||||||
|
bool bCPUCull = false;
|
||||||
|
|
||||||
bool bEFBEmulateFormatChanges = false;
|
bool bEFBEmulateFormatChanges = false;
|
||||||
bool bSkipEFBCopyToRam = false;
|
bool bSkipEFBCopyToRam = false;
|
||||||
|
|
Loading…
Reference in New Issue