VideoCommon: Cull vertices on the CPU
This commit is contained in:
parent
b170ef9651
commit
1be0149146
|
@ -93,6 +93,7 @@ const Info<bool> GFX_SAVE_TEXTURE_CACHE_TO_STATE{
|
|||
{System::GFX, "Settings", "SaveTextureCacheToState"}, true};
|
||||
const Info<bool> GFX_PREFER_VS_FOR_LINE_POINT_EXPANSION{
|
||||
{System::GFX, "Settings", "PreferVSForLinePointExpansion"}, false};
|
||||
const Info<bool> GFX_CPU_CULL{{System::GFX, "Settings", "CPUCull"}, false};
|
||||
|
||||
const Info<TriState> GFX_MTL_MANUALLY_UPLOAD_BUFFERS{
|
||||
{System::GFX, "Settings", "ManuallyUploadBuffers"}, TriState::Auto};
|
||||
|
|
|
@ -82,6 +82,7 @@ extern const Info<int> GFX_SHADER_COMPILER_THREADS;
|
|||
extern const Info<int> GFX_SHADER_PRECOMPILER_THREADS;
|
||||
extern const Info<bool> GFX_SAVE_TEXTURE_CACHE_TO_STATE;
|
||||
extern const Info<bool> GFX_PREFER_VS_FOR_LINE_POINT_EXPANSION;
|
||||
extern const Info<bool> GFX_CPU_CULL;
|
||||
|
||||
extern const Info<TriState> GFX_MTL_MANUALLY_UPLOAD_BUFFERS;
|
||||
extern const Info<bool> GFX_MTL_USE_PRESENT_DRAWABLE;
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include "VideoCommon/VertexLoaderBase.h"
|
||||
#include "VideoCommon/VertexManagerBase.h"
|
||||
#include "VideoCommon/VertexShaderManager.h"
|
||||
#include "VideoCommon/VideoConfig.h"
|
||||
#include "VideoCommon/XFMemory.h"
|
||||
|
||||
namespace VertexLoaderManager
|
||||
|
@ -366,17 +367,33 @@ int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int coun
|
|||
vertex_shader_manager.SetVertexFormat(loader->m_native_components,
|
||||
loader->m_native_vertex_format->GetVertexDeclaration());
|
||||
|
||||
// if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads.
|
||||
// They still need to go through vertex loading, because we need to calculate a zfreeze refrence
|
||||
// slope.
|
||||
bool cullall = (bpmem.genMode.cullmode == CullMode::All &&
|
||||
primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES);
|
||||
// CPUCull's performance increase comes from encoding fewer GPU commands, not sending less data
|
||||
// Therefore it's only useful to check if culling could remove a flush
|
||||
const bool can_cpu_cull = g_ActiveConfig.bCPUCull &&
|
||||
primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES &&
|
||||
!g_vertex_manager->HasSendableVertices();
|
||||
|
||||
DataReader dst = g_vertex_manager->PrepareForAdditionalData(
|
||||
primitive, count, loader->m_native_vtx_decl.stride, cullall);
|
||||
// if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads.
|
||||
// They still need to go through vertex loading, because we need to calculate a zfreeze
|
||||
// reference slope.
|
||||
const bool cullall = (bpmem.genMode.cullmode == CullMode::All &&
|
||||
primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES);
|
||||
|
||||
const int stride = loader->m_native_vtx_decl.stride;
|
||||
DataReader dst = g_vertex_manager->PrepareForAdditionalData(primitive, count, stride,
|
||||
cullall || can_cpu_cull);
|
||||
|
||||
count = loader->RunVertices(src, dst.GetPointer(), count);
|
||||
|
||||
if (can_cpu_cull && !cullall)
|
||||
{
|
||||
if (!g_vertex_manager->AreAllVerticesCulled(loader, primitive, dst.GetPointer(), count))
|
||||
{
|
||||
DataReader new_dst = g_vertex_manager->DisableCullAll(stride);
|
||||
memmove(new_dst.GetPointer(), dst.GetPointer(), count * stride);
|
||||
}
|
||||
}
|
||||
|
||||
g_vertex_manager->AddIndices(primitive, count);
|
||||
g_vertex_manager->FlushData(count, loader->m_native_vtx_decl.stride);
|
||||
|
||||
|
|
|
@ -104,6 +104,7 @@ VertexManagerBase::~VertexManagerBase() = default;
|
|||
bool VertexManagerBase::Initialize()
|
||||
{
|
||||
m_index_generator.Init();
|
||||
m_cpu_cull.Init();
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -117,6 +118,13 @@ void VertexManagerBase::AddIndices(OpcodeDecoder::Primitive primitive, u32 num_v
|
|||
m_index_generator.AddIndices(primitive, num_vertices);
|
||||
}
|
||||
|
||||
bool VertexManagerBase::AreAllVerticesCulled(VertexLoaderBase* loader,
|
||||
OpcodeDecoder::Primitive primitive, const u8* src,
|
||||
u32 count)
|
||||
{
|
||||
return m_cpu_cull.AreAllVerticesCulled(loader, primitive, src, count);
|
||||
}
|
||||
|
||||
DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive primitive,
|
||||
u32 count, u32 stride, bool cullall)
|
||||
{
|
||||
|
@ -185,6 +193,16 @@ DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive
|
|||
return DataReader(m_cur_buffer_pointer, m_end_buffer_pointer);
|
||||
}
|
||||
|
||||
DataReader VertexManagerBase::DisableCullAll(u32 stride)
|
||||
{
|
||||
if (m_cull_all)
|
||||
{
|
||||
m_cull_all = false;
|
||||
ResetBuffer(stride);
|
||||
}
|
||||
return DataReader(m_cur_buffer_pointer, m_end_buffer_pointer);
|
||||
}
|
||||
|
||||
void VertexManagerBase::FlushData(u32 count, u32 stride)
|
||||
{
|
||||
m_cur_buffer_pointer += count * stride;
|
||||
|
@ -546,6 +564,8 @@ void VertexManagerBase::Flush()
|
|||
// Now the vertices can be flushed to the GPU. Everything following the CommitBuffer() call
|
||||
// must be careful to not upload any utility vertices, as the binding will be lost otherwise.
|
||||
const u32 num_indices = m_index_generator.GetIndexLen();
|
||||
if (num_indices == 0)
|
||||
return;
|
||||
u32 base_vertex, base_index;
|
||||
CommitBuffer(m_index_generator.GetNumVerts(),
|
||||
VertexLoaderManager::GetCurrentVertexFormat()->GetVertexStride(), num_indices,
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include "Common/BitSet.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/MathUtil.h"
|
||||
#include "VideoCommon/CPUCull.h"
|
||||
#include "VideoCommon/IndexGenerator.h"
|
||||
#include "VideoCommon/RenderState.h"
|
||||
#include "VideoCommon/ShaderCache.h"
|
||||
|
@ -100,11 +101,18 @@ public:
|
|||
|
||||
PrimitiveType GetCurrentPrimitiveType() const { return m_current_primitive_type; }
|
||||
void AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices);
|
||||
bool AreAllVerticesCulled(VertexLoaderBase* loader, OpcodeDecoder::Primitive primitive,
|
||||
const u8* src, u32 count);
|
||||
virtual DataReader PrepareForAdditionalData(OpcodeDecoder::Primitive primitive, u32 count,
|
||||
u32 stride, bool cullall);
|
||||
/// Switch cullall off after a call to PrepareForAdditionalData with cullall true
|
||||
/// Expects that you will add a nonzero number of primitives before the next flush
|
||||
/// Returns whether cullall was changed (false if cullall was already off)
|
||||
DataReader DisableCullAll(u32 stride);
|
||||
void FlushData(u32 count, u32 stride);
|
||||
|
||||
void Flush();
|
||||
bool HasSendableVertices() const { return !m_is_flushed && !m_cull_all; }
|
||||
|
||||
void DoState(PointerWrap& p);
|
||||
|
||||
|
@ -201,6 +209,7 @@ protected:
|
|||
bool m_cull_all = false;
|
||||
|
||||
IndexGenerator m_index_generator;
|
||||
CPUCull m_cpu_cull;
|
||||
|
||||
private:
|
||||
// Minimum number of draws per command buffer when attempting to preempt a readback operation.
|
||||
|
|
|
@ -113,6 +113,7 @@ void VideoConfig::Refresh()
|
|||
iShaderCompilationMode = Config::Get(Config::GFX_SHADER_COMPILATION_MODE);
|
||||
iShaderCompilerThreads = Config::Get(Config::GFX_SHADER_COMPILER_THREADS);
|
||||
iShaderPrecompilerThreads = Config::Get(Config::GFX_SHADER_PRECOMPILER_THREADS);
|
||||
bCPUCull = Config::Get(Config::GFX_CPU_CULL);
|
||||
|
||||
texture_filtering_mode = Config::Get(Config::GFX_ENHANCE_FORCE_TEXTURE_FILTERING);
|
||||
iMaxAnisotropy = Config::Get(Config::GFX_ENHANCE_MAX_ANISOTROPY);
|
||||
|
|
|
@ -138,6 +138,7 @@ struct VideoConfig final
|
|||
bool bPerfQueriesEnable = false;
|
||||
bool bBBoxEnable = false;
|
||||
bool bForceProgressive = false;
|
||||
bool bCPUCull = false;
|
||||
|
||||
bool bEFBEmulateFormatChanges = false;
|
||||
bool bSkipEFBCopyToRam = false;
|
||||
|
|
Loading…
Reference in New Issue