OGL: Track state of last bound vertex array object

This reduces the overhead of calling glBindVertexArray() every time
RestoreAPIState() is called, even when it is redundant.
This commit is contained in:
Stenzek 2018-01-21 00:59:10 +10:00
parent fca9c28f38
commit 3fd4142f36
9 changed files with 59 additions and 29 deletions

View File

@ -19,6 +19,7 @@
#include "VideoBackends/OGL/Render.h" #include "VideoBackends/OGL/Render.h"
#include "VideoBackends/OGL/SamplerCache.h" #include "VideoBackends/OGL/SamplerCache.h"
#include "VideoBackends/OGL/TextureConverter.h" #include "VideoBackends/OGL/TextureConverter.h"
#include "VideoBackends/OGL/VertexManager.h"
#include "VideoCommon/OnScreenDisplay.h" #include "VideoCommon/OnScreenDisplay.h"
#include "VideoCommon/VertexShaderGen.h" #include "VideoCommon/VertexShaderGen.h"
@ -395,6 +396,8 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms
glEnableVertexAttribArray(SHADER_COLOR1_ATTRIB); glEnableVertexAttribArray(SHADER_COLOR1_ATTRIB);
glVertexAttribIPointer(SHADER_COLOR1_ATTRIB, 1, GL_INT, sizeof(EfbPokeData), glVertexAttribIPointer(SHADER_COLOR1_ATTRIB, 1, GL_INT, sizeof(EfbPokeData),
(void*)offsetof(EfbPokeData, data)); (void*)offsetof(EfbPokeData, data));
glBindBuffer(GL_ARRAY_BUFFER,
static_cast<VertexManager*>(g_vertex_manager.get())->GetVertexBufferHandle());
if (GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGL) if (GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGL)
glEnable(GL_PROGRAM_POINT_SIZE); glEnable(GL_PROGRAM_POINT_SIZE);
@ -563,8 +566,6 @@ void FramebufferManager::ReinterpretPixelData(unsigned int convtype)
{ {
g_renderer->ResetAPIState(); g_renderer->ResetAPIState();
OpenGL_BindAttributelessVAO();
GLuint src_texture = 0; GLuint src_texture = 0;
// We aren't allowed to render and sample the same texture in one draw call, // We aren't allowed to render and sample the same texture in one draw call,
@ -582,6 +583,7 @@ void FramebufferManager::ReinterpretPixelData(unsigned int convtype)
g_sampler_cache->BindNearestSampler(9); g_sampler_cache->BindNearestSampler(9);
m_pixel_format_shaders[convtype ? 1 : 0].Bind(); m_pixel_format_shaders[convtype ? 1 : 0].Bind();
ProgramShaderCache::BindVertexFormat(nullptr);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
glBindTexture(m_textureType, 0); glBindTexture(m_textureType, 0);
@ -607,6 +609,8 @@ void FramebufferManager::PokeEFB(EFBAccessType type, const EfbPokeData* points,
glViewport(0, 0, m_targetWidth, m_targetHeight); glViewport(0, 0, m_targetWidth, m_targetHeight);
glDrawArrays(GL_POINTS, 0, (GLsizei)num_points); glDrawArrays(GL_POINTS, 0, (GLsizei)num_points);
glBindBuffer(GL_ARRAY_BUFFER,
static_cast<VertexManager*>(g_vertex_manager.get())->GetVertexBufferHandle());
g_renderer->RestoreAPIState(); g_renderer->RestoreAPIState();
// TODO: Could just update the EFB cache with the new value // TODO: Could just update the EFB cache with the new value

View File

@ -5,7 +5,6 @@
#include "VideoBackends/OGL/PostProcessing.h" #include "VideoBackends/OGL/PostProcessing.h"
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/GL/GLUtil.h"
#include "Common/Logging/Log.h" #include "Common/Logging/Log.h"
#include "Common/StringUtil.h" #include "Common/StringUtil.h"
@ -47,7 +46,7 @@ void OpenGLPostProcessing::BlitFromTexture(TargetRectangle src, TargetRectangle
glViewport(dst.left, dst.bottom, dst.GetWidth(), dst.GetHeight()); glViewport(dst.left, dst.bottom, dst.GetWidth(), dst.GetHeight());
OpenGL_BindAttributelessVAO(); ProgramShaderCache::BindVertexFormat(nullptr);
m_shader.Bind(); m_shader.Bind();

View File

@ -40,13 +40,14 @@
namespace OGL namespace OGL
{ {
static constexpr u32 UBO_LENGTH = 32 * 1024 * 1024; static constexpr u32 UBO_LENGTH = 32 * 1024 * 1024;
static constexpr u32 INVALID_VAO = std::numeric_limits<u32>::max();
std::unique_ptr<ProgramShaderCache::SharedContextAsyncShaderCompiler> std::unique_ptr<ProgramShaderCache::SharedContextAsyncShaderCompiler>
ProgramShaderCache::s_async_compiler; ProgramShaderCache::s_async_compiler;
u32 ProgramShaderCache::s_ubo_buffer_size; u32 ProgramShaderCache::s_ubo_buffer_size;
s32 ProgramShaderCache::s_ubo_align; s32 ProgramShaderCache::s_ubo_align;
u32 ProgramShaderCache::s_last_VAO = INVALID_VAO; GLuint ProgramShaderCache::s_attributeless_VBO = 0;
GLuint ProgramShaderCache::s_attributeless_VAO = 0;
GLuint ProgramShaderCache::s_last_VAO = 0;
static std::unique_ptr<StreamBuffer> s_buffer; static std::unique_ptr<StreamBuffer> s_buffer;
static int num_failures = 0; static int num_failures = 0;
@ -608,6 +609,7 @@ void ProgramShaderCache::Init()
LoadProgramBinaries(); LoadProgramBinaries();
CreateHeader(); CreateHeader();
CreateAttributelessVAO();
CurrentProgram = 0; CurrentProgram = 0;
last_entry = nullptr; last_entry = nullptr;
@ -657,7 +659,6 @@ void ProgramShaderCache::Reload()
if (g_ActiveConfig.CanPrecompileUberShaders()) if (g_ActiveConfig.CanPrecompileUberShaders())
PrecompileUberShaders(); PrecompileUberShaders();
InvalidateVertexFormat();
CurrentProgram = 0; CurrentProgram = 0;
last_entry = nullptr; last_entry = nullptr;
last_uber_entry = nullptr; last_uber_entry = nullptr;
@ -681,14 +682,38 @@ void ProgramShaderCache::Shutdown()
s_program_disk_cache.Close(); s_program_disk_cache.Close();
s_uber_program_disk_cache.Close(); s_uber_program_disk_cache.Close();
InvalidateVertexFormat();
DestroyShaders(); DestroyShaders();
s_buffer.reset(); s_buffer.reset();
glBindVertexArray(0);
glDeleteBuffers(1, &s_attributeless_VBO);
glDeleteVertexArrays(1, &s_attributeless_VAO);
s_attributeless_VBO = 0;
s_attributeless_VAO = 0;
s_last_VAO = 0;
}
void ProgramShaderCache::CreateAttributelessVAO()
{
glGenVertexArrays(1, &s_attributeless_VAO);
// In a compatibility context, we require a valid, bound array buffer.
glGenBuffers(1, &s_attributeless_VBO);
// Initialize the buffer with nothing. 16 floats is an arbitrary size that may work around driver
// issues.
glBindBuffer(GL_ARRAY_BUFFER, s_attributeless_VBO);
glBufferData(GL_ARRAY_BUFFER, sizeof(GLfloat) * 16, nullptr, GL_STATIC_DRAW);
// We must also define vertex attribute 0.
glBindVertexArray(s_attributeless_VAO);
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, nullptr);
glEnableVertexAttribArray(0);
} }
void ProgramShaderCache::BindVertexFormat(const GLVertexFormat* vertex_format) void ProgramShaderCache::BindVertexFormat(const GLVertexFormat* vertex_format)
{ {
u32 new_VAO = vertex_format ? vertex_format->VAO : 0; u32 new_VAO = vertex_format ? vertex_format->VAO : s_attributeless_VAO;
if (s_last_VAO == new_VAO) if (s_last_VAO == new_VAO)
return; return;
@ -698,15 +723,7 @@ void ProgramShaderCache::BindVertexFormat(const GLVertexFormat* vertex_format)
void ProgramShaderCache::InvalidateVertexFormat() void ProgramShaderCache::InvalidateVertexFormat()
{ {
s_last_VAO = INVALID_VAO; s_last_VAO = 0;
}
void ProgramShaderCache::BindLastVertexFormat()
{
if (s_last_VAO != INVALID_VAO)
glBindVertexArray(s_last_VAO);
else
glBindVertexArray(0);
} }
GLuint ProgramShaderCache::CreateProgramFromBinary(const u8* value, u32 value_size) GLuint ProgramShaderCache::CreateProgramFromBinary(const u8* value, u32 value_size)

View File

@ -98,7 +98,6 @@ public:
static SHADER* SetUberShader(PrimitiveType primitive_type, const GLVertexFormat* vertex_format); static SHADER* SetUberShader(PrimitiveType primitive_type, const GLVertexFormat* vertex_format);
static void BindVertexFormat(const GLVertexFormat* vertex_format); static void BindVertexFormat(const GLVertexFormat* vertex_format);
static void InvalidateVertexFormat(); static void InvalidateVertexFormat();
static void BindLastVertexFormat();
static bool CompileShader(SHADER& shader, const std::string& vcode, const std::string& pcode, static bool CompileShader(SHADER& shader, const std::string& vcode, const std::string& pcode,
const std::string& gcode = ""); const std::string& gcode = "");
@ -191,6 +190,7 @@ private:
typedef std::map<SHADERUID, PCacheEntry> PCache; typedef std::map<SHADERUID, PCacheEntry> PCache;
typedef std::map<UBERSHADERUID, PCacheEntry> UberPCache; typedef std::map<UBERSHADERUID, PCacheEntry> UberPCache;
static void CreateAttributelessVAO();
static GLuint CreateProgramFromBinary(const u8* value, u32 value_size); static GLuint CreateProgramFromBinary(const u8* value, u32 value_size);
static bool CreateCacheEntryFromBinary(PCacheEntry* entry, const u8* value, u32 value_size); static bool CreateCacheEntryFromBinary(PCacheEntry* entry, const u8* value, u32 value_size);
static void LoadProgramBinaries(); static void LoadProgramBinaries();
@ -210,7 +210,10 @@ private:
static std::unique_ptr<SharedContextAsyncShaderCompiler> s_async_compiler; static std::unique_ptr<SharedContextAsyncShaderCompiler> s_async_compiler;
static u32 s_ubo_buffer_size; static u32 s_ubo_buffer_size;
static s32 s_ubo_align; static s32 s_ubo_align;
static u32 s_last_VAO;
static GLuint s_attributeless_VBO;
static GLuint s_attributeless_VAO;
static GLuint s_last_VAO;
}; };
} // namespace OGL } // namespace OGL

View File

@ -9,6 +9,7 @@
#include "VideoBackends/OGL/ProgramShaderCache.h" #include "VideoBackends/OGL/ProgramShaderCache.h"
#include "VideoBackends/OGL/RasterFont.h" #include "VideoBackends/OGL/RasterFont.h"
#include "VideoBackends/OGL/VertexManager.h"
// globals // globals
@ -181,6 +182,9 @@ RasterFont::RasterFont()
glEnableVertexAttribArray(SHADER_TEXTURE0_ATTRIB); glEnableVertexAttribArray(SHADER_TEXTURE0_ATTRIB);
glVertexAttribPointer(SHADER_TEXTURE0_ATTRIB, 2, GL_FLOAT, 0, sizeof(GLfloat) * 4, glVertexAttribPointer(SHADER_TEXTURE0_ATTRIB, 2, GL_FLOAT, 0, sizeof(GLfloat) * 4,
(GLfloat*)nullptr + 2); (GLfloat*)nullptr + 2);
glBindBuffer(GL_ARRAY_BUFFER,
static_cast<VertexManager*>(g_vertex_manager.get())->GetVertexBufferHandle());
ProgramShaderCache::InvalidateVertexFormat();
} }
RasterFont::~RasterFont() RasterFont::~RasterFont()
@ -278,5 +282,9 @@ void RasterFont::printMultilineText(const std::string& text, double start_x, dou
GLfloat((color >> 8) & 0xff) / 255.f, GLfloat((color >> 0) & 0xff) / 255.f, GLfloat((color >> 8) & 0xff) / 255.f, GLfloat((color >> 0) & 0xff) / 255.f,
GLfloat((color >> 24) & 0xff) / 255.f); GLfloat((color >> 24) & 0xff) / 255.f);
glDrawArrays(GL_TRIANGLES, 0, usage / 4); glDrawArrays(GL_TRIANGLES, 0, usage / 4);
glBindBuffer(GL_ARRAY_BUFFER,
static_cast<VertexManager*>(g_vertex_manager.get())->GetVertexBufferHandle());
ProgramShaderCache::InvalidateVertexFormat();
} }
} }

View File

@ -1493,10 +1493,6 @@ void Renderer::RestoreAPIState()
BPFunctions::SetViewport(); BPFunctions::SetViewport();
BPFunctions::SetDepthMode(); BPFunctions::SetDepthMode();
BPFunctions::SetBlendMode(); BPFunctions::SetBlendMode();
ProgramShaderCache::BindLastVertexFormat();
const VertexManager* const vm = static_cast<VertexManager*>(g_vertex_manager.get());
glBindBuffer(GL_ARRAY_BUFFER, vm->GetVertexBufferHandle());
} }
void Renderer::SetRasterizationState(const RasterizationState& state) void Renderer::SetRasterizationState(const RasterizationState& state)

View File

@ -320,7 +320,7 @@ void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source,
glBindTexture(GL_TEXTURE_BUFFER, m_palette_resolv_texture); glBindTexture(GL_TEXTURE_BUFFER, m_palette_resolv_texture);
g_sampler_cache->BindNearestSampler(10); g_sampler_cache->BindNearestSampler(10);
OpenGL_BindAttributelessVAO(); ProgramShaderCache::BindVertexFormat(nullptr);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
FramebufferManager::SetFramebuffer(0); FramebufferManager::SetFramebuffer(0);
@ -496,8 +496,6 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
FramebufferManager::SetFramebuffer(destination_texture->GetFramebuffer()); FramebufferManager::SetFramebuffer(destination_texture->GetFramebuffer());
OpenGL_BindAttributelessVAO();
glActiveTexture(GL_TEXTURE9); glActiveTexture(GL_TEXTURE9);
glBindTexture(GL_TEXTURE_2D_ARRAY, read_texture); glBindTexture(GL_TEXTURE_2D_ARRAY, read_texture);
if (scale_by_half) if (scale_by_half)
@ -539,6 +537,7 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
glUniform4f(shader.position_uniform, static_cast<float>(R.left), static_cast<float>(R.top), glUniform4f(shader.position_uniform, static_cast<float>(R.left), static_cast<float>(R.top),
static_cast<float>(R.right), static_cast<float>(R.bottom)); static_cast<float>(R.right), static_cast<float>(R.bottom));
ProgramShaderCache::BindVertexFormat(nullptr);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
FramebufferManager::SetFramebuffer(0); FramebufferManager::SetFramebuffer(0);

View File

@ -111,8 +111,6 @@ static void EncodeToRamUsingShader(GLuint srcTexture, u8* destAddr, u32 dst_line
FramebufferManager::SetFramebuffer( FramebufferManager::SetFramebuffer(
static_cast<OGLTexture*>(s_encoding_render_texture.get())->GetFramebuffer()); static_cast<OGLTexture*>(s_encoding_render_texture.get())->GetFramebuffer());
OpenGL_BindAttributelessVAO();
// set source texture // set source texture
glActiveTexture(GL_TEXTURE9); glActiveTexture(GL_TEXTURE9);
glBindTexture(GL_TEXTURE_2D_ARRAY, srcTexture); glBindTexture(GL_TEXTURE_2D_ARRAY, srcTexture);
@ -128,6 +126,7 @@ static void EncodeToRamUsingShader(GLuint srcTexture, u8* destAddr, u32 dst_line
glViewport(0, 0, (GLsizei)(dst_line_size / 4), (GLsizei)dstHeight); glViewport(0, 0, (GLsizei)(dst_line_size / 4), (GLsizei)dstHeight);
ProgramShaderCache::BindVertexFormat(nullptr);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
MathUtil::Rectangle<int> copy_rect(0, 0, dst_line_size / 4, dstHeight); MathUtil::Rectangle<int> copy_rect(0, 0, dst_line_size / 4, dstHeight);

View File

@ -95,6 +95,11 @@ void VertexManager::ResetBuffer(u32 stride)
} }
else else
{ {
// The index buffer is part of the VAO state, therefore we need to bind it first.
const GLVertexFormat* vertex_format =
static_cast<GLVertexFormat*>(VertexLoaderManager::GetCurrentVertexFormat());
ProgramShaderCache::BindVertexFormat(vertex_format);
auto buffer = s_vertexBuffer->Map(MAXVBUFFERSIZE, stride); auto buffer = s_vertexBuffer->Map(MAXVBUFFERSIZE, stride);
m_cur_buffer_pointer = m_base_buffer_pointer = buffer.first; m_cur_buffer_pointer = m_base_buffer_pointer = buffer.first;
m_end_buffer_pointer = buffer.first + MAXVBUFFERSIZE; m_end_buffer_pointer = buffer.first + MAXVBUFFERSIZE;