From 7c14463d11f816b1dd0cd2efd9bbe018f4d64a4a Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 7 Oct 2013 17:19:47 +0200 Subject: [PATCH] ogl: implement useful constant buffer upload this will remove the additional memcpy introduced in my last commit --- .../OGL/Src/PixelShaderCache.cpp | 15 --- .../OGL/Src/ProgramShaderCache.cpp | 108 ++++++++++++------ .../OGL/Src/ProgramShaderCache.h | 9 +- .../OGL/Src/VertexShaderCache.cpp | 14 --- Source/Core/VideoCommon/Src/PixelShaderGen.h | 13 --- .../VideoCommon/Src/PixelShaderManager.cpp | 5 +- Source/Core/VideoCommon/Src/VertexShaderGen.h | 11 -- .../VideoCommon/Src/VertexShaderManager.cpp | 5 +- Source/Core/VideoCommon/Src/VideoCommon.h | 5 - 9 files changed, 75 insertions(+), 110 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/Src/PixelShaderCache.cpp b/Source/Core/VideoBackends/OGL/Src/PixelShaderCache.cpp index 0635003dfd..ba6a0b2e97 100644 --- a/Source/Core/VideoBackends/OGL/Src/PixelShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/Src/PixelShaderCache.cpp @@ -27,20 +27,5 @@ namespace OGL // Renderer functions void Renderer::SetMultiPSConstant4fv(unsigned int const_number, unsigned int count, const float *f) { - if (g_ActiveConfig.backend_info.bSupportsGLSLUBO) - { - ProgramShaderCache::SetMultiPSConstant4fv(const_number, f, count); - return; - } - - ProgramShaderCache::PCacheEntry tmp = ProgramShaderCache::GetShaderProgram(); - for (unsigned int a = 0; a < 10; ++a) - { - u32 offset = PSVar_Loc[a].reg - const_number; - if(offset >= count) return; - u32 size = std::min(tmp.shader.UniformSize[a], count-offset); - if(size > 0) - glUniform4fv(tmp.shader.UniformLocations[a], size, f + 4*offset); - } } } // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/Src/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/Src/ProgramShaderCache.cpp index 94b9502d2f..0904ef41eb 100644 --- a/Source/Core/VideoBackends/OGL/Src/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/Src/ProgramShaderCache.cpp @@ -10,18 +10,16 @@ #include "Statistics.h" #include "ImageWrite.h" #include "Render.h" +#include "PixelShaderManager.h" +#include "VertexShaderManager.h" namespace OGL { static const u32 UBO_LENGTH = 32*1024*1024; -GLintptr ProgramShaderCache::s_vs_data_size; -GLintptr ProgramShaderCache::s_ps_data_size; -GLintptr ProgramShaderCache::s_vs_data_offset; -u8 *ProgramShaderCache::s_ubo_buffer; u32 ProgramShaderCache::s_ubo_buffer_size; -bool ProgramShaderCache::s_ubo_dirty; +s32 ProgramShaderCache::s_ubo_align; static StreamBuffer *s_buffer; static int num_failures = 0; @@ -36,6 +34,10 @@ UidChecker ProgramShaderCache::vertex_uid_chec static char s_glsl_header[1024] = ""; + + +// Annoying sure, can be removed once we drop our UBO workaround + const char *UniformNames[NUM_UNIFORMS] = { // PIXEL SHADER UNIFORMS @@ -61,6 +63,37 @@ const char *UniformNames[NUM_UNIFORMS] = I_DEPTHPARAMS, }; +struct s_svar +{ + const unsigned int reg; + const unsigned int size; +}; + +const s_svar PSVar_Loc[] = { {C_COLORS, 4 }, + {C_KCOLORS, 4 }, + {C_ALPHA, 1 }, + {C_TEXDIMS, 8 }, + {C_ZBIAS, 2 }, + {C_INDTEXSCALE, 2 }, + {C_INDTEXMTX, 6 }, + {C_FOG, 3 }, + {C_PLIGHTS, 40 }, + {C_PMATERIALS, 4 }, + }; + +const s_svar VSVar_Loc[] = { {C_POSNORMALMATRIX, 6 }, + {C_PROJECTION, 4 }, + {C_MATERIALS, 4 }, + {C_LIGHTS, 40 }, + {C_TEXMATRICES, 24 }, + {C_TRANSFORMMATRICES, 64 }, + {C_NORMALMATRICES, 32 }, + {C_POSTTRANSFORMMATRICES, 64 }, + {C_DEPTHPARAMS, 1 }, + }; + +// End of UBO workaround + void SHADER::SetProgramVariables() { // glsl shader must be bind to set samplers @@ -162,30 +195,43 @@ void SHADER::Bind() } } - -void ProgramShaderCache::SetMultiPSConstant4fv(unsigned int offset, const float *f, unsigned int count) -{ - s_ubo_dirty = true; - memcpy(s_ubo_buffer+(offset*4*sizeof(float)), f, count*4*sizeof(float)); -} - -void ProgramShaderCache::SetMultiVSConstant4fv(unsigned int offset, const float *f, unsigned int count) -{ - s_ubo_dirty = true; - memcpy(s_ubo_buffer+(offset*4*sizeof(float))+s_vs_data_offset, f, count*4*sizeof(float)); -} - void ProgramShaderCache::UploadConstants() { - if(s_ubo_dirty) { - s_buffer->Alloc(s_ubo_buffer_size); - size_t offset = s_buffer->Upload(s_ubo_buffer, s_ubo_buffer_size); - glBindBufferRange(GL_UNIFORM_BUFFER, 1, s_buffer->getBuffer(), offset, s_ps_data_size); - glBindBufferRange(GL_UNIFORM_BUFFER, 2, s_buffer->getBuffer(), offset + s_vs_data_offset, s_vs_data_size); - s_ubo_dirty = false; + if(g_ActiveConfig.backend_info.bSupportsGLSLUBO) + { + if(PixelShaderManager::dirty || VertexShaderManager::dirty) + { + s_buffer->Alloc(s_ubo_buffer_size); + + size_t offset = s_buffer->Upload((u8*)&PixelShaderManager::constants, ROUND_UP(sizeof(PixelShaderConstants), s_ubo_align)); + glBindBufferRange(GL_UNIFORM_BUFFER, 1, s_buffer->getBuffer(), offset, sizeof(PixelShaderConstants)); + offset = s_buffer->Upload((u8*)&VertexShaderManager::constants, ROUND_UP(sizeof(VertexShaderConstants), s_ubo_align)); + glBindBufferRange(GL_UNIFORM_BUFFER, 2, s_buffer->getBuffer(), offset, sizeof(VertexShaderConstants)); + + PixelShaderManager::dirty = false; + VertexShaderManager::dirty = false; + + ADDSTAT(stats.thisFrame.bytesUniformStreamed, s_ubo_buffer_size); + } + } + else + { + // UBO workaround + // this must be updated per shader switch, so also update it when it's not dirty + for (unsigned int a = 0; a < 10; ++a) + { + if(last_entry->shader.UniformSize[a] > 0) + glUniform4fv(last_entry->shader.UniformLocations[a], last_entry->shader.UniformSize[a], (float*) &PixelShaderManager::constants + 4*PSVar_Loc[a].reg); + } + for (unsigned int a = 0; a < 9; ++a) + { + if(last_entry->shader.UniformSize[a+10] > 0) + glUniform4fv(last_entry->shader.UniformLocations[a+10], last_entry->shader.UniformSize[a+10], (float*) &VertexShaderManager::constants + 4*VSVar_Loc[a].reg); + } ADDSTAT(stats.thisFrame.bytesUniformStreamed, s_ubo_buffer_size); } + } GLuint ProgramShaderCache::GetCurrentProgram(void) @@ -419,22 +465,14 @@ void ProgramShaderCache::Init(void) // then the UBO will fail. if (g_ActiveConfig.backend_info.bSupportsGLSLUBO) { - GLint Align; - glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &Align); + glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &s_ubo_align); - s_ps_data_size = C_PENVCONST_END * sizeof(float) * 4; - s_vs_data_size = C_VENVCONST_END * sizeof(float) * 4; - s_vs_data_offset = ROUND_UP(s_ps_data_size, Align); - s_ubo_buffer_size = ROUND_UP(s_ps_data_size, Align) + ROUND_UP(s_vs_data_size, Align); + s_ubo_buffer_size = ROUND_UP(sizeof(PixelShaderConstants), s_ubo_align) + ROUND_UP(sizeof(VertexShaderConstants), s_ubo_align); // We multiply by *4*4 because we need to get down to basic machine units. // So multiply by four to get how many floats we have from vec4s // Then once more to get bytes s_buffer = new StreamBuffer(GL_UNIFORM_BUFFER, UBO_LENGTH); - - s_ubo_buffer = new u8[s_ubo_buffer_size]; - memset(s_ubo_buffer, 0, s_ubo_buffer_size); - s_ubo_dirty = true; } // Read our shader cache, only if supported @@ -509,8 +547,6 @@ void ProgramShaderCache::Shutdown(void) { delete s_buffer; s_buffer = 0; - delete [] s_ubo_buffer; - s_ubo_buffer = 0; } } diff --git a/Source/Core/VideoBackends/OGL/Src/ProgramShaderCache.h b/Source/Core/VideoBackends/OGL/Src/ProgramShaderCache.h index 6c1350fe34..d0c792aaa0 100644 --- a/Source/Core/VideoBackends/OGL/Src/ProgramShaderCache.h +++ b/Source/Core/VideoBackends/OGL/Src/ProgramShaderCache.h @@ -87,9 +87,6 @@ public: static bool CompileShader(SHADER &shader, const char* vcode, const char* pcode); static GLuint CompileSingleShader(GLuint type, const char *code); - - static void SetMultiPSConstant4fv(unsigned int offset, const float *f, unsigned int count); - static void SetMultiVSConstant4fv(unsigned int offset, const float *f, unsigned int count); static void UploadConstants(); static void Init(void); @@ -110,12 +107,8 @@ private: static UidChecker pixel_uid_checker; static UidChecker vertex_uid_checker; - static GLintptr s_vs_data_size; - static GLintptr s_ps_data_size; - static GLintptr s_vs_data_offset; - static u8 *s_ubo_buffer; static u32 s_ubo_buffer_size; - static bool s_ubo_dirty; + static s32 s_ubo_align; }; } // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/Src/VertexShaderCache.cpp b/Source/Core/VideoBackends/OGL/Src/VertexShaderCache.cpp index 09f2368bad..c2c49b8505 100644 --- a/Source/Core/VideoBackends/OGL/Src/VertexShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/Src/VertexShaderCache.cpp @@ -26,20 +26,6 @@ namespace OGL void Renderer::SetMultiVSConstant4fv(unsigned int const_number, unsigned int count, const float *f) { - if (g_ActiveConfig.backend_info.bSupportsGLSLUBO) - { - ProgramShaderCache::SetMultiVSConstant4fv(const_number, f, count); - return; - } - ProgramShaderCache::PCacheEntry tmp = ProgramShaderCache::GetShaderProgram(); - for (unsigned int a = 0; a < 9; ++a) - { - u32 offset = VSVar_Loc[a].reg - const_number; - if(offset >= count) return; - u32 size = std::min(tmp.shader.UniformSize[a+10], count-offset); - if(size > 0) - glUniform4fv(tmp.shader.UniformLocations[a+10], size, f + 4*offset); - } } } // namespace OGL diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h index d9ce8c5874..b73b17168c 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.h +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h @@ -43,19 +43,6 @@ enum DSTALPHA_MODE DSTALPHA_DUAL_SOURCE_BLEND // Use dual-source blending }; -// Annoying sure, can be removed once we get up to GLSL ~1.3 -const s_svar PSVar_Loc[] = { {C_COLORS, 4 }, - {C_KCOLORS, 4 }, - {C_ALPHA, 1 }, - {C_TEXDIMS, 8 }, - {C_ZBIAS, 2 }, - {C_INDTEXSCALE, 2 }, - {C_INDTEXMTX, 6 }, - {C_FOG, 3 }, - {C_PLIGHTS, 40 }, - {C_PMATERIALS, 4 }, - }; - #pragma pack(1) struct pixel_shader_uid_data { diff --git a/Source/Core/VideoCommon/Src/PixelShaderManager.cpp b/Source/Core/VideoCommon/Src/PixelShaderManager.cpp index 3a13e287aa..970bdb6e58 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderManager.cpp @@ -95,9 +95,6 @@ void PixelShaderManager::Shutdown() void PixelShaderManager::SetConstants(u32 components) { - if (g_ActiveConfig.backend_info.APIType == API_OPENGL && !g_ActiveConfig.backend_info.bSupportsGLSLUBO) - dirty = true; - for (int i = 0; i < 2; ++i) { if (s_nColorsChanged[i]) @@ -359,7 +356,7 @@ void PixelShaderManager::SetConstants(u32 components) } } - if(dirty) + if(dirty && g_ActiveConfig.backend_info.APIType != API_OPENGL) { g_renderer->SetMultiPSConstant4fv(0, sizeof(constants)/16, (float*) &constants); dirty = false; diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.h b/Source/Core/VideoCommon/Src/VertexShaderGen.h index c5f32c741d..667df311c2 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.h +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.h @@ -53,17 +53,6 @@ #define C_DEPTHPARAMS (C_POSTTRANSFORMMATRICES + 64) #define C_VENVCONST_END (C_DEPTHPARAMS + 1) -const s_svar VSVar_Loc[] = { {C_POSNORMALMATRIX, 6 }, - {C_PROJECTION, 4 }, - {C_MATERIALS, 4 }, - {C_LIGHTS, 40 }, - {C_TEXMATRICES, 24 }, - {C_TRANSFORMMATRICES, 64 }, - {C_NORMALMATRICES, 32 }, - {C_POSTTRANSFORMMATRICES, 64 }, - {C_DEPTHPARAMS, 1 }, - }; - #pragma pack(1) struct vertex_shader_uid_data diff --git a/Source/Core/VideoCommon/Src/VertexShaderManager.cpp b/Source/Core/VideoCommon/Src/VertexShaderManager.cpp index 016fb517e4..20c4994cf8 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderManager.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderManager.cpp @@ -225,9 +225,6 @@ void VertexShaderManager::Dirty() // TODO: A cleaner way to control the matrices without making a mess in the parameters field void VertexShaderManager::SetConstants() { - if (g_ActiveConfig.backend_info.APIType == API_OPENGL && !g_ActiveConfig.backend_info.bSupportsGLSLUBO) - dirty = true; - if (nTransformMatricesChanged[0] >= 0) { int startn = nTransformMatricesChanged[0] / 4; @@ -521,7 +518,7 @@ void VertexShaderManager::SetConstants() } } - if(dirty) + if(dirty && g_ActiveConfig.backend_info.APIType != API_OPENGL) { dirty = false; g_renderer->SetMultiVSConstant4fv(0, sizeof(constants)/16, (float*) &constants); diff --git a/Source/Core/VideoCommon/Src/VideoCommon.h b/Source/Core/VideoCommon/Src/VideoCommon.h index 54a2504631..ef36b1fed6 100644 --- a/Source/Core/VideoCommon/Src/VideoCommon.h +++ b/Source/Core/VideoCommon/Src/VideoCommon.h @@ -132,10 +132,5 @@ inline unsigned int GetPow2(unsigned int val) ++ret; return ret; } -struct s_svar -{ - const unsigned int reg; - const unsigned int size; -}; #endif // _VIDEOCOMMON_H