ogl: implement useful constant buffer upload

this will remove the additional memcpy introduced in my last commit
This commit is contained in:
degasus 2013-10-07 17:19:47 +02:00
parent 4377618438
commit 7c14463d11
9 changed files with 75 additions and 110 deletions

View File

@ -27,20 +27,5 @@ namespace OGL
// Renderer functions // Renderer functions
void Renderer::SetMultiPSConstant4fv(unsigned int const_number, unsigned int count, const float *f) void Renderer::SetMultiPSConstant4fv(unsigned int const_number, unsigned int count, const float *f)
{ {
if (g_ActiveConfig.backend_info.bSupportsGLSLUBO)
{
ProgramShaderCache::SetMultiPSConstant4fv(const_number, f, count);
return;
}
ProgramShaderCache::PCacheEntry tmp = ProgramShaderCache::GetShaderProgram();
for (unsigned int a = 0; a < 10; ++a)
{
u32 offset = PSVar_Loc[a].reg - const_number;
if(offset >= count) return;
u32 size = std::min(tmp.shader.UniformSize[a], count-offset);
if(size > 0)
glUniform4fv(tmp.shader.UniformLocations[a], size, f + 4*offset);
}
} }
} // namespace OGL } // namespace OGL

View File

@ -10,18 +10,16 @@
#include "Statistics.h" #include "Statistics.h"
#include "ImageWrite.h" #include "ImageWrite.h"
#include "Render.h" #include "Render.h"
#include "PixelShaderManager.h"
#include "VertexShaderManager.h"
namespace OGL namespace OGL
{ {
static const u32 UBO_LENGTH = 32*1024*1024; static const u32 UBO_LENGTH = 32*1024*1024;
GLintptr ProgramShaderCache::s_vs_data_size;
GLintptr ProgramShaderCache::s_ps_data_size;
GLintptr ProgramShaderCache::s_vs_data_offset;
u8 *ProgramShaderCache::s_ubo_buffer;
u32 ProgramShaderCache::s_ubo_buffer_size; u32 ProgramShaderCache::s_ubo_buffer_size;
bool ProgramShaderCache::s_ubo_dirty; s32 ProgramShaderCache::s_ubo_align;
static StreamBuffer *s_buffer; static StreamBuffer *s_buffer;
static int num_failures = 0; static int num_failures = 0;
@ -36,6 +34,10 @@ UidChecker<VertexShaderUid,VertexShaderCode> ProgramShaderCache::vertex_uid_chec
static char s_glsl_header[1024] = ""; static char s_glsl_header[1024] = "";
// Annoying sure, can be removed once we drop our UBO workaround
const char *UniformNames[NUM_UNIFORMS] = const char *UniformNames[NUM_UNIFORMS] =
{ {
// PIXEL SHADER UNIFORMS // PIXEL SHADER UNIFORMS
@ -61,6 +63,37 @@ const char *UniformNames[NUM_UNIFORMS] =
I_DEPTHPARAMS, I_DEPTHPARAMS,
}; };
struct s_svar
{
const unsigned int reg;
const unsigned int size;
};
const s_svar PSVar_Loc[] = { {C_COLORS, 4 },
{C_KCOLORS, 4 },
{C_ALPHA, 1 },
{C_TEXDIMS, 8 },
{C_ZBIAS, 2 },
{C_INDTEXSCALE, 2 },
{C_INDTEXMTX, 6 },
{C_FOG, 3 },
{C_PLIGHTS, 40 },
{C_PMATERIALS, 4 },
};
const s_svar VSVar_Loc[] = { {C_POSNORMALMATRIX, 6 },
{C_PROJECTION, 4 },
{C_MATERIALS, 4 },
{C_LIGHTS, 40 },
{C_TEXMATRICES, 24 },
{C_TRANSFORMMATRICES, 64 },
{C_NORMALMATRICES, 32 },
{C_POSTTRANSFORMMATRICES, 64 },
{C_DEPTHPARAMS, 1 },
};
// End of UBO workaround
void SHADER::SetProgramVariables() void SHADER::SetProgramVariables()
{ {
// glsl shader must be bind to set samplers // glsl shader must be bind to set samplers
@ -162,31 +195,44 @@ void SHADER::Bind()
} }
} }
void ProgramShaderCache::SetMultiPSConstant4fv(unsigned int offset, const float *f, unsigned int count)
{
s_ubo_dirty = true;
memcpy(s_ubo_buffer+(offset*4*sizeof(float)), f, count*4*sizeof(float));
}
void ProgramShaderCache::SetMultiVSConstant4fv(unsigned int offset, const float *f, unsigned int count)
{
s_ubo_dirty = true;
memcpy(s_ubo_buffer+(offset*4*sizeof(float))+s_vs_data_offset, f, count*4*sizeof(float));
}
void ProgramShaderCache::UploadConstants() void ProgramShaderCache::UploadConstants()
{ {
if(s_ubo_dirty) { if(g_ActiveConfig.backend_info.bSupportsGLSLUBO)
{
if(PixelShaderManager::dirty || VertexShaderManager::dirty)
{
s_buffer->Alloc(s_ubo_buffer_size); s_buffer->Alloc(s_ubo_buffer_size);
size_t offset = s_buffer->Upload(s_ubo_buffer, s_ubo_buffer_size);
glBindBufferRange(GL_UNIFORM_BUFFER, 1, s_buffer->getBuffer(), offset, s_ps_data_size); size_t offset = s_buffer->Upload((u8*)&PixelShaderManager::constants, ROUND_UP(sizeof(PixelShaderConstants), s_ubo_align));
glBindBufferRange(GL_UNIFORM_BUFFER, 2, s_buffer->getBuffer(), offset + s_vs_data_offset, s_vs_data_size); glBindBufferRange(GL_UNIFORM_BUFFER, 1, s_buffer->getBuffer(), offset, sizeof(PixelShaderConstants));
s_ubo_dirty = false; offset = s_buffer->Upload((u8*)&VertexShaderManager::constants, ROUND_UP(sizeof(VertexShaderConstants), s_ubo_align));
glBindBufferRange(GL_UNIFORM_BUFFER, 2, s_buffer->getBuffer(), offset, sizeof(VertexShaderConstants));
PixelShaderManager::dirty = false;
VertexShaderManager::dirty = false;
ADDSTAT(stats.thisFrame.bytesUniformStreamed, s_ubo_buffer_size); ADDSTAT(stats.thisFrame.bytesUniformStreamed, s_ubo_buffer_size);
} }
} }
else
{
// UBO workaround
// this must be updated per shader switch, so also update it when it's not dirty
for (unsigned int a = 0; a < 10; ++a)
{
if(last_entry->shader.UniformSize[a] > 0)
glUniform4fv(last_entry->shader.UniformLocations[a], last_entry->shader.UniformSize[a], (float*) &PixelShaderManager::constants + 4*PSVar_Loc[a].reg);
}
for (unsigned int a = 0; a < 9; ++a)
{
if(last_entry->shader.UniformSize[a+10] > 0)
glUniform4fv(last_entry->shader.UniformLocations[a+10], last_entry->shader.UniformSize[a+10], (float*) &VertexShaderManager::constants + 4*VSVar_Loc[a].reg);
}
ADDSTAT(stats.thisFrame.bytesUniformStreamed, s_ubo_buffer_size);
}
}
GLuint ProgramShaderCache::GetCurrentProgram(void) GLuint ProgramShaderCache::GetCurrentProgram(void)
{ {
@ -419,22 +465,14 @@ void ProgramShaderCache::Init(void)
// then the UBO will fail. // then the UBO will fail.
if (g_ActiveConfig.backend_info.bSupportsGLSLUBO) if (g_ActiveConfig.backend_info.bSupportsGLSLUBO)
{ {
GLint Align; glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &s_ubo_align);
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &Align);
s_ps_data_size = C_PENVCONST_END * sizeof(float) * 4; s_ubo_buffer_size = ROUND_UP(sizeof(PixelShaderConstants), s_ubo_align) + ROUND_UP(sizeof(VertexShaderConstants), s_ubo_align);
s_vs_data_size = C_VENVCONST_END * sizeof(float) * 4;
s_vs_data_offset = ROUND_UP(s_ps_data_size, Align);
s_ubo_buffer_size = ROUND_UP(s_ps_data_size, Align) + ROUND_UP(s_vs_data_size, Align);
// We multiply by *4*4 because we need to get down to basic machine units. // We multiply by *4*4 because we need to get down to basic machine units.
// So multiply by four to get how many floats we have from vec4s // So multiply by four to get how many floats we have from vec4s
// Then once more to get bytes // Then once more to get bytes
s_buffer = new StreamBuffer(GL_UNIFORM_BUFFER, UBO_LENGTH); s_buffer = new StreamBuffer(GL_UNIFORM_BUFFER, UBO_LENGTH);
s_ubo_buffer = new u8[s_ubo_buffer_size];
memset(s_ubo_buffer, 0, s_ubo_buffer_size);
s_ubo_dirty = true;
} }
// Read our shader cache, only if supported // Read our shader cache, only if supported
@ -509,8 +547,6 @@ void ProgramShaderCache::Shutdown(void)
{ {
delete s_buffer; delete s_buffer;
s_buffer = 0; s_buffer = 0;
delete [] s_ubo_buffer;
s_ubo_buffer = 0;
} }
} }

View File

@ -87,9 +87,6 @@ public:
static bool CompileShader(SHADER &shader, const char* vcode, const char* pcode); static bool CompileShader(SHADER &shader, const char* vcode, const char* pcode);
static GLuint CompileSingleShader(GLuint type, const char *code); static GLuint CompileSingleShader(GLuint type, const char *code);
static void SetMultiPSConstant4fv(unsigned int offset, const float *f, unsigned int count);
static void SetMultiVSConstant4fv(unsigned int offset, const float *f, unsigned int count);
static void UploadConstants(); static void UploadConstants();
static void Init(void); static void Init(void);
@ -110,12 +107,8 @@ private:
static UidChecker<PixelShaderUid,PixelShaderCode> pixel_uid_checker; static UidChecker<PixelShaderUid,PixelShaderCode> pixel_uid_checker;
static UidChecker<VertexShaderUid,VertexShaderCode> vertex_uid_checker; static UidChecker<VertexShaderUid,VertexShaderCode> vertex_uid_checker;
static GLintptr s_vs_data_size;
static GLintptr s_ps_data_size;
static GLintptr s_vs_data_offset;
static u8 *s_ubo_buffer;
static u32 s_ubo_buffer_size; static u32 s_ubo_buffer_size;
static bool s_ubo_dirty; static s32 s_ubo_align;
}; };
} // namespace OGL } // namespace OGL

View File

@ -26,20 +26,6 @@ namespace OGL
void Renderer::SetMultiVSConstant4fv(unsigned int const_number, unsigned int count, const float *f) void Renderer::SetMultiVSConstant4fv(unsigned int const_number, unsigned int count, const float *f)
{ {
if (g_ActiveConfig.backend_info.bSupportsGLSLUBO)
{
ProgramShaderCache::SetMultiVSConstant4fv(const_number, f, count);
return;
}
ProgramShaderCache::PCacheEntry tmp = ProgramShaderCache::GetShaderProgram();
for (unsigned int a = 0; a < 9; ++a)
{
u32 offset = VSVar_Loc[a].reg - const_number;
if(offset >= count) return;
u32 size = std::min(tmp.shader.UniformSize[a+10], count-offset);
if(size > 0)
glUniform4fv(tmp.shader.UniformLocations[a+10], size, f + 4*offset);
}
} }
} // namespace OGL } // namespace OGL

View File

@ -43,19 +43,6 @@ enum DSTALPHA_MODE
DSTALPHA_DUAL_SOURCE_BLEND // Use dual-source blending DSTALPHA_DUAL_SOURCE_BLEND // Use dual-source blending
}; };
// Annoying sure, can be removed once we get up to GLSL ~1.3
const s_svar PSVar_Loc[] = { {C_COLORS, 4 },
{C_KCOLORS, 4 },
{C_ALPHA, 1 },
{C_TEXDIMS, 8 },
{C_ZBIAS, 2 },
{C_INDTEXSCALE, 2 },
{C_INDTEXMTX, 6 },
{C_FOG, 3 },
{C_PLIGHTS, 40 },
{C_PMATERIALS, 4 },
};
#pragma pack(1) #pragma pack(1)
struct pixel_shader_uid_data struct pixel_shader_uid_data
{ {

View File

@ -95,9 +95,6 @@ void PixelShaderManager::Shutdown()
void PixelShaderManager::SetConstants(u32 components) void PixelShaderManager::SetConstants(u32 components)
{ {
if (g_ActiveConfig.backend_info.APIType == API_OPENGL && !g_ActiveConfig.backend_info.bSupportsGLSLUBO)
dirty = true;
for (int i = 0; i < 2; ++i) for (int i = 0; i < 2; ++i)
{ {
if (s_nColorsChanged[i]) if (s_nColorsChanged[i])
@ -359,7 +356,7 @@ void PixelShaderManager::SetConstants(u32 components)
} }
} }
if(dirty) if(dirty && g_ActiveConfig.backend_info.APIType != API_OPENGL)
{ {
g_renderer->SetMultiPSConstant4fv(0, sizeof(constants)/16, (float*) &constants); g_renderer->SetMultiPSConstant4fv(0, sizeof(constants)/16, (float*) &constants);
dirty = false; dirty = false;

View File

@ -53,17 +53,6 @@
#define C_DEPTHPARAMS (C_POSTTRANSFORMMATRICES + 64) #define C_DEPTHPARAMS (C_POSTTRANSFORMMATRICES + 64)
#define C_VENVCONST_END (C_DEPTHPARAMS + 1) #define C_VENVCONST_END (C_DEPTHPARAMS + 1)
const s_svar VSVar_Loc[] = { {C_POSNORMALMATRIX, 6 },
{C_PROJECTION, 4 },
{C_MATERIALS, 4 },
{C_LIGHTS, 40 },
{C_TEXMATRICES, 24 },
{C_TRANSFORMMATRICES, 64 },
{C_NORMALMATRICES, 32 },
{C_POSTTRANSFORMMATRICES, 64 },
{C_DEPTHPARAMS, 1 },
};
#pragma pack(1) #pragma pack(1)
struct vertex_shader_uid_data struct vertex_shader_uid_data

View File

@ -225,9 +225,6 @@ void VertexShaderManager::Dirty()
// TODO: A cleaner way to control the matrices without making a mess in the parameters field // TODO: A cleaner way to control the matrices without making a mess in the parameters field
void VertexShaderManager::SetConstants() void VertexShaderManager::SetConstants()
{ {
if (g_ActiveConfig.backend_info.APIType == API_OPENGL && !g_ActiveConfig.backend_info.bSupportsGLSLUBO)
dirty = true;
if (nTransformMatricesChanged[0] >= 0) if (nTransformMatricesChanged[0] >= 0)
{ {
int startn = nTransformMatricesChanged[0] / 4; int startn = nTransformMatricesChanged[0] / 4;
@ -521,7 +518,7 @@ void VertexShaderManager::SetConstants()
} }
} }
if(dirty) if(dirty && g_ActiveConfig.backend_info.APIType != API_OPENGL)
{ {
dirty = false; dirty = false;
g_renderer->SetMultiVSConstant4fv(0, sizeof(constants)/16, (float*) &constants); g_renderer->SetMultiVSConstant4fv(0, sizeof(constants)/16, (float*) &constants);

View File

@ -132,10 +132,5 @@ inline unsigned int GetPow2(unsigned int val)
++ret; ++ret;
return ret; return ret;
} }
struct s_svar
{
const unsigned int reg;
const unsigned int size;
};
#endif // _VIDEOCOMMON_H #endif // _VIDEOCOMMON_H