gsdx ogl: various minor optimization.

* move most of gl states into a separate namespace. Extend it to depth/stencil/blend micro state
=> save 10,000 opengl call by frame for colin mcrae 3
* Only setup blend state of first drawbuffer
* Don't request anymore a debug context on dev/release build



git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5713 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gregory.hainaut 2013-08-05 20:25:25 +00:00
parent 34045eb8f7
commit a46b489a24
21 changed files with 419 additions and 193 deletions

View File

@ -50,6 +50,7 @@ endif()
set(GSdxSources
GLLoader.cpp
GLState.cpp
GPU.cpp
GPUDrawScanline.cpp
GPUDrawScanlineCodeGenerator.cpp

View File

@ -32,14 +32,15 @@ PFNGLBINDBUFFERBASEPROC gl_BindBufferBase = NULL;
PFNGLBINDFRAMEBUFFERPROC gl_BindFramebuffer = NULL;
PFNGLBINDSAMPLERPROC gl_BindSampler = NULL;
PFNGLBINDVERTEXARRAYPROC gl_BindVertexArray = NULL;
PFNGLBLENDEQUATIONSEPARATEPROC gl_BlendEquationSeparate = NULL;
PFNGLBLENDFUNCSEPARATEPROC gl_BlendFuncSeparate = NULL;
PFNGLBLENDEQUATIONSEPARATEIARBPROC gl_BlendEquationSeparateiARB = NULL;
PFNGLBLENDFUNCSEPARATEIARBPROC gl_BlendFuncSeparateiARB = NULL;
PFNGLBLITFRAMEBUFFERPROC gl_BlitFramebuffer = NULL;
PFNGLBUFFERDATAPROC gl_BufferData = NULL;
PFNGLCHECKFRAMEBUFFERSTATUSPROC gl_CheckFramebufferStatus = NULL;
PFNGLCLEARBUFFERFVPROC gl_ClearBufferfv = NULL;
PFNGLCLEARBUFFERIVPROC gl_ClearBufferiv = NULL;
PFNGLCLEARBUFFERUIVPROC gl_ClearBufferuiv = NULL;
PFNGLCOLORMASKIPROC gl_ColorMaski = NULL;
PFNGLCOMPILESHADERPROC gl_CompileShader = NULL;
PFNGLCREATEPROGRAMPROC gl_CreateProgram = NULL;
PFNGLCREATESHADERPROC gl_CreateShader = NULL;

View File

@ -85,8 +85,8 @@ extern PFNGLBINDBUFFERBASEPROC gl_BindBufferBase;
extern PFNGLBINDFRAMEBUFFERPROC gl_BindFramebuffer;
extern PFNGLBINDSAMPLERPROC gl_BindSampler;
extern PFNGLBINDVERTEXARRAYPROC gl_BindVertexArray;
extern PFNGLBLENDEQUATIONSEPARATEPROC gl_BlendEquationSeparate;
extern PFNGLBLENDFUNCSEPARATEPROC gl_BlendFuncSeparate;
extern PFNGLBLENDEQUATIONSEPARATEIARBPROC gl_BlendEquationSeparateiARB;
extern PFNGLBLENDFUNCSEPARATEIARBPROC gl_BlendFuncSeparateiARB;
extern PFNGLBLITFRAMEBUFFERPROC gl_BlitFramebuffer;
extern PFNGLBUFFERDATAPROC gl_BufferData;
extern PFNGLCHECKFRAMEBUFFERSTATUSPROC gl_CheckFramebufferStatus;
@ -94,6 +94,7 @@ extern PFNGLCLEARBUFFERFVPROC gl_ClearBufferfv;
extern PFNGLCLEARBUFFERIVPROC gl_ClearBufferiv;
extern PFNGLCLEARBUFFERUIVPROC gl_ClearBufferuiv;
extern PFNGLCOMPILESHADERPROC gl_CompileShader;
extern PFNGLCOLORMASKIPROC gl_ColorMaski;
extern PFNGLCREATEPROGRAMPROC gl_CreateProgram;
extern PFNGLCREATESHADERPROC gl_CreateShader;
extern PFNGLCREATESHADERPROGRAMVPROC gl_CreateShaderProgramv;

116
plugins/GSdx/GLState.cpp Normal file
View File

@ -0,0 +1,116 @@
/*
* Copyright (C) 2011-2013 Gregory hainaut
* Copyright (C) 2007-2009 Gabest
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#include "stdafx.h"
#include "GLState.h"
namespace GLState {
GLuint fbo = 0;
GLenum draw = GL_NONE;
GSVector2i viewport(0, 0);
GSVector4i scissor(0, 0, 0, 0);
bool blend = false;
GLenum eq_RGB = 0;
GLenum eq_A = 0;
GLenum f_sRGB = 0;
GLenum f_dRGB = 0;
GLenum f_sA = 0;
GLenum f_dA = 0;
bool r_msk = true;
bool g_msk = true;
bool b_msk = true;
bool a_msk = true;
float bf = 0.0;
bool depth = false;
GLenum depth_func = 0;
bool depth_mask = false;
bool stencil = false;
GLenum stencil_func = 0;
GLenum stencil_pass = 0;
GLuint ubo = 0;
GLuint ps_ss = 0;
GLuint rt = 0;
GLuint ds = 0;
GLuint tex_unit[2] = {0, 0};
GLuint tex = 0;
GLuint ps = 0;
GLuint gs = 0;
GLuint vs = 0;
GLuint program = 0;
#if 0
struct {
GSVertexBufferStateOGL* vb;
GSDepthStencilOGL* dss;
GSBlendStateOGL* bs;
float bf; // blend factor
} m_state;
#endif
void Clear() {
fbo = 0;
draw = GL_NONE;
viewport = GSVector2i(0, 0);
scissor = GSVector4i(0, 0, 0, 0);
blend = false;
eq_RGB = 0;
eq_A = 0;
f_sRGB = 0;
f_dRGB = 0;
f_sA = 0;
f_dA = 0;
r_msk = true;
g_msk = true;
b_msk = true;
a_msk = true;
bf = 0.0;
depth = false;
depth_func = 0;
depth_mask = false;
stencil = false;
stencil_func = 0;
stencil_pass = 0;
ubo = 0;
ps_ss = 0;
rt = 0;
ds = 0;
tex_unit[0] = 0;
tex_unit[1] = 0;
tex = 0;
ps = 0;
gs = 0;
vs = 0;
program = 0;
}
}

69
plugins/GSdx/GLState.h Normal file
View File

@ -0,0 +1,69 @@
/*
* Copyright (C) 2011-2013 Gregory hainaut
* Copyright (C) 2007-2009 Gabest
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#pragma once
#include "GSdx.h"
#include "GSVector.h"
namespace GLState {
extern GLuint fbo; // frame buffer object
extern GLenum draw; // Drawing buffer
extern GSVector2i viewport;
extern GSVector4i scissor;
extern bool blend;
extern GLenum eq_RGB;
extern GLenum eq_A;
extern GLenum f_sRGB;
extern GLenum f_dRGB;
extern GLenum f_sA;
extern GLenum f_dA;
extern bool r_msk;
extern bool g_msk;
extern bool b_msk;
extern bool a_msk;
extern float bf;
extern bool depth;
extern GLenum depth_func;
extern bool depth_mask;
extern bool stencil;
extern GLenum stencil_func;
extern GLenum stencil_pass;
extern GLuint ubo; // uniform buffer object
extern GLuint ps_ss; // sampler
extern GLuint rt; // render target
extern GLuint ds; // Depth-Stencil
extern GLuint tex_unit[2]; // shader input texture
extern GLuint tex; // Generic texture (for tex operation)
extern GLuint ps;
extern GLuint gs;
extern GLuint vs;
extern GLuint program; // monolith program (when sso isn't supported)
extern void Clear();
}

View File

@ -21,6 +21,7 @@
#include "stdafx.h"
#include "GSDeviceOGL.h"
#include "GLState.h"
#include "res/glsl_source.h"
@ -39,7 +40,6 @@ static const uint32 g_fxaa_cb_index = 13;
GSDeviceOGL::GSDeviceOGL()
: m_free_window(false)
, m_window(NULL)
, m_pipeline(0)
, m_fbo(0)
, m_fbo_read(0)
, m_vb_sr(NULL)
@ -50,6 +50,7 @@ GSDeviceOGL::GSDeviceOGL()
memset(&m_convert, 0, sizeof(m_convert));
memset(&m_date, 0, sizeof(m_date));
memset(&m_state, 0, sizeof(m_state));
GLState::Clear();
// Reset the debug file
#ifdef ENABLE_OGL_DEBUG
@ -410,7 +411,7 @@ void GSDeviceOGL::ClearRenderTarget(GSTexture* t, const GSVector4& c)
} else {
OMSetFBO(m_fbo);
OMSetWriteBuffer();
OMAttachRt(t);
OMAttachRt(static_cast<GSTextureOGL*>(t)->GetID());
gl_ClearBufferfv(GL_COLOR, 0, c.v);
}
@ -435,7 +436,7 @@ void GSDeviceOGL::ClearRenderTarget_ui(GSTexture* t, uint32 c)
OMSetFBO(m_fbo);
OMSetWriteBuffer();
OMAttachRt(t);
OMAttachRt(static_cast<GSTextureOGL*>(t)->GetID());
gl_ClearBufferuiv(GL_COLOR, 0, col);
@ -452,10 +453,10 @@ void GSDeviceOGL::ClearDepth(GSTexture* t, float c)
} else {
OMSetFBO(m_fbo);
OMSetWriteBuffer();
OMAttachDs(t);
OMAttachDs(static_cast<GSTextureOGL*>(t)->GetID());
glDisable(GL_SCISSOR_TEST);
if (m_state.dss != NULL && m_state.dss->IsMaskEnable()) {
if (GLState::depth_mask) {
gl_ClearBufferfv(GL_DEPTH, 0, &c);
} else {
glDepthMask(true);
@ -475,7 +476,7 @@ void GSDeviceOGL::ClearStencil(GSTexture* t, uint8 c)
} else {
OMSetFBO(m_fbo);
OMSetWriteBuffer();
OMAttachDs(t);
OMAttachDs(static_cast<GSTextureOGL*>(t)->GetID());
GLint color = c;
glDisable(GL_SCISSOR_TEST);
@ -786,7 +787,7 @@ void GSDeviceOGL::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt,
// ps
// ************************************
PSSetShaderResource(0, st);
PSSetShaderResource(0, static_cast<GSTextureOGL*>(st)->GetID());
PSSetSamplerState(linear ? m_convert.ln : m_convert.pt);
m_shader->PS(ps);
@ -813,7 +814,6 @@ void GSDeviceOGL::DoMerge(GSTexture* st[2], GSVector4* sr, GSTexture* dt, GSVect
if(st[0])
{
SetUniformBuffer(m_merge_obj.cb);
m_merge_obj.cb->upload(&c.v);
StretchRect(st[0], sr[0], dt, dr[0], m_merge_obj.ps[mmod ? 1 : 0], m_merge_obj.bs);
@ -832,7 +832,6 @@ void GSDeviceOGL::DoInterlace(GSTexture* st, GSTexture* dt, int shader, bool lin
cb.ZrH = GSVector2(0, 1.0f / s.y);
cb.hH = s.y / 2;
SetUniformBuffer(m_interlace.cb);
m_interlace.cb->upload(&cb);
StretchRect(st, sr, dt, dr, m_interlace.ps[shader], linear);
@ -851,7 +850,6 @@ void GSDeviceOGL::DoFXAA(GSTexture* st, GSTexture* dt)
cb.rcpFrame = GSVector4(1.0f / s.x, 1.0f / s.y, 0.0f, 0.0f);
cb.rcpFrameOpt = GSVector4::zero();
SetUniformBuffer(m_fxaa.cb);
m_fxaa.cb->upload(&cb);
StretchRect(st, sr, dt, dr, m_fxaa.ps, true);
@ -869,7 +867,6 @@ void GSDeviceOGL::DoShadeBoost(GSTexture* st, GSTexture* dt)
cb.rcpFrame = GSVector4(1.0f / s.x, 1.0f / s.y, 0.0f, 0.0f);
cb.rcpFrameOpt = GSVector4::zero();
SetUniformBuffer(m_shadeboost.cb);
m_shadeboost.cb->upload(&cb);
StretchRect(st, sr, dt, dr, m_shadeboost.ps, true);
@ -911,7 +908,7 @@ void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* ver
// ps
PSSetShaderResource(0, rt);
PSSetShaderResource(0, static_cast<GSTextureOGL*>(rt)->GetID());
PSSetSamplerState(m_convert.pt);
m_shader->PS(m_convert.ps[datm ? 2 : 3]);
@ -934,14 +931,6 @@ void GSDeviceOGL::EndScene()
m_state.vb->EndScene();
}
void GSDeviceOGL::SetUniformBuffer(GSUniformBufferOGL* cb)
{
if (m_state.cb != cb) {
m_state.cb = cb;
cb->bind();
}
}
void GSDeviceOGL::IASetVertexState(GSVertexBufferStateOGL* vb)
{
if (vb == NULL) vb = m_vb;
@ -977,19 +966,17 @@ void GSDeviceOGL::IASetPrimitiveTopology(GLenum topology)
m_state.vb->SetTopology(topology);
}
void GSDeviceOGL::PSSetShaderResource(const int i, GSTexture* sr)
void GSDeviceOGL::PSSetShaderResource(const int i, GLuint sr)
{
ASSERT(sr);
if (m_state.tex_unit[i] != sr) {
m_state.tex_unit[i] = sr;
if (GLState::tex_unit[i] != sr) {
GLState::tex_unit[i] = sr;
if (GLLoader::found_GL_ARB_multi_bind) {
GLuint textures[1] = {static_cast<GSTextureOGL*>(sr)->GetID()};
GLuint textures[1] = {sr};
gl_BindTextures(i, 1, textures);
} else {
gl_ActiveTexture(GL_TEXTURE0 + i);
glBindTexture(GL_TEXTURE_2D, static_cast<GSTextureOGL*>(sr)->GetID());
glBindTexture(GL_TEXTURE_2D, sr);
// Get back to the expected active texture unit
gl_ActiveTexture(GL_TEXTURE0 + 3);
@ -997,55 +984,42 @@ void GSDeviceOGL::PSSetShaderResource(const int i, GSTexture* sr)
}
}
void GSDeviceOGL::PSSetShaderResources(GSTexture* tex[2])
void GSDeviceOGL::PSSetShaderResources(GLuint tex[2])
{
if (m_state.tex_unit[0] != tex[0] || m_state.tex_unit[1] != tex[1]) {
GLuint textures[2] = {static_cast<GSTextureOGL*>(tex[0])->GetID(), static_cast<GSTextureOGL*>(tex[1])->GetID()};
if (GLState::tex_unit[0] != tex[0] || GLState::tex_unit[1] != tex[1]) {
GLuint textures[2] = {tex[0], tex[1]};
gl_BindTextures(0, 2, textures);
}
// FIXME without multibind?
#if 0
for (int i = 0; i < count; i++) {
if (m_state.tex_unit[i] != id) {
m_state.tex_unit[i] = id;
gl_ActiveTexture(GL_TEXTURE0 + i);
glBindTexture(GL_TEXTURE_2D, id);
}
}
// Get back to the expected active texture unit
gl_ActiveTexture(GL_TEXTURE0 + 3);
#endif
}
void GSDeviceOGL::PSSetSamplerState(GLuint ss)
{
if (m_state.ps_ss != ss) {
m_state.ps_ss = ss;
if (GLState::ps_ss != ss) {
GLState::ps_ss = ss;
gl_BindSampler(0, ss);
}
}
void GSDeviceOGL::OMAttachRt(GSTexture* rt)
void GSDeviceOGL::OMAttachRt(GLuint rt)
{
if (m_state.rt != rt) {
m_state.rt = rt;
gl_FramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, static_cast<GSTextureOGL*>(rt)->GetID(), 0);
if (GLState::rt != rt) {
GLState::rt = rt;
gl_FramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, rt, 0);
}
}
void GSDeviceOGL::OMAttachDs(GSTexture* ds)
void GSDeviceOGL::OMAttachDs(GLuint ds)
{
if (m_state.ds != ds) {
m_state.ds = ds;
gl_FramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, static_cast<GSTextureOGL*>(ds)->GetID(), 0);
if (GLState::ds != ds) {
GLState::ds = ds;
gl_FramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, ds, 0);
}
}
void GSDeviceOGL::OMSetFBO(GLuint fbo)
{
if (m_state.fbo != fbo) {
m_state.fbo = fbo;
if (GLState::fbo != fbo) {
GLState::fbo = fbo;
gl_BindFramebuffer(GL_FRAMEBUFFER, fbo);
}
}
@ -1053,8 +1027,8 @@ void GSDeviceOGL::OMSetFBO(GLuint fbo)
void GSDeviceOGL::OMSetWriteBuffer(GLenum buffer)
{
// Note if fbo is 0, standard GL_BACK will be used instead
if (m_state.fbo && m_state.draw != buffer) {
m_state.draw = buffer;
if (GLState::fbo && GLState::draw != buffer) {
GLState::draw = buffer;
GLenum target[1] = {buffer};
gl_DrawBuffers(1, target);
@ -1063,6 +1037,7 @@ void GSDeviceOGL::OMSetWriteBuffer(GLenum buffer)
void GSDeviceOGL::OMSetDepthStencilState(GSDepthStencilOGL* dss, uint8 sref)
{
// State is checkd inside the object but worst case is 11 comparaisons !
if (m_state.dss != dss) {
m_state.dss = dss;
@ -1073,6 +1048,7 @@ void GSDeviceOGL::OMSetDepthStencilState(GSDepthStencilOGL* dss, uint8 sref)
void GSDeviceOGL::OMSetBlendState(GSBlendStateOGL* bs, float bf)
{
// State is checkd inside the object but worst case is 15 comparaisons !
if ( m_state.bs != bs || (m_state.bf != bf && bs->HasConstantFactor()) )
{
m_state.bs = bs;
@ -1088,7 +1064,7 @@ void GSDeviceOGL::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVecto
if (rt) {
OMSetFBO(m_fbo);
OMSetWriteBuffer();
OMAttachRt(rt);
OMAttachRt(static_cast<GSTextureOGL*>(rt)->GetID());
} else {
// Note: NULL rt is only used in DATE so far. Color writing is disabled
// on the blend setup
@ -1098,7 +1074,7 @@ void GSDeviceOGL::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVecto
// Note: it must be done after OMSetFBO
if (ds)
OMAttachDs(ds);
OMAttachDs(static_cast<GSTextureOGL*>(ds)->GetID());
} else {
// Render in the backbuffer
@ -1108,17 +1084,17 @@ void GSDeviceOGL::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVecto
GSVector2i size = rt ? rt->GetSize() : ds->GetSize();
if(m_state.viewport != size)
if(GLState::viewport != size)
{
m_state.viewport = size;
GLState::viewport = size;
glViewport(0, 0, size.x, size.y);
}
GSVector4i r = scissor ? *scissor : GSVector4i(size).zwxy();
if(!m_state.scissor.eq(r))
if(!GLState::scissor.eq(r))
{
m_state.scissor = r;
GLState::scissor = r;
glScissor( r.x, r.y, r.width(), r.height() );
}
}

View File

@ -27,17 +27,18 @@
#include "GSVertexArrayOGL.h"
#include "GSUniformBufferOGL.h"
#include "GSShaderOGL.h"
#include "GLState.h"
class GSBlendStateOGL {
// Note: You can also select the index of the draw buffer for which to set the blend setting
// We will keep basic the first try
bool m_enable;
GLenum m_equation_RGB;
GLenum m_equation_ALPHA;
GLenum m_equation_A;
GLenum m_func_sRGB;
GLenum m_func_dRGB;
GLenum m_func_sALPHA;
GLenum m_func_dALPHA;
GLenum m_func_sA;
GLenum m_func_dA;
bool m_r_msk;
bool m_b_msk;
bool m_g_msk;
@ -48,11 +49,11 @@ public:
GSBlendStateOGL() : m_enable(false)
, m_equation_RGB(0)
, m_equation_ALPHA(GL_FUNC_ADD)
, m_equation_A(GL_FUNC_ADD)
, m_func_sRGB(0)
, m_func_dRGB(0)
, m_func_sALPHA(GL_ONE)
, m_func_dALPHA(GL_ZERO)
, m_func_sA(GL_ONE)
, m_func_dA(GL_ZERO)
, m_r_msk(GL_TRUE)
, m_b_msk(GL_TRUE)
, m_g_msk(GL_TRUE)
@ -70,9 +71,9 @@ public:
void SetALPHA(GLenum op, GLenum src, GLenum dst)
{
m_equation_ALPHA = op;
m_func_sALPHA = src;
m_func_dALPHA = dst;
m_equation_A = op;
m_func_sA = src;
m_func_dA = dst;
}
void SetMask(bool r, bool g, bool b, bool a) { m_r_msk = r; m_g_msk = g; m_b_msk = b; m_a_msk = a; }
@ -93,23 +94,50 @@ public:
void SetupColorMask()
{
glColorMask(m_r_msk, m_g_msk, m_b_msk, m_a_msk);
// FIXME align then SSE
if (GLState::r_msk != m_r_msk || GLState::g_msk != m_g_msk || GLState::b_msk != m_b_msk || GLState::a_msk != m_a_msk) {
GLState::r_msk = m_r_msk;
GLState::g_msk = m_g_msk;
GLState::b_msk = m_b_msk;
GLState::a_msk = m_a_msk;
gl_ColorMaski(0, m_r_msk, m_g_msk, m_b_msk, m_a_msk);
}
}
void SetupBlend(float factor)
{
SetupColorMask();
if (m_enable) {
if (GLState::blend != m_enable) {
GLState::blend = m_enable;
if (m_enable)
glEnable(GL_BLEND);
if (HasConstantFactor()) {
gl_BlendColor(factor, factor, factor, 0);
else
glDisable(GL_BLEND);
}
gl_BlendEquationSeparate(m_equation_RGB, m_equation_ALPHA);
gl_BlendFuncSeparate(m_func_sRGB, m_func_dRGB, m_func_sALPHA, m_func_dALPHA);
} else {
glDisable(GL_BLEND);
if (m_enable) {
if (HasConstantFactor()) {
if (GLState::bf != factor) {
GLState::bf = factor;
gl_BlendColor(factor, factor, factor, 0);
}
}
if (GLState::eq_RGB != m_equation_RGB || GLState::eq_A != m_equation_A) {
GLState::eq_RGB = m_equation_RGB;
GLState::eq_A = m_equation_A;
gl_BlendEquationSeparateiARB(0, m_equation_RGB, m_equation_A);
}
// FIXME align then SSE
if (GLState::f_sRGB != m_func_sRGB || GLState::f_dRGB != m_func_dRGB || GLState::f_sA != m_func_sA || GLState::f_dA != m_func_dA) {
GLState::f_sRGB = m_func_sRGB;
GLState::f_dRGB = m_func_dRGB;
GLState::f_sA = m_func_sA;
GLState::f_dA = m_func_dA;
gl_BlendFuncSeparateiARB(0, m_func_sRGB, m_func_dRGB, m_func_sA, m_func_dA);
}
}
}
};
@ -120,12 +148,8 @@ class GSDepthStencilOGL {
GLboolean m_depth_mask;
// Note front face and back might be split but it seems they have same parameter configuration
bool m_stencil_enable;
const GLuint m_stencil_mask;
GLuint m_stencil_func;
const GLuint m_stencil_ref;
const GLuint m_stencil_sfail_op;
const GLuint m_stencil_spass_dfail_op;
GLuint m_stencil_spass_dpass_op;
GLenum m_stencil_func;
GLenum m_stencil_spass_dpass_op;
public:
@ -133,45 +157,65 @@ public:
, m_depth_func(0)
, m_depth_mask(0)
, m_stencil_enable(false)
, m_stencil_mask(1)
, m_stencil_func(0)
, m_stencil_ref(1)
, m_stencil_sfail_op(GL_KEEP)
, m_stencil_spass_dfail_op(GL_KEEP)
, m_stencil_spass_dpass_op(GL_KEEP)
{
// Only needed once since m_stencil_mask is constant
// Control which stencil bitplane are written
glStencilMask(m_stencil_mask);
glStencilMask(1);
}
void EnableDepth() { m_depth_enable = true; }
void EnableStencil() { m_stencil_enable = true; }
void SetDepth(GLenum func, GLboolean mask) { m_depth_func = func; m_depth_mask = mask; }
void SetStencil(GLuint func, GLuint pass) { m_stencil_func = func; m_stencil_spass_dpass_op = pass; }
void SetStencil(GLenum func, GLenum pass) { m_stencil_func = func; m_stencil_spass_dpass_op = pass; }
void SetupDepth()
{
if (m_depth_enable) {
if (GLState::depth != m_depth_enable) {
GLState::depth = m_depth_enable;
if (m_depth_enable)
glEnable(GL_DEPTH_TEST);
glDepthFunc(m_depth_func);
glDepthMask(m_depth_mask);
} else
else
glDisable(GL_DEPTH_TEST);
}
if (m_depth_enable) {
if (GLState::depth_func != m_depth_func) {
GLState::depth_func = m_depth_func;
glDepthFunc(m_depth_func);
}
if (GLState::depth_mask != m_depth_mask) {
GLState::depth_mask = m_depth_mask;
glDepthMask(m_depth_mask);
}
}
}
void SetupStencil()
{
if (m_stencil_enable) {
if (GLState::stencil != m_stencil_enable) {
GLState::stencil = m_stencil_enable;
if (m_stencil_enable)
glEnable(GL_STENCIL_TEST);
// Note: here the mask control which bitplane is considered by the operation
glStencilFunc(m_stencil_func, m_stencil_ref, m_stencil_mask);
glStencilOp(m_stencil_sfail_op, m_stencil_spass_dfail_op, m_stencil_spass_dpass_op);
} else
else
glDisable(GL_STENCIL_TEST);
}
if (m_stencil_enable) {
// Note: here the mask control which bitplane is considered by the operation
if (GLState::stencil_func != m_stencil_func) {
GLState::stencil_func = m_stencil_func;
glStencilFunc(m_stencil_func, 1, 1);
}
if (GLState::stencil_pass != m_stencil_spass_dpass_op) {
GLState::stencil_pass = m_stencil_spass_dpass_op;
glStencilOp(GL_KEEP, GL_KEEP, m_stencil_spass_dpass_op);
}
}
}
bool IsMaskEnable() { return m_depth_mask != GL_FALSE; }
};
@ -431,7 +475,6 @@ class GSDeviceOGL : public GSDevice
bool m_free_window;
GSWnd* m_window;
GLuint m_pipeline; // pipeline to attach program shader
GLuint m_fbo; // frame buffer container
GLuint m_fbo_read; // frame buffer container only for reading
@ -469,28 +512,16 @@ class GSDeviceOGL : public GSDevice
GSTexture* t;
} m_date;
struct
{
struct {
GLuint ps;
GSUniformBufferOGL *cb;
} m_shadeboost;
struct {
GSVertexBufferStateOGL* vb;
GSUniformBufferOGL* cb;
GLuint ps_ss; // sampler
GSVector2i viewport;
GSVector4i scissor;
GSDepthStencilOGL* dss;
GSBlendStateOGL* bs;
float bf; // blend factor
GLuint fbo;
GLenum draw;
GSTexture* rt; // render target
GSTexture* ds; // Depth-Stencil
GSTexture* tex_unit[2];
} m_state;
GSShaderOGL* m_shader;
@ -519,8 +550,8 @@ class GSDeviceOGL : public GSDevice
void DoFXAA(GSTexture* st, GSTexture* dt);
void DoShadeBoost(GSTexture* st, GSTexture* dt);
void OMAttachRt(GSTexture* rt);
void OMAttachDs(GSTexture* ds);
void OMAttachRt(GLuint rt);
void OMAttachDs(GLuint ds);
void OMSetFBO(GLuint fbo);
public:
@ -576,10 +607,8 @@ class GSDeviceOGL : public GSDevice
void IASetIndexBuffer(const void* index, size_t count);
void IASetVertexState(GSVertexBufferStateOGL* vb = NULL);
void SetUniformBuffer(GSUniformBufferOGL* cb);
void PSSetShaderResource(const int i, GSTexture* sr);
void PSSetShaderResources(GSTexture* tex[2]);
void PSSetShaderResource(const int i, GLuint sr);
void PSSetShaderResources(GLuint tex[2]);
void PSSetSamplerState(GLuint ss);
void PSSetSamplerStates(const int count, const GLuint* samplers);

View File

@ -496,14 +496,14 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
dev->SetupSampler(ps_sel, ps_ssel);
if (tex->m_palette) {
if (GLLoader::found_GL_ARB_multi_bind) {
GSTexture* textures[2] = {tex->m_texture, tex->m_palette};
GLuint textures[2] = {static_cast<GSTextureOGL*>(tex->m_texture)->GetID(), static_cast<GSTextureOGL*>(tex->m_palette)->GetID()};
dev->PSSetShaderResources(textures);
} else {
dev->PSSetShaderResource(1, tex->m_palette);
dev->PSSetShaderResource(0, tex->m_texture);
dev->PSSetShaderResource(1, static_cast<GSTextureOGL*>(tex->m_palette)->GetID());
dev->PSSetShaderResource(0, static_cast<GSTextureOGL*>(tex->m_texture)->GetID());
}
} else {
dev->PSSetShaderResource(0, tex->m_texture);
dev->PSSetShaderResource(0, static_cast<GSTextureOGL*>(tex->m_texture)->GetID());
}
}
else

View File

@ -21,12 +21,9 @@
#include "stdafx.h"
#include "GSShaderOGL.h"
#include "GLState.h"
GSShaderOGL::GSShaderOGL(bool debug, bool sso, bool glsl420) :
m_vs(0),
m_ps(0),
m_gs(0),
m_prog(0),
m_debug_shader(debug),
m_sso(sso),
m_glsl420(glsl420)
@ -53,9 +50,9 @@ GSShaderOGL::~GSShaderOGL()
void GSShaderOGL::VS(GLuint s)
{
if (m_vs != s)
if (GLState::vs != s)
{
m_vs = s;
GLState::vs = s;
#ifndef ENABLE_GLES
if (m_sso)
gl_UseProgramStages(m_pipeline, GL_VERTEX_SHADER_BIT, s);
@ -65,9 +62,9 @@ void GSShaderOGL::VS(GLuint s)
void GSShaderOGL::PS(GLuint s)
{
if (m_ps != s)
if (GLState::ps != s)
{
m_ps = s;
GLState::ps = s;
#ifndef ENABLE_GLES
if (m_sso)
gl_UseProgramStages(m_pipeline, GL_FRAGMENT_SHADER_BIT, s);
@ -77,9 +74,9 @@ void GSShaderOGL::PS(GLuint s)
void GSShaderOGL::GS(GLuint s)
{
if (m_gs != s)
if (GLState::gs != s)
{
m_gs = s;
GLState::gs = s;
#ifndef ENABLE_GLES
if (m_sso)
gl_UseProgramStages(m_pipeline, GL_GEOMETRY_SHADER_BIT, s);
@ -115,29 +112,29 @@ void GSShaderOGL::SetupUniform()
if (m_glsl420) return;
if (m_sso) {
SetUniformBinding(m_vs, "cb20", 20);
SetUniformBinding(m_ps, "cb21", 21);
SetUniformBinding(GLState::vs, "cb20", 20);
SetUniformBinding(GLState::ps, "cb21", 21);
SetUniformBinding(m_ps, "cb10", 10);
SetUniformBinding(m_ps, "cb11", 11);
SetUniformBinding(m_ps, "cb12", 12);
SetUniformBinding(m_ps, "cb13", 13);
SetUniformBinding(GLState::ps, "cb10", 10);
SetUniformBinding(GLState::ps, "cb11", 11);
SetUniformBinding(GLState::ps, "cb12", 12);
SetUniformBinding(GLState::ps, "cb13", 13);
SetSamplerBinding(m_ps, "TextureSampler", 0);
SetSamplerBinding(m_ps, "PaletteSampler", 1);
SetSamplerBinding(m_ps, "RTCopySampler", 2);
SetSamplerBinding(GLState::ps, "TextureSampler", 0);
SetSamplerBinding(GLState::ps, "PaletteSampler", 1);
SetSamplerBinding(GLState::ps, "RTCopySampler", 2);
} else {
SetUniformBinding(m_prog, "cb20", 20);
SetUniformBinding(m_prog, "cb21", 21);
SetUniformBinding(GLState::program, "cb20", 20);
SetUniformBinding(GLState::program, "cb21", 21);
SetUniformBinding(m_prog, "cb10", 10);
SetUniformBinding(m_prog, "cb11", 11);
SetUniformBinding(m_prog, "cb12", 12);
SetUniformBinding(m_prog, "cb13", 13);
SetUniformBinding(GLState::program, "cb10", 10);
SetUniformBinding(GLState::program, "cb11", 11);
SetUniformBinding(GLState::program, "cb12", 12);
SetUniformBinding(GLState::program, "cb13", 13);
SetSamplerBinding(m_prog, "TextureSampler", 0);
SetSamplerBinding(m_prog, "PaletteSampler", 1);
SetSamplerBinding(m_prog, "RTCopySampler", 2);
SetSamplerBinding(GLState::program, "TextureSampler", 0);
SetSamplerBinding(GLState::program, "PaletteSampler", 1);
SetSamplerBinding(GLState::program, "RTCopySampler", 2);
}
}
@ -213,9 +210,9 @@ bool GSShaderOGL::ValidatePipeline(GLuint p)
GLuint GSShaderOGL::LinkNewProgram()
{
GLuint p = gl_CreateProgram();
if (m_vs) gl_AttachShader(p, m_vs);
if (m_ps) gl_AttachShader(p, m_ps);
if (m_gs) gl_AttachShader(p, m_gs);
if (GLState::vs) gl_AttachShader(p, GLState::vs);
if (GLState::ps) gl_AttachShader(p, GLState::ps);
if (GLState::gs) gl_AttachShader(p, GLState::gs);
gl_LinkProgram(p);
@ -233,20 +230,23 @@ void GSShaderOGL::UseProgram()
// Note2: vs & gs are precompiled at startup. FGLRX and radeon got value < 128.
// We migth be able to pack the value in a 32bits int
// I would need to check the behavior on Nvidia (pause/resume).
uint64 sel = (uint64)m_vs << 40 | (uint64)m_gs << 20 | m_ps;
uint64 sel = (uint64)GLState::vs << 40 | (uint64)GLState::gs << 20 | GLState::ps;
it = m_single_prog.find(sel);
if (it == m_single_prog.end()) {
m_prog = LinkNewProgram();
m_single_prog[sel] = m_prog;
GLState::program = LinkNewProgram();
m_single_prog[sel] = GLState::program;
ValidateProgram(m_prog);
ValidateProgram(GLState::program);
gl_UseProgram(m_prog);
gl_UseProgram(GLState::program);
// warning it must be done after the "setup" of the program
SetupUniform();
} else {
m_prog = it->second;
gl_UseProgram(m_prog);
GLuint prog = it->second;
if (prog != GLState::program) {
GLState::program = prog;
gl_UseProgram(GLState::program);
}
}
} else {

View File

@ -23,10 +23,6 @@
class GSShaderOGL {
GLuint m_pipeline;
GLuint m_vs;
GLuint m_ps;
GLuint m_gs;
GLuint m_prog;
hash_map<uint64, GLuint > m_single_prog;
const bool m_debug_shader;
const bool m_sso;

View File

@ -138,7 +138,6 @@ void GSDeviceOGL::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
GLuint vs = m_vs[sel];
if(m_vs_cb_cache.Update(cb)) {
SetUniformBuffer(m_vs_cb);
m_vs_cb->upload(cb);
}
@ -171,7 +170,6 @@ void GSDeviceOGL::SetupPS(PSSelector sel, const PSConstantBuffer* cb)
// Dynamic
// *************************************************************
if(m_ps_cb_cache.Update(cb)) {
SetUniformBuffer(m_ps_cb);
m_ps_cb->upload(cb);
}

View File

@ -22,6 +22,7 @@
#include "stdafx.h"
#include <limits.h>
#include "GSTextureOGL.h"
#include "GLState.h"
namespace PboPool {
@ -54,8 +55,6 @@ namespace PboPool {
}
}
static GLuint g_tex3_state = 0;
// FIXME: check if it possible to always use those setup by default
// glPixelStorei(GL_PACK_ALIGNMENT, 1);
// glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
@ -156,12 +155,13 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read)
default:
break;
}
// Extra buffer to handle various pixel transfer
gl_GenBuffers(1, &m_pbo_id);
// Allocate the buffer
switch (m_type) {
case GSTexture::Offscreen:
// Extra buffer to handle various pixel transfer
gl_GenBuffers(1, &m_pbo_id);
// Allocate a pbo with the texture
m_pbo_size = (m_size.x * m_size.y) << m_int_shift;
@ -183,9 +183,17 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read)
GSTextureOGL::~GSTextureOGL()
{
/* Unbind the texture from our local state */
for (uint32 i = 0; i < 5; i++)
if (g_tex3_state == m_texture_id)
g_tex3_state = 0;
if (m_texture_id == GLState::rt)
GLState::rt = 0;
if (m_texture_id == GLState::ds)
GLState::ds = 0;
if (m_texture_id == GLState::tex)
GLState::tex = 0;
if (m_texture_id == GLState::tex_unit[0])
GLState::tex_unit[0] = 0;
if (m_texture_id == GLState::tex_unit[1])
GLState::tex_unit[1] = 0;
gl_DeleteBuffers(1, &m_pbo_id);
glDeleteTextures(1, &m_texture_id);
@ -271,8 +279,8 @@ void GSTextureOGL::EnableUnit()
/* Not a real texture */
ASSERT(!IsBackbuffer());
if (g_tex3_state != m_texture_id) {
g_tex3_state = m_texture_id;
if (GLState::tex != m_texture_id) {
GLState::tex = m_texture_id;
glBindTexture(GL_TEXTURE_2D, m_texture_id);
}
}
@ -529,7 +537,7 @@ bool GSTextureOGL::Save(const string& fn, bool dds)
// Restore state
gl_ActiveTexture(GL_TEXTURE0 + 3);
glBindTexture(GL_TEXTURE_2D, g_tex3_state);
glBindTexture(GL_TEXTURE_2D, GLState::tex);
return status;
}

View File

@ -21,6 +21,8 @@
#pragma once
#include "GLState.h"
class GSUniformBufferOGL {
GLuint buffer; // data object
GLuint index; // GLSL slot
@ -38,8 +40,11 @@ public:
void bind()
{
if (GLState::ubo != buffer) {
GLState::ubo = buffer;
gl_BindBuffer(GL_UNIFORM_BUFFER, buffer);
}
}
void allocate()
{
@ -53,6 +58,7 @@ public:
void upload(const void* src)
{
bind();
// glMapBufferRange allow to set various parameter but the call is
// synchronous whereas glBufferSubData could be asynchronous.
// TODO: investigate the extension ARB_invalidate_subdata

View File

@ -33,14 +33,15 @@ void GSWndGL::PopulateGlFunction()
*(void**)&(gl_BindFramebuffer) = GetProcAddress("glBindFramebuffer");
*(void**)&(gl_BindSampler) = GetProcAddress("glBindSampler");
*(void**)&(gl_BindVertexArray) = GetProcAddress("glBindVertexArray");
*(void**)&(gl_BlendEquationSeparate) = GetProcAddress("glBlendEquationSeparate");
*(void**)&(gl_BlendFuncSeparate) = GetProcAddress("glBlendFuncSeparate");
*(void**)&(gl_BlendEquationSeparateiARB) = GetProcAddress("glBlendEquationSeparateiARB");
*(void**)&(gl_BlendFuncSeparateiARB) = GetProcAddress("glBlendFuncSeparateiARB");
*(void**)&(gl_BlitFramebuffer) = GetProcAddress("glBlitFramebuffer");
*(void**)&(gl_BufferData) = GetProcAddress("glBufferData");
*(void**)&(gl_CheckFramebufferStatus) = GetProcAddress("glCheckFramebufferStatus");
*(void**)&(gl_ClearBufferfv) = GetProcAddress("glClearBufferfv");
*(void**)&(gl_ClearBufferiv) = GetProcAddress("glClearBufferiv");
*(void**)&(gl_ClearBufferuiv) = GetProcAddress("glClearBufferuiv");
*(void**)&(gl_ColorMaski) = GetProcAddress("glColorMaski");
*(void**)&(gl_CompileShader) = GetProcAddress("glCompileShader");
*(void**)&(gl_CreateProgram) = GetProcAddress("glCreateProgram");
*(void**)&(gl_CreateShader) = GetProcAddress("glCreateShader");

View File

@ -43,10 +43,10 @@ void GSWndEGL::CreateContext(int major, int minor)
{
EGL_CONTEXT_MAJOR_VERSION_KHR, major,
EGL_CONTEXT_MINOR_VERSION_KHR, minor,
// Keep compatibility for old cruft
//EGL_CONTEXT_OPENGL_PROFILE_MASK_KHR, EGL_CONTEXT_OPENGL_COMPATIBILITY_PROFILE_BIT_KHR,
// FIXME : Request a debug context to ease opengl development
EGL_CONTEXT_FLAGS_KHR, EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR | EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE_BIT_KHR,
#ifdef ENABLE_OGL_DEBUG
EGL_CONTEXT_FLAGS_KHR, EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR,
#endif
EGL_CONTEXT_OPENGL_PROFILE_MASK_KHR, EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT_KHR,
EGL_NONE
};
#endif

View File

@ -79,10 +79,10 @@ void GSWndOGL::CreateContext(int major, int minor)
{
GLX_CONTEXT_MAJOR_VERSION_ARB, major,
GLX_CONTEXT_MINOR_VERSION_ARB, minor,
// FIXME : Request a debug context to ease opengl development
// Note: don't support deprecated feature (pre openg 3.1)
//GLX_CONTEXT_FLAGS_ARB, GLX_CONTEXT_DEBUG_BIT_ARB | GLX_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB,
#ifdef ENABLE_OGL_DEBUG
GLX_CONTEXT_FLAGS_ARB, GLX_CONTEXT_DEBUG_BIT_ARB,
#endif
GLX_CONTEXT_PROFILE_MASK_ARB, GLX_CONTEXT_CORE_PROFILE_BIT_ARB,
None
};

View File

@ -439,6 +439,7 @@
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="GLLoader.cpp" />
<ClCompile Include="GLState.cpp" />
<ClCompile Include="GPU.cpp" />
<ClCompile Include="GPUDrawScanline.cpp" />
<ClCompile Include="GPUDrawScanlineCodeGenerator.cpp" />
@ -1572,6 +1573,7 @@
<ItemGroup>
<ClInclude Include="config.h" />
<ClInclude Include="GLLoader.h" />
<ClInclude Include="GLState.h" />
<ClInclude Include="GPU.h" />
<ClInclude Include="GPUDrawingEnvironment.h" />
<ClInclude Include="GPUDrawScanline.h" />

View File

@ -276,6 +276,9 @@
<ClCompile Include="GLLoader.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="GLState.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="GPU.cpp">
<Filter>Source Files</Filter>
</ClCompile>
@ -344,6 +347,9 @@
<ClInclude Include="GLLoader.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="GLState.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="GS.h">
<Filter>Header Files</Filter>
</ClInclude>

View File

@ -567,6 +567,7 @@
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="GLLoader.cpp" />
<ClCompile Include="GLState.cpp" />
<ClCompile Include="GPU.cpp" />
<ClCompile Include="GPUDrawScanline.cpp" />
<ClCompile Include="GPUDrawScanlineCodeGenerator.cpp" />
@ -1929,6 +1930,7 @@
<ClInclude Include="comptr.h" />
<ClInclude Include="config.h" />
<ClInclude Include="GLLoader.h" />
<ClInclude Include="GLState.h" />
<ClInclude Include="GPU.h" />
<ClInclude Include="GPUDrawingEnvironment.h" />
<ClInclude Include="GPUDrawScanline.h" />

View File

@ -276,6 +276,9 @@
<ClCompile Include="GLLoader.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="GLState.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="GPU.cpp">
<Filter>Source Files</Filter>
</ClCompile>
@ -350,6 +353,9 @@
<ClInclude Include="GLLoader.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="GLState.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="GS.h">
<Filter>Header Files</Filter>
</ClInclude>

View File

@ -777,6 +777,10 @@
RelativePath=".\GLLoader.cpp"
>
</File>
<File
RelativePath=".\GLState.cpp"
>
</File>
<File
RelativePath=".\GPU.cpp"
>
@ -1399,6 +1403,10 @@
RelativePath=".\GLLoader.h"
>
</File>
<File
RelativePath=".\GLState.h"
>
</File>
<File
RelativePath=".\GPU.h"
>