From 64f783410e06627c207aae344aec3f90775f03b6 Mon Sep 17 00:00:00 2001 From: "gregory.hainaut" Date: Fri, 2 Aug 2013 16:38:12 +0000 Subject: [PATCH] gsdx ogl: * preliminary work for GL4.4 extensions (ARB_clear_texture & ARB_multi_bind). Disabled until I got a 4.4 driver Note: I plan also to use ARB_buffer_storage * compute texture gl option in the constructor (avoid a couple of swith case) * redo texture unit management. Unit 0-2 for shaders, Unit 3 for texture operations. MultiBind will allow to bind shader input without disturbing texture binding points. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5711 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GLLoader.cpp | 30 +++-- plugins/GSdx/GLLoader.h | 8 ++ plugins/GSdx/GSDeviceOGL.cpp | 187 ++++++++++++++++++++++++-------- plugins/GSdx/GSDeviceOGL.h | 9 +- plugins/GSdx/GSRendererOGL.cpp | 18 ++- plugins/GSdx/GSTextureFXOGL.cpp | 4 +- plugins/GSdx/GSTextureOGL.cpp | 127 ++++++++++------------ plugins/GSdx/GSTextureOGL.h | 10 +- plugins/GSdx/GSWnd.cpp | 6 + plugins/GSdx/res/glsl_source.h | 37 ++++--- plugins/GSdx/res/tfx.glsl | 37 ++++--- 11 files changed, 309 insertions(+), 164 deletions(-) diff --git a/plugins/GSdx/GLLoader.cpp b/plugins/GSdx/GLLoader.cpp index ec1162d598..c9e74ffe66 100644 --- a/plugins/GSdx/GLLoader.cpp +++ b/plugins/GSdx/GLLoader.cpp @@ -103,6 +103,11 @@ PFNGLGETUNIFORMLOCATIONPROC gl_GetUniformLocation = NULL; // GL4.2 PFNGLBINDIMAGETEXTUREPROC gl_BindImageTexture = NULL; PFNGLMEMORYBARRIERPROC gl_MemoryBarrier = NULL; +// GL4.4 +#ifdef GL44 +PFNGLCLEARTEXIMAGEPROC gl ClearTexImage = NULL; +PFNGLBINDTEXTURESPROC gl BindTextures = NULL; +#endif #endif namespace GLLoader { @@ -120,6 +125,8 @@ namespace GLLoader { bool found_only_gl30 = false; bool found_GL_ARB_gpu_shader5 = false; bool found_GL_ARB_shader_image_load_store = false; + bool found_GL_ARB_clear_texture = false; + bool found_GL_ARB_multi_bind = false; bool check_gl_version(uint32 major, uint32 minor) { @@ -193,6 +200,11 @@ namespace GLLoader { if (ext.compare("GL_ARB_copy_image") == 0) found_GL_ARB_copy_image = true; if (ext.compare("GL_ARB_gpu_shader5") == 0) found_GL_ARB_gpu_shader5 = true; if (ext.compare("GL_ARB_shader_image_load_store") == 0) found_GL_ARB_shader_image_load_store = true; +#ifdef GL44 + if (ext.compare("GL_ARB_clear_texture") == 0) found_GL_ARB_clear_texture = true; + if (ext.compare("GL_ARB_multi_bind") == 0) found_GL_ARB_multi_bind = true; +#endif + #ifdef ENABLE_GLES fprintf(stderr, "DEBUG ext: %s\n", ext.c_str()); #endif @@ -200,21 +212,17 @@ namespace GLLoader { } #ifndef ENABLE_GLES - if (!found_GL_ARB_separate_shader_objects) { - fprintf(stderr, "INFO: GL_ARB_separate_shader_objects is not supported\n"); - } - if (!found_GL_ARB_shading_language_420pack) { - fprintf(stderr, "INFO: GL_ARB_shading_language_420pack is not supported\n"); - } + if (!found_GL_ARB_separate_shader_objects) fprintf(stderr, "INFO: GL_ARB_separate_shader_objects is not supported\n"); + if (!found_GL_ARB_shading_language_420pack) fprintf(stderr, "INFO: GL_ARB_shading_language_420pack is not supported\n"); + if (!found_GL_ARB_gpu_shader5) fprintf(stderr, "INFO: GL_ARB_gpu_shader5 is not supported\n"); + if (!found_GL_ARB_shader_image_load_store) fprintf(stderr, "INFO: GL_ARB_shader_image_load_store is not supported\n"); + if (!found_GL_ARB_clear_texture) fprintf(stderr, "INFO: GL_ARB_clear_texture is not supported\n"); + if (!found_GL_ARB_multi_bind) fprintf(stderr, "INFO: GL_ARB_multi_bind is not supported\n"); + if (!found_GL_ARB_texture_storage) { fprintf(stderr, "FATAL: GL_ARB_texture_storage is not supported\n"); return false; } - if (!found_GL_ARB_gpu_shader5) { - fprintf(stderr, "INFO: GL_ARB_gpu_shader5 is not supported\n"); - } - if (!found_GL_ARB_shader_image_load_store) - fprintf(stderr, "INFO: GL_ARB_shader_image_load_store is not supported\n"); if (theApp.GetConfig("override_GL_ARB_shading_language_420pack", -1) != -1) { diff --git a/plugins/GSdx/GLLoader.h b/plugins/GSdx/GLLoader.h index ebca6ac5ab..c953efe187 100644 --- a/plugins/GSdx/GLLoader.h +++ b/plugins/GSdx/GLLoader.h @@ -101,6 +101,12 @@ extern PFNGLGETUNIFORMLOCATIONPROC gl_GetUniformLocation; // GL4.2 extern PFNGLBINDIMAGETEXTUREPROC gl_BindImageTexture; extern PFNGLMEMORYBARRIERPROC gl_MemoryBarrier; +// GL4.4 +#ifdef GL44 +extern PFNGLCLEARTEXIMAGEPROC gl_ClearTexImage; +extern PFNGLBINDTEXTURESPROC gl_BindTextures; +#endif + #else #define gl_ActiveTexture glActiveTexture #define gl_BlendColor glBlendColor @@ -198,4 +204,6 @@ namespace GLLoader { extern bool found_only_gl30; extern bool found_GL_ARB_gpu_shader5; extern bool found_GL_ARB_shader_image_load_store; + extern bool found_GL_ARB_clear_texture; + extern bool found_GL_ARB_multi_bind; } diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index dfb039f974..fc13d51e1b 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -180,6 +180,14 @@ bool GSDeviceOGL::Create(GSWnd* wnd) }; m_vb_sr = new GSVertexBufferStateOGL(sizeof(GSVertexPT1), il_convert, countof(il_convert)); + // **************************************************************** + // Texture unit state + // **************************************************************** + // By default use unit 3 for texture modification + // unit 0-2 will allocated to shader input + gl_ActiveTexture(GL_TEXTURE0 + 3); + + // **************************************************************** // convert // **************************************************************** @@ -339,6 +347,7 @@ void GSDeviceOGL::Flip() void GSDeviceOGL::BeforeDraw() { m_shader->UseProgram(); + //#ifdef ENABLE_OGL_STENCIL_DEBUG // if (m_date.t) // static_cast(m_date.t)->Save(format("/tmp/date_before_%04ld.csv", g_draw_count)); @@ -351,7 +360,7 @@ void GSDeviceOGL::AfterDraw() // if (m_date.t) // static_cast(m_date.t)->Save(format("/tmp/date_after_%04ld.csv", g_draw_count)); //#endif -#if defined(ENABLE_OGL_DEBUG) || defined(PRINT_FRAME_NUMBER) +#if defined(ENABLE_OGL_DEBUG) || defined(PRINT_FRAME_NUMBER) || defined(ENABLE_OGL_STENCIL_DEBUG) g_draw_count++; #endif } @@ -381,21 +390,25 @@ void GSDeviceOGL::DrawIndexedPrimitive(int offset, int count) void GSDeviceOGL::ClearRenderTarget(GSTexture* t, const GSVector4& c) { - glDisable(GL_SCISSOR_TEST); - if (static_cast(t)->IsBackbuffer()) { - OMSetFBO(0); - - // glDrawBuffer(GL_BACK); // this is the default when there is no FB - // 0 will select the first drawbuffer ie GL_BACK - gl_ClearBufferfv(GL_COLOR, 0, c.v); + if (GLLoader::found_GL_ARB_clear_texture) { + static_cast(t)->Clear((const void*)&c); } else { - OMSetFBO(m_fbo); - OMSetWriteBuffer(); - OMAttachRt(t); + glDisable(GL_SCISSOR_TEST); + if (static_cast(t)->IsBackbuffer()) { + OMSetFBO(0); - gl_ClearBufferfv(GL_COLOR, 0, c.v); + // glDrawBuffer(GL_BACK); // this is the default when there is no FB + // 0 will select the first drawbuffer ie GL_BACK + gl_ClearBufferfv(GL_COLOR, 0, c.v); + } else { + OMSetFBO(m_fbo); + OMSetWriteBuffer(); + OMAttachRt(t); + + gl_ClearBufferfv(GL_COLOR, 0, c.v); + } + glEnable(GL_SCISSOR_TEST); } - glEnable(GL_SCISSOR_TEST); } void GSDeviceOGL::ClearRenderTarget(GSTexture* t, uint32 c) @@ -406,46 +419,66 @@ void GSDeviceOGL::ClearRenderTarget(GSTexture* t, uint32 c) void GSDeviceOGL::ClearRenderTarget_ui(GSTexture* t, uint32 c) { - uint32 col[4] = {c, c, c, c}; + if (GLLoader::found_GL_ARB_clear_texture) { + static_cast(t)->Clear((const void*)&c); + } else { + uint32 col[4] = {c, c, c, c}; - glDisable(GL_SCISSOR_TEST); + glDisable(GL_SCISSOR_TEST); - OMSetFBO(m_fbo); - OMSetWriteBuffer(); - OMAttachRt(t); + OMSetFBO(m_fbo); + OMSetWriteBuffer(); + OMAttachRt(t); - gl_ClearBufferuiv(GL_COLOR, 0, col); + gl_ClearBufferuiv(GL_COLOR, 0, col); - glEnable(GL_SCISSOR_TEST); + glEnable(GL_SCISSOR_TEST); + } } void GSDeviceOGL::ClearDepth(GSTexture* t, float c) { - OMSetFBO(m_fbo); - OMSetWriteBuffer(); - OMAttachDs(t); - - glDisable(GL_SCISSOR_TEST); - if (m_state.dss != NULL && m_state.dss->IsMaskEnable()) { - gl_ClearBufferfv(GL_DEPTH, 0, &c); + // TODO is it possible with GL44 ClearTexture? + // It is seriously not clear if we can clear only the depth + if (GLLoader::found_GL_ARB_clear_texture) { +#ifdef GL44 + gl_ClearTexImage(static_cast(t)->GetID(), 0, GL_DEPTH_STENCIL, GL_FLOAT, &c); +#endif } else { - glDepthMask(true); - gl_ClearBufferfv(GL_DEPTH, 0, &c); - glDepthMask(false); + OMSetFBO(m_fbo); + OMSetWriteBuffer(); + OMAttachDs(t); + + glDisable(GL_SCISSOR_TEST); + if (m_state.dss != NULL && m_state.dss->IsMaskEnable()) { + gl_ClearBufferfv(GL_DEPTH, 0, &c); + } else { + glDepthMask(true); + gl_ClearBufferfv(GL_DEPTH, 0, &c); + glDepthMask(false); + } + glEnable(GL_SCISSOR_TEST); } - glEnable(GL_SCISSOR_TEST); } void GSDeviceOGL::ClearStencil(GSTexture* t, uint8 c) { - OMSetFBO(m_fbo); - OMSetWriteBuffer(); - OMAttachDs(t); - GLint color = c; + // TODO is it possible with GL44 ClearTexture? + // It is seriously not clear if we can clear only the stencil + if (GLLoader::found_GL_ARB_clear_texture) { +#ifdef GL44 + gl_ClearTexImage(static_cast(t)->GetID(), 0, GL_DEPTH_STENCIL, GL_BYTE, &c); +#endif + } else { + OMSetFBO(m_fbo); + OMSetWriteBuffer(); + OMAttachDs(t); + GLint color = c; - glDisable(GL_SCISSOR_TEST); - gl_ClearBufferiv(GL_STENCIL, 0, &color); - glEnable(GL_SCISSOR_TEST); + glDisable(GL_SCISSOR_TEST); + gl_ClearBufferiv(GL_STENCIL, 0, &color); + glEnable(GL_SCISSOR_TEST); + } } GLuint GSDeviceOGL::CreateSampler(bool bilinear, bool tau, bool tav) @@ -494,9 +527,22 @@ void GSDeviceOGL::InitPrimDateTexture(int w, int h) ClearRenderTarget_ui(m_date.t, 0xFFFFFFFF); #ifdef ENABLE_OGL_STENCIL_DEBUG - static_cast(m_date.t)->EnableUnit(6); + gl_ActiveTexture(GL_TEXTURE0 + 5); + glBindTexture(GL_TEXTURE_2D, static_cast(m_date.t)->GetID()); + // Get back to the expected active texture unit + gl_ActiveTexture(GL_TEXTURE0 + 3); #endif + BindDateTexture(); +} + +void GSDeviceOGL::BindDateTexture() +{ + // TODO: multibind? + // GLuint textures[1] = {static_cast(m_date.t)->GetID()}; + // gl_BindImageTextures(0, 1, textures); + //gl_BindImageTexture(0, 0, 0, true, 0, GL_READ_WRITE, GL_R32UI); + gl_BindImageTexture(0, static_cast(m_date.t)->GetID(), 0, false, 0, GL_READ_WRITE, GL_R32UI); } @@ -513,6 +559,15 @@ void GSDeviceOGL::RecycleDateTexture() } } +void GSDeviceOGL::Barrier(GLbitfield b) +{ + gl_MemoryBarrier(b); +//#ifdef ENABLE_OGL_STENCIL_DEBUG +// if (m_date.t) +// static_cast(m_date.t)->Save(format("/tmp/barrier_%04ld.csv", g_draw_count)); +//#endif +} + GLuint GSDeviceOGL::CompileVS(VSSelector sel) { std::string macro = format("#define VS_BPPZ %d\n", sel.bppz) @@ -637,7 +692,7 @@ void GSDeviceOGL::CopyRect(GSTexture* st, GSTexture* dt, const GSVector4i& r) gl_FramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, static_cast(st_ogl)->GetID(), 0); glReadBuffer(GL_COLOR_ATTACHMENT0); - dt_ogl->EnableUnit(6); + dt_ogl->EnableUnit(); glCopyTexSubImage2D(GL_TEXTURE_2D, 0, r.x, r.y, r.x, r.y, r.width(), r.height()); gl_BindFramebuffer(GL_READ_FRAMEBUFFER, 0); @@ -739,7 +794,7 @@ void GSDeviceOGL::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, // ************************************ PSSetShaderResource(0, st); - PSSetSamplerState(0, linear ? m_convert.ln : m_convert.pt); + PSSetSamplerState(linear ? m_convert.ln : m_convert.pt); m_shader->PS(ps); // ************************************ @@ -864,7 +919,7 @@ void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* ver // ps PSSetShaderResource(0, rt); - PSSetSamplerState(0, m_convert.pt); + PSSetSamplerState(m_convert.pt); m_shader->PS(m_convert.ps[datm ? 2 : 3]); // @@ -933,14 +988,52 @@ void GSDeviceOGL::PSSetShaderResource(const int i, GSTexture* sr) { ASSERT(sr); - static_cast(sr)->EnableUnit(i); + if (m_state.tex_unit[i] != sr) { + m_state.tex_unit[i] = sr; + + if (GLLoader::found_GL_ARB_multi_bind) { +#ifdef GL44 + gl_BindTextures(i, 1, textures); +#endif + } else { + gl_ActiveTexture(GL_TEXTURE0 + i); + glBindTexture(GL_TEXTURE_2D, static_cast(sr)->GetID()); + + // Get back to the expected active texture unit + gl_ActiveTexture(GL_TEXTURE0 + 3); + } + } } -void GSDeviceOGL::PSSetSamplerState(const int i, GLuint ss) +void GSDeviceOGL::PSSetShaderResources(GSTexture* tex[2]) { - if (m_state.ps_ss[i] != ss) { - m_state.ps_ss[i] = ss; - gl_BindSampler(i, ss); + // FIXME how to check the state +#ifdef GL44 + if (m_state.tex_unit[0] != tex[0] || m_state.tex_unit[1] != tex[1]) { + GLuint textures[2] = {static_cast(tex[0])->GetID(), static_cast(tex[1])->GetID()}; + gl_BindTextures(0, 2, textures); + } +#endif + + // FIXME without multibind +#if 0 + for (int i = 0; i < count; i++) { + if (m_state.tex_unit[i] != id) { + m_state.tex_unit[i] = id; + gl_ActiveTexture(GL_TEXTURE0 + i); + glBindTexture(GL_TEXTURE_2D, id); + } + } + // Get back to the expected active texture unit + gl_ActiveTexture(GL_TEXTURE0 + 3); +#endif +} + +void GSDeviceOGL::PSSetSamplerState(GLuint ss) +{ + if (m_state.ps_ss != ss) { + m_state.ps_ss = ss; + gl_BindSampler(0, ss); } } diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index d1539d4826..7c9ed81303 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -480,7 +480,7 @@ class GSDeviceOGL : public GSDevice struct { GSVertexBufferStateOGL* vb; GSUniformBufferOGL* cb; - GLuint ps_ss[3]; // sampler + GLuint ps_ss; // sampler GSVector2i viewport; GSVector4i scissor; GSDepthStencilOGL* dss; @@ -490,6 +490,7 @@ class GSDeviceOGL : public GSDevice GLenum draw; GSTexture* rt; // render target GSTexture* ds; // Depth-Stencil + GSTexture* tex_unit[2]; } m_state; GSShaderOGL* m_shader; @@ -555,6 +556,7 @@ class GSDeviceOGL : public GSDevice GSTexture* CreateOffscreen(int w, int h, int format = 0); void InitPrimDateTexture(int w, int h); void RecycleDateTexture(); + void BindDateTexture(); GSTexture* CopyOffscreen(GSTexture* src, const GSVector4& sr, int w, int h, int format = 0); @@ -577,7 +579,9 @@ class GSDeviceOGL : public GSDevice void SetUniformBuffer(GSUniformBufferOGL* cb); void PSSetShaderResource(const int i, GSTexture* sr); - void PSSetSamplerState(const int i, GLuint ss); + void PSSetShaderResources(GSTexture* tex[2]); + void PSSetSamplerState(GLuint ss); + void PSSetSamplerStates(const int count, const GLuint* samplers); void OMSetDepthStencilState(GSDepthStencilOGL* dss, uint8 sref); void OMSetBlendState(GSBlendStateOGL* bs, float bf); @@ -601,4 +605,5 @@ class GSDeviceOGL : public GSDevice void SetupSampler(PSSelector sel, PSSamplerSelector ssel); void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix); + void Barrier(GLbitfield b); }; diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index c176fd9fef..b770c800de 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -494,9 +494,17 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour ps_ssel.ltf = bilinear && simple_sample; dev->SetupSampler(ps_sel, ps_ssel); - dev->PSSetShaderResource(0, tex->m_texture); - if (tex->m_palette) - dev->PSSetShaderResource(1, tex->m_palette); + if (tex->m_palette) { + if (GLLoader::found_GL_ARB_multi_bind) { + GSTexture* textures[2] = {tex->m_texture, tex->m_palette}; + dev->PSSetShaderResources(textures); + } else { + dev->PSSetShaderResource(1, tex->m_palette); + dev->PSSetShaderResource(0, tex->m_texture); + } + } else { + dev->PSSetShaderResource(0, tex->m_texture); + } } else { @@ -519,7 +527,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour dev->SetupPS(ps_sel, &ps_cb); if (advance_DATE) { - // Create an r32ui image that will containt primitive ID + // Create an r32ui image that will contain primitive ID // Note: do it at the beginning because the clean will dirty the state //dev->InitPrimDateTexture(rtsize.x, rtsize.y); @@ -535,7 +543,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour dev->SetupPS(ps_sel, &ps_cb); // Be sure that first pass is finished ! - gl_MemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); + dev->Barrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); } if(context->TEST.DoFirstPass()) diff --git a/plugins/GSdx/GSTextureFXOGL.cpp b/plugins/GSdx/GSTextureFXOGL.cpp index 9cd2f9f5fc..3ea359f77b 100644 --- a/plugins/GSdx/GSTextureFXOGL.cpp +++ b/plugins/GSdx/GSTextureFXOGL.cpp @@ -32,6 +32,7 @@ void GSDeviceOGL::CreateTextureFX() m_ps_cb = new GSUniformBufferOGL(g_ps_cb_index, sizeof(PSConstantBuffer)); m_palette_ss = CreateSampler(false, false, false); + gl_BindSampler(1, m_palette_ss); GSInputLayoutOGL vert_format[] = { @@ -184,8 +185,7 @@ void GSDeviceOGL::SetupSampler(PSSelector sel, PSSamplerSelector ssel) ssel.ltf = 0; } - PSSetSamplerState(0, m_ps_ss[ssel]); - PSSetSamplerState(1, m_palette_ss); + PSSetSamplerState(m_ps_ss[ssel]); } void GSDeviceOGL::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix) diff --git a/plugins/GSdx/GSTextureOGL.cpp b/plugins/GSdx/GSTextureOGL.cpp index 288a1aa0fe..3e672af46c 100644 --- a/plugins/GSdx/GSTextureOGL.cpp +++ b/plugins/GSdx/GSTextureOGL.cpp @@ -22,8 +22,7 @@ #include "stdafx.h" #include #include "GSTextureOGL.h" -static GLuint g_state_texture_unit = -1; -static GLuint g_state_texture_id[7] = {0, 0, 0, 0, 0, 0, 0}; +static GLuint g_tex3_state = 0; // FIXME: check if it possible to always use those setup by default // glPixelStorei(GL_PACK_ALIGNMENT, 1); @@ -62,6 +61,36 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read) m_fbo_read = fbo_read; m_texture_id = 0; + // Bunch of constant parameter + switch (m_format) { + case GL_R32UI: + m_int_format = GL_RED_INTEGER; + m_int_type = GL_UNSIGNED_INT; + m_int_alignment = 4; + m_int_shift = 2; + break; + case GL_R16UI: + m_int_format = GL_RED_INTEGER; + m_int_type = GL_UNSIGNED_SHORT; + m_int_alignment = 2; + m_int_shift = 1; + break; + case GL_RGBA8: + m_int_format = GL_RGBA; + m_int_type = GL_UNSIGNED_BYTE; + m_int_alignment = 4; + m_int_shift = 2; + break; + case GL_R8: + m_int_format = GL_RED; + m_int_type = GL_UNSIGNED_BYTE; + m_int_alignment = 1; + m_int_shift = 0; + break; + default: + ASSERT(0); + } + // Generate the buffer switch (m_type) { case GSTexture::Offscreen: @@ -90,12 +119,7 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read) switch (m_type) { case GSTexture::Offscreen: // Allocate a pbo with the texture - if (m_format == GL_RGBA8) m_pbo_size = m_size.x * m_size.y * 4; - else if (m_format == GL_R16UI) m_pbo_size = m_size.x * m_size.y * 2; - else { - fprintf(stderr, "wrong texture pixel format :%x\n", m_format); - ASSERT(0); // TODO Later - } + m_pbo_size = (m_size.x * m_size.y) << m_int_shift; gl_BindBuffer(GL_PIXEL_PACK_BUFFER, m_pbo_id); gl_BufferData(GL_PIXEL_PACK_BUFFER, m_pbo_size, NULL, GL_STREAM_DRAW); @@ -104,24 +128,32 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read) case GSTexture::DepthStencil: case GSTexture::RenderTarget: case GSTexture::Texture: - EnableUnit(3); + EnableUnit(); gl_TexStorage2D(GL_TEXTURE_2D, 1, m_format, m_size.x, m_size.y); break; default: break; } + } GSTextureOGL::~GSTextureOGL() { /* Unbind the texture from our local state */ - for (uint32 i = 0; i < 7; i++) - if (g_state_texture_id[i] == m_texture_id) - g_state_texture_id[i] = 0; + for (uint32 i = 0; i < 5; i++) + if (g_tex3_state == m_texture_id) + g_tex3_state = 0; gl_DeleteBuffers(1, &m_pbo_id); glDeleteTextures(1, &m_texture_id); } +void GSTextureOGL::Clear(const void *data) +{ +#ifdef GL44 + gl_ClearTexImage(m_texture_id, 0, m_format, m_int_type, const void * data); +#endif +} + bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch) { ASSERT(m_type != GSTexture::DepthStencil && m_type != GSTexture::Offscreen); @@ -129,34 +161,11 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch) // FIXME warning order of the y axis // FIXME I'm not confident with GL_UNSIGNED_BYTE type - EnableUnit(4); + EnableUnit(); // pitch is in byte wherease GL_UNPACK_ROW_LENGTH is in pixel - GLenum format = GL_RGBA; - GLenum type = GL_UNSIGNED_BYTE; - switch (m_format) { - case GL_RGBA8: - glPixelStorei(GL_UNPACK_ALIGNMENT, 4); - glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch>>2); - format = GL_RGBA; - type = GL_UNSIGNED_BYTE; - break; - case GL_R16UI: - glPixelStorei(GL_UNPACK_ALIGNMENT, 2); - glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch>>1); - format = GL_RED_INTEGER; - type = GL_UNSIGNED_SHORT; - break; - case GL_R8: - glPixelStorei(GL_UNPACK_ALIGNMENT, 1); - glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch); - format = GL_RED; - type = GL_UNSIGNED_BYTE; - break; - default: - fprintf(stderr, "wrong texture pixel format :%x\n", m_format); - ASSERT(0); - } + glPixelStorei(GL_UNPACK_ALIGNMENT, m_int_alignment); + glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch >> m_int_shift); #ifdef _LINUX if (GLLoader::fglrx_buggy_driver && !GLLoader::in_replayer) { @@ -168,14 +177,16 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch) fprintf(stderr, "Box (%d,%d)x(%d,%d)\n", r.x, r.y, r.width(), r.height()); #endif + // FIXME useful? glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); // Restore default behavior return false; } } #endif - glTexSubImage2D(GL_TEXTURE_2D, 0, r.x, r.y, r.width(), r.height(), format, type, data); + glTexSubImage2D(GL_TEXTURE_2D, 0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, data); + // FIXME useful? glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); // Restore default behavior return true; @@ -193,18 +204,13 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch) #endif } -void GSTextureOGL::EnableUnit(const uint32 unit) +void GSTextureOGL::EnableUnit() { /* Not a real texture */ ASSERT(!IsBackbuffer()); - if (g_state_texture_unit != unit) { - gl_ActiveTexture(GL_TEXTURE0 + unit); - g_state_texture_unit = unit; - } - - if (g_state_texture_id[unit] != m_texture_id) { - g_state_texture_id[unit] = m_texture_id; + if (g_tex3_state != m_texture_id) { + g_tex3_state = m_texture_id; glBindTexture(GL_TEXTURE_2D, m_texture_id); } } @@ -222,7 +228,7 @@ bool GSTextureOGL::Map(GSMap& m, const GSVector4i* r) // Can be used on GL_PIXEL_UNPACK_BUFFER or GL_TEXTURE_BUFFER // Bind the texture to the read framebuffer to avoid any disturbance - EnableUnit(5); + EnableUnit(); gl_BindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read); gl_FramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_texture_id, 0); glReadBuffer(GL_COLOR_ATTACHMENT0); @@ -230,22 +236,9 @@ bool GSTextureOGL::Map(GSMap& m, const GSVector4i* r) // FIXME It might be possible to only read a subrange of the texture based on r object // Load the PBO with the data gl_BindBuffer(GL_PIXEL_PACK_BUFFER, m_pbo_id); - if (m_format == GL_RGBA8) { - glPixelStorei(GL_PACK_ALIGNMENT, 4); - glReadPixels(0, 0, m_size.x, m_size.y, GL_RGBA, GL_UNSIGNED_BYTE, 0); - m.pitch = m_size.x * 4; - } else if (m_format == GL_R16UI) { - glPixelStorei(GL_PACK_ALIGNMENT, 2); - glReadPixels(0, 0, m_size.x, m_size.y, GL_RED_INTEGER, GL_UNSIGNED_SHORT, 0); - m.pitch = m_size.x * 2; - } else if (m_format == GL_R8) { - glPixelStorei(GL_PACK_ALIGNMENT, 1); - glReadPixels(0, 0, m_size.x, m_size.y, GL_RED, GL_UNSIGNED_BYTE, 0); - m.pitch = m_size.x; - } else { - fprintf(stderr, "wrong texture pixel format :%x\n", m_format); - ASSERT(0); - } + glPixelStorei(GL_PACK_ALIGNMENT, m_int_alignment); + glReadPixels(0, 0, m_size.x, m_size.y, m_int_format, m_int_type, 0); + m.pitch = m_size.x << m_int_shift; gl_BindFramebuffer(GL_READ_FRAMEBUFFER, 0); // Give access from the CPU @@ -433,7 +426,6 @@ bool GSTextureOGL::Save(const string& fn, bool dds) gl_BindFramebuffer(GL_READ_FRAMEBUFFER, 0); } else if(m_format == GL_R32UI) { - //EnableUnit(6); gl_ActiveTexture(GL_TEXTURE0 + 6); glBindTexture(GL_TEXTURE_2D, m_texture_id); @@ -446,7 +438,6 @@ bool GSTextureOGL::Save(const string& fn, bool dds) } else { gl_BindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read); - //EnableUnit(6); gl_ActiveTexture(GL_TEXTURE0 + 6); glBindTexture(GL_TEXTURE_2D, m_texture_id); @@ -475,8 +466,8 @@ bool GSTextureOGL::Save(const string& fn, bool dds) free(image); // Restore state - gl_ActiveTexture(GL_TEXTURE0 + g_state_texture_unit); - glBindTexture(GL_TEXTURE_2D, g_state_texture_id[g_state_texture_unit]); + gl_ActiveTexture(GL_TEXTURE0 + 3); + glBindTexture(GL_TEXTURE_2D, g_tex3_state); return status; } diff --git a/plugins/GSdx/GSTextureOGL.h b/plugins/GSdx/GSTextureOGL.h index fcb9024c3f..03fe33d34f 100644 --- a/plugins/GSdx/GSTextureOGL.h +++ b/plugins/GSdx/GSTextureOGL.h @@ -31,6 +31,12 @@ class GSTextureOGL : public GSTexture int m_pbo_size; GLuint m_fbo_read; + // internal opengl format/type/alignment + GLenum m_int_format; + GLenum m_int_type; + uint32 m_int_alignment; + uint32 m_int_shift; + public: explicit GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read); virtual ~GSTextureOGL(); @@ -42,7 +48,9 @@ class GSTextureOGL : public GSTexture void Save(const string& fn, const void* image, uint32 pitch); void SaveRaw(const string& fn, const void* image, uint32 pitch); - void EnableUnit(const uint32 unit); + void Clear(const void *data); + + void EnableUnit(); bool IsBackbuffer() { return (m_type == GSTexture::Backbuffer); } bool IsDss() { return (m_type == GSTexture::DepthStencil); } diff --git a/plugins/GSdx/GSWnd.cpp b/plugins/GSdx/GSWnd.cpp index b050fa402f..5045673340 100644 --- a/plugins/GSdx/GSWnd.cpp +++ b/plugins/GSdx/GSWnd.cpp @@ -104,5 +104,11 @@ void GSWndGL::PopulateGlFunction() // GL4.2 *(void**)&(gl_BindImageTexture) = GetProcAddress("glBindImageTexture"); *(void**)&(gl_MemoryBarrier) = GetProcAddress("glMemoryBarrier"); +#ifdef GL44 + *(void**)&(gl_ClearTexImage) = GetProcAddress("glCLearTexImage"); + *(void**)&(gl_BindTextures) = GetProcAddress("glBindTextures"); + *(void**)&(gl_BindSamplers) = GetProcAddress("glBindSamplers"); +#endif + #endif } diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h index d976bfc108..657b0a742a 100644 --- a/plugins/GSdx/res/glsl_source.h +++ b/plugins/GSdx/res/glsl_source.h @@ -684,7 +684,7 @@ static const char* tfx_glsl = " float gl_PointSize;\n" " float gl_ClipDistance[];\n" "};\n" - "out int gl_PrimitiveID;\n" + "//out int gl_PrimitiveID;\n" "\n" "in SHADER\n" "{\n" @@ -823,7 +823,6 @@ static const char* tfx_glsl = " out_vertex(rb);\n" "\n" " EndPrimitive();\n" - "\n" "}\n" "\n" "#endif\n" @@ -1255,16 +1254,19 @@ static const char* tfx_glsl = " return c;\n" "}\n" "\n" + "#if GL_ES\n" "void ps_main()\n" "{\n" " vec4 c = ps_color();\n" - "\n" - "#if GL_ES\n" - "\n" " c.a *= 2.0;\n" " SV_Target0 = c;\n" + "}\n" + "#endif\n" "\n" - "#else\n" + "#if !GL_ES\n" + "void ps_main()\n" + "{\n" + " vec4 c = ps_color();\n" "\n" " float alpha = c.a * 2.0;\n" "\n" @@ -1279,17 +1281,15 @@ static const char* tfx_glsl = " if(c.a < 0.5) c.a += 0.5;\n" " }\n" "\n" - "#ifndef DISABLE_GL42_image\n" - "\n" " // Get first primitive that will write a failling alpha value\n" - "#if PS_DATE == 1\n" + "#if PS_DATE == 1 && !defined(DISABLE_GL42_image)\n" " // DATM == 0\n" " // Pixel with alpha equal to 1 will failed\n" " if (c.a > 127.5f / 255.0f) {\n" " imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID);\n" " }\n" " //memoryBarrier();\n" - "#elif PS_DATE == 2\n" + "#elif PS_DATE == 2 && !defined(DISABLE_GL42_image)\n" " // DATM == 1\n" " // Pixel with alpha equal to 0 will failed\n" " if (c.a < 127.5f / 255.0f) {\n" @@ -1299,12 +1299,21 @@ static const char* tfx_glsl = "\n" " // TODO\n" " // warning non uniform flow ???\n" - "#if PS_DATE == 3\n" + "#if PS_DATE == 3 && !defined(DISABLE_GL42_image)\n" " uint stencil_ceil = imageLoad(img_prim_min, ivec2(gl_FragCoord.xy));\n" " // Note gl_PrimitiveID == stencil_ceil will be the primitive that will update\n" " // the bad alpha value so we must keep it.\n" - " if (gl_PrimitiveID > stencil_ceil)\n" - " discard;\n" + "#if 0\n" + " if (stencil_ceil > 0)\n" + " c = vec4(1.0, 0.0, 0.0, 1.0);\n" + " else\n" + " c = vec4(0.0, 1.0, 0.0, 1.0);\n" + "#endif\n" + "\n" + "#if 1\n" + " if (gl_PrimitiveID > stencil_ceil) {\n" + " discard;\n" + " }\n" "#endif\n" "\n" "#endif\n" @@ -1320,8 +1329,8 @@ static const char* tfx_glsl = " SV_Target1 = vec4(alpha, alpha, alpha, alpha);\n" "#endif\n" "\n" - "#endif\n" "}\n" + "#endif // !GL_ES\n" "\n" "#endif\n" ; diff --git a/plugins/GSdx/res/tfx.glsl b/plugins/GSdx/res/tfx.glsl index b02e02b69d..d34b975bf5 100644 --- a/plugins/GSdx/res/tfx.glsl +++ b/plugins/GSdx/res/tfx.glsl @@ -174,7 +174,7 @@ out gl_PerVertex { float gl_PointSize; float gl_ClipDistance[]; }; -out int gl_PrimitiveID; +//out int gl_PrimitiveID; in SHADER { @@ -313,7 +313,6 @@ void gs_main() out_vertex(rb); EndPrimitive(); - } #endif @@ -745,16 +744,19 @@ vec4 ps_color() return c; } +#if GL_ES void ps_main() { vec4 c = ps_color(); - -#if GL_ES - c.a *= 2.0; SV_Target0 = c; +} +#endif -#else +#if !GL_ES +void ps_main() +{ + vec4 c = ps_color(); float alpha = c.a * 2.0; @@ -769,17 +771,15 @@ void ps_main() if(c.a < 0.5) c.a += 0.5; } -#ifndef DISABLE_GL42_image - // Get first primitive that will write a failling alpha value -#if PS_DATE == 1 +#if PS_DATE == 1 && !defined(DISABLE_GL42_image) // DATM == 0 // Pixel with alpha equal to 1 will failed if (c.a > 127.5f / 255.0f) { imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID); } //memoryBarrier(); -#elif PS_DATE == 2 +#elif PS_DATE == 2 && !defined(DISABLE_GL42_image) // DATM == 1 // Pixel with alpha equal to 0 will failed if (c.a < 127.5f / 255.0f) { @@ -789,12 +789,21 @@ void ps_main() // TODO // warning non uniform flow ??? -#if PS_DATE == 3 +#if PS_DATE == 3 && !defined(DISABLE_GL42_image) uint stencil_ceil = imageLoad(img_prim_min, ivec2(gl_FragCoord.xy)); // Note gl_PrimitiveID == stencil_ceil will be the primitive that will update // the bad alpha value so we must keep it. - if (gl_PrimitiveID > stencil_ceil) - discard; +#if 0 + if (stencil_ceil > 0) + c = vec4(1.0, 0.0, 0.0, 1.0); + else + c = vec4(0.0, 1.0, 0.0, 1.0); +#endif + +#if 1 + if (gl_PrimitiveID > stencil_ceil) { + discard; + } #endif #endif @@ -810,7 +819,7 @@ void ps_main() SV_Target1 = vec4(alpha, alpha, alpha, alpha); #endif -#endif } +#endif // !GL_ES #endif