diff --git a/plugins/GSdx/GLLoader.cpp b/plugins/GSdx/GLLoader.cpp index e54dd4dd85..ec1162d598 100644 --- a/plugins/GSdx/GLLoader.cpp +++ b/plugins/GSdx/GLLoader.cpp @@ -40,6 +40,7 @@ PFNGLBUFFERDATAPROC gl_BufferData = NULL; PFNGLCHECKFRAMEBUFFERSTATUSPROC gl_CheckFramebufferStatus = NULL; PFNGLCLEARBUFFERFVPROC gl_ClearBufferfv = NULL; PFNGLCLEARBUFFERIVPROC gl_ClearBufferiv = NULL; +PFNGLCLEARBUFFERUIVPROC gl_ClearBufferuiv = NULL; PFNGLCOMPILESHADERPROC gl_CompileShader = NULL; PFNGLCOPYIMAGESUBDATANVPROC gl_CopyImageSubDataNV = NULL; PFNGLCREATEPROGRAMPROC gl_CreateProgram = NULL; @@ -99,12 +100,16 @@ PFNGLPROGRAMUNIFORM1IPROC gl_ProgramUniform1i = NULL; PFNGLGETUNIFORMBLOCKINDEXPROC gl_GetUniformBlockIndex = NULL; PFNGLUNIFORMBLOCKBINDINGPROC gl_UniformBlockBinding = NULL; PFNGLGETUNIFORMLOCATIONPROC gl_GetUniformLocation = NULL; +// GL4.2 +PFNGLBINDIMAGETEXTUREPROC gl_BindImageTexture = NULL; +PFNGLMEMORYBARRIERPROC gl_MemoryBarrier = NULL; #endif namespace GLLoader { bool fglrx_buggy_driver = false; bool nvidia_buggy_driver = false; + bool in_replayer = false; bool found_GL_ARB_separate_shader_objects = false; bool found_GL_ARB_shading_language_420pack = false; @@ -114,11 +119,15 @@ namespace GLLoader { bool found_GL_ARB_copy_image = false; bool found_only_gl30 = false; bool found_GL_ARB_gpu_shader5 = false; + bool found_GL_ARB_shader_image_load_store = false; bool check_gl_version(uint32 major, uint32 minor) { const GLubyte* s = glGetString(GL_VERSION); - if (s == NULL) return false; + if (s == NULL) { + fprintf(stderr, "Error: GLLoader failed to get GL version\n"); + return false; + } const char* vendor = (const char*)glGetString(GL_VENDOR); fprintf(stderr, "Supported Opengl version: %s on GPU: %s. Vendor: %s\n", s, glGetString(GL_RENDERER), vendor); @@ -183,6 +192,7 @@ namespace GLLoader { // Replace previous extensions (when driver will be updated) if (ext.compare("GL_ARB_copy_image") == 0) found_GL_ARB_copy_image = true; if (ext.compare("GL_ARB_gpu_shader5") == 0) found_GL_ARB_gpu_shader5 = true; + if (ext.compare("GL_ARB_shader_image_load_store") == 0) found_GL_ARB_shader_image_load_store = true; #ifdef ENABLE_GLES fprintf(stderr, "DEBUG ext: %s\n", ext.c_str()); #endif @@ -203,6 +213,8 @@ namespace GLLoader { if (!found_GL_ARB_gpu_shader5) { fprintf(stderr, "INFO: GL_ARB_gpu_shader5 is not supported\n"); } + if (!found_GL_ARB_shader_image_load_store) + fprintf(stderr, "INFO: GL_ARB_shader_image_load_store is not supported\n"); if (theApp.GetConfig("override_GL_ARB_shading_language_420pack", -1) != -1) { @@ -213,6 +225,10 @@ namespace GLLoader { found_GL_ARB_separate_shader_objects = !!theApp.GetConfig("override_GL_ARB_separate_shader_objects", -1); fprintf(stderr, "Override GL_ARB_separate_shader_objects detection\n"); } + if (theApp.GetConfig("override_GL_ARB_shader_image_load_store", -1) != -1) { + found_GL_ARB_shader_image_load_store = !!theApp.GetConfig("override_GL_ARB_shader_image_load_store", -1); + fprintf(stderr, "Override GL_ARB_shader_image_load_store detection\n"); + } if (theApp.GetConfig("override_GL_ARB_copy_image", -1) != -1) { // Same extension so override both found_GL_ARB_copy_image = !!theApp.GetConfig("override_GL_ARB_copy_image", -1); diff --git a/plugins/GSdx/GLLoader.h b/plugins/GSdx/GLLoader.h index 73fe4f04ea..ebca6ac5ab 100644 --- a/plugins/GSdx/GLLoader.h +++ b/plugins/GSdx/GLLoader.h @@ -38,6 +38,7 @@ extern PFNGLBUFFERDATAPROC gl_BufferData; extern PFNGLCHECKFRAMEBUFFERSTATUSPROC gl_CheckFramebufferStatus; extern PFNGLCLEARBUFFERFVPROC gl_ClearBufferfv; extern PFNGLCLEARBUFFERIVPROC gl_ClearBufferiv; +extern PFNGLCLEARBUFFERUIVPROC gl_ClearBufferuiv; extern PFNGLCOMPILESHADERPROC gl_CompileShader; extern PFNGLCOPYIMAGESUBDATANVPROC gl_CopyImageSubDataNV; extern PFNGLCREATEPROGRAMPROC gl_CreateProgram; @@ -97,6 +98,9 @@ extern PFNGLPROGRAMUNIFORM1IPROC gl_ProgramUniform1i; extern PFNGLGETUNIFORMBLOCKINDEXPROC gl_GetUniformBlockIndex; extern PFNGLUNIFORMBLOCKBINDINGPROC gl_UniformBlockBinding; extern PFNGLGETUNIFORMLOCATIONPROC gl_GetUniformLocation; +// GL4.2 +extern PFNGLBINDIMAGETEXTUREPROC gl_BindImageTexture; +extern PFNGLMEMORYBARRIERPROC gl_MemoryBarrier; #else #define gl_ActiveTexture glActiveTexture #define gl_BlendColor glBlendColor @@ -183,6 +187,7 @@ namespace GLLoader { extern bool fglrx_buggy_driver; extern bool nvidia_buggy_driver; + extern bool in_replayer; extern bool found_GL_ARB_separate_shader_objects; extern bool found_GL_ARB_shading_language_420pack; @@ -192,4 +197,5 @@ namespace GLLoader { extern bool found_geometry_shader; extern bool found_only_gl30; extern bool found_GL_ARB_gpu_shader5; + extern bool found_GL_ARB_shader_image_load_store; } diff --git a/plugins/GSdx/GS.cpp b/plugins/GSdx/GS.cpp index 3748d15acc..a1bf5a2761 100644 --- a/plugins/GSdx/GS.cpp +++ b/plugins/GSdx/GS.cpp @@ -1417,7 +1417,7 @@ inline unsigned long timeGetTime() // Note EXPORT_C GSReplay(char* lpszCmdLine, int renderer) { - + GLLoader::in_replayer = true; // lpszCmdLine: // First parameter is the renderer. @@ -1462,7 +1462,11 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer) void* hWnd = NULL; - _GSopen((void**)&hWnd, "", renderer); + int err = _GSopen((void**)&hWnd, "", renderer); + if (err != 0) { + fprintf(stderr, "Error failed to GSopen\n"); + return; + } if (s_gs->m_wnd == NULL) return; uint32 crc; diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index bca28ab422..08ef9708d8 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -27,11 +27,6 @@ //#define LOUD_DEBUGGING //#define PRINT_FRAME_NUMBER //#define ONLY_LINES -#if 0 -#ifdef _DEBUG -#define ENABLE_OGL_STENCIL_DEBUG -#endif -#endif static uint32 g_draw_count = 0; static uint32 g_frame_count = 1; @@ -344,10 +339,18 @@ void GSDeviceOGL::Flip() void GSDeviceOGL::BeforeDraw() { m_shader->UseProgram(); +//#ifdef ENABLE_OGL_STENCIL_DEBUG +// if (m_date.t) +// static_cast(m_date.t)->Save(format("/tmp/date_before_%04ld.csv", g_draw_count)); +//#endif } void GSDeviceOGL::AfterDraw() { +//#ifdef ENABLE_OGL_STENCIL_DEBUG +// if (m_date.t) +// static_cast(m_date.t)->Save(format("/tmp/date_after_%04ld.csv", g_draw_count)); +//#endif #if defined(ENABLE_OGL_DEBUG) || defined(PRINT_FRAME_NUMBER) g_draw_count++; #endif @@ -387,7 +390,6 @@ void GSDeviceOGL::ClearRenderTarget(GSTexture* t, const GSVector4& c) gl_ClearBufferfv(GL_COLOR, 0, c.v); } else { OMSetFBO(m_fbo); - // FIXME useful OMSetWriteBuffer(); OMAttachRt(t); @@ -402,10 +404,26 @@ void GSDeviceOGL::ClearRenderTarget(GSTexture* t, uint32 c) ClearRenderTarget(t, color); } +void GSDeviceOGL::ClearRenderTarget_ui(GSTexture* t, uint32 c) +{ + uint32 col[4] = {c, c, c, c}; + + glDisable(GL_SCISSOR_TEST); + + OMSetFBO(m_fbo); + OMSetWriteBuffer(); + OMAttachRt(t); + + gl_ClearBufferuiv(GL_COLOR, 0, col); + + glEnable(GL_SCISSOR_TEST); + + static_cast(t)->Save(format("/tmp/date_init_%04ld.csv", g_draw_count)); +} + void GSDeviceOGL::ClearDepth(GSTexture* t, float c) { OMSetFBO(m_fbo); - // FIXME useful OMSetWriteBuffer(); OMAttachDs(t); @@ -423,7 +441,6 @@ void GSDeviceOGL::ClearDepth(GSTexture* t, float c) void GSDeviceOGL::ClearStencil(GSTexture* t, uint8 c) { OMSetFBO(m_fbo); - // FIXME useful OMSetWriteBuffer(); OMAttachDs(t); GLint color = c; @@ -470,6 +487,34 @@ GLuint GSDeviceOGL::CreateSampler(bool bilinear, bool tau, bool tav) return sampler; } +void GSDeviceOGL::InitPrimDateTexture(int w, int h) +{ + // Create a texture to avoid the useless clean@0 + if (m_date.t == NULL) + m_date.t = CreateTexture(w, h, GL_R32UI); + + ClearRenderTarget_ui(m_date.t, 0xFFFFFFFF); + +#ifdef ENABLE_OGL_STENCIL_DEBUG + static_cast(m_date.t)->EnableUnit(6); +#endif + + gl_BindImageTexture(0, static_cast(m_date.t)->GetID(), 0, false, 0, GL_READ_WRITE, GL_R32UI); +} + +void GSDeviceOGL::RecycleDateTexture() +{ + if (m_date.t) { +#ifdef ENABLE_OGL_STENCIL_DEBUG + //static_cast(m_date.t)->Save(format("/tmp/date_adv_%04ld.csv", g_draw_count)); +#endif + + // FIXME invalidate data + Recycle(m_date.t); + m_date.t = NULL; + } +} + GLuint GSDeviceOGL::CompileVS(VSSelector sel) { std::string macro = format("#define VS_BPPZ %d\n", sel.bppz) diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index 5cb8e8b85a..d1539d4826 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -466,6 +466,7 @@ class GSDeviceOGL : public GSDevice struct { GSDepthStencilOGL* dss; GSBlendStateOGL* bs; + GSTexture* t; } m_date; struct @@ -520,7 +521,6 @@ class GSDeviceOGL : public GSDevice void OMAttachRt(GSTexture* rt); void OMAttachDs(GSTexture* ds); void OMSetFBO(GLuint fbo); - void OMSetWriteBuffer(GLenum buffer = GL_COLOR_ATTACHMENT0); public: GSDeviceOGL(); @@ -545,6 +545,7 @@ class GSDeviceOGL : public GSDevice void ClearRenderTarget(GSTexture* t, const GSVector4& c); void ClearRenderTarget(GSTexture* t, uint32 c); + void ClearRenderTarget_ui(GSTexture* t, uint32 c); void ClearDepth(GSTexture* t, float c); void ClearStencil(GSTexture* t, uint8 c); @@ -552,6 +553,8 @@ class GSDeviceOGL : public GSDevice GSTexture* CreateDepthStencil(int w, int h, bool msaa, int format = 0); GSTexture* CreateTexture(int w, int h, int format = 0); GSTexture* CreateOffscreen(int w, int h, int format = 0); + void InitPrimDateTexture(int w, int h); + void RecycleDateTexture(); GSTexture* CopyOffscreen(GSTexture* src, const GSVector4& sr, int w, int h, int format = 0); @@ -579,7 +582,7 @@ class GSDeviceOGL : public GSDevice void OMSetDepthStencilState(GSDepthStencilOGL* dss, uint8 sref); void OMSetBlendState(GSBlendStateOGL* bs, float bf); void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL); - + void OMSetWriteBuffer(GLenum buffer = GL_COLOR_ATTACHMENT0); void CreateTextureFX(); GLuint CompileVS(VSSelector sel); diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index 7db3c50f13..c176fd9fef 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -31,6 +31,7 @@ GSRendererOGL::GSRendererOGL() m_fba = !!theApp.GetConfig("fba", 1); UserHacks_AlphaHack = !!theApp.GetConfig("UserHacks_AlphaHack", 0) && !!theApp.GetConfig("UserHacks", 0); UserHacks_AlphaStencil = !!theApp.GetConfig("UserHacks_AlphaStencil", 0) && !!theApp.GetConfig("UserHacks", 0); + UserHacks_DateGL4 = !!theApp.GetConfig("UserHacks_DateGL4", 0); m_pixelcenter = GSVector2(-0.5f, -0.5f); UserHacks_TCOffset = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_TCOffset", 0) : 0; @@ -217,6 +218,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour const GSVector2& rtscale = rt->GetScale(); bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24; + bool advance_DATE = false; ASSERT(m_dev != NULL); @@ -243,6 +245,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour dev->SetupDATE(rt, ds, vertices, m_context->TEST.DATM); } + // Create an r32ui image that will containt primitive ID + dev->InitPrimDateTexture(rtsize.x, rtsize.y); } // @@ -298,6 +302,13 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour om_bsel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask(); + // TODO + //if (UserHacks_DateGL4 && DATE && om_bsel.wa && (!context->TEST.ATE || context->TEST.ATST == ATST_ALWAYS)) { + if (UserHacks_DateGL4 && DATE) { + //if (!(context->FBA.FBA && context->TEST.DATM == 1)) + advance_DATE = true; + } + // vs GSDeviceOGL::VSSelector vs_sel; @@ -381,14 +392,9 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour if(DATE) { - if(dev->HasStencil()) - { - om_dssel.date = 1; - } - else - { + om_dssel.date = 1; + if (advance_DATE) ps_sel.date = 1 + context->TEST.DATM; - } } if(env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST) @@ -512,7 +518,25 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour dev->SetupGS(gs_sel); dev->SetupPS(ps_sel, &ps_cb); - // draw + if (advance_DATE) { + // Create an r32ui image that will containt primitive ID + // Note: do it at the beginning because the clean will dirty the state + //dev->InitPrimDateTexture(rtsize.x, rtsize.y); + + // Don't write anything on the color buffer + dev->OMSetWriteBuffer(GL_NONE); + // Compute primitiveID max that pass the date test + dev->DrawIndexedPrimitive(); + + // Ask PS to discard shader above the primitiveID max + dev->OMSetWriteBuffer(); + + ps_sel.date = 3; + dev->SetupPS(ps_sel, &ps_cb); + + // Be sure that first pass is finished ! + gl_MemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); + } if(context->TEST.DoFirstPass()) { @@ -586,6 +610,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour } } } + if (advance_DATE) + dev->RecycleDateTexture(); dev->EndScene(); diff --git a/plugins/GSdx/GSRendererOGL.h b/plugins/GSdx/GSRendererOGL.h index e265ded94c..7c8dd3679b 100644 --- a/plugins/GSdx/GSRendererOGL.h +++ b/plugins/GSdx/GSRendererOGL.h @@ -39,6 +39,7 @@ class GSRendererOGL : public GSRendererHW bool m_fba; bool UserHacks_AlphaHack; bool UserHacks_AlphaStencil; + bool UserHacks_DateGL4; unsigned int UserHacks_TCOffset; float UserHacks_TCO_x, UserHacks_TCO_y; diff --git a/plugins/GSdx/GSShaderOGL.cpp b/plugins/GSdx/GSShaderOGL.cpp index 0aa1105ad6..66b76c47b1 100644 --- a/plugins/GSdx/GSShaderOGL.cpp +++ b/plugins/GSdx/GSShaderOGL.cpp @@ -264,14 +264,16 @@ std::string GSShaderOGL::GenGlslHeader(const std::string& entry, GLenum type, co if (GLLoader::found_only_gl30) { header = "#version 130\n"; } else { - header = "#version 330\n"; + header = "#version 330 core\n"; } if (m_glsl420) { + // Need GL version 420 header += "#extension GL_ARB_shading_language_420pack: require\n"; } else { header += "#define DISABLE_GL42\n"; } if (m_sso) { + // Need GL version 410 header += "#extension GL_ARB_separate_shader_objects : require\n"; } else { header += "#define DISABLE_SSO\n"; @@ -285,11 +287,18 @@ std::string GSShaderOGL::GenGlslHeader(const std::string& entry, GLenum type, co #ifdef ENABLE_OGL_STENCIL_DEBUG header += "#define ENABLE_OGL_STENCIL_DEBUG 1\n"; #endif + if (GLLoader::found_GL_ARB_shader_image_load_store) + // Need GL version 420 + header += "#extension GL_ARB_shader_image_load_store: require\n"; + else + header += "#define DISABLE_GL42_image\n"; #else header = "#version 300 es\n"; + // Disable full GL features header += "#define DISABLE_SSO\n"; header += "#define DISABLE_GL42\n"; + header += "#define DISABLE_GL42_image\n"; #endif // Allow to puts several shader in 1 files diff --git a/plugins/GSdx/GSTextureOGL.cpp b/plugins/GSdx/GSTextureOGL.cpp index 3ddcb0d3dd..288a1aa0fe 100644 --- a/plugins/GSdx/GSTextureOGL.cpp +++ b/plugins/GSdx/GSTextureOGL.cpp @@ -144,7 +144,7 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch) case GL_R16UI: glPixelStorei(GL_UNPACK_ALIGNMENT, 2); glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch>>1); - format = GL_RED; + format = GL_RED_INTEGER; type = GL_UNSIGNED_SHORT; break; case GL_R8: @@ -159,7 +159,7 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch) } #ifdef _LINUX - if (GLLoader::fglrx_buggy_driver) { + if (GLLoader::fglrx_buggy_driver && !GLLoader::in_replayer) { // FIXME : it crash on colin mcrae rally 3 (others game too) when the texture is small //if ((pitch >> 2) == 32 || r.width() < 32 || r.height() < 32) { if ((r.width() < 32) || (pitch == 128 && r.width() == 32)) { @@ -388,6 +388,30 @@ void GSTextureOGL::Save(const string& fn, const void* image, uint32 pitch) fclose(fp); } +void GSTextureOGL::SaveRaw(const string& fn, const void* image, uint32 pitch) +{ + // Build a raw CSV file + FILE* fp = fopen(fn.c_str(), "w"); + + uint32* data = (uint32*)image; + + for(int h = m_size.y; h > 0; h--) { + for (int w = m_size.x; w > 0; w--, data += 1) { + if (*data == 0xffffffff) + fprintf(fp, ""); + else { + fprintf(fp, "%x", *data); + } + if ( w > 1) + fprintf(fp, ","); + } + fprintf(fp, "\n"); + } + + fclose(fp); +} + + bool GSTextureOGL::Save(const string& fn, bool dds) { // Collect the texture data @@ -408,10 +432,24 @@ bool GSTextureOGL::Save(const string& fn, bool dds) glReadPixels(0, 0, m_size.x, m_size.y, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, image); gl_BindFramebuffer(GL_READ_FRAMEBUFFER, 0); + } else if(m_format == GL_R32UI) { + //EnableUnit(6); + gl_ActiveTexture(GL_TEXTURE0 + 6); + glBindTexture(GL_TEXTURE_2D, m_texture_id); + + glGetTexImage(GL_TEXTURE_2D, 0, GL_RED_INTEGER, GL_UNSIGNED_INT, image); + SaveRaw(fn, image, pitch); + + // Not supported in Save function + status = false; + } else { gl_BindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read); - EnableUnit(6); + //EnableUnit(6); + gl_ActiveTexture(GL_TEXTURE0 + 6); + glBindTexture(GL_TEXTURE_2D, m_texture_id); + gl_FramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_texture_id, 0); glReadBuffer(GL_COLOR_ATTACHMENT0); @@ -436,6 +474,10 @@ bool GSTextureOGL::Save(const string& fn, bool dds) if (status) Save(fn, image, pitch); free(image); + // Restore state + gl_ActiveTexture(GL_TEXTURE0 + g_state_texture_unit); + glBindTexture(GL_TEXTURE_2D, g_state_texture_id[g_state_texture_unit]); + return status; } diff --git a/plugins/GSdx/GSTextureOGL.h b/plugins/GSdx/GSTextureOGL.h index b5997fc7b6..fcb9024c3f 100644 --- a/plugins/GSdx/GSTextureOGL.h +++ b/plugins/GSdx/GSTextureOGL.h @@ -40,6 +40,7 @@ class GSTextureOGL : public GSTexture void Unmap(); bool Save(const string& fn, bool dds = false); void Save(const string& fn, const void* image, uint32 pitch); + void SaveRaw(const string& fn, const void* image, uint32 pitch); void EnableUnit(const uint32 unit); diff --git a/plugins/GSdx/GSWnd.cpp b/plugins/GSdx/GSWnd.cpp index d93fb2bbe6..b050fa402f 100644 --- a/plugins/GSdx/GSWnd.cpp +++ b/plugins/GSdx/GSWnd.cpp @@ -41,6 +41,7 @@ void GSWndGL::PopulateGlFunction() *(void**)&(gl_CheckFramebufferStatus) = GetProcAddress("glCheckFramebufferStatus"); *(void**)&(gl_ClearBufferfv) = GetProcAddress("glClearBufferfv"); *(void**)&(gl_ClearBufferiv) = GetProcAddress("glClearBufferiv"); + *(void**)&(gl_ClearBufferuiv) = GetProcAddress("glClearBufferuiv"); *(void**)&(gl_CompileShader) = GetProcAddress("glCompileShader"); *(void**)&(gl_CopyImageSubDataNV) = GetProcAddress("glCopyImageSubDataNV"); *(void**)&(gl_CreateProgram) = GetProcAddress("glCreateProgram"); @@ -100,5 +101,8 @@ void GSWndGL::PopulateGlFunction() *(void**)&(gl_GetUniformBlockIndex) = GetProcAddress("glGetUniformBlockIndex"); *(void**)&(gl_UniformBlockBinding) = GetProcAddress("glUniformBlockBinding"); *(void**)&(gl_GetUniformLocation) = GetProcAddress("glGetUniformLocation"); + // GL4.2 + *(void**)&(gl_BindImageTexture) = GetProcAddress("glBindImageTexture"); + *(void**)&(gl_MemoryBarrier) = GetProcAddress("glMemoryBarrier"); #endif } diff --git a/plugins/GSdx/GSWndOGL.cpp b/plugins/GSdx/GSWndOGL.cpp index f7438a4375..2e37ce9002 100644 --- a/plugins/GSdx/GSWndOGL.cpp +++ b/plugins/GSdx/GSWndOGL.cpp @@ -183,6 +183,9 @@ bool GSWndOGL::Create(const string& title, int w, int h) m_NativeWindow = XCreateSimpleWindow(m_NativeDisplay, DefaultRootWindow(m_NativeDisplay), 0, 0, w, h, 0, 0, 0); XMapWindow (m_NativeDisplay, m_NativeWindow); + if (m_NativeWindow == 0) + throw GSDXRecoverableError(); + CreateContext(3, 3); AttachContext(); @@ -191,9 +194,6 @@ bool GSWndOGL::Create(const string& title, int w, int h) PopulateGlFunction(); - if (m_NativeWindow == 0) - throw GSDXRecoverableError(); - return true; } diff --git a/plugins/GSdx/config.h b/plugins/GSdx/config.h index 6fcc5e6253..19eaf76732 100644 --- a/plugins/GSdx/config.h +++ b/plugins/GSdx/config.h @@ -40,3 +40,6 @@ #ifdef _DEBUG #define ENABLE_OGL_DEBUG // Create a debug context and check opengl command status. Allow also to dump various textures/states. #endif + +// Output stencil to a color buffer +#define ENABLE_OGL_STENCIL_DEBUG diff --git a/plugins/GSdx/res/convert.glsl b/plugins/GSdx/res/convert.glsl index 233463b3d5..908107cef4 100644 --- a/plugins/GSdx/res/convert.glsl +++ b/plugins/GSdx/res/convert.glsl @@ -182,10 +182,11 @@ void ps_main6() // diagonal #endif // Used for DATE (stencil) +// DATM == 1 #ifdef ps_main2 void ps_main2() { - if((sample_c().a - 127.5f / 255.0f) < 0.0f) // >= 0x80 pass + if(sample_c().a < 127.5f / 255.0f) // >= 0x80 pass discard; #ifdef ENABLE_OGL_STENCIL_DEBUG @@ -195,10 +196,11 @@ void ps_main2() #endif // Used for DATE (stencil) +// DATM == 0 #ifdef ps_main3 void ps_main3() { - if((127.5f / 255.0f - sample_c().a) < 0.0f) // < 0x80 pass (== 0x80 should not pass) + if(127.5f / 255.0f < sample_c().a) // < 0x80 pass (== 0x80 should not pass) discard; #ifdef ENABLE_OGL_STENCIL_DEBUG diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h index 383829b55e..d976bfc108 100644 --- a/plugins/GSdx/res/glsl_source.h +++ b/plugins/GSdx/res/glsl_source.h @@ -207,10 +207,11 @@ static const char* convert_glsl = "#endif\n" "\n" "// Used for DATE (stencil)\n" + "// DATM == 1\n" "#ifdef ps_main2\n" "void ps_main2()\n" "{\n" - " if((sample_c().a - 127.5f / 255.0f) < 0.0f) // >= 0x80 pass\n" + " if(sample_c().a < 127.5f / 255.0f) // >= 0x80 pass\n" " discard;\n" "\n" "#ifdef ENABLE_OGL_STENCIL_DEBUG\n" @@ -220,10 +221,11 @@ static const char* convert_glsl = "#endif\n" "\n" "// Used for DATE (stencil)\n" + "// DATM == 0\n" "#ifdef ps_main3\n" "void ps_main3()\n" "{\n" - " if((127.5f / 255.0f - sample_c().a) < 0.0f) // < 0x80 pass (== 0x80 should not pass)\n" + " if(127.5f / 255.0f < sample_c().a) // < 0x80 pass (== 0x80 should not pass)\n" " discard;\n" "\n" "#ifdef ENABLE_OGL_STENCIL_DEBUG\n" @@ -671,16 +673,18 @@ static const char* tfx_glsl = "\n" "#ifdef GEOMETRY_SHADER\n" "in gl_PerVertex {\n" - " vec4 gl_Position;\n" + " invariant vec4 gl_Position;\n" " float gl_PointSize;\n" " float gl_ClipDistance[];\n" "} gl_in[];\n" + "//in int gl_PrimitiveIDIn;\n" "\n" "out gl_PerVertex {\n" " vec4 gl_Position;\n" " float gl_PointSize;\n" " float gl_ClipDistance[];\n" "};\n" + "out int gl_PrimitiveID;\n" "\n" "in SHADER\n" "{\n" @@ -701,6 +705,7 @@ static const char* tfx_glsl = " GSout.t = v.t;\n" " GSout.tp = v.tp;\n" " GSout.c = v.c;\n" + " gl_PrimitiveID = gl_PrimitiveIDIn;\n" " EmitVertex();\n" "}\n" "\n" @@ -709,6 +714,7 @@ static const char* tfx_glsl = " GSout.t = t;\n" " GSout.tp = tp;\n" " GSout.c = c;\n" + " gl_PrimitiveID = gl_PrimitiveIDIn;\n" " EmitVertex();\n" "}\n" "\n" @@ -874,6 +880,23 @@ static const char* tfx_glsl = "layout(binding = 2) uniform sampler2D RTCopySampler;\n" "#endif\n" "\n" + "#ifndef DISABLE_GL42_image\n" + "#if PS_DATE > 0\n" + "// FIXME how to declare memory access\n" + "layout(r32ui, binding = 0) coherent uniform uimage2D img_prim_min;\n" + "#endif\n" + "#else\n" + "// use basic stencil\n" + "#endif\n" + "\n" + "#ifndef DISABLE_GL42_image\n" + "#if PS_DATE > 0\n" + "// origin_upper_left\n" + "layout(pixel_center_integer) in vec4 gl_FragCoord;\n" + "//in int gl_PrimitiveID;\n" + "#endif\n" + "#endif\n" + "\n" "#ifdef DISABLE_GL42\n" "layout(std140) uniform cb21\n" "#else\n" @@ -1202,7 +1225,7 @@ static const char* tfx_glsl = "\n" "vec4 ps_color()\n" "{\n" - " datst();\n" + " //datst();\n" "\n" " vec4 t = sample_color(PSin_t.xy, PSin_t.w);\n" "\n" @@ -1256,12 +1279,50 @@ static const char* tfx_glsl = " if(c.a < 0.5) c.a += 0.5;\n" " }\n" "\n" - " SV_Target0 = c;\n" - " SV_Target1 = vec4(alpha, alpha, alpha, alpha);\n" + "#ifndef DISABLE_GL42_image\n" + "\n" + " // Get first primitive that will write a failling alpha value\n" + "#if PS_DATE == 1\n" + " // DATM == 0\n" + " // Pixel with alpha equal to 1 will failed\n" + " if (c.a > 127.5f / 255.0f) {\n" + " imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID);\n" + " }\n" + " //memoryBarrier();\n" + "#elif PS_DATE == 2\n" + " // DATM == 1\n" + " // Pixel with alpha equal to 0 will failed\n" + " if (c.a < 127.5f / 255.0f) {\n" + " imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID);\n" + " }\n" + "#endif\n" + "\n" + " // TODO\n" + " // warning non uniform flow ???\n" + "#if PS_DATE == 3\n" + " uint stencil_ceil = imageLoad(img_prim_min, ivec2(gl_FragCoord.xy));\n" + " // Note gl_PrimitiveID == stencil_ceil will be the primitive that will update\n" + " // the bad alpha value so we must keep it.\n" + " if (gl_PrimitiveID > stencil_ceil)\n" + " discard;\n" + "#endif\n" "\n" "#endif\n" "\n" + "\n" + "#if (PS_DATE == 2 || PS_DATE == 1) && !defined(DISABLE_GL42_image)\n" + " // Don't write anything on the framebuffer\n" + " // Note: you can't use discard because it will also drop\n" + " // image operation\n" + " // Note2: output will be disabled too in opengl\n" + "#else\n" + " SV_Target0 = c;\n" + " SV_Target1 = vec4(alpha, alpha, alpha, alpha);\n" + "#endif\n" + "\n" + "#endif\n" "}\n" + "\n" "#endif\n" ; diff --git a/plugins/GSdx/res/tfx.glsl b/plugins/GSdx/res/tfx.glsl index bdfcaf3b64..b02e02b69d 100644 --- a/plugins/GSdx/res/tfx.glsl +++ b/plugins/GSdx/res/tfx.glsl @@ -163,16 +163,18 @@ void vs_main() #ifdef GEOMETRY_SHADER in gl_PerVertex { - vec4 gl_Position; + invariant vec4 gl_Position; float gl_PointSize; float gl_ClipDistance[]; } gl_in[]; +//in int gl_PrimitiveIDIn; out gl_PerVertex { vec4 gl_Position; float gl_PointSize; float gl_ClipDistance[]; }; +out int gl_PrimitiveID; in SHADER { @@ -193,6 +195,7 @@ void out_vertex(in vertex v) GSout.t = v.t; GSout.tp = v.tp; GSout.c = v.c; + gl_PrimitiveID = gl_PrimitiveIDIn; EmitVertex(); } @@ -201,6 +204,7 @@ void out_vertex_elem(in vec4 t, in vec4 tp, in vec4 c) GSout.t = t; GSout.tp = tp; GSout.c = c; + gl_PrimitiveID = gl_PrimitiveIDIn; EmitVertex(); } @@ -366,6 +370,23 @@ layout(binding = 1) uniform sampler2D PaletteSampler; layout(binding = 2) uniform sampler2D RTCopySampler; #endif +#ifndef DISABLE_GL42_image +#if PS_DATE > 0 +// FIXME how to declare memory access +layout(r32ui, binding = 0) coherent uniform uimage2D img_prim_min; +#endif +#else +// use basic stencil +#endif + +#ifndef DISABLE_GL42_image +#if PS_DATE > 0 +// origin_upper_left +layout(pixel_center_integer) in vec4 gl_FragCoord; +//in int gl_PrimitiveID; +#endif +#endif + #ifdef DISABLE_GL42 layout(std140) uniform cb21 #else @@ -694,7 +715,7 @@ vec4 fog(vec4 c, float f) vec4 ps_color() { - datst(); + //datst(); vec4 t = sample_color(PSin_t.xy, PSin_t.w); @@ -748,10 +769,48 @@ void ps_main() if(c.a < 0.5) c.a += 0.5; } +#ifndef DISABLE_GL42_image + + // Get first primitive that will write a failling alpha value +#if PS_DATE == 1 + // DATM == 0 + // Pixel with alpha equal to 1 will failed + if (c.a > 127.5f / 255.0f) { + imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID); + } + //memoryBarrier(); +#elif PS_DATE == 2 + // DATM == 1 + // Pixel with alpha equal to 0 will failed + if (c.a < 127.5f / 255.0f) { + imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID); + } +#endif + + // TODO + // warning non uniform flow ??? +#if PS_DATE == 3 + uint stencil_ceil = imageLoad(img_prim_min, ivec2(gl_FragCoord.xy)); + // Note gl_PrimitiveID == stencil_ceil will be the primitive that will update + // the bad alpha value so we must keep it. + if (gl_PrimitiveID > stencil_ceil) + discard; +#endif + +#endif + + +#if (PS_DATE == 2 || PS_DATE == 1) && !defined(DISABLE_GL42_image) + // Don't write anything on the framebuffer + // Note: you can't use discard because it will also drop + // image operation + // Note2: output will be disabled too in opengl +#else SV_Target0 = c; SV_Target1 = vec4(alpha, alpha, alpha, alpha); - #endif +#endif } + #endif