From e80b002929edced29e3839fab5ab91964e328d4b Mon Sep 17 00:00:00 2001 From: "gregory.hainaut" Date: Thu, 24 Oct 2013 20:54:27 +0000 Subject: [PATCH] gsdx ogl: Flush various pending work * try to use more subroutine on VS&PS, unfortunately hit a driver crash! * Call Attach/DetachContext through GSDevice so I can unmap currently mapped buffer * Implement glsl part of GL_ARB_bindless texture, again hit another driver crash! * various fix of GL_ARB_buffer_storage. Basic benchmark show only improvement on 'cold' case, I guess it will improve smoothness * try to fix GL_clear_texture, no success so far. It seem the extension is limited to basic texture (aka no depth/stencil) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5752 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GLLoader.cpp | 5 +- plugins/GSdx/GLState.cpp | 2 + plugins/GSdx/GLState.h | 1 + plugins/GSdx/GS.cpp | 10 +- plugins/GSdx/GSDevice.h | 6 +- plugins/GSdx/GSDeviceOGL.cpp | 62 ++++-- plugins/GSdx/GSDeviceOGL.h | 24 ++- plugins/GSdx/GSRendererOGL.cpp | 3 +- plugins/GSdx/GSShaderOGL.cpp | 49 ++++- plugins/GSdx/GSShaderOGL.h | 9 +- plugins/GSdx/GSState.cpp | 4 +- plugins/GSdx/GSTextureFXOGL.cpp | 39 +++- plugins/GSdx/GSTextureOGL.cpp | 38 ++-- plugins/GSdx/GSWnd.cpp | 2 +- plugins/GSdx/linux_replay.cpp | 2 +- plugins/GSdx/res/convert.glsl | 4 + plugins/GSdx/res/fxaa.fx | 4 + plugins/GSdx/res/glsl_source.h | 314 ++++++++++++++++++++++++++++--- plugins/GSdx/res/interlace.glsl | 4 + plugins/GSdx/res/merge.glsl | 4 + plugins/GSdx/res/shadeboost.glsl | 4 + plugins/GSdx/res/tfx.glsl | 294 ++++++++++++++++++++++++++--- 22 files changed, 762 insertions(+), 122 deletions(-) diff --git a/plugins/GSdx/GLLoader.cpp b/plugins/GSdx/GLLoader.cpp index 4f7fc3cb1e..3114187e89 100644 --- a/plugins/GSdx/GLLoader.cpp +++ b/plugins/GSdx/GLLoader.cpp @@ -248,9 +248,12 @@ namespace GLLoader { #endif if (ext.compare("GL_ARB_explicit_uniform_location") == 0) found_GL_ARB_explicit_uniform_location = true; #ifdef GL44 // Need to debug the code first + // Need to check the clean (in particular of depth/stencil texture) if (ext.compare("GL_ARB_clear_texture") == 0) found_GL_ARB_clear_texture = true; - if (ext.compare("GL_ARB_multi_bind") == 0) found_GL_ARB_multi_bind = true; + // FIXME unattach context case + perf if (ext.compare("GL_ARB_buffer_storage") == 0) found_GL_ARB_buffer_storage = true; + // OK but no apitrace support + if (ext.compare("GL_ARB_multi_bind") == 0) found_GL_ARB_multi_bind = true; #endif #ifdef GLBINDLESS // Need to debug the code first if (ext.compare("GL_ARB_bindless_texture") == 0) found_GL_ARB_bindless_texture = true; diff --git a/plugins/GSdx/GLState.cpp b/plugins/GSdx/GLState.cpp index 61f1501f46..448fd63c39 100644 --- a/plugins/GSdx/GLState.cpp +++ b/plugins/GSdx/GLState.cpp @@ -65,6 +65,7 @@ namespace GLState { GLuint vs = 0; GLuint program = 0; bool dirty_prog = false; + bool dirty_subroutine_vs = false; bool dirty_subroutine_ps = false; #if 0 struct { @@ -119,6 +120,7 @@ namespace GLState { vs = 0; program = 0; dirty_prog = false; + dirty_subroutine_vs = false; dirty_subroutine_ps = false; dirty_ressources = false; } diff --git a/plugins/GSdx/GLState.h b/plugins/GSdx/GLState.h index c1dee66ecf..681f863cde 100644 --- a/plugins/GSdx/GLState.h +++ b/plugins/GSdx/GLState.h @@ -66,6 +66,7 @@ namespace GLState { extern GLuint vs; extern GLuint program; // monolith program (when sso isn't supported) extern bool dirty_prog; + extern bool dirty_subroutine_vs; extern bool dirty_subroutine_ps; extern bool dirty_ressources; diff --git a/plugins/GSdx/GS.cpp b/plugins/GSdx/GS.cpp index f98a930232..55a7ab43f1 100644 --- a/plugins/GSdx/GS.cpp +++ b/plugins/GSdx/GS.cpp @@ -541,13 +541,13 @@ EXPORT_C GSreadFIFO(uint8* mem) #ifdef ENABLE_OGL_DEBUG if (theApp.GetConfig("renderer", 0) / 3 == 4) fprintf(stderr, "Disable FIFO1 on opengl\n"); #endif - s_gs->m_wnd->AttachContext(); + s_gs->m_dev->AttachContext(); #endif s_gs->ReadFIFO(mem, 1); #ifdef ENABLE_OGL_MT_HACK - s_gs->m_wnd->DetachContext(); + s_gs->m_dev->DetachContext(); #endif } catch (GSDXRecoverableError) @@ -562,13 +562,13 @@ EXPORT_C GSreadFIFO2(uint8* mem, uint32 size) #ifdef ENABLE_OGL_MT_HACK // FIXME called from EE core thread not MTGS which cause // invalidate data for opengl - s_gs->m_wnd->AttachContext(); + s_gs->m_dev->AttachContext(); #endif s_gs->ReadFIFO(mem, size); #ifdef ENABLE_OGL_MT_HACK - s_gs->m_wnd->DetachContext(); + s_gs->m_dev->DetachContext(); #endif } catch (GSDXRecoverableError) @@ -642,7 +642,7 @@ EXPORT_C GSvsync(int field) #endif #ifdef ENABLE_OGL_MT_HACK - s_gs->m_wnd->AttachContext(); + s_gs->m_dev->AttachContext(); #endif s_gs->VSync(field); } diff --git a/plugins/GSdx/GSDevice.h b/plugins/GSdx/GSDevice.h index e8e4ff5e3a..f6b7333945 100644 --- a/plugins/GSdx/GSDevice.h +++ b/plugins/GSdx/GSDevice.h @@ -140,6 +140,10 @@ public: virtual void PSSetShaderResource(int i, GSTexture* sr) {} virtual void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL) {} + // Used for opengl multithread hack + virtual void AttachContext() {} + virtual void DetachContext() {} + GSTexture* GetCurrent(); void Merge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, const GSVector2i& fs, bool slbg, bool mmod, const GSVector4& c); @@ -179,4 +183,4 @@ struct GSAdapter #ifdef _LINUX // TODO #endif -}; \ No newline at end of file +}; diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index 3eee5ea78a..253da3f388 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -361,10 +361,31 @@ void GSDeviceOGL::Flip() #endif } +void GSDeviceOGL::AttachContext() +{ + if (m_window) + m_window->AttachContext(); +} + +void GSDeviceOGL::DetachContext() +{ + // Must be done before we detach the context! + if (GLLoader::found_GL_ARB_buffer_storage) + PboPool::UnmapAll(); + + if (m_window) + m_window->DetachContext(); + +} + void GSDeviceOGL::BeforeDraw() { m_shader->UseProgram(); +#ifdef _DEBUG + ASSERT(gl_CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); +#endif + //#ifdef ENABLE_OGL_STENCIL_DEBUG // if (m_date.t) // static_cast(m_date.t)->Save(format("/tmp/date_before_%04ld.csv", g_draw_count)); @@ -408,7 +429,16 @@ void GSDeviceOGL::DrawIndexedPrimitive(int offset, int count) void GSDeviceOGL::ClearRenderTarget(GSTexture* t, const GSVector4& c) { if (GLLoader::found_GL_ARB_clear_texture) { - static_cast(t)->Clear((const void*)&c); + if (static_cast(t)->IsBackbuffer()) { + glDisable(GL_SCISSOR_TEST); + OMSetFBO(0); + // glDrawBuffer(GL_BACK); // this is the default when there is no FB + // 0 will select the first drawbuffer ie GL_BACK + gl_ClearBufferfv(GL_COLOR, 0, c.v); + glEnable(GL_SCISSOR_TEST); + } else { + static_cast(t)->Clear((const void*)&c); + } } else { glDisable(GL_SCISSOR_TEST); if (static_cast(t)->IsBackbuffer()) { @@ -455,10 +485,20 @@ void GSDeviceOGL::ClearRenderTarget_ui(GSTexture* t, uint32 c) void GSDeviceOGL::ClearDepth(GSTexture* t, float c) { - // TODO is it possible with GL44 ClearTexture? - // It is seriously not clear if we can clear only the depth - if (GLLoader::found_GL_ARB_clear_texture) { - gl_ClearTexImage(static_cast(t)->GetID(), 0, GL_DEPTH_STENCIL, GL_FLOAT, &c); + // TODO is it possible with GL44 ClearTexture? no the API is garbage! + // Anyway, stencil can be cleared to 0 (it will be only used for date) + if (0 && GLLoader::found_GL_ARB_clear_texture) { + static_cast(t)->EnableUnit(); + // Yes a very nice API to mix float and integer + struct clear { + float depth; + GLuint stencil; + } clear; + + clear.depth = c; + clear.stencil = 0; + + gl_ClearTexImage(static_cast(t)->GetID(), 0, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, &clear); } else { OMSetFBO(m_fbo); OMSetWriteBuffer(); @@ -478,9 +518,9 @@ void GSDeviceOGL::ClearDepth(GSTexture* t, float c) void GSDeviceOGL::ClearStencil(GSTexture* t, uint8 c) { - // TODO is it possible with GL44 ClearTexture? - // It is seriously not clear if we can clear only the stencil + // TODO is it possible with GL44 ClearTexture? no the API is garbage! if (GLLoader::found_GL_ARB_clear_texture) { + static_cast(t)->EnableUnit(); gl_ClearTexImage(static_cast(t)->GetID(), 0, GL_DEPTH_STENCIL, GL_BYTE, &c); } else { OMSetFBO(m_fbo); @@ -558,10 +598,10 @@ void GSDeviceOGL::BindDateTexture() { // TODO: multibind? // GLuint textures[1] = {static_cast(m_date.t)->GetID()}; - // gl_BindImageTextures(0, 1, textures); - //gl_BindImageTexture(0, 0, 0, true, 0, GL_READ_WRITE, GL_R32I); + // gl_BindImageTextures(2, 1, textures); + //gl_BindImageTexture(2, 0, 0, true, 0, GL_READ_WRITE, GL_R32I); - gl_BindImageTexture(0, static_cast(m_date.t)->GetID(), 0, false, 0, GL_READ_WRITE, GL_R32I); + gl_BindImageTexture(2, static_cast(m_date.t)->GetID(), 0, false, 0, GL_READ_WRITE, GL_R32I); } void GSDeviceOGL::RecycleDateTexture() @@ -1196,7 +1236,7 @@ void GSDeviceOGL::DebugOutputToFile(unsigned int source, unsigned int type, unsi fprintf(f,"Type:%s\tID:%d\tSeverity:%s\tMessage:%s\n", debType, g_draw_count, debSev,message); fclose(f); } - //if (sev_counter > 2) assert(0); + ASSERT(sev_counter < 3); #endif } diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index c8de474617..ec578ccb15 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -260,9 +260,10 @@ class GSDeviceOGL : public GSDevice struct { uint32 bppz:2; + uint32 logz:1; + // Next param will be handle by subroutine uint32 tme:1; uint32 fst:1; - uint32 logz:1; }; uint32 key; @@ -333,24 +334,26 @@ class GSDeviceOGL : public GSDevice struct { uint32 fst:1; - uint32 wms:2; - uint32 wmt:2; uint32 fmt:3; uint32 aem:1; - uint32 tfx:3; - uint32 tcc:1; - uint32 atst:3; uint32 fog:1; uint32 clr1:1; uint32 fba:1; uint32 aout:1; - uint32 ltf:1; - uint32 colclip:2; uint32 date:2; uint32 spritehack:1; uint32 tcoffsethack:1; uint32 point_sampler:1; uint32 iip:1; + // Next param will be handle by subroutine + uint32 colclip:2; + uint32 atst:3; + + uint32 tfx:3; + uint32 tcc:1; + uint32 wms:2; + uint32 wmt:2; + uint32 ltf:1; }; uint32 key; @@ -538,7 +541,7 @@ class GSDeviceOGL : public GSDevice GSDeviceOGL(); virtual ~GSDeviceOGL(); - void CheckDebugLog(); + static void CheckDebugLog(); static void DebugOutputToFile(unsigned int source, unsigned int type, unsigned int id, unsigned int severity, const char* message); bool HasStencil() { return true; } @@ -548,6 +551,9 @@ class GSDeviceOGL : public GSDevice bool Reset(int w, int h); void Flip(); void SetVSync(bool enable); + // Used for opengl multithread hack + void AttachContext(); + void DetachContext(); void DrawPrimitive(); void DrawIndexedPrimitive(); diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index d71fae51c3..01e360edef 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -423,7 +423,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour ps_sel.spritehack = tex->m_spritehack_t; // FIXME the ati is currently disabled on the shader. I need to find a .gs to test that we got same // bug on opengl - ps_sel.point_sampler = !(bilinear && simple_sample); + // FIXME for the moment disable it on subroutine (it will kill my perf for nothings) + ps_sel.point_sampler = !(bilinear && simple_sample) && !GLLoader::found_GL_ARB_shader_subroutine; int w = tex->m_texture->GetWidth(); int h = tex->m_texture->GetHeight(); diff --git a/plugins/GSdx/GSShaderOGL.cpp b/plugins/GSdx/GSShaderOGL.cpp index 1ec5c4e800..9bc7b2f79c 100644 --- a/plugins/GSdx/GSShaderOGL.cpp +++ b/plugins/GSdx/GSShaderOGL.cpp @@ -25,9 +25,11 @@ GSShaderOGL::GSShaderOGL(bool debug) : m_debug_shader(debug), - m_sub_count(0) + m_vs_sub_count(0), + m_ps_sub_count(0) { + memset(&m_vs_sub, 0, countof(m_vs_sub)*sizeof(m_vs_sub[0])); memset(&m_ps_sub, 0, countof(m_ps_sub)*sizeof(m_ps_sub[0])); m_single_prog.clear(); @@ -50,12 +52,15 @@ GSShaderOGL::~GSShaderOGL() m_single_prog.clear(); } -void GSShaderOGL::VS(GLuint s) +void GSShaderOGL::VS(GLuint s, GLuint sub_count) { if (GLState::vs != s) { + m_vs_sub_count = sub_count; + GLState::vs = s; GLState::dirty_prog = true; + GLState::dirty_subroutine_vs = true; #ifndef ENABLE_GLES if (GLLoader::found_GL_ARB_separate_shader_objects) gl_UseProgramStages(m_pipeline, GL_VERTEX_SHADER_BIT, s); @@ -63,11 +68,23 @@ void GSShaderOGL::VS(GLuint s) } } +void GSShaderOGL::VS_subroutine(GLuint *sub) +{ + if (!(m_vs_sub[0] == sub[0])) { + m_vs_sub[0] = sub[0]; + GLState::dirty_subroutine_vs = true; + } +} + void GSShaderOGL::PS_subroutine(GLuint *sub) { - if (!(m_ps_sub[0] == sub[0] && m_ps_sub[1] == sub[1])) { + // FIXME could be more efficient with GSvector + if (!(m_ps_sub[0] == sub[0] && m_ps_sub[1] == sub[1] && m_ps_sub[2] == sub[2] && m_ps_sub[3] == sub[3] && m_ps_sub[4] == sub[4])) { m_ps_sub[0] = sub[0]; m_ps_sub[1] = sub[1]; + m_ps_sub[2] = sub[2]; + m_ps_sub[3] = sub[3]; + m_ps_sub[4] = sub[4]; GLState::dirty_subroutine_ps = true; } } @@ -85,7 +102,7 @@ void GSShaderOGL::PS(GLuint s, GLuint sub_count) { if (GLState::ps != s) { - m_sub_count = sub_count; + m_ps_sub_count = sub_count; GLState::ps = s; GLState::dirty_prog = true; @@ -185,10 +202,14 @@ void GSShaderOGL::SetupUniform() void GSShaderOGL::SetupSubroutineUniform() { if (!GLLoader::found_GL_ARB_shader_subroutine) return; - if (m_sub_count == 0) return; - if (GLState::dirty_subroutine_ps) { - gl_UniformSubroutinesuiv(GL_FRAGMENT_SHADER, m_sub_count, m_ps_sub); + if (GLState::dirty_subroutine_vs && m_vs_sub_count) { + gl_UniformSubroutinesuiv(GL_VERTEX_SHADER, m_vs_sub_count, m_vs_sub); + GLState::dirty_subroutine_vs = false; + } + + if (GLState::dirty_subroutine_ps && m_ps_sub_count) { + gl_UniformSubroutinesuiv(GL_FRAGMENT_SHADER, m_ps_sub_count, m_ps_sub); GLState::dirty_subroutine_ps = false; } } @@ -280,6 +301,7 @@ void GSShaderOGL::UseProgram() { if (GLState::dirty_prog) { if (!GLLoader::found_GL_ARB_separate_shader_objects) { + GLState::dirty_subroutine_vs = true; GLState::dirty_subroutine_ps = true; GLState::dirty_ressources = true; @@ -355,17 +377,26 @@ std::string GSShaderOGL::GenGlslHeader(const std::string& entry, GLenum type, co // Need GL version 400 header += "#define SUBROUTINE_GL40 1\n"; header += "#extension GL_ARB_shader_subroutine: require\n"; + } + if (GLLoader::found_GL_ARB_explicit_uniform_location) { // Need GL version 430 header += "#extension GL_ARB_explicit_uniform_location: require\n"; } #ifdef ENABLE_OGL_STENCIL_DEBUG header += "#define ENABLE_OGL_STENCIL_DEBUG 1\n"; #endif - if (GLLoader::found_GL_ARB_shader_image_load_store) + if (GLLoader::found_GL_ARB_shader_image_load_store) { // Need GL version 420 header += "#extension GL_ARB_shader_image_load_store: require\n"; - else + } else { header += "#define DISABLE_GL42_image\n"; + } + if (GLLoader::found_GL_ARB_bindless_texture && GLLoader::found_GL_ARB_explicit_uniform_location) { + // Future opengl 5? + header += "#extension GL_ARB_bindless_texture: require\n"; + header += "#define ENABLE_BINDLESS_TEX\n"; + } + #else header = "#version 300 es\n"; diff --git a/plugins/GSdx/GSShaderOGL.h b/plugins/GSdx/GSShaderOGL.h index 111026c63b..d30c2ca95f 100644 --- a/plugins/GSdx/GSShaderOGL.h +++ b/plugins/GSdx/GSShaderOGL.h @@ -25,9 +25,11 @@ class GSShaderOGL { GLuint m_pipeline; hash_map m_single_prog; const bool m_debug_shader; - GLuint m_sub_count; + GLuint m_vs_sub_count; + GLuint m_ps_sub_count; - GLuint m_ps_sub[2]; + GLuint m_vs_sub[1]; + GLuint m_ps_sub[5]; void SetupSubroutineUniform(); void SetupUniform(); @@ -51,7 +53,8 @@ class GSShaderOGL { void PS(GLuint s, GLuint sub_count = 0); void PS_subroutine(GLuint *sub); void PS_ressources(GLuint64 handle[2]); - void VS(GLuint s); + void VS(GLuint s, GLuint sub_count = 0); + void VS_subroutine(GLuint *sub); void UseProgram(); diff --git a/plugins/GSdx/GSState.cpp b/plugins/GSdx/GSState.cpp index 5bfd287e41..b506d6fc8c 100644 --- a/plugins/GSdx/GSState.cpp +++ b/plugins/GSdx/GSState.cpp @@ -1287,7 +1287,7 @@ void GSState::GIFRegHandlerTRXDIR(const GIFReg* RESTRICT r) case 1: // local -> host m_tr.Init(m_env.TRXPOS.SSAX, m_env.TRXPOS.SSAY); #ifdef ENABLE_OGL_MT_HACK - s_gs->m_wnd->DetachContext(); + s_gs->m_dev->DetachContext(); #endif break; case 2: // local -> local @@ -1794,7 +1794,7 @@ template void GSState::Transfer(const uint8* mem, uint32 size) { GSPerfMonAutoTimer pmat(&m_perfmon); #ifdef ENABLE_OGL_MT_HACK - s_gs->m_wnd->AttachContext(); + s_gs->m_dev->AttachContext(); #endif const uint8* start = mem; diff --git a/plugins/GSdx/GSTextureFXOGL.cpp b/plugins/GSdx/GSTextureFXOGL.cpp index 1ed4520568..98589c8aa7 100644 --- a/plugins/GSdx/GSTextureFXOGL.cpp +++ b/plugins/GSdx/GSTextureFXOGL.cpp @@ -136,9 +136,16 @@ void GSDeviceOGL::SetupCB(const VSConstantBuffer* vs_cb, const PSConstantBuffer* void GSDeviceOGL::SetupVS(VSSelector sel) { - GLuint vs = m_vs[sel]; + if (GLLoader::found_GL_ARB_shader_subroutine) { + GLuint sub[1]; + sub[0] = sel.tme ? 1 + (uint32)sel.fst : 0; + m_shader->VS_subroutine(sub); + // Handle by subroutine useless now + sel.tme = 0; + sel.fst = 0; + } - m_shader->VS(vs); + m_shader->VS(m_vs[sel], 1); } void GSDeviceOGL::SetupGS(bool enable) @@ -152,11 +159,35 @@ void GSDeviceOGL::SetupGS(bool enable) void GSDeviceOGL::SetupPS(PSSelector sel) { if (GLLoader::found_GL_ARB_shader_subroutine) { - GLuint sub[2] = {sel.atst, (uint32)sel.colclip + 8}; + GLuint tfx = sel.tfx > 3 ? 19 : 11 + (uint32)sel.tfx + (uint32)sel.tcc*4; + + GLuint colclip = 8 + (uint32)sel.colclip; + + GLuint clamp = + (sel.wms == 2 && sel.wmt == 2) ? 20 : + (sel.wms == 2) ? 21 : + (sel.wmt == 2) ? 22 : 23; + + GLuint wrap = + (sel.wms == 2 && sel.wmt == 2) ? 24 : + (sel.wms == 3 && sel.wmt == 3) ? 25 : + (sel.wms == 2 && sel.wmt == 3) ? 26 : + (sel.wms == 3 && sel.wmt == 2) ? 27 : + (sel.wms == 2) ? 28 : + (sel.wmt == 3) ? 29 : + (sel.wms == 3) ? 30 : + (sel.wmt == 2) ? 31 : 32; + + GLuint sub[5] = {sel.atst, colclip, tfx, clamp, wrap}; + m_shader->PS_subroutine(sub); // Handle by subroutine useless now sel.atst = 0; sel.colclip = 0; + sel.tfx = 0; + sel.tcc = 0; + // sel.wms = 0; + // sel.wmt = 0; } // ************************************************************* @@ -175,7 +206,7 @@ void GSDeviceOGL::SetupPS(PSSelector sel) // ************************************************************* // Dynamic // ************************************************************* - m_shader->PS(ps, 2); + m_shader->PS(ps, 3); } void GSDeviceOGL::SetupSampler(PSSamplerSelector ssel) diff --git a/plugins/GSdx/GSTextureOGL.cpp b/plugins/GSdx/GSTextureOGL.cpp index d0c72224bd..677a6884a3 100644 --- a/plugins/GSdx/GSTextureOGL.cpp +++ b/plugins/GSdx/GSTextureOGL.cpp @@ -24,10 +24,7 @@ #include "GSTextureOGL.h" #include "GLState.h" -// Flush need bind/unbind -// Barrier might sync much more -#define BARRIER_INSTEAD_FLUSH - +// FIXME OGL4: investigate, only 1 unpack buffer always bound namespace PboPool { GLuint m_pool[PBO_POOL_SIZE]; @@ -46,11 +43,12 @@ namespace PboPool { if (GLLoader::found_GL_ARB_buffer_storage) { gl_BufferStorage(GL_PIXEL_UNPACK_BUFFER, m_pbo_size, NULL, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_DYNAMIC_STORAGE_BIT | GL_CLIENT_STORAGE_BIT); } else { - gl_BufferData(GL_PIXEL_UNPACK_BUFFER, m_pbo_size, NULL, GL_STREAM_DRAW); - m_offset[m_current_pbo] = 0; - m_map[m_current_pbo] = NULL; + gl_BufferData(GL_PIXEL_UNPACK_BUFFER, m_pbo_size, NULL, GL_STREAM_COPY); } + m_offset[m_current_pbo] = 0; + m_map[m_current_pbo] = NULL; + NextPbo(); } UnbindPbo(); @@ -60,11 +58,7 @@ namespace PboPool { if (m_map[m_current_pbo] != NULL) return; // FIXME I'm not sure it is allowed to map another buffer after we get a pointer -#ifdef BARRIER_INSTEAD_FLUSH GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_PERSISTENT_BIT; -#else - GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_FLUSH_EXPLICIT_BIT; -#endif for (size_t i = 0; i < countof(m_pool); i++) { BindPbo(); m_map[m_current_pbo] = (char*)gl_MapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, m_pbo_size, flags); @@ -107,10 +101,15 @@ namespace PboPool { m_offset[m_current_pbo] = 0; } + // Note: it still need it because texsubimage will access currently bound buffer + // Pbo ready let's get a pointer + BindPbo(); + return m_map[m_current_pbo] + m_offset[m_current_pbo]; } } + // FIXME: unmap buffer when the context is dettached (not sure it is required actually) void UnmapAll() { if (m_map[m_current_pbo] == NULL) return; @@ -125,14 +124,7 @@ namespace PboPool { void Unmap() { if (GLLoader::found_GL_ARB_buffer_storage) { - // GL4.4 do a glMemoryBarrier? or glFlushMappedBufferRange? -#ifdef BARRIER_INSTEAD_FLUSH gl_MemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT); -#else - BindPbo(); - gl_FlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, m_offset[m_current_pbo], m_size); - UnbindPbo(); -#endif } else { gl_UnmapBuffer(GL_PIXEL_UNPACK_BUFFER); } @@ -276,7 +268,7 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read) m_pbo_size = (m_size.x * m_size.y) << m_int_shift; gl_BindBuffer(GL_PIXEL_PACK_BUFFER, m_pbo_id); - gl_BufferData(GL_PIXEL_PACK_BUFFER, m_pbo_size, NULL, GL_STREAM_DRAW); + gl_BufferData(GL_PIXEL_PACK_BUFFER, m_pbo_size, NULL, GL_STREAM_READ); gl_BindBuffer(GL_PIXEL_PACK_BUFFER, 0); case GSTexture::DepthStencil: @@ -310,7 +302,8 @@ GSTextureOGL::~GSTextureOGL() void GSTextureOGL::Clear(const void *data) { - gl_ClearTexImage(m_texture_id, 0, m_format, m_int_type, data); + EnableUnit(); + gl_ClearTexImage(m_texture_id, 0, m_int_format, m_int_type, data); } bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch) @@ -340,8 +333,9 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch) glTexSubImage2D(GL_TEXTURE_2D, 0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, (const void*)PboPool::Offset()); - if (!GLLoader::found_GL_ARB_buffer_storage) - PboPool::UnbindPbo(); + // FIXME OGL4: investigate, only 1 unpack buffer always bound + //if (!GLLoader::found_GL_ARB_buffer_storage) + PboPool::UnbindPbo(); PboPool::EndTransfer(); diff --git a/plugins/GSdx/GSWnd.cpp b/plugins/GSdx/GSWnd.cpp index fa98a1626f..4cbf40a3d5 100644 --- a/plugins/GSdx/GSWnd.cpp +++ b/plugins/GSdx/GSWnd.cpp @@ -110,7 +110,7 @@ void GSWndGL::PopulateGlFunction() // GL4.3 *(void**)&(gl_CopyImageSubData) = GetProcAddress("glCopyImageSubData", true); // GL4.4 - *(void**)&(gl_ClearTexImage) = GetProcAddress("glCLearTexImage", true); + *(void**)&(gl_ClearTexImage) = GetProcAddress("glClearTexImage", true); *(void**)&(gl_BindTextures) = GetProcAddress("glBindTextures", true); *(void**)&(gl_BufferStorage) = GetProcAddress("glBufferStorage", true); // GL_ARB_bindless_texture (GL5?) diff --git a/plugins/GSdx/linux_replay.cpp b/plugins/GSdx/linux_replay.cpp index d6de2f73ed..6f69d8c45a 100644 --- a/plugins/GSdx/linux_replay.cpp +++ b/plugins/GSdx/linux_replay.cpp @@ -36,7 +36,7 @@ int main ( int argc, char *argv[] ) void *handle = dlopen(argv[1], RTLD_LAZY|RTLD_GLOBAL); if (handle == NULL) { - fprintf(stderr, "Failed to open plugin %s\n", argv[1]); + fprintf(stderr, "Failed to dlopen plugin %s\n", argv[1]); help(); } diff --git a/plugins/GSdx/res/convert.glsl b/plugins/GSdx/res/convert.glsl index 908107cef4..dcd001b330 100644 --- a/plugins/GSdx/res/convert.glsl +++ b/plugins/GSdx/res/convert.glsl @@ -96,11 +96,15 @@ layout(location = 0) out uint SV_Target1; layout(location = 0) out vec4 SV_Target0; #endif +#ifdef ENABLE_BINDLESS_TEX +layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler; +#else #ifdef DISABLE_GL42 uniform sampler2D TextureSampler; #else layout(binding = 0) uniform sampler2D TextureSampler; #endif +#endif vec4 sample_c() { diff --git a/plugins/GSdx/res/fxaa.fx b/plugins/GSdx/res/fxaa.fx index e65beda2d0..15d4721e08 100644 --- a/plugins/GSdx/res/fxaa.fx +++ b/plugins/GSdx/res/fxaa.fx @@ -81,11 +81,15 @@ layout(std140, binding = 13) uniform cb13 vec4 _rcpFrameOpt; }; +#ifdef ENABLE_BINDLESS_TEX +layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler; +#else #ifdef DISABLE_GL42 uniform sampler2D TextureSampler; #else layout(binding = 0) uniform sampler2D TextureSampler; #endif +#endif #if !GL_ES && __VERSION__ > 140 diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h index 20c5037154..0fe67af3db 100644 --- a/plugins/GSdx/res/glsl_source.h +++ b/plugins/GSdx/res/glsl_source.h @@ -121,11 +121,15 @@ static const char* convert_glsl = "layout(location = 0) out vec4 SV_Target0;\n" "#endif\n" "\n" + "#ifdef ENABLE_BINDLESS_TEX\n" + "layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler;\n" + "#else\n" "#ifdef DISABLE_GL42\n" "uniform sampler2D TextureSampler;\n" "#else\n" "layout(binding = 0) uniform sampler2D TextureSampler;\n" "#endif\n" + "#endif\n" "\n" "vec4 sample_c()\n" "{\n" @@ -296,11 +300,15 @@ static const char* interlace_glsl = " float hH;\n" "};\n" "\n" + "#ifdef ENABLE_BINDLESS_TEX\n" + "layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler;\n" + "#else\n" "#ifdef DISABLE_GL42\n" "uniform sampler2D TextureSampler;\n" "#else\n" "layout(binding = 0) uniform sampler2D TextureSampler;\n" "#endif\n" + "#endif\n" "\n" "// TODO ensure that clip (discard) is < 0 and not <= 0 ???\n" "void ps_main0()\n" @@ -389,11 +397,15 @@ static const char* merge_glsl = " vec4 BGColor;\n" "};\n" "\n" + "#ifdef ENABLE_BINDLESS_TEX\n" + "layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler;\n" + "#else\n" "#ifdef DISABLE_GL42\n" "uniform sampler2D TextureSampler;\n" "#else\n" "layout(binding = 0) uniform sampler2D TextureSampler;\n" "#endif\n" + "#endif\n" "\n" "void ps_main0()\n" "{\n" @@ -465,11 +477,15 @@ static const char* shadeboost_glsl = " vec4 BGColor;\n" "};\n" "\n" + "#ifdef ENABLE_BINDLESS_TEX\n" + "layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler;\n" + "#else\n" "#ifdef DISABLE_GL42\n" "uniform sampler2D TextureSampler;\n" "#else\n" "layout(binding = 0) uniform sampler2D TextureSampler;\n" "#endif\n" + "#endif\n" "\n" "// For all settings: 1.0 = 100% 0.5=50% 1.5 = 150% \n" "vec4 ContrastSaturationBrightness(vec4 color)\n" @@ -617,6 +633,60 @@ static const char* tfx_glsl = "\n" "const float exp_min32 = exp2(-32.0f);\n" "\n" + "#ifdef SUBROUTINE_GL40\n" + "// Function pointer type\n" + "subroutine void TextureCoordType(void);\n" + "\n" + "// a function pointer variable\n" + "layout(location = 0) subroutine uniform TextureCoordType texture_coord;\n" + "\n" + "layout(index = 0) subroutine(TextureCoordType)\n" + "void tme_0()\n" + "{\n" + " VSout_t.xy = vec2(0.0f, 0.0f);\n" + " VSout_t.w = 1.0f;\n" + "}\n" + "\n" + "layout(index = 1) subroutine(TextureCoordType)\n" + "void tme_1_fst_0()\n" + "{\n" + " VSout_t.xy = i_st;\n" + " VSout_t.w = i_q;\n" + "}\n" + "\n" + "layout(index = 2) subroutine(TextureCoordType)\n" + "void tme_1_fst_1()\n" + "{\n" + " VSout_t.xy = vec2(i_uv) * TextureScale;\n" + " VSout_t.w = 1.0f;\n" + "}\n" + "\n" + "#else\n" + "\n" + "void texture_coord()\n" + "{\n" + " if(VS_TME != 0)\n" + " {\n" + " if(VS_FST != 0)\n" + " {\n" + " VSout_t.xy = vec2(i_uv) * TextureScale;\n" + " VSout_t.w = 1.0f;\n" + " }\n" + " else\n" + " {\n" + " VSout_t.xy = i_st;\n" + " VSout_t.w = i_q;\n" + " }\n" + " }\n" + " else\n" + " {\n" + " VSout_t.xy = vec2(0.0f, 0.0f);\n" + " VSout_t.w = 1.0f;\n" + " }\n" + "}\n" + "\n" + "#endif\n" + "\n" "void vs_main()\n" "{\n" " uint z;\n" @@ -642,24 +712,7 @@ static const char* tfx_glsl = "\n" " gl_Position = vec4(p, 1.0f); // NOTE I don't know if it is possible to merge POSITION_OUT and gl_Position\n" "\n" - " if(VS_TME != 0)\n" - " {\n" - " if(VS_FST != 0)\n" - " {\n" - " VSout_t.xy = vec2(i_uv) * TextureScale;\n" - " VSout_t.w = 1.0f;\n" - " }\n" - " else\n" - " {\n" - " VSout_t.xy = i_st;\n" - " VSout_t.w = i_q;\n" - " }\n" - " }\n" - " else\n" - " {\n" - " VSout_t.xy = vec2(0.0f, 0.0f);\n" - " VSout_t.w = 1.0f;\n" - " }\n" + " texture_coord();\n" "\n" " VSout_c = i_c;\n" " VSout_fc = i_c;\n" @@ -804,20 +857,23 @@ static const char* tfx_glsl = "layout(location = 0, index = 1) out vec4 SV_Target1;\n" "#endif\n" "\n" + "#ifdef ENABLE_BINDLESS_TEX\n" + "layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler;\n" + "layout(bindless_sampler, location = 1) uniform sampler2D PaletteSampler;\n" + "#else\n" "#ifdef DISABLE_GL42\n" "uniform sampler2D TextureSampler;\n" "uniform sampler2D PaletteSampler;\n" - "//uniform sampler2D RTCopySampler;\n" "#else\n" "layout(binding = 0) uniform sampler2D TextureSampler;\n" "layout(binding = 1) uniform sampler2D PaletteSampler;\n" - "//layout(binding = 2) uniform sampler2D RTCopySampler;\n" + "#endif\n" "#endif\n" "\n" "#ifndef DISABLE_GL42_image\n" "#if PS_DATE > 0\n" "// FIXME how to declare memory access\n" - "layout(r32i, binding = 0) coherent uniform iimage2D img_prim_min;\n" + "layout(r32i, binding = 2) coherent uniform iimage2D img_prim_min;\n" "#endif\n" "#else\n" "// use basic stencil\n" @@ -878,6 +934,87 @@ static const char* tfx_glsl = "}\n" "#endif\n" "\n" + "// FIXME crash nvidia\n" + "#if 0\n" + "// Function pointer type\n" + "subroutine vec4 WrapType(vec4 uv);\n" + "\n" + "// a function pointer variable\n" + "layout(location = 4) subroutine uniform WrapType wrapuv;\n" + "\n" + "layout(index = 24) subroutine(WrapType)\n" + "vec4 wrapuv_wms_wmt_2(vec4 uv)\n" + "{\n" + " vec4 uv_out = uv;\n" + " uv_out = clamp(uv, MinMax.xyxy, MinMax.zwzw);\n" + " return uv_out;\n" + "}\n" + "\n" + "layout(index = 25) subroutine(WrapType)\n" + "vec4 wrapuv_wms_wmt3(vec4 uv)\n" + "{\n" + " vec4 uv_out = uv;\n" + " uv_out = vec4((ivec4(uv * WH.xyxy) & ivec4(MskFix.xyxy)) | ivec4(MskFix.zwzw)) / WH.xyxy;\n" + " return uv_out;\n" + "}\n" + "\n" + "layout(index = 26) subroutine(WrapType)\n" + "vec4 wrapuv_wms2_wmt3(vec4 uv)\n" + "{\n" + " vec4 uv_out = uv;\n" + " uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);\n" + " uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy;\n" + " return uv_out;\n" + "}\n" + "\n" + "layout(index = 27) subroutine(WrapType)\n" + "vec4 wrapuv_wms3_wmt2(vec4 uv)\n" + "{\n" + " vec4 uv_out = uv;\n" + " uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx;\n" + " uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);\n" + " return uv_out;\n" + "}\n" + "\n" + "layout(index = 28) subroutine(WrapType)\n" + "vec4 wrapuv_wms2_wmtx(vec4 uv)\n" + "{\n" + " vec4 uv_out = uv;\n" + " uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);\n" + " return uv_out;\n" + "}\n" + "\n" + "layout(index = 29) subroutine(WrapType)\n" + "vec4 wrapuv_wmsx_wmt3(vec4 uv)\n" + "{\n" + " vec4 uv_out = uv;\n" + " uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy;\n" + " return uv_out;\n" + "}\n" + "\n" + "layout(index = 30) subroutine(WrapType)\n" + "vec4 wrapuv_wms3_wmtx(vec4 uv)\n" + "{\n" + " vec4 uv_out = uv;\n" + " uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx;\n" + " return uv_out;\n" + "}\n" + "\n" + "layout(index = 31) subroutine(WrapType)\n" + "vec4 wrapuv_wmsx_wmt2(vec4 uv)\n" + "{\n" + " vec4 uv_out = uv;\n" + " uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);\n" + " return uv_out;\n" + "}\n" + "\n" + "layout(index = 32) subroutine(WrapType)\n" + "vec4 wrapuv_dummy(vec4 uv)\n" + "{\n" + " return uv;\n" + "}\n" + "\n" + "#else\n" "vec4 wrapuv(vec4 uv)\n" "{\n" " vec4 uv_out = uv;\n" @@ -915,7 +1052,45 @@ static const char* tfx_glsl = "\n" " return uv_out;\n" "}\n" + "#endif\n" "\n" + "// FIXME crash nvidia\n" + "#if 0\n" + "// Function pointer type\n" + "subroutine vec2 ClampType(vec2 uv);\n" + "\n" + "// a function pointer variable\n" + "layout(location = 3) subroutine uniform ClampType clampuv;\n" + "\n" + "layout(index = 20) subroutine(ClampType)\n" + "vec2 clampuv_wms2_wmt2(vec2 uv)\n" + "{\n" + " return clamp(uv, MinF, MinMax.zw);\n" + "}\n" + "\n" + "layout(index = 21) subroutine(ClampType)\n" + "vec2 clampuv_wms2(vec2 uv)\n" + "{\n" + " vec2 uv_out = uv;\n" + " uv_out.x = clamp(uv.x, MinF.x, MinMax.z);\n" + " return uv_out;\n" + "}\n" + "\n" + "layout(index = 22) subroutine(ClampType)\n" + "vec2 clampuv_wmt2(vec2 uv)\n" + "{\n" + " vec2 uv_out = uv;\n" + " uv_out.y = clamp(uv.y, MinF.y, MinMax.w);\n" + " return uv_out;\n" + "}\n" + "\n" + "layout(index = 23) subroutine(ClampType)\n" + "vec2 clampuv_dummy(vec2 uv)\n" + "{\n" + " return uv;\n" + "}\n" + "\n" + "#else\n" "vec2 clampuv(vec2 uv)\n" "{\n" " vec2 uv_out = uv;\n" @@ -935,6 +1110,7 @@ static const char* tfx_glsl = "\n" " return uv_out;\n" "}\n" + "#endif\n" "\n" "mat4 sample_4c(vec4 uv)\n" "{\n" @@ -1043,6 +1219,86 @@ static const char* tfx_glsl = " return t;\n" "}\n" "\n" + "#ifdef SUBROUTINE_GL40\n" + "// Function pointer type\n" + "subroutine vec4 TfxType(vec4 t, vec4 c);\n" + "\n" + "// a function pointer variable\n" + "layout(location = 2) subroutine uniform TfxType tfx;\n" + "\n" + "layout(index = 11) subroutine(TfxType)\n" + "vec4 tfx_0_tcc_0(vec4 t, vec4 c)\n" + "{\n" + " vec4 c_out = c;\n" + " c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f;\n" + " return c_out;\n" + "}\n" + "\n" + "layout(index = 12) subroutine(TfxType)\n" + "vec4 tfx_1_tcc_0(vec4 t, vec4 c)\n" + "{\n" + " vec4 c_out = c;\n" + " c_out.rgb = t.rgb;\n" + " return c_out;\n" + "}\n" + "\n" + "layout(index = 13) subroutine(TfxType)\n" + "vec4 tfx_2_tcc_0(vec4 t, vec4 c)\n" + "{\n" + " vec4 c_out = c;\n" + " c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n" + " return c_out;\n" + "}\n" + "\n" + "layout(index = 14) subroutine(TfxType)\n" + "vec4 tfx_3_tcc_0(vec4 t, vec4 c)\n" + "{\n" + " vec4 c_out = c;\n" + " c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n" + " return c_out;\n" + "}\n" + "\n" + "layout(index = 15) subroutine(TfxType)\n" + "vec4 tfx_0_tcc_1(vec4 t, vec4 c)\n" + "{\n" + " vec4 c_out = c;\n" + " c_out = c * t * 255.0f / 128.0f;\n" + " return c_out;\n" + "}\n" + "\n" + "layout(index = 16) subroutine(TfxType)\n" + "vec4 tfx_1_tcc_1(vec4 t, vec4 c)\n" + "{\n" + " vec4 c_out = c;\n" + " c_out = t;\n" + " return c_out;\n" + "}\n" + "\n" + "layout(index = 17) subroutine(TfxType)\n" + "vec4 tfx_2_tcc_1(vec4 t, vec4 c)\n" + "{\n" + " vec4 c_out = c;\n" + " c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n" + " c_out.a += t.a;\n" + " return c_out;\n" + "}\n" + "\n" + "layout(index = 18) subroutine(TfxType)\n" + "vec4 tfx_3_tcc_1(vec4 t, vec4 c)\n" + "{\n" + " vec4 c_out = c;\n" + " c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n" + " c_out.a = t.a;\n" + " return c_out;\n" + "}\n" + "\n" + "layout(index = 19) subroutine(TfxType)\n" + "vec4 tfx_dummy(vec4 t, vec4 c)\n" + "{\n" + " return c;\n" + "}\n" + "\n" + "#else\n" "vec4 tfx(vec4 t, vec4 c)\n" "{\n" " vec4 c_out = c;\n" @@ -1087,8 +1343,10 @@ static const char* tfx_glsl = " }\n" " }\n" "\n" - " return clamp(c_out, vec4(0.0f, 0.0f, 0.0f, 0.0f), vec4(1.0f, 1.0f, 1.0f, 1.0f));\n" + " return c_out;\n" "}\n" + "#endif\n" + "\n" "\n" "#if 0\n" "void datst()\n" @@ -1105,7 +1363,6 @@ static const char* tfx_glsl = "}\n" "#endif\n" "\n" - "// Note layout stuff might require gl4.3\n" "#ifdef SUBROUTINE_GL40\n" "// Function pointer type\n" "subroutine void AlphaTestType(vec4 c);\n" @@ -1113,7 +1370,6 @@ static const char* tfx_glsl = "// a function pointer variable\n" "layout(location = 0) subroutine uniform AlphaTestType atst;\n" "\n" - "// The function attached to AlphaTestType\n" "layout(index = 0) subroutine(AlphaTestType)\n" "void atest_never(vec4 c)\n" "{\n" @@ -1284,10 +1540,12 @@ static const char* tfx_glsl = "{\n" " vec4 t = sample_color(PSin_t.xy, PSin_t.w);\n" "\n" + " vec4 zero = vec4(0.0f, 0.0f, 0.0f, 0.0f);\n" + " vec4 one = vec4(1.0f, 1.0f, 1.0f, 1.0f);\n" "#if PS_IIP == 1\n" - " vec4 c = tfx(t, PSin_c);\n" + " vec4 c = clamp(tfx(t, PSin_c), zero, one);\n" "#else\n" - " vec4 c = tfx(t, PSin_fc);\n" + " vec4 c = clamp(tfx(t, PSin_fc), zero, one);\n" "#endif\n" "\n" " atst(c);\n" @@ -1457,11 +1715,15 @@ static const char* fxaa_fx = " vec4 _rcpFrameOpt;\n" "};\n" "\n" + "#ifdef ENABLE_BINDLESS_TEX\n" + "layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler;\n" + "#else\n" "#ifdef DISABLE_GL42\n" "uniform sampler2D TextureSampler;\n" "#else\n" "layout(binding = 0) uniform sampler2D TextureSampler;\n" "#endif\n" + "#endif\n" "\n" "#if !GL_ES && __VERSION__ > 140\n" "\n" diff --git a/plugins/GSdx/res/interlace.glsl b/plugins/GSdx/res/interlace.glsl index b152b85863..18979a1c43 100644 --- a/plugins/GSdx/res/interlace.glsl +++ b/plugins/GSdx/res/interlace.glsl @@ -45,11 +45,15 @@ layout(std140, binding = 11) uniform cb11 float hH; }; +#ifdef ENABLE_BINDLESS_TEX +layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler; +#else #ifdef DISABLE_GL42 uniform sampler2D TextureSampler; #else layout(binding = 0) uniform sampler2D TextureSampler; #endif +#endif // TODO ensure that clip (discard) is < 0 and not <= 0 ??? void ps_main0() diff --git a/plugins/GSdx/res/merge.glsl b/plugins/GSdx/res/merge.glsl index fdcb94f77c..1a20359cfa 100644 --- a/plugins/GSdx/res/merge.glsl +++ b/plugins/GSdx/res/merge.glsl @@ -44,11 +44,15 @@ layout(std140, binding = 10) uniform cb10 vec4 BGColor; }; +#ifdef ENABLE_BINDLESS_TEX +layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler; +#else #ifdef DISABLE_GL42 uniform sampler2D TextureSampler; #else layout(binding = 0) uniform sampler2D TextureSampler; #endif +#endif void ps_main0() { diff --git a/plugins/GSdx/res/shadeboost.glsl b/plugins/GSdx/res/shadeboost.glsl index 48e8112af5..cd11c0de51 100644 --- a/plugins/GSdx/res/shadeboost.glsl +++ b/plugins/GSdx/res/shadeboost.glsl @@ -50,11 +50,15 @@ layout(std140, binding = 12) uniform cb12 vec4 BGColor; }; +#ifdef ENABLE_BINDLESS_TEX +layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler; +#else #ifdef DISABLE_GL42 uniform sampler2D TextureSampler; #else layout(binding = 0) uniform sampler2D TextureSampler; #endif +#endif // For all settings: 1.0 = 100% 0.5=50% 1.5 = 150% vec4 ContrastSaturationBrightness(vec4 color) diff --git a/plugins/GSdx/res/tfx.glsl b/plugins/GSdx/res/tfx.glsl index f2cedfdf1d..7a44a6d919 100644 --- a/plugins/GSdx/res/tfx.glsl +++ b/plugins/GSdx/res/tfx.glsl @@ -107,6 +107,60 @@ layout(std140, binding = 20) uniform cb20 const float exp_min32 = exp2(-32.0f); +#ifdef SUBROUTINE_GL40 +// Function pointer type +subroutine void TextureCoordType(void); + +// a function pointer variable +layout(location = 0) subroutine uniform TextureCoordType texture_coord; + +layout(index = 0) subroutine(TextureCoordType) +void tme_0() +{ + VSout_t.xy = vec2(0.0f, 0.0f); + VSout_t.w = 1.0f; +} + +layout(index = 1) subroutine(TextureCoordType) +void tme_1_fst_0() +{ + VSout_t.xy = i_st; + VSout_t.w = i_q; +} + +layout(index = 2) subroutine(TextureCoordType) +void tme_1_fst_1() +{ + VSout_t.xy = vec2(i_uv) * TextureScale; + VSout_t.w = 1.0f; +} + +#else + +void texture_coord() +{ + if(VS_TME != 0) + { + if(VS_FST != 0) + { + VSout_t.xy = vec2(i_uv) * TextureScale; + VSout_t.w = 1.0f; + } + else + { + VSout_t.xy = i_st; + VSout_t.w = i_q; + } + } + else + { + VSout_t.xy = vec2(0.0f, 0.0f); + VSout_t.w = 1.0f; + } +} + +#endif + void vs_main() { uint z; @@ -132,24 +186,7 @@ void vs_main() gl_Position = vec4(p, 1.0f); // NOTE I don't know if it is possible to merge POSITION_OUT and gl_Position - if(VS_TME != 0) - { - if(VS_FST != 0) - { - VSout_t.xy = vec2(i_uv) * TextureScale; - VSout_t.w = 1.0f; - } - else - { - VSout_t.xy = i_st; - VSout_t.w = i_q; - } - } - else - { - VSout_t.xy = vec2(0.0f, 0.0f); - VSout_t.w = 1.0f; - } + texture_coord(); VSout_c = i_c; VSout_fc = i_c; @@ -294,20 +331,23 @@ layout(location = 0, index = 0) out vec4 SV_Target0; layout(location = 0, index = 1) out vec4 SV_Target1; #endif +#ifdef ENABLE_BINDLESS_TEX +layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler; +layout(bindless_sampler, location = 1) uniform sampler2D PaletteSampler; +#else #ifdef DISABLE_GL42 uniform sampler2D TextureSampler; uniform sampler2D PaletteSampler; -//uniform sampler2D RTCopySampler; #else layout(binding = 0) uniform sampler2D TextureSampler; layout(binding = 1) uniform sampler2D PaletteSampler; -//layout(binding = 2) uniform sampler2D RTCopySampler; +#endif #endif #ifndef DISABLE_GL42_image #if PS_DATE > 0 // FIXME how to declare memory access -layout(r32i, binding = 0) coherent uniform iimage2D img_prim_min; +layout(r32i, binding = 2) coherent uniform iimage2D img_prim_min; #endif #else // use basic stencil @@ -368,6 +408,87 @@ vec4 sample_rt(vec2 uv) } #endif +// FIXME crash nvidia +#if 0 +// Function pointer type +subroutine vec4 WrapType(vec4 uv); + +// a function pointer variable +layout(location = 4) subroutine uniform WrapType wrapuv; + +layout(index = 24) subroutine(WrapType) +vec4 wrapuv_wms_wmt_2(vec4 uv) +{ + vec4 uv_out = uv; + uv_out = clamp(uv, MinMax.xyxy, MinMax.zwzw); + return uv_out; +} + +layout(index = 25) subroutine(WrapType) +vec4 wrapuv_wms_wmt3(vec4 uv) +{ + vec4 uv_out = uv; + uv_out = vec4((ivec4(uv * WH.xyxy) & ivec4(MskFix.xyxy)) | ivec4(MskFix.zwzw)) / WH.xyxy; + return uv_out; +} + +layout(index = 26) subroutine(WrapType) +vec4 wrapuv_wms2_wmt3(vec4 uv) +{ + vec4 uv_out = uv; + uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz); + uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy; + return uv_out; +} + +layout(index = 27) subroutine(WrapType) +vec4 wrapuv_wms3_wmt2(vec4 uv) +{ + vec4 uv_out = uv; + uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx; + uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww); + return uv_out; +} + +layout(index = 28) subroutine(WrapType) +vec4 wrapuv_wms2_wmtx(vec4 uv) +{ + vec4 uv_out = uv; + uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz); + return uv_out; +} + +layout(index = 29) subroutine(WrapType) +vec4 wrapuv_wmsx_wmt3(vec4 uv) +{ + vec4 uv_out = uv; + uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy; + return uv_out; +} + +layout(index = 30) subroutine(WrapType) +vec4 wrapuv_wms3_wmtx(vec4 uv) +{ + vec4 uv_out = uv; + uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx; + return uv_out; +} + +layout(index = 31) subroutine(WrapType) +vec4 wrapuv_wmsx_wmt2(vec4 uv) +{ + vec4 uv_out = uv; + uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww); + return uv_out; +} + +layout(index = 32) subroutine(WrapType) +vec4 wrapuv_dummy(vec4 uv) +{ + return uv; +} + +#else vec4 wrapuv(vec4 uv) { vec4 uv_out = uv; @@ -405,7 +526,45 @@ vec4 wrapuv(vec4 uv) return uv_out; } +#endif +// FIXME crash nvidia +#if 0 +// Function pointer type +subroutine vec2 ClampType(vec2 uv); + +// a function pointer variable +layout(location = 3) subroutine uniform ClampType clampuv; + +layout(index = 20) subroutine(ClampType) +vec2 clampuv_wms2_wmt2(vec2 uv) +{ + return clamp(uv, MinF, MinMax.zw); +} + +layout(index = 21) subroutine(ClampType) +vec2 clampuv_wms2(vec2 uv) +{ + vec2 uv_out = uv; + uv_out.x = clamp(uv.x, MinF.x, MinMax.z); + return uv_out; +} + +layout(index = 22) subroutine(ClampType) +vec2 clampuv_wmt2(vec2 uv) +{ + vec2 uv_out = uv; + uv_out.y = clamp(uv.y, MinF.y, MinMax.w); + return uv_out; +} + +layout(index = 23) subroutine(ClampType) +vec2 clampuv_dummy(vec2 uv) +{ + return uv; +} + +#else vec2 clampuv(vec2 uv) { vec2 uv_out = uv; @@ -425,6 +584,7 @@ vec2 clampuv(vec2 uv) return uv_out; } +#endif mat4 sample_4c(vec4 uv) { @@ -533,6 +693,86 @@ vec4 sample_color(vec2 st, float q) return t; } +#ifdef SUBROUTINE_GL40 +// Function pointer type +subroutine vec4 TfxType(vec4 t, vec4 c); + +// a function pointer variable +layout(location = 2) subroutine uniform TfxType tfx; + +layout(index = 11) subroutine(TfxType) +vec4 tfx_0_tcc_0(vec4 t, vec4 c) +{ + vec4 c_out = c; + c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f; + return c_out; +} + +layout(index = 12) subroutine(TfxType) +vec4 tfx_1_tcc_0(vec4 t, vec4 c) +{ + vec4 c_out = c; + c_out.rgb = t.rgb; + return c_out; +} + +layout(index = 13) subroutine(TfxType) +vec4 tfx_2_tcc_0(vec4 t, vec4 c) +{ + vec4 c_out = c; + c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a; + return c_out; +} + +layout(index = 14) subroutine(TfxType) +vec4 tfx_3_tcc_0(vec4 t, vec4 c) +{ + vec4 c_out = c; + c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a; + return c_out; +} + +layout(index = 15) subroutine(TfxType) +vec4 tfx_0_tcc_1(vec4 t, vec4 c) +{ + vec4 c_out = c; + c_out = c * t * 255.0f / 128.0f; + return c_out; +} + +layout(index = 16) subroutine(TfxType) +vec4 tfx_1_tcc_1(vec4 t, vec4 c) +{ + vec4 c_out = c; + c_out = t; + return c_out; +} + +layout(index = 17) subroutine(TfxType) +vec4 tfx_2_tcc_1(vec4 t, vec4 c) +{ + vec4 c_out = c; + c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a; + c_out.a += t.a; + return c_out; +} + +layout(index = 18) subroutine(TfxType) +vec4 tfx_3_tcc_1(vec4 t, vec4 c) +{ + vec4 c_out = c; + c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a; + c_out.a = t.a; + return c_out; +} + +layout(index = 19) subroutine(TfxType) +vec4 tfx_dummy(vec4 t, vec4 c) +{ + return c; +} + +#else vec4 tfx(vec4 t, vec4 c) { vec4 c_out = c; @@ -577,8 +817,10 @@ vec4 tfx(vec4 t, vec4 c) } } - return clamp(c_out, vec4(0.0f, 0.0f, 0.0f, 0.0f), vec4(1.0f, 1.0f, 1.0f, 1.0f)); + return c_out; } +#endif + #if 0 void datst() @@ -595,7 +837,6 @@ void datst() } #endif -// Note layout stuff might require gl4.3 #ifdef SUBROUTINE_GL40 // Function pointer type subroutine void AlphaTestType(vec4 c); @@ -603,7 +844,6 @@ subroutine void AlphaTestType(vec4 c); // a function pointer variable layout(location = 0) subroutine uniform AlphaTestType atst; -// The function attached to AlphaTestType layout(index = 0) subroutine(AlphaTestType) void atest_never(vec4 c) { @@ -774,10 +1014,12 @@ vec4 ps_color() { vec4 t = sample_color(PSin_t.xy, PSin_t.w); + vec4 zero = vec4(0.0f, 0.0f, 0.0f, 0.0f); + vec4 one = vec4(1.0f, 1.0f, 1.0f, 1.0f); #if PS_IIP == 1 - vec4 c = tfx(t, PSin_c); + vec4 c = clamp(tfx(t, PSin_c), zero, one); #else - vec4 c = tfx(t, PSin_fc); + vec4 c = clamp(tfx(t, PSin_fc), zero, one); #endif atst(c);