From c207632e4974ccfdb4576c7487864fb2390489ef Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Fri, 24 Apr 2015 20:53:59 +0200 Subject: [PATCH] gsdx-ogl: improve date performance for GL45 If there is no overlap, it is allowed to directly read from the render target. On SotC testcase with 6x scaling: 30fps -> 40fps Note: it requires GL_ARB_texture_barrier extension so be sure to have a recent driver Note2: it requires a lots of testing too Open question: in case of complex date (written alpha) Will it be faster to split the draw call into multiple call with no primitive overlap --- plugins/GSdx/GLState.cpp | 12 +++--- plugins/GSdx/GLState.h | 4 +- plugins/GSdx/GSDeviceOGL.cpp | 1 - plugins/GSdx/GSDeviceOGL.h | 2 +- plugins/GSdx/GSRendererOGL.cpp | 65 +++++++++++++++++++++++++------ plugins/GSdx/GSRendererOGL.h | 2 + plugins/GSdx/res/glsl/tfx_fs.glsl | 4 +- plugins/GSdx/res/glsl_source.h | 4 +- 8 files changed, 69 insertions(+), 25 deletions(-) diff --git a/plugins/GSdx/GLState.cpp b/plugins/GSdx/GLState.cpp index 3f2f5e19e4..c333ed7eab 100644 --- a/plugins/GSdx/GLState.cpp +++ b/plugins/GSdx/GLState.cpp @@ -54,8 +54,8 @@ namespace GLState { GLuint rt = 0; GLuint ds = 0; - GLuint tex_unit[2] = {0, 0}; - GLuint64 tex_handle[2] = { 0, 0}; + GLuint tex_unit[4] = {0, 0, 0, 0}; + GLuint64 tex_handle[4] = { 0, 0, 0, 0}; bool dirty_ressources = false; GLuint ps = 0; @@ -106,10 +106,10 @@ namespace GLState { rt = 0; ds = 0; - tex_unit[0] = 0; - tex_unit[1] = 0; - tex_handle[0] = 0; - tex_handle[1] = 0; + for (int i = 0; i < 4; i++) { + tex_unit[i] = 0; + tex_handle[i] = 0; + } ps = 0; gs = 0; diff --git a/plugins/GSdx/GLState.h b/plugins/GSdx/GLState.h index ed4d775848..b377cbd293 100644 --- a/plugins/GSdx/GLState.h +++ b/plugins/GSdx/GLState.h @@ -56,8 +56,8 @@ namespace GLState { extern GLuint rt; // render target extern GLuint ds; // Depth-Stencil - extern GLuint tex_unit[2]; // shader input texture - extern GLuint64 tex_handle[2]; // shader input texture + extern GLuint tex_unit[4]; // shader input texture + extern GLuint64 tex_handle[4]; // shader input texture extern GLuint ps; extern GLuint gs; diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index d81d9cba92..494f79a336 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -579,7 +579,6 @@ void GSDeviceOGL::InitPrimDateTexture(GSTexture* rt) #ifndef ENABLE_GLES gl_BindImageTexture(2, static_cast(m_date.t)->GetID(), 0, false, 0, GL_READ_WRITE, GL_R32I); #endif - gl_BindTextureUnit(3, static_cast(rt)->GetID()); } void GSDeviceOGL::RecycleDateTexture() diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index 165ab5426d..64e29dbcd9 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -372,7 +372,7 @@ class GSDeviceOGL : public GSDevice uint32 clr1:1; uint32 fba:1; uint32 aout:1; - uint32 date:2; + uint32 date:3; uint32 spritehack:1; uint32 tcoffsethack:1; uint32 point_sampler:1; diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index e060dd263b..8f949bdd52 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -151,6 +151,39 @@ void GSRendererOGL::SetupIA() dev->IASetPrimitiveTopology(t); } +bool GSRendererOGL::PrimitiveOverlap() +{ + if (m_vertex.next < 4) + return false; + + if (m_vt.m_primclass != GS_SPRITE_CLASS) + return true; + + // Check intersection of sprite primitive only + size_t count = m_vertex.next; + GSVertex* v = &m_vertex.buff[0]; + + for(size_t i = 0; i < count; i += 2) { + // Very bad code + GSVector4i vi(v[i].XYZ.X, v[i].XYZ.Y, v[i+1].XYZ.X, v[i+1].XYZ.Y); + for (size_t j = i+2; j < count; j += 2) { + GSVector4i vj(v[j].XYZ.X, v[j].XYZ.Y, v[j+1].XYZ.X, v[j+1].XYZ.Y); + GSVector4i inter = vi.rintersect(vj); + if (!inter.rempty()) { + //fprintf(stderr, "Overlap found between %d and %d (draw of %d vertices)\n", i, j, count); + //vi.print(); + //vj.print(); + //inter.print(); + //exit(0); + return true; + } + } + } + + //fprintf(stderr, "Yes, code can be optimized (draw of %d vertices)\n", count); + return false; +} + void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) { GSDrawingEnvironment& env = m_env; @@ -160,7 +193,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour const GSVector2& rtscale = rt->GetScale(); bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24; - bool advance_DATE = false; + bool DATE_GL42 = false; + bool DATE_GL45 = false; ASSERT(m_dev != NULL); @@ -208,14 +242,21 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour om_bsel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask(); - if (DATE && om_bsel.wa && (!context->TEST.ATE || context->TEST.ATST == ATST_ALWAYS)) { - advance_DATE = GLLoader::found_GL_ARB_shader_image_load_store && !UserHacks_AlphaStencil; + if (DATE) { + if (gl_TextureBarrier && !PrimitiveOverlap()) { + DATE_GL45 = true; + DATE = false; + } else if (om_bsel.wa && (!context->TEST.ATE || context->TEST.ATST == ATST_ALWAYS)) { + DATE_GL42 = GLLoader::found_GL_ARB_shader_image_load_store && !UserHacks_AlphaStencil; + } } // DATE - if(DATE) - { + if (DATE_GL45) { + gl_TextureBarrier(); + dev->PSSetShaderResource(3, rt); + } else if (DATE) { // TODO: do I need to clamp the value (if yes how? rintersect with rt?) GSVector4 si = GSVector4(rtscale.x, rtscale.y); GSVector4 o = GSVector4(-1.0f, 1.0f); // Round value @@ -227,8 +268,9 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour // Must be done here to avoid any GL state pertubation (clear function...) // Create an r32ui image that will containt primitive ID - if (advance_DATE) { + if (DATE_GL42) { dev->InitPrimDateTexture(rt); + dev->PSSetShaderResource(3, rt); } else { GSVector4 s = GSVector4(rtscale.x / rtsize.x, rtscale.y / rtsize.y); @@ -349,9 +391,10 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour // GS_SPRITE_CLASS are already flat (either by CPU or the GS) ps_sel.iip = (m_vt.m_primclass == GS_SPRITE_CLASS) ? 1 : PRIM->IIP; - if(DATE) - { - if (advance_DATE) + if (DATE_GL45) { + ps_sel.date = 5 + context->TEST.DATM; + } else if(DATE) { + if (DATE_GL42) ps_sel.date = 1 + context->TEST.DATM; else om_dssel.date = 1; @@ -497,7 +540,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour dev->SetupOM(om_dssel, om_bsel, afix); dev->SetupCB(&vs_cb, &ps_cb, ps_sel.sprite ? &gs_cb : NULL); - if (advance_DATE) { + if (DATE_GL42) { // Create an r32i image that will contain primitive ID // Note: do it at the beginning because the clean will dirty the FBO state //dev->InitPrimDateTexture(rtsize.x, rtsize.y); @@ -598,7 +641,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour } } } - if (advance_DATE) + if (DATE_GL42) dev->RecycleDateTexture(); dev->EndScene(); diff --git a/plugins/GSdx/GSRendererOGL.h b/plugins/GSdx/GSRendererOGL.h index 47b8ef85e0..74680c5ff8 100644 --- a/plugins/GSdx/GSRendererOGL.h +++ b/plugins/GSdx/GSRendererOGL.h @@ -56,4 +56,6 @@ class GSRendererOGL : public GSRendererHW void UpdateFBA(GSTexture* rt); void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex); + + bool PrimitiveOverlap(); }; diff --git a/plugins/GSdx/res/glsl/tfx_fs.glsl b/plugins/GSdx/res/glsl/tfx_fs.glsl index ac2518096c..3ecdb53c81 100644 --- a/plugins/GSdx/res/glsl/tfx_fs.glsl +++ b/plugins/GSdx/res/glsl/tfx_fs.glsl @@ -438,13 +438,13 @@ void ps_main() #if !pGL_ES void ps_main() { -#if PS_DATE == 1 && !defined(DISABLE_GL42_image) +#if (PS_DATE & 3) == 1 && !defined(DISABLE_GL42_image) // DATM == 0 // Pixel with alpha equal to 1 will failed float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a; if ((127.5f / 255.0f) < rt_a) // < 0x80 pass (== 0x80 should not pass) discard; -#elif PS_DATE == 2 && !defined(DISABLE_GL42_image) +#elif (PS_DATE & 3) == 2 && !defined(DISABLE_GL42_image) // DATM == 1 // Pixel with alpha equal to 0 will failed float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a; diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h index 8828c7e138..6e10b6c853 100644 --- a/plugins/GSdx/res/glsl_source.h +++ b/plugins/GSdx/res/glsl_source.h @@ -1189,13 +1189,13 @@ static const char* tfx_fs_all_glsl = "#if !pGL_ES\n" "void ps_main()\n" "{\n" - "#if PS_DATE == 1 && !defined(DISABLE_GL42_image)\n" + "#if (PS_DATE & 3) == 1 && !defined(DISABLE_GL42_image)\n" " // DATM == 0\n" " // Pixel with alpha equal to 1 will failed\n" " float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a;\n" " if ((127.5f / 255.0f) < rt_a) // < 0x80 pass (== 0x80 should not pass)\n" " discard;\n" - "#elif PS_DATE == 2 && !defined(DISABLE_GL42_image)\n" + "#elif (PS_DATE & 3) == 2 && !defined(DISABLE_GL42_image)\n" " // DATM == 1\n" " // Pixel with alpha equal to 0 will failed\n" " float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a;\n"