gsdx-ogl: DATE with texture barrier

Much faster for small batch that write the alpha value. Code can
be enabled with accurate_date option.

Here a summary of all DATE possibilities:
1/ no overlap of primitive
    => texture barrier (pro no setup of stencil and single draw)

2/ alpha written
    => small batch => texture barrier (primitive by primitive). Done in N-primitive draw calls.
    (based on GL_ARB_texture_barrier)

    => bigger batch => compute the first good primitive, slow but only 2 draw calls.
    (based on GL_ARB_shader_image_load_store)

    => Otherwise there is the UserHacks_AlphaStencil but it is a hack!

3/ alpha written
    => full setup of stencil ( 2 draw calls)
This commit is contained in:
Gregory Hainaut 2015-05-09 19:54:01 +02:00
parent b89f0cfa4c
commit 5565544ba6
4 changed files with 30 additions and 9 deletions

View File

@ -504,13 +504,6 @@ namespace GLLoader {
theApp.SetConfig("accurate_blend", 0); theApp.SetConfig("accurate_blend", 0);
} }
} }
if (!found_GL_ARB_shader_image_load_store) {
if (theApp.GetConfig("accurate_date", 0)) {
fprintf(stderr, "Error GL_ARB_shader_image_load_store is not supported by your driver so you can't enable accurate_date! Sorry.\n");
theApp.SetConfig("accurate_date", 0);
}
}
fprintf(stderr, "\n"); fprintf(stderr, "\n");

View File

@ -198,6 +198,9 @@ void GSRendererOGL::SendDraw(bool require_barrier)
ASSERT(m_vt.m_primclass != GS_LINE_CLASS); ASSERT(m_vt.m_primclass != GS_LINE_CLASS);
ASSERT(GLLoader::found_geometry_shader); ASSERT(GLLoader::found_geometry_shader);
GL_INS("Special Draw");
// FIXME: Investigate: do a dynamic check to pack as many primitives as possibles
size_t nb_vertex = (m_vt.m_primclass == GS_TRIANGLE_CLASS) ? 3 : 2; size_t nb_vertex = (m_vt.m_primclass == GS_TRIANGLE_CLASS) ? 3 : 2;
for (size_t p = 0; p < m_index.tail; p += nb_vertex) { for (size_t p = 0; p < m_index.tail; p += nb_vertex) {
@ -278,8 +281,17 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
if (GLLoader::found_GL_ARB_texture_barrier && !PrimitiveOverlap()) { if (GLLoader::found_GL_ARB_texture_barrier && !PrimitiveOverlap()) {
DATE_GL45 = true; DATE_GL45 = true;
DATE = false; DATE = false;
} else if (om_csel.wa && (!context->TEST.ATE || context->TEST.ATST == ATST_ALWAYS)) { } else if (m_accurate_date && !UserHacks_AlphaStencil &&
DATE_GL42 = m_accurate_date && GLLoader::found_GL_ARB_shader_image_load_store && !UserHacks_AlphaStencil; om_csel.wa && (!context->TEST.ATE || context->TEST.ATST == ATST_ALWAYS)) {
// texture barrier will split the draw call into n draw call. It is very efficient for
// few primitive draws. Otherwise it sucks.
if (GLLoader::found_GL_ARB_texture_barrier && (m_index.tail < 100)) {
require_barrier = true;
DATE_GL45 = true;
DATE = false;
} else {
DATE_GL42 = GLLoader::found_GL_ARB_shader_image_load_store;
}
} }
} }

View File

@ -419,16 +419,24 @@ void ps_main()
// Pixel with alpha equal to 1 will failed // Pixel with alpha equal to 1 will failed
float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a; float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a;
if ((127.5f / 255.0f) < rt_a) { // < 0x80 pass (== 0x80 should not pass) if ((127.5f / 255.0f) < rt_a) { // < 0x80 pass (== 0x80 should not pass)
#if PS_DATE >= 5
discard;
#else
imageStore(img_prim_min, ivec2(gl_FragCoord.xy), ivec4(-1)); imageStore(img_prim_min, ivec2(gl_FragCoord.xy), ivec4(-1));
return; return;
#endif
} }
#elif (PS_DATE & 3) == 2 && !defined(DISABLE_GL42_image) #elif (PS_DATE & 3) == 2 && !defined(DISABLE_GL42_image)
// DATM == 1 // DATM == 1
// Pixel with alpha equal to 0 will failed // Pixel with alpha equal to 0 will failed
float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a; float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a;
if(rt_a < (127.5f / 255.0f)) { // >= 0x80 pass if(rt_a < (127.5f / 255.0f)) { // >= 0x80 pass
#if PS_DATE >= 5
discard;
#else
imageStore(img_prim_min, ivec2(gl_FragCoord.xy), ivec4(-1)); imageStore(img_prim_min, ivec2(gl_FragCoord.xy), ivec4(-1));
return; return;
#endif
} }
#endif #endif

View File

@ -1133,16 +1133,24 @@ static const char* tfx_fs_all_glsl =
" // Pixel with alpha equal to 1 will failed\n" " // Pixel with alpha equal to 1 will failed\n"
" float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a;\n" " float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a;\n"
" if ((127.5f / 255.0f) < rt_a) { // < 0x80 pass (== 0x80 should not pass)\n" " if ((127.5f / 255.0f) < rt_a) { // < 0x80 pass (== 0x80 should not pass)\n"
"#if PS_DATE >= 5\n"
" discard;\n"
"#else\n"
" imageStore(img_prim_min, ivec2(gl_FragCoord.xy), ivec4(-1));\n" " imageStore(img_prim_min, ivec2(gl_FragCoord.xy), ivec4(-1));\n"
" return;\n" " return;\n"
"#endif\n"
" }\n" " }\n"
"#elif (PS_DATE & 3) == 2 && !defined(DISABLE_GL42_image)\n" "#elif (PS_DATE & 3) == 2 && !defined(DISABLE_GL42_image)\n"
" // DATM == 1\n" " // DATM == 1\n"
" // Pixel with alpha equal to 0 will failed\n" " // Pixel with alpha equal to 0 will failed\n"
" float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a;\n" " float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a;\n"
" if(rt_a < (127.5f / 255.0f)) { // >= 0x80 pass\n" " if(rt_a < (127.5f / 255.0f)) { // >= 0x80 pass\n"
"#if PS_DATE >= 5\n"
" discard;\n"
"#else\n"
" imageStore(img_prim_min, ivec2(gl_FragCoord.xy), ivec4(-1));\n" " imageStore(img_prim_min, ivec2(gl_FragCoord.xy), ivec4(-1));\n"
" return;\n" " return;\n"
"#endif\n"
" }\n" " }\n"
"#endif\n" "#endif\n"
"\n" "\n"