From 0f603a98d5eccc644b0b8af1d17a0e8479652832 Mon Sep 17 00:00:00 2001 From: "gregory.hainaut" Date: Sat, 10 Aug 2013 19:43:59 +0000 Subject: [PATCH] gsdx ogl: Test the ARB_shader_subroutine GL4.0 extension The idea was to replace shader program swith by pointer function calls inside shaders. At least parameters that are often changed between draw call. So far I only ported atst and colclip. Unfortunately code is "slower" (on GSdx standalone). For the moment keep the code but disabled. If I understand well the validation of program is done in the "driver thread" but the additional call are done in the overloaded MTGS thread. Apitrace profiling shows faster GPU draw calls. Another possibility is that the driver still need to validate the draw call because of others state change. Here some stats on colin3 (90 frames): without subroutine: UseProgram 125246 with subroutine: UseProgram 2906, subroutine 125945 => 3605 extra calls overhead (not all parameters are ported to subroutine) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5715 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GLLoader.cpp | 19 ++++- plugins/GSdx/GLLoader.h | 3 + plugins/GSdx/GLState.cpp | 4 + plugins/GSdx/GLState.h | 2 + plugins/GSdx/GSDeviceOGL.cpp | 79 ++++++++++-------- plugins/GSdx/GSDeviceOGL.h | 8 +- plugins/GSdx/GSRendererOGL.cpp | 67 ++++++++------- plugins/GSdx/GSShaderOGL.cpp | 123 +++++++++++++++++---------- plugins/GSdx/GSShaderOGL.h | 10 ++- plugins/GSdx/GSTextureFXOGL.cpp | 34 +++++--- plugins/GSdx/GSTextureOGL.cpp | 34 +------- plugins/GSdx/GSWnd.cpp | 2 + plugins/GSdx/res/glsl_source.h | 143 ++++++++++++++++++++++++++++---- plugins/GSdx/res/tfx.glsl | 143 ++++++++++++++++++++++++++++---- 14 files changed, 479 insertions(+), 192 deletions(-) diff --git a/plugins/GSdx/GLLoader.cpp b/plugins/GSdx/GLLoader.cpp index 46436c6c83..70eac560bc 100644 --- a/plugins/GSdx/GLLoader.cpp +++ b/plugins/GSdx/GLLoader.cpp @@ -81,7 +81,9 @@ PFNGLUSEPROGRAMSTAGESPROC gl_UseProgramStages = NULL; PFNGLVERTEXATTRIBIPOINTERPROC gl_VertexAttribIPointer = NULL; PFNGLVERTEXATTRIBPOINTERPROC gl_VertexAttribPointer = NULL; PFNGLBUFFERSUBDATAPROC gl_BufferSubData = NULL; -// GL 4.1 +// GL4.0 +PFNGLUNIFORMSUBROUTINESUIVPROC gl_UniformSubroutinesuiv = NULL; +// GL4.1 PFNGLBINDPROGRAMPIPELINEPROC gl_BindProgramPipeline = NULL; PFNGLGENPROGRAMPIPELINESPROC gl_GenProgramPipelines = NULL; PFNGLDELETEPROGRAMPIPELINESPROC gl_DeleteProgramPipelines = NULL; @@ -122,9 +124,10 @@ namespace GLLoader { bool found_GL_ARB_clear_texture = false; // Don't know if GL3 hardawe can support it bool found_GL_ARB_buffer_storage = false; // GL4 hardware - bool found_GL_ARB_copy_image = false; + bool found_GL_ARB_copy_image = false; // Not sure actually bool found_GL_ARB_gpu_shader5 = false; bool found_GL_ARB_shader_image_load_store = false; + bool found_GL_ARB_shader_subroutine = false; // Mandatory for FULL GL (but optional for GLES) bool found_GL_ARB_multi_bind = false; // Not yet. Wait Mesa & AMD drivers @@ -221,6 +224,17 @@ namespace GLLoader { if (ext.compare("GL_ARB_copy_image") == 0) found_GL_ARB_copy_image = true; if (ext.compare("GL_ARB_gpu_shader5") == 0) found_GL_ARB_gpu_shader5 = true; if (ext.compare("GL_ARB_shader_image_load_store") == 0) found_GL_ARB_shader_image_load_store = true; +#if 0 + // Strangely it doesn't provide the speed boost as expected. + // Note: only atst/colclip was replaced with subroutine for the moment. It replace 2000 program switch on + // colin mcrae 3 by 2100 uniform, but code is slower! + // + // Current hypothesis: the validation of useprogram is done in the "driver thread" whereas the extra function calls + // are done on the overloaded main threads. + // Apitrace profiling shows faster GPU draw times + + if (ext.compare("GL_ARB_shader_subroutine") == 0) found_GL_ARB_shader_subroutine = true; +#endif #ifdef GL44 // Need to debug the code first if (ext.compare("GL_ARB_clear_texture") == 0) found_GL_ARB_clear_texture = true; if (ext.compare("GL_ARB_multi_bind") == 0) found_GL_ARB_multi_bind = true; @@ -242,6 +256,7 @@ namespace GLLoader { status &= status_and_override(found_GL_ARB_shader_image_load_store,"GL_ARB_shader_image_load_store"); status &= status_and_override(found_GL_ARB_clear_texture,"GL_ARB_clear_texture"); status &= status_and_override(found_GL_ARB_buffer_storage,"GL_ARB_buffer_storage"); + status &= status_and_override(found_GL_ARB_shader_subroutine,"GL_ARB_shader_subroutine"); status &= status_and_override(found_GL_ARB_texture_storage, "GL_ARB_texture_storage", true); status &= status_and_override(found_GL_ARB_shading_language_420pack,"GL_ARB_shading_language_420pack"); diff --git a/plugins/GSdx/GLLoader.h b/plugins/GSdx/GLLoader.h index 6d1cb477e7..4248145734 100644 --- a/plugins/GSdx/GLLoader.h +++ b/plugins/GSdx/GLLoader.h @@ -134,6 +134,8 @@ extern PFNGLUSEPROGRAMSTAGESPROC gl_UseProgramStages; extern PFNGLVERTEXATTRIBIPOINTERPROC gl_VertexAttribIPointer; extern PFNGLVERTEXATTRIBPOINTERPROC gl_VertexAttribPointer; extern PFNGLBUFFERSUBDATAPROC gl_BufferSubData; +// GL4.0 +extern PFNGLUNIFORMSUBROUTINESUIVPROC gl_UniformSubroutinesuiv; // GL4.1 extern PFNGLBINDPROGRAMPIPELINEPROC gl_BindProgramPipeline; extern PFNGLDELETEPROGRAMPIPELINESPROC gl_DeleteProgramPipelines; @@ -254,4 +256,5 @@ namespace GLLoader { extern bool found_GL_ARB_clear_texture; extern bool found_GL_ARB_multi_bind; extern bool found_GL_ARB_buffer_storage; + extern bool found_GL_ARB_shader_subroutine; } diff --git a/plugins/GSdx/GLState.cpp b/plugins/GSdx/GLState.cpp index a85143425b..766efc3af5 100644 --- a/plugins/GSdx/GLState.cpp +++ b/plugins/GSdx/GLState.cpp @@ -62,6 +62,8 @@ namespace GLState { GLuint gs = 0; GLuint vs = 0; GLuint program = 0; + bool dirty_prog = false; + bool dirty_subroutine_ps = false; #if 0 struct { GSVertexBufferStateOGL* vb; @@ -112,5 +114,7 @@ namespace GLState { gs = 0; vs = 0; program = 0; + dirty_prog = false; + dirty_subroutine_ps = false; } } diff --git a/plugins/GSdx/GLState.h b/plugins/GSdx/GLState.h index 29f9270109..7d0a215ded 100644 --- a/plugins/GSdx/GLState.h +++ b/plugins/GSdx/GLState.h @@ -64,6 +64,8 @@ namespace GLState { extern GLuint gs; extern GLuint vs; extern GLuint program; // monolith program (when sso isn't supported) + extern bool dirty_prog; + extern bool dirty_subroutine_ps; extern void Clear(); } diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index d32d465c96..bb99c9b19c 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -168,7 +168,7 @@ bool GSDeviceOGL::Create(GSWnd* wnd) // **************************************************************** // Various object // **************************************************************** - m_shader = new GSShaderOGL(!!theApp.GetConfig("debug_ogl_shader", 1), GLLoader::found_GL_ARB_separate_shader_objects, GLLoader::found_GL_ARB_shading_language_420pack); + m_shader = new GSShaderOGL(!!theApp.GetConfig("debug_ogl_shader", 1)); gl_GenFramebuffers(1, &m_fbo); gl_GenFramebuffers(1, &m_fbo_read); @@ -572,6 +572,7 @@ void GSDeviceOGL::Barrier(GLbitfield b) //#endif } +/* Note: must be here because tfx_glsl is static */ GLuint GSDeviceOGL::CompileVS(VSSelector sel) { std::string macro = format("#define VS_BPPZ %d\n", sel.bppz) @@ -582,6 +583,7 @@ GLuint GSDeviceOGL::CompileVS(VSSelector sel) return m_shader->Compile("tfx.glsl", "vs_main", GL_VERTEX_SHADER, tfx_glsl, macro); } +/* Note: must be here because tfx_glsl is static */ GLuint GSDeviceOGL::CompileGS(GSSelector sel) { // Easy case @@ -598,6 +600,7 @@ GLuint GSDeviceOGL::CompileGS(GSSelector sel) #endif } +/* Note: must be here because tfx_glsl is static */ GLuint GSDeviceOGL::CompilePS(PSSelector sel) { std::string macro = format("#define PS_FST %d\n", sel.fst) @@ -720,6 +723,14 @@ void GSDeviceOGL::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, GSVector2i ds = dt->GetSize(); + // WARNING: setup of the program must be done first. So you can setup + // 1/ subroutine uniform + // 2/ bindless texture uniform + // 3/ others uniform? + m_shader->VS(m_convert.vs); + m_shader->GS(0); + m_shader->PS(ps); + // ************************************ // om // ************************************ @@ -764,32 +775,17 @@ void GSDeviceOGL::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, {GSVector4(left, bottom, 0.5f, 1.0f), GSVector2(flip_sr.x, flip_sr.w)}, {GSVector4(right, bottom, 0.5f, 1.0f), GSVector2(flip_sr.z, flip_sr.w)}, }; - //fprintf(stderr, "A:%fx%f B:%fx%f\n", left, top, bottom, right); - //fprintf(stderr, "SR: %f %f %f %f\n", sr.x, sr.y, sr.z, sr.w); IASetVertexState(m_vb_sr); IASetVertexBuffer(vertices, 4); IASetPrimitiveTopology(GL_TRIANGLE_STRIP); // ************************************ - // vs + // Texture // ************************************ - m_shader->VS(m_convert.vs); - - // ************************************ - // gs - // ************************************ - - m_shader->GS(0); - - // ************************************ - // ps - // ************************************ - - PSSetShaderResource(0, static_cast(st)->GetID()); + PSSetShaderResource(static_cast(st)->GetID()); PSSetSamplerState(linear ? m_convert.ln : m_convert.pt); - m_shader->PS(ps); // ************************************ // Draw @@ -886,6 +882,14 @@ void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* ver ClearStencil(ds, 0); + // WARNING: setup of the program must be done first. So you can setup + // 1/ subroutine uniform + // 2/ bindless texture uniform + // 3/ others uniform? + m_shader->VS(m_convert.vs); + m_shader->GS(0); + m_shader->PS(m_convert.ps[datm ? 2 : 3]); + // om OMSetDepthStencilState(m_date.dss, 1); @@ -898,19 +902,11 @@ void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* ver IASetVertexBuffer(vertices, 4); IASetPrimitiveTopology(GL_TRIANGLE_STRIP); - // vs - m_shader->VS(m_convert.vs); + // Texture - // gs - - m_shader->GS(0); - - // ps - - PSSetShaderResource(0, static_cast(rt)->GetID()); + PSSetShaderResource(static_cast(rt)->GetID()); PSSetSamplerState(m_convert.pt); - m_shader->PS(m_convert.ps[datm ? 2 : 3]); // @@ -966,16 +962,16 @@ void GSDeviceOGL::IASetPrimitiveTopology(GLenum topology) m_state.vb->SetTopology(topology); } -void GSDeviceOGL::PSSetShaderResource(const int i, GLuint sr) +void GSDeviceOGL::PSSetShaderResource(GLuint sr) { - if (GLState::tex_unit[i] != sr) { - GLState::tex_unit[i] = sr; + if (GLState::tex_unit[0] != sr) { + GLState::tex_unit[0] = sr; if (GLLoader::found_GL_ARB_multi_bind) { GLuint textures[1] = {sr}; - gl_BindTextures(i, 1, textures); + gl_BindTextures(0, 1, textures); } else { - gl_ActiveTexture(GL_TEXTURE0 + i); + gl_ActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, sr); // Get back to the expected active texture unit @@ -987,8 +983,21 @@ void GSDeviceOGL::PSSetShaderResource(const int i, GLuint sr) void GSDeviceOGL::PSSetShaderResources(GLuint tex[2]) { if (GLState::tex_unit[0] != tex[0] || GLState::tex_unit[1] != tex[1]) { - GLuint textures[2] = {tex[0], tex[1]}; - gl_BindTextures(0, 2, textures); + GLState::tex_unit[0] = tex[0]; + GLState::tex_unit[1] = tex[1]; + + if (GLLoader::found_GL_ARB_multi_bind) { + gl_BindTextures(0, 2, tex); + } else { + gl_ActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, tex[0]); + + gl_ActiveTexture(GL_TEXTURE0 + 1); + glBindTexture(GL_TEXTURE_2D, tex[1]); + + // Get back to the expected active texture unit + gl_ActiveTexture(GL_TEXTURE0 + 3); + } } } diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index ab2e596e61..2b0987f1a5 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -606,10 +606,9 @@ class GSDeviceOGL : public GSDevice void IASetIndexBuffer(const void* index, size_t count); void IASetVertexState(GSVertexBufferStateOGL* vb = NULL); - void PSSetShaderResource(const int i, GLuint sr); + void PSSetShaderResource(GLuint sr); void PSSetShaderResources(GLuint tex[2]); void PSSetSamplerState(GLuint ss); - void PSSetSamplerStates(const int count, const GLuint* samplers); void OMSetDepthStencilState(GSDepthStencilOGL* dss, uint8 sref); void OMSetBlendState(GSBlendStateOGL* bs, float bf); @@ -627,9 +626,10 @@ class GSDeviceOGL : public GSDevice void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim); - void SetupVS(VSSelector sel, const VSConstantBuffer* cb); + void SetupVS(VSSelector sel); void SetupGS(GSSelector sel); - void SetupPS(PSSelector sel, const PSConstantBuffer* cb); + void SetupPS(PSSelector sel); + void SetupCB(const VSConstantBuffer* vs_cb, const PSConstantBuffer* ps_cb); void SetupSampler(PSSelector sel, PSSamplerSelector ssel); void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix); diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index 11b5ac3a8e..cb8aae54f6 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -163,6 +163,7 @@ void GSRendererOGL::SetupIA() dev->IASetVertexState(); if(UserHacks_WildHack && !isPackedUV_HackFlag) { + // FIXME: why not put it on the Vertex shader if(dev->IAMapVertexBuffer(&ptr, sizeof(GSVertex), m_vertex.next)) { GSVector4i::storent(ptr, m_vertex.buff, sizeof(GSVertex) * m_vertex.next); @@ -441,6 +442,9 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour om_dssel.alpha_stencil = 1; } + // By default don't use texture + ps_sel.tfx = 4; + if(tex) { const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[context->TEX0.PSM]; @@ -456,6 +460,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour ps_sel.tcc = context->TEX0.TCC; ps_sel.ltf = bilinear && !simple_sample; ps_sel.spritehack = tex->m_spritehack_t; + // FIXME the ati is currently disabled on the shader. I need to find a .gs to test that we got same + // bug on opengl ps_sel.point_sampler = !(bilinear && simple_sample); int w = tex->m_texture->GetWidth(); @@ -491,23 +497,30 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour ps_ssel.tau = (context->CLAMP.WMS + 3) >> 1; ps_ssel.tav = (context->CLAMP.WMT + 3) >> 1; ps_ssel.ltf = bilinear && simple_sample; - - dev->SetupSampler(ps_sel, ps_ssel); - if (tex->m_palette) { - if (GLLoader::found_GL_ARB_multi_bind) { - GLuint textures[2] = {static_cast(tex->m_texture)->GetID(), static_cast(tex->m_palette)->GetID()}; - dev->PSSetShaderResources(textures); - } else { - dev->PSSetShaderResource(1, static_cast(tex->m_palette)->GetID()); - dev->PSSetShaderResource(0, static_cast(tex->m_texture)->GetID()); - } - } else { - dev->PSSetShaderResource(0, static_cast(tex->m_texture)->GetID()); - } } - else - { - ps_sel.tfx = 4; + + // WARNING: setup of the program must be done first. So you can setup + // 1/ subroutine uniform + // 2/ bindless texture uniform + // 3/ others uniform? + dev->SetupVS(vs_sel); + dev->SetupGS(gs_sel); + dev->SetupPS(ps_sel); + + // Note: bindless texture will use uniform so it must be done after the program setup + if(tex) { + if (tex->m_palette) { + // 2 textures (main + palette) + dev->SetupSampler(ps_sel, ps_ssel); + + GLuint textures[2] = {static_cast(tex->m_texture)->GetID(), static_cast(tex->m_palette)->GetID()}; + dev->PSSetShaderResources(textures); + } else if (tex->m_texture) { + // Only main texture + dev->SetupSampler(ps_sel, ps_ssel); + + dev->PSSetShaderResource(static_cast(tex->m_texture)->GetID()); + } } // rs @@ -521,9 +534,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour SetupIA(); dev->SetupOM(om_dssel, om_bsel, afix); - dev->SetupVS(vs_sel, &vs_cb); - dev->SetupGS(gs_sel); - dev->SetupPS(ps_sel, &ps_cb); + dev->SetupCB(&vs_cb, &ps_cb); if (advance_DATE) { // Create an r32ui image that will contain primitive ID @@ -539,7 +550,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour dev->OMSetWriteBuffer(); ps_sel.date = 3; - dev->SetupPS(ps_sel, &ps_cb); + dev->SetupPS(ps_sel); // Be sure that first pass is finished ! dev->Barrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); @@ -558,7 +569,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour ps_selneg.colclip = 2; dev->SetupOM(om_dssel, om_bselneg, afix); - dev->SetupPS(ps_selneg, &ps_cb); + dev->SetupPS(ps_selneg); dev->DrawIndexedPrimitive(); dev->SetupOM(om_dssel, om_bsel, afix); @@ -573,7 +584,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour ps_sel.atst = iatst[ps_sel.atst]; - dev->SetupPS(ps_sel, &ps_cb); + dev->SetupPS(ps_sel); bool z = om_dssel.zwe; bool r = om_bsel.wr; @@ -583,11 +594,11 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour switch(context->TEST.AFAIL) { - case AFAIL_KEEP: z = r = g = b = a = false; break; // none - case AFAIL_FB_ONLY: z = false; break; // rgba - case AFAIL_ZB_ONLY: r = g = b = a = false; break; // z - case AFAIL_RGB_ONLY: z = a = false; break; // rgb - default: __assume(0); + case AFAIL_KEEP: z = r = g = b = a = false; break; // none + case AFAIL_FB_ONLY: z = false; break; // rgba + case AFAIL_ZB_ONLY: r = g = b = a = false; break; // z + case AFAIL_RGB_ONLY: z = a = false; break; // rgb + default: __assume(0); } if(z || r || g || b || a) @@ -611,7 +622,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour ps_selneg.colclip = 2; dev->SetupOM(om_dssel, om_bselneg, afix); - dev->SetupPS(ps_selneg, &ps_cb); + dev->SetupPS(ps_selneg); dev->DrawIndexedPrimitive(); } diff --git a/plugins/GSdx/GSShaderOGL.cpp b/plugins/GSdx/GSShaderOGL.cpp index e217de63aa..61a298f500 100644 --- a/plugins/GSdx/GSShaderOGL.cpp +++ b/plugins/GSdx/GSShaderOGL.cpp @@ -23,14 +23,16 @@ #include "GSShaderOGL.h" #include "GLState.h" -GSShaderOGL::GSShaderOGL(bool debug, bool sso, bool glsl420) : +GSShaderOGL::GSShaderOGL(bool debug) : m_debug_shader(debug), - m_sso(sso), - m_glsl420(glsl420) + m_sub_count(0) { + + memset(&m_ps_sub, 0, countof(m_ps_sub)*sizeof(GLuint)); + m_single_prog.clear(); #ifndef ENABLE_GLES - if (sso) { + if (GLLoader::found_GL_ARB_separate_shader_objects) { gl_GenProgramPipelines(1, &m_pipeline); gl_BindProgramPipeline(m_pipeline); } @@ -40,7 +42,7 @@ GSShaderOGL::GSShaderOGL(bool debug, bool sso, bool glsl420) : GSShaderOGL::~GSShaderOGL() { #ifndef ENABLE_GLES - if (m_sso) + if (GLLoader::found_GL_ARB_separate_shader_objects) gl_DeleteProgramPipelines(1, &m_pipeline); #endif @@ -53,21 +55,35 @@ void GSShaderOGL::VS(GLuint s) if (GLState::vs != s) { GLState::vs = s; + GLState::dirty_prog = true; #ifndef ENABLE_GLES - if (m_sso) + if (GLLoader::found_GL_ARB_separate_shader_objects) gl_UseProgramStages(m_pipeline, GL_VERTEX_SHADER_BIT, s); #endif } } -void GSShaderOGL::PS(GLuint s) +void GSShaderOGL::PS_subroutine(GLuint *sub) +{ + if (!(m_ps_sub[0] == sub[0] && m_ps_sub[1] == sub[1])) { + m_ps_sub[0] = sub[0]; + m_ps_sub[1] = sub[1]; + GLState::dirty_subroutine_ps = true; + } +} + +void GSShaderOGL::PS(GLuint s, GLuint sub_count) { if (GLState::ps != s) { + m_sub_count = sub_count; + GLState::ps = s; + GLState::dirty_prog = true; #ifndef ENABLE_GLES - if (m_sso) + if (GLLoader::found_GL_ARB_separate_shader_objects) { gl_UseProgramStages(m_pipeline, GL_FRAGMENT_SHADER_BIT, s); + } #endif } } @@ -77,8 +93,9 @@ void GSShaderOGL::GS(GLuint s) if (GLState::gs != s) { GLState::gs = s; + GLState::dirty_prog = true; #ifndef ENABLE_GLES - if (m_sso) + if (GLLoader::found_GL_ARB_separate_shader_objects) gl_UseProgramStages(m_pipeline, GL_GEOMETRY_SHADER_BIT, s); #endif } @@ -97,7 +114,7 @@ void GSShaderOGL::SetSamplerBinding(GLuint prog, GLchar* name, GLuint binding) { GLint loc = gl_GetUniformLocation(prog, name); if (loc != -1) { - if (m_sso) { + if (GLLoader::found_GL_ARB_separate_shader_objects) { #ifndef ENABLE_GLES gl_ProgramUniform1i(prog, loc, binding); #endif @@ -109,9 +126,9 @@ void GSShaderOGL::SetSamplerBinding(GLuint prog, GLchar* name, GLuint binding) void GSShaderOGL::SetupUniform() { - if (m_glsl420) return; + if (GLLoader::found_GL_ARB_shading_language_420pack) return; - if (m_sso) { + if (GLLoader::found_GL_ARB_separate_shader_objects) { SetUniformBinding(GLState::vs, "cb20", 20); SetUniformBinding(GLState::ps, "cb21", 21); @@ -138,6 +155,17 @@ void GSShaderOGL::SetupUniform() } } +void GSShaderOGL::SetSubroutineUniform() +{ + if (!GLLoader::found_GL_ARB_shader_subroutine) return; + if (m_sub_count == 0) return; + + if (GLState::dirty_subroutine_ps || GLState::dirty_prog) + gl_UniformSubroutinesuiv(GL_FRAGMENT_SHADER, m_sub_count, m_ps_sub); + + GLState::dirty_subroutine_ps = false; +} + bool GSShaderOGL::ValidateShader(GLuint s) { if (!m_debug_shader) return true; @@ -223,37 +251,44 @@ GLuint GSShaderOGL::LinkNewProgram() void GSShaderOGL::UseProgram() { - hash_map::iterator it; - if (!m_sso) { - // Note: shader are integer lookup pointer. They start from 1 and incr - // every time you create a new shader OR a new program. - // Note2: vs & gs are precompiled at startup. FGLRX and radeon got value < 128. - // We migth be able to pack the value in a 32bits int - // I would need to check the behavior on Nvidia (pause/resume). - uint64 sel = (uint64)GLState::vs << 40 | (uint64)GLState::gs << 20 | GLState::ps; - it = m_single_prog.find(sel); - if (it == m_single_prog.end()) { - GLState::program = LinkNewProgram(); - m_single_prog[sel] = GLState::program; + if (GLState::dirty_prog) { + if (!GLLoader::found_GL_ARB_separate_shader_objects) { + hash_map::iterator it; + // Note: shader are integer lookup pointer. They start from 1 and incr + // every time you create a new shader OR a new program. + // Note2: vs & gs are precompiled at startup. FGLRX and radeon got value < 128. + // We migth be able to pack the value in a 32bits int + // I would need to check the behavior on Nvidia (pause/resume). + uint64 sel = (uint64)GLState::vs << 40 | (uint64)GLState::gs << 20 | GLState::ps; + it = m_single_prog.find(sel); + if (it == m_single_prog.end()) { + GLState::program = LinkNewProgram(); + m_single_prog[sel] = GLState::program; - ValidateProgram(GLState::program); + ValidateProgram(GLState::program); - gl_UseProgram(GLState::program); - // warning it must be done after the "setup" of the program - SetupUniform(); - } else { - GLuint prog = it->second; - if (prog != GLState::program) { - GLState::program = prog; gl_UseProgram(GLState::program); + + // warning it must be done after the "setup" of the program + SetupUniform(); + } else { + GLuint prog = it->second; + if (prog != GLState::program) { + GLState::program = prog; + gl_UseProgram(GLState::program); + } } + + } else { + ValidatePipeline(m_pipeline); + + SetupUniform(); } - - } else { - ValidatePipeline(m_pipeline); - - SetupUniform(); } + + SetSubroutineUniform(); + + GLState::dirty_prog = false; } std::string GSShaderOGL::GenGlslHeader(const std::string& entry, GLenum type, const std::string& macro) @@ -266,13 +301,13 @@ std::string GSShaderOGL::GenGlslHeader(const std::string& entry, GLenum type, co } else { header = "#version 330 core\n"; } - if (m_glsl420) { + if (GLLoader::found_GL_ARB_shading_language_420pack) { // Need GL version 420 header += "#extension GL_ARB_shading_language_420pack: require\n"; } else { header += "#define DISABLE_GL42\n"; } - if (m_sso) { + if (GLLoader::found_GL_ARB_separate_shader_objects) { // Need GL version 410 header += "#extension GL_ARB_separate_shader_objects : require\n"; } else { @@ -284,6 +319,10 @@ std::string GSShaderOGL::GenGlslHeader(const std::string& entry, GLenum type, co // Need version 140 header += "#extension GL_ARB_uniform_buffer_object : require\n"; } + if (GLLoader::found_GL_ARB_shader_subroutine) { + // Need GL version 400 + header += "#define SUBROUTINE_GL40 1\n"; + } #ifdef ENABLE_OGL_STENCIL_DEBUG header += "#define ENABLE_OGL_STENCIL_DEBUG 1\n"; #endif @@ -351,7 +390,7 @@ GLuint GSShaderOGL::Compile(const std::string& glsl_file, const std::string& ent sources[0] = header.append(glsl_h_code).c_str(); #endif - if (m_sso) { + if (GLLoader::found_GL_ARB_separate_shader_objects) { #ifndef ENABLE_GLES program = gl_CreateShaderProgramv(type, shader_nb, sources); #endif @@ -362,7 +401,7 @@ GLuint GSShaderOGL::Compile(const std::string& glsl_file, const std::string& ent } bool status; - if (m_sso) + if (GLLoader::found_GL_ARB_separate_shader_objects) status = ValidateProgram(program); else status = ValidateShader(program); @@ -378,7 +417,7 @@ GLuint GSShaderOGL::Compile(const std::string& glsl_file, const std::string& ent void GSShaderOGL::Delete(GLuint s) { - if (m_sso) { + if (GLLoader::found_GL_ARB_separate_shader_objects) { gl_DeleteProgram(s); } else { gl_DeleteShader(s); diff --git a/plugins/GSdx/GSShaderOGL.h b/plugins/GSdx/GSShaderOGL.h index de5920e244..7423a41b72 100644 --- a/plugins/GSdx/GSShaderOGL.h +++ b/plugins/GSdx/GSShaderOGL.h @@ -25,10 +25,11 @@ class GSShaderOGL { GLuint m_pipeline; hash_map m_single_prog; const bool m_debug_shader; - const bool m_sso; - const bool m_glsl420; + GLuint m_sub_count; + GLuint m_ps_sub[2]; + void SetSubroutineUniform(); void SetupUniform(); void SetUniformBinding(GLuint prog, GLchar* name, GLuint binding); void SetSamplerBinding(GLuint prog, GLchar* name, GLuint binding); @@ -41,11 +42,12 @@ class GSShaderOGL { GLuint LinkNewProgram(); public: - GSShaderOGL(bool debug, bool sso, bool glsl420); + GSShaderOGL(bool debug); ~GSShaderOGL(); void GS(GLuint s); - void PS(GLuint s); + void PS(GLuint s, GLuint sub_count = 0); + void PS_subroutine(GLuint *sub); void VS(GLuint s); void UseProgram(); diff --git a/plugins/GSdx/GSTextureFXOGL.cpp b/plugins/GSdx/GSTextureFXOGL.cpp index 3cf9431c2d..b58392a02c 100644 --- a/plugins/GSdx/GSTextureFXOGL.cpp +++ b/plugins/GSdx/GSTextureFXOGL.cpp @@ -133,14 +133,22 @@ GSBlendStateOGL* GSDeviceOGL::CreateBlend(OMBlendSelector bsel, uint8 afix) return bs; } -void GSDeviceOGL::SetupVS(VSSelector sel, const VSConstantBuffer* cb) +void GSDeviceOGL::SetupCB(const VSConstantBuffer* vs_cb, const PSConstantBuffer* ps_cb) +{ + if(m_vs_cb_cache.Update(vs_cb)) { + m_vs_cb->upload(vs_cb); + } + + if(m_ps_cb_cache.Update(ps_cb)) { + m_ps_cb->upload(ps_cb); + } + +} + +void GSDeviceOGL::SetupVS(VSSelector sel) { GLuint vs = m_vs[sel]; - if(m_vs_cb_cache.Update(cb)) { - m_vs_cb->upload(cb); - } - m_shader->VS(vs); } @@ -151,8 +159,16 @@ void GSDeviceOGL::SetupGS(GSSelector sel) m_shader->GS(gs); } -void GSDeviceOGL::SetupPS(PSSelector sel, const PSConstantBuffer* cb) +void GSDeviceOGL::SetupPS(PSSelector sel) { + if (GLLoader::found_GL_ARB_shader_subroutine) { + GLuint sub[2] = {sel.atst, (uint32)sel.colclip + 8}; + m_shader->PS_subroutine(sub); + // Handle by subroutine useless now + sel.atst = 0; + sel.colclip = 0; + } + // ************************************************************* // Static // ************************************************************* @@ -169,11 +185,7 @@ void GSDeviceOGL::SetupPS(PSSelector sel, const PSConstantBuffer* cb) // ************************************************************* // Dynamic // ************************************************************* - if(m_ps_cb_cache.Update(cb)) { - m_ps_cb->upload(cb); - } - - m_shader->PS(ps); + m_shader->PS(ps, 2); } void GSDeviceOGL::SetupSampler(PSSelector sel, PSSamplerSelector ssel) diff --git a/plugins/GSdx/GSTextureOGL.cpp b/plugins/GSdx/GSTextureOGL.cpp index 9f41dc768f..61fdc29d3b 100644 --- a/plugins/GSdx/GSTextureOGL.cpp +++ b/plugins/GSdx/GSTextureOGL.cpp @@ -177,7 +177,6 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read) break; default: break; } - } GSTextureOGL::~GSTextureOGL() @@ -210,7 +209,6 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch) EnableUnit(); -#if 1 PboPool::BindPbo(); glPixelStorei(GL_UNPACK_ALIGNMENT, m_int_alignment); @@ -230,47 +228,19 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch) PboPool::UnbindPbo(); return true; -#else + +#if 0 // pitch is in byte wherease GL_UNPACK_ROW_LENGTH is in pixel glPixelStorei(GL_UNPACK_ALIGNMENT, m_int_alignment); glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch >> m_int_shift); -#ifdef _LINUX - if (GLLoader::fglrx_buggy_driver && !GLLoader::in_replayer) { - // FIXME : it crash on colin mcrae rally 3 (others game too) when the texture is small - //if ((pitch >> 2) == 32 || r.width() < 32 || r.height() < 32) { - if ((r.width() < 32) || (pitch == 128 && r.width() == 32)) { -#ifdef ENABLE_OGL_DEBUG - fprintf(stderr, "Skip Texture %dx%d with a pitch of %d pixel. Type %x\n", m_size.x, m_size.y, pitch >>2, m_format); - fprintf(stderr, "Box (%d,%d)x(%d,%d)\n", r.x, r.y, r.width(), r.height()); -#endif - - // FIXME useful? - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); // Restore default behavior - return false; - } - } -#endif - glTexSubImage2D(GL_TEXTURE_2D, 0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, data); // FIXME useful? glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); // Restore default behavior return true; -#if 0 - if(m_dev && m_texture) - { - D3D11_BOX box = {r.left, r.top, 0, r.right, r.bottom, 1}; - - m_ctx->UpdateSubresource(m_texture, 0, &box, data, pitch, 0); - - return true; - } - - return false; -#endif #endif } diff --git a/plugins/GSdx/GSWnd.cpp b/plugins/GSdx/GSWnd.cpp index 7bcddf9ed9..d61b2aea90 100644 --- a/plugins/GSdx/GSWnd.cpp +++ b/plugins/GSdx/GSWnd.cpp @@ -82,6 +82,8 @@ void GSWndGL::PopulateGlFunction() *(void**)&(gl_VertexAttribIPointer) = GetProcAddress("glVertexAttribIPointer"); *(void**)&(gl_VertexAttribPointer) = GetProcAddress("glVertexAttribPointer"); *(void**)&(gl_BufferSubData) = GetProcAddress("glBufferSubData"); + // GL4.0 + *(void**)&(gl_UniformSubroutinesuiv) = GetProcAddress("glUniformSubroutinesuiv"); // GL4.1 *(void**)&(gl_BindProgramPipeline) = GetProcAddress("glBindProgramPipeline"); *(void**)&(gl_DeleteProgramPipelines) = GetProcAddress("glDeleteProgramPipelines"); diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h index 657b0a742a..7faf30caed 100644 --- a/plugins/GSdx/res/glsl_source.h +++ b/plugins/GSdx/res/glsl_source.h @@ -1166,6 +1166,77 @@ static const char* tfx_glsl = "#endif\n" "}\n" "\n" + "// Note layout stuff might require gl4.3\n" + "#ifdef SUBROUTINE_GL40\n" + "// Function pointer type\n" + "subroutine void AlphaTestType(vec4 c);\n" + "\n" + "// a function pointer variable\n" + "layout(location = 0) subroutine uniform AlphaTestType atst;\n" + "\n" + "// The function attached to AlphaTestType\n" + "layout(index = 0) subroutine(AlphaTestType)\n" + "void atest_never(vec4 c)\n" + "{\n" + " discard;\n" + "}\n" + "\n" + "layout(index = 1) subroutine(AlphaTestType)\n" + "void atest_always(vec4 c)\n" + "{\n" + " // Nothing to do\n" + "}\n" + "\n" + "layout(index = 2) subroutine(AlphaTestType)\n" + "void atest_l(vec4 c)\n" + "{\n" + " float a = trunc(c.a * 255.0 + 0.01);\n" + " if (PS_SPRITEHACK == 0)\n" + " if ((AREF - a - 0.5f) < 0.0f)\n" + " discard;\n" + "}\n" + "\n" + "layout(index = 3) subroutine(AlphaTestType)\n" + "void atest_le(vec4 c)\n" + "{\n" + " float a = trunc(c.a * 255.0 + 0.01);\n" + " if ((AREF - a + 0.5f) < 0.0f)\n" + " discard;\n" + "}\n" + "\n" + "layout(index = 4) subroutine(AlphaTestType)\n" + "void atest_e(vec4 c)\n" + "{\n" + " float a = trunc(c.a * 255.0 + 0.01);\n" + " if ((0.5f - abs(a - AREF)) < 0.0f)\n" + " discard;\n" + "}\n" + "\n" + "layout(index = 5) subroutine(AlphaTestType)\n" + "void atest_ge(vec4 c)\n" + "{\n" + " float a = trunc(c.a * 255.0 + 0.01);\n" + " if ((a-AREF + 0.5f) < 0.0f)\n" + " discard;\n" + "}\n" + "\n" + "layout(index = 6) subroutine(AlphaTestType)\n" + "void atest_g(vec4 c)\n" + "{\n" + " float a = trunc(c.a * 255.0 + 0.01);\n" + " if ((a-AREF - 0.5f) < 0.0f)\n" + " discard;\n" + "}\n" + "\n" + "layout(index = 7) subroutine(AlphaTestType)\n" + "void atest_ne(vec4 c)\n" + "{\n" + " float a = trunc(c.a * 255.0 + 0.01);\n" + " if ((abs(a - AREF) - 0.5f) < 0.0f)\n" + " discard;\n" + "}\n" + "\n" + "#else\n" "void atst(vec4 c)\n" "{\n" " float a = trunc(c.a * 255.0 + 0.01);\n" @@ -1210,16 +1281,64 @@ static const char* tfx_glsl = " discard;\n" " }\n" "}\n" + "#endif\n" "\n" - "vec4 fog(vec4 c, float f)\n" + "// Note layout stuff might require gl4.3\n" + "#ifdef SUBROUTINE_GL40\n" + "// Function pointer type\n" + "subroutine void ColClipType(inout vec4 c);\n" + "\n" + "// a function pointer variable\n" + "layout(location = 1) subroutine uniform ColClipType colclip;\n" + "\n" + "layout(index = 8) subroutine(ColClipType)\n" + "void colclip_0(inout vec4 c)\n" + "{\n" + " // nothing to do\n" + "}\n" + "\n" + "layout(index = 9) subroutine(ColClipType)\n" + "void colclip_1(inout vec4 c)\n" + "{\n" + " // FIXME !!!!\n" + " //c.rgb *= c.rgb < 128./255;\n" + " bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);\n" + " c.rgb *= vec3(factor);\n" + "}\n" + "\n" + "layout(index = 10) subroutine(ColClipType)\n" + "void colclip_2(inout vec4 c)\n" + "{\n" + " c.rgb = 256.0f/255.0f - c.rgb;\n" + " // FIXME !!!!\n" + " //c.rgb *= c.rgb < 128./255;\n" + " bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);\n" + " c.rgb *= vec3(factor);\n" + "}\n" + "\n" + "#else\n" + "void colclip(inout vec4 c)\n" + "{\n" + " if (PS_COLCLIP == 2)\n" + " {\n" + " c.rgb = 256.0f/255.0f - c.rgb;\n" + " }\n" + " if (PS_COLCLIP > 0)\n" + " {\n" + " // FIXME !!!!\n" + " //c.rgb *= c.rgb < 128./255;\n" + " bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);\n" + " c.rgb *= vec3(factor);\n" + " }\n" + "}\n" + "#endif\n" + "\n" + "void fog(vec4 c, float f)\n" "{\n" - " vec4 c_out = c;\n" " if(PS_FOG != 0)\n" " {\n" - " c_out.rgb = mix(FogColor, c.rgb, f);\n" + " c.rgb = mix(FogColor, c.rgb, f);\n" " }\n" - "\n" - " return c_out;\n" "}\n" "\n" "vec4 ps_color()\n" @@ -1232,19 +1351,9 @@ static const char* tfx_glsl = "\n" " atst(c);\n" "\n" - " c = fog(c, PSin_t.z);\n" + " fog(c, PSin_t.z);\n" "\n" - " if (PS_COLCLIP == 2)\n" - " {\n" - " c.rgb = 256.0f/255.0f - c.rgb;\n" - " }\n" - " if (PS_COLCLIP > 0)\n" - " {\n" - " // FIXME !!!!\n" - " //c.rgb *= c.rgb < 128./255;\n" - " bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);\n" - " c.rgb *= vec3(factor);\n" - " }\n" + " colclip(c);\n" "\n" " if(PS_CLR1 != 0) // needed for Cd * (As/Ad/F + 1) blending modes\n" " {\n" diff --git a/plugins/GSdx/res/tfx.glsl b/plugins/GSdx/res/tfx.glsl index d34b975bf5..5a11a5d6e8 100644 --- a/plugins/GSdx/res/tfx.glsl +++ b/plugins/GSdx/res/tfx.glsl @@ -656,6 +656,77 @@ void datst() #endif } +// Note layout stuff might require gl4.3 +#ifdef SUBROUTINE_GL40 +// Function pointer type +subroutine void AlphaTestType(vec4 c); + +// a function pointer variable +layout(location = 0) subroutine uniform AlphaTestType atst; + +// The function attached to AlphaTestType +layout(index = 0) subroutine(AlphaTestType) +void atest_never(vec4 c) +{ + discard; +} + +layout(index = 1) subroutine(AlphaTestType) +void atest_always(vec4 c) +{ + // Nothing to do +} + +layout(index = 2) subroutine(AlphaTestType) +void atest_l(vec4 c) +{ + float a = trunc(c.a * 255.0 + 0.01); + if (PS_SPRITEHACK == 0) + if ((AREF - a - 0.5f) < 0.0f) + discard; +} + +layout(index = 3) subroutine(AlphaTestType) +void atest_le(vec4 c) +{ + float a = trunc(c.a * 255.0 + 0.01); + if ((AREF - a + 0.5f) < 0.0f) + discard; +} + +layout(index = 4) subroutine(AlphaTestType) +void atest_e(vec4 c) +{ + float a = trunc(c.a * 255.0 + 0.01); + if ((0.5f - abs(a - AREF)) < 0.0f) + discard; +} + +layout(index = 5) subroutine(AlphaTestType) +void atest_ge(vec4 c) +{ + float a = trunc(c.a * 255.0 + 0.01); + if ((a-AREF + 0.5f) < 0.0f) + discard; +} + +layout(index = 6) subroutine(AlphaTestType) +void atest_g(vec4 c) +{ + float a = trunc(c.a * 255.0 + 0.01); + if ((a-AREF - 0.5f) < 0.0f) + discard; +} + +layout(index = 7) subroutine(AlphaTestType) +void atest_ne(vec4 c) +{ + float a = trunc(c.a * 255.0 + 0.01); + if ((abs(a - AREF) - 0.5f) < 0.0f) + discard; +} + +#else void atst(vec4 c) { float a = trunc(c.a * 255.0 + 0.01); @@ -700,16 +771,64 @@ void atst(vec4 c) discard; } } +#endif -vec4 fog(vec4 c, float f) +// Note layout stuff might require gl4.3 +#ifdef SUBROUTINE_GL40 +// Function pointer type +subroutine void ColClipType(inout vec4 c); + +// a function pointer variable +layout(location = 1) subroutine uniform ColClipType colclip; + +layout(index = 8) subroutine(ColClipType) +void colclip_0(inout vec4 c) +{ + // nothing to do +} + +layout(index = 9) subroutine(ColClipType) +void colclip_1(inout vec4 c) +{ + // FIXME !!!! + //c.rgb *= c.rgb < 128./255; + bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f); + c.rgb *= vec3(factor); +} + +layout(index = 10) subroutine(ColClipType) +void colclip_2(inout vec4 c) +{ + c.rgb = 256.0f/255.0f - c.rgb; + // FIXME !!!! + //c.rgb *= c.rgb < 128./255; + bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f); + c.rgb *= vec3(factor); +} + +#else +void colclip(inout vec4 c) +{ + if (PS_COLCLIP == 2) + { + c.rgb = 256.0f/255.0f - c.rgb; + } + if (PS_COLCLIP > 0) + { + // FIXME !!!! + //c.rgb *= c.rgb < 128./255; + bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f); + c.rgb *= vec3(factor); + } +} +#endif + +void fog(vec4 c, float f) { - vec4 c_out = c; if(PS_FOG != 0) { - c_out.rgb = mix(FogColor, c.rgb, f); + c.rgb = mix(FogColor, c.rgb, f); } - - return c_out; } vec4 ps_color() @@ -722,19 +841,9 @@ vec4 ps_color() atst(c); - c = fog(c, PSin_t.z); + fog(c, PSin_t.z); - if (PS_COLCLIP == 2) - { - c.rgb = 256.0f/255.0f - c.rgb; - } - if (PS_COLCLIP > 0) - { - // FIXME !!!! - //c.rgb *= c.rgb < 128./255; - bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f); - c.rgb *= vec3(factor); - } + colclip(c); if(PS_CLR1 != 0) // needed for Cd * (As/Ad/F + 1) blending modes {