From ca1edbf2cb6cf35b63c2f41c0be0fb01717f9caf Mon Sep 17 00:00:00 2001 From: "gregory.hainaut" Date: Wed, 26 Jun 2013 20:09:07 +0000 Subject: [PATCH] gsdx ogl: * Separate state and shader compilation into separate function * replace various hash_map by basic array * Compact VertexScale and offset into a single vec4 * add the new option "ogl_vertex_subdata": subdata is faster on FGLRX, test are welcome on Nvidia drivers 0 => use map/unmap 1 => use subdata replay: add "linux_replay" option and compute some nice stat (mean, standard deviation) cmake: recreate shader header at build time git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5682 96395faa-99c1-11dd-bbfe-3dabce05a288 --- linux_various/glsl2h.pl | 4 +- plugins/GSdx/CMakeLists.txt | 12 ++ plugins/GSdx/GS.cpp | 30 +++- plugins/GSdx/GSDeviceOGL.cpp | 28 +-- plugins/GSdx/GSDeviceOGL.h | 46 +++-- plugins/GSdx/GSRendererOGL.cpp | 53 ++---- plugins/GSdx/GSTextureFXOGL.cpp | 308 +++++++++++++++----------------- plugins/GSdx/GSVertexArrayOGL.h | 58 +++--- plugins/GSdx/res/tfx.glsl | 45 ++--- plugins/GSdx/res/tfx.h | 45 ++--- 10 files changed, 308 insertions(+), 321 deletions(-) diff --git a/linux_various/glsl2h.pl b/linux_various/glsl2h.pl index 510bdae3d2..869d826b11 100755 --- a/linux_various/glsl2h.pl +++ b/linux_various/glsl2h.pl @@ -3,9 +3,11 @@ use strict; use warnings; use File::Spec; +use File::Basename; +use Cwd 'abs_path'; my @res = qw/convert interlace merge shadeboost tfx/; -my $path = File::Spec->catdir("plugins", "GSdx", "res"); +my $path = File::Spec->catdir(dirname(abs_path($0)), "..", "plugins", "GSdx", "res"); foreach my $r (@res) { glsl2h($path, $r, "glsl"); diff --git a/plugins/GSdx/CMakeLists.txt b/plugins/GSdx/CMakeLists.txt index f8aeec43d5..1c8d9c7f05 100644 --- a/plugins/GSdx/CMakeLists.txt +++ b/plugins/GSdx/CMakeLists.txt @@ -177,8 +177,20 @@ set(GSdxHeaders xbyak/xbyak_util.h ) +set(GSdxHeaders + res/convert.h + res/fxaa.h + res/interlace.h + res/merge.h + res/shaderboost.h + res/tfx.h + ) + include_directories(.) +# Generate Glsl header file +add_custom_command(OUTPUT res/convert.h res/fxaa.h res/interlace.h res/merge.h res/shaderboost.h res/tfx.h COMMAND perl ${PROJECT_SOURCE_DIR}/linux_various/glsl2h.pl) + add_library(${Output} SHARED ${GSdxSources} ${GSdxHeaders}) target_link_libraries(${Output} ${X11_LIBRARIES}) diff --git a/plugins/GSdx/GS.cpp b/plugins/GSdx/GS.cpp index 6a55621883..707030af6e 100644 --- a/plugins/GSdx/GS.cpp +++ b/plugins/GSdx/GS.cpp @@ -1424,6 +1424,9 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer) return; } + vector stats; + stats.clear(); + if(FILE* fp = fopen(lpszCmdLine, "rb")) { //Console console("GSdx", true); @@ -1522,11 +1525,12 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer) //while(IsWindowVisible(hWnd)) //FIXME map? - int finished = 2; + int finished = theApp.GetConfig("linux_replay", 1); + unsigned long frame_number = 0; while(finished > 0) { + frame_number = 0; unsigned long start = timeGetTime(); - unsigned long frame_number = 0; for(auto i = packets.begin(); i != packets.end(); i++) { Packet* p = *i; @@ -1571,10 +1575,30 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer) fprintf(stderr, "The %ld frames of the scene was render on %ldms\n", frame_number, end - start); fprintf(stderr, "A means of %fms by frame\n", (float)(end - start)/(float)frame_number); + stats.push_back((float)(end - start)); + + sleep(1); finished--; } + // Print some nice stats + float n = (float)theApp.GetConfig("linux_replay", 1); + float mean = 0; + float sd = 0; + for (auto i = stats.begin(); i != stats.end(); i++) { + mean += *i; + } + mean = mean/n; + for (auto i = stats.begin(); i != stats.end(); i++) { + sd += pow((*i)-mean, 2); + } + sd = sqrt(sd/n); + + fprintf(stderr, "\n\nMean: %fms\n", mean); + fprintf(stderr, "Standard deviation: %fms\n", sd); + fprintf(stderr, "Mean by frame: %fms (%ffps)\n", mean/(float)frame_number, 1000.0f*frame_number/mean); + fprintf(stderr, "Standard deviatin by frame: %fms\n", sd/(float)frame_number); for(auto i = packets.begin(); i != packets.end(); i++) { @@ -1589,6 +1613,8 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer) GSshutdown(); fclose(fp); + } else { + fprintf(stderr, "failed to open %s\n", lpszCmdLine); } } #endif diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index 2f86ea6ed6..538b1dbec0 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -58,6 +58,7 @@ GSDeviceOGL::GSDeviceOGL() , m_vb_sr(NULL) { m_msaa = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_MSAA", 0) : 0; + m_debug_shader = !!theApp.GetConfig("debug_ogl_shader", 1); memset(&m_merge_obj, 0, sizeof(m_merge_obj)); memset(&m_interlace, 0, sizeof(m_interlace)); @@ -134,24 +135,22 @@ GSDeviceOGL::~GSDeviceOGL() delete m_vb; if (GLLoader::found_GL_ARB_separate_shader_objects) { - for (auto it = m_vs.begin(); it != m_vs.end() ; it++) gl_DeleteProgram(it->second); - for (auto it = m_gs.begin(); it != m_gs.end() ; it++) gl_DeleteProgram(it->second); + for (uint32 key = 0; key < VSSelector::size(); key++) gl_DeleteProgram(m_vs[key]); + for (uint32 key = 0; key < GSSelector::size(); key++) gl_DeleteProgram(m_gs[key]); for (auto it = m_ps.begin(); it != m_ps.end() ; it++) gl_DeleteProgram(it->second); } else { - for (auto it = m_vs.begin(); it != m_vs.end() ; it++) gl_DeleteShader(it->second); - for (auto it = m_gs.begin(); it != m_gs.end() ; it++) gl_DeleteShader(it->second); + for (uint32 key = 0; key < VSSelector::size(); key++) gl_DeleteShader(m_vs[key]); + for (uint32 key = 0; key < GSSelector::size(); key++) gl_DeleteShader(m_gs[key]); for (auto it = m_ps.begin(); it != m_ps.end() ; it++) gl_DeleteShader(it->second); for (auto it = m_single_prog.begin(); it != m_single_prog.end() ; it++) gl_DeleteProgram(it->second); m_single_prog.clear(); } - for (auto it = m_ps_ss.begin(); it != m_ps_ss.end() ; it++) gl_DeleteSamplers(1, &it->second); - m_vs.clear(); - m_gs.clear(); + gl_DeleteSamplers(PSSamplerSelector::size(), m_ps_ss); + + for (uint32 key = 0; key < OMDepthStencilSelector::size(); key++) delete m_om_dss[key]; m_ps.clear(); - m_ps_ss.clear(); - m_om_dss.clear(); m_om_bs.clear(); } @@ -248,8 +247,8 @@ bool GSDeviceOGL::Create(GSWnd* wnd) hr = m_dev->CreateBlendState(&bsd, &m_convert.bs); #endif - CreateSampler(m_convert.ln, true, false, false); - CreateSampler(m_convert.pt, false, false, false); + m_convert.ln = CreateSampler(true, false, false); + m_convert.pt = CreateSampler(false, false, false); m_convert.dss = new GSDepthStencilOGL(); m_convert.bs = new GSBlendStateOGL(); @@ -625,8 +624,9 @@ void GSDeviceOGL::ClearStencil(GSTexture* t, uint8 c) glEnable(GL_SCISSOR_TEST); } -void GSDeviceOGL::CreateSampler(GLuint& sampler, bool bilinear, bool tau, bool tav) +GLuint GSDeviceOGL::CreateSampler(bool bilinear, bool tau, bool tav) { + GLuint sampler; gl_GenSamplers(1, &sampler); if (bilinear) { gl_SamplerParameteri(sampler, GL_TEXTURE_MIN_FILTER, GL_LINEAR); @@ -657,6 +657,8 @@ void GSDeviceOGL::CreateSampler(GLuint& sampler, bool bilinear, bool tau, bool t gl_SamplerParameteri(sampler, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_REF_TO_TEXTURE); gl_SamplerParameteri(sampler, GL_TEXTURE_COMPARE_FUNC, GL_NEVER); // FIXME: need ogl extension sd.MaxAnisotropy = 16; + + return sampler; } GSTexture* GSDeviceOGL::CreateRenderTarget(int w, int h, bool msaa, int format) @@ -1363,7 +1365,7 @@ void GSDeviceOGL::CompileShaderFromSource(const std::string& glsl_file, const st free(header_str); free(sources_array); - if (theApp.GetConfig("debug_ogl_shader", 1) == 1) { + if (m_debug_shader) { GLint log_length = 0; GLint status = false; if (GLLoader::found_GL_ARB_separate_shader_objects) { diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index 8c29ba4af1..2a7f5dcf1b 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -240,14 +240,12 @@ class GSDeviceOGL : public GSDevice public: __aligned(struct, 32) VSConstantBuffer { - GSVector4 VertexScale; - GSVector4 VertexOffset; + GSVector4 Vertex_Scale_Offset; GSVector4 TextureScale; VSConstantBuffer() { - VertexScale = GSVector4::zero(); - VertexOffset = GSVector4::zero(); + Vertex_Scale_Offset = GSVector4::zero(); TextureScale = GSVector4::zero(); } @@ -258,13 +256,11 @@ class GSDeviceOGL : public GSDevice GSVector4i b0 = b[0]; GSVector4i b1 = b[1]; - GSVector4i b2 = b[2]; - if(!((a[0] == b0) & (a[1] == b1) & (a[2] == b2)).alltrue()) + if(!((a[0] == b0) & (a[1] == b1)).alltrue()) { a[0] = b0; a[1] = b1; - a[2] = b2; return true; } @@ -283,7 +279,6 @@ class GSDeviceOGL : public GSDevice uint32 tme:1; uint32 fst:1; uint32 logz:1; - //uint32 rtcopy:1; }; uint32 key; @@ -292,6 +287,9 @@ class GSDeviceOGL : public GSDevice operator uint32() {return key & 0x3f;} VSSelector() : key(0) {} + VSSelector(uint32 k) : key(k) {} + + static uint32 size() { return 1 << 5; } }; __aligned(struct, 32) PSConstantBuffer @@ -327,7 +325,8 @@ class GSDeviceOGL : public GSDevice GSVector4i b4 = b[4]; GSVector4i b5 = b[5]; - if(!((a[0] == b0) /*& (a[1] == b1)*/ & (a[2] == b2) & (a[3] == b3) & (a[4] == b4) & (a[5] == b5)).alltrue()) // if WH matches HalfTexel does too + // if WH matches both HalfTexel and TC_OffsetHack do too + if(!((a[0] == b0) & (a[2] == b2) & (a[3] == b3) & (a[4] == b4) & (a[5] == b5)).alltrue()) { a[0] = b0; a[1] = b1; @@ -359,6 +358,9 @@ class GSDeviceOGL : public GSDevice operator uint32() {return key & 0x7;} GSSelector() : key(0) {} + GSSelector(uint32 k) : key(k) {} + + static uint32 size() { return 1 << 3; } }; struct PSSelector @@ -413,6 +415,9 @@ class GSDeviceOGL : public GSDevice operator uint32() {return key & 0x7;} PSSamplerSelector() : key(0) {} + PSSamplerSelector(uint32 k) : key(k) {} + + static uint32 size() { return 1 << 3; } }; struct OMDepthStencilSelector @@ -434,6 +439,9 @@ class GSDeviceOGL : public GSDevice operator uint32() {return key & 0x3f;} OMDepthStencilSelector() : key(0) {} + OMDepthStencilSelector(uint32 k) : key(k) {} + + static uint32 size() { return 1 << 6; } }; struct OMBlendSelector @@ -490,6 +498,8 @@ class GSDeviceOGL : public GSDevice GSVertexBufferStateOGL* m_vb; // vb_state for HW renderer GSVertexBufferStateOGL* m_vb_sr; // vb_state for StretchRect + bool m_debug_shader; + struct { GLuint ps[2]; // program object GSUniformBufferOGL* cb; // uniform buffer object @@ -552,11 +562,11 @@ class GSDeviceOGL : public GSDevice GLenum draw; } m_state; - hash_map m_vs; - hash_map m_gs; + GLuint m_vs[1<<5]; + GLuint m_gs[1<<3]; + GLuint m_ps_ss[1<<3]; + GSDepthStencilOGL* m_om_dss[1<<6]; hash_map m_ps; - hash_map m_ps_ss; - hash_map m_om_dss; hash_map m_om_bs; GLuint m_palette_ss; @@ -603,7 +613,6 @@ class GSDeviceOGL : public GSDevice void ClearDepth(GSTexture* t, float c); void ClearStencil(GSTexture* t, uint8 c); - void CreateSampler(GLuint& sampler, bool bilinear, bool tau, bool tav); GSTexture* CreateRenderTarget(int w, int h, bool msaa, int format = 0); GSTexture* CreateDepthStencil(int w, int h, bool msaa, int format = 0); GSTexture* CreateTexture(int w, int h, int format = 0); @@ -648,6 +657,15 @@ class GSDeviceOGL : public GSDevice void CreateTextureFX(); + GLuint CompileVS(VSSelector sel); + GLuint CompileGS(GSSelector sel); + GLuint CompilePS(PSSelector sel); + GLuint CreateSampler(bool bilinear, bool tau, bool tav); + GLuint CreateSampler(PSSamplerSelector sel); + GSDepthStencilOGL* CreateDepthStencil(OMDepthStencilSelector dssel); + GSBlendStateOGL* CreateBlend(OMBlendSelector bsel, uint8 afix); + + void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim); void SetupVS(VSSelector sel, const VSConstantBuffer* cb); void SetupGS(GSSelector sel); diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index 6dc4236f4f..06627fc5d3 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -157,21 +157,23 @@ void GSRendererOGL::SetupIA() dev->IASetVertexState(); - if(dev->IAMapVertexBuffer(&ptr, sizeof(GSVertex), m_vertex.next)) - { - GSVector4i::storent(ptr, m_vertex.buff, sizeof(GSVertex) * m_vertex.next); - - if(UserHacks_WildHack && !isPackedUV_HackFlag) + if(UserHacks_WildHack && !isPackedUV_HackFlag) { + if(dev->IAMapVertexBuffer(&ptr, sizeof(GSVertex), m_vertex.next)) { + GSVector4i::storent(ptr, m_vertex.buff, sizeof(GSVertex) * m_vertex.next); + GSVertex* RESTRICT d = (GSVertex*)ptr; for(unsigned int i = 0; i < m_vertex.next; i++) { if(PRIM->TME && PRIM->FST) d[i].UV &= 0x3FEF3FEF; } - } - dev->IAUnmapVertexBuffer(); + dev->IAUnmapVertexBuffer(); + } + } else { + // By default use the common path (in case it can be made faster) + dev->IASetVertexBuffer(m_vertex.buff, m_vertex.next); } dev->IASetIndexBuffer(m_index.buff, m_index.tail); @@ -202,7 +204,6 @@ void GSRendererOGL::SetupIA() dev->IASetPrimitiveTopology(t); } - void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) { GSDrawingEnvironment& env = m_env; @@ -213,8 +214,6 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24; - //OGL GSTexture* rtcopy = NULL; - ASSERT(m_dev != NULL); GSDeviceOGL* dev = (GSDeviceOGL*)m_dev; @@ -232,32 +231,14 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour GSVertexPT1 vertices[] = { -#if 0 - {GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)}, - {GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)}, - {GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)}, - {GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)}, -#else {GSVector4(dst.x, dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)}, {GSVector4(dst.z, dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)}, {GSVector4(dst.x, dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)}, {GSVector4(dst.z, dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)}, -#endif }; - //fprintf(stderr, "DATE A:%fx%f B:%fx%f\n", dst.x, -dst.y, dst.z, -dst.w); - //fprintf(stderr, "DATE SR: %f %f %f %f\n", src.x, src.y, src.z, src.w); - //fprintf(stderr, "DATE offset: %f\n", o.x); dev->SetupDATE(rt, ds, vertices, m_context->TEST.DATM); } - else - { - //OGL rtcopy = dev->CreateRenderTarget(rtsize.x, rtsize.y, false, rt->GetFormat()); - - //OGL // I'll use VertexTrace when I consider it more trustworthy - - //OGL dev->CopyRect(rt, rtcopy, GSVector4i(rtsize).zwxy()); - } } // @@ -320,7 +301,6 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour vs_sel.tme = PRIM->TME; vs_sel.fst = PRIM->FST; vs_sel.logz = m_logz ? 1 : 0; - //OGL vs_sel.rtcopy = !!rtcopy; // The real GS appears to do no masking based on the Z buffer format and writing larger Z values // than the buffer supports seems to be an error condition on the real GS, causing it to crash. @@ -363,8 +343,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour float sy = 2.0f * rtscale.y / (rtsize.y << 4); float ox = (float)(int)context->XYOFFSET.OFX; float oy = (float)(int)context->XYOFFSET.OFY; - float ox2 = 2.0f * m_pixelcenter.x / rtsize.x; - float oy2 = 2.0f * m_pixelcenter.y / rtsize.y; + float ox2 = -1.0f / rtsize.x; + float oy2 = -1.0f / rtsize.y; //This hack subtracts around half a pixel from OFX and OFY. (Cannot do this directly, //because DX10 and DX9 have a different pixel center.) @@ -374,16 +354,12 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour if(rt->LikelyOffset) { - // DX9 has pixelcenter set to 0.0, so give it some value here - - if(m_pixelcenter.x == 0 && m_pixelcenter.y == 0) { ox2 = -0.0003f; oy2 = -0.0003f; } - ox2 *= rt->OffsetHack_modx; oy2 *= rt->OffsetHack_mody; } - vs_cb.VertexScale = GSVector4(sx, -sy, ldexpf(1, -32), 0.0f); - vs_cb.VertexOffset = GSVector4(ox * sx + ox2 + 1, -(oy * sy + oy2 + 1), 0.0f, -1.0f); + // Note: DX does y *= -1.0 + vs_cb.Vertex_Scale_Offset = GSVector4(sx, sy, ox * sx + ox2 + 1, oy * sy + oy2 + 1); // END of FIXME // gs @@ -519,7 +495,6 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour dev->OMSetRenderTargets(rt, ds, &scissor); dev->PSSetShaderResource(0, tex ? tex->m_texture : NULL); dev->PSSetShaderResource(1, tex ? tex->m_palette : NULL); - //OGL dev->PSSetShaderResource(2, rtcopy); uint8 afix = context->ALPHA.FIX; @@ -607,7 +582,5 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour dev->EndScene(); - //OGL dev->Recycle(rtcopy); - if(om_dssel.fba) UpdateFBA(rt); } diff --git a/plugins/GSdx/GSTextureFXOGL.cpp b/plugins/GSdx/GSTextureFXOGL.cpp index 3be6c82640..239c1629b5 100644 --- a/plugins/GSdx/GSTextureFXOGL.cpp +++ b/plugins/GSdx/GSTextureFXOGL.cpp @@ -33,7 +33,7 @@ void GSDeviceOGL::CreateTextureFX() m_vs_cb = new GSUniformBufferOGL(g_vs_cb_index, sizeof(VSConstantBuffer)); m_ps_cb = new GSUniformBufferOGL(g_ps_cb_index, sizeof(PSConstantBuffer)); - CreateSampler(m_palette_ss, false, false, false); + m_palette_ss = CreateSampler(false, false, false); GSInputLayoutOGL vert_format[] = { @@ -56,77 +56,158 @@ void GSDeviceOGL::CreateTextureFX() // Pre compile all Geometry & Vertex Shader // It might cost a seconds at startup but it would reduce benchmark pollution - GSDeviceOGL::GSSelector gs_sel; - for (uint32 key = 0; key < (1 << 3); key++) { - gs_sel.key = key; - SetupGS(gs_sel); + for (uint32 key = 0; key < GSSelector::size(); key++) + m_gs[key] = CompileGS(GSSelector(key)); + + for (uint32 key = 0; key < VSSelector::size(); key++) + m_vs[key] = CompileVS(VSSelector(key)); + + for (uint32 key = 0; key < PSSamplerSelector::size(); key++) + m_ps_ss[key] = CreateSampler(PSSamplerSelector(key)); + + for (uint32 key = 0; key < OMDepthStencilSelector::size(); key++) + m_om_dss[key] = CreateDepthStencil(OMDepthStencilSelector(key)); + +} + +GLuint GSDeviceOGL::CompileVS(VSSelector sel) +{ + GLuint vs; + std::string macro = format("#define VS_BPPZ %d\n", sel.bppz) + + format("#define VS_LOGZ %d\n", sel.logz) + + format("#define VS_TME %d\n", sel.tme) + + format("#define VS_FST %d\n", sel.fst); + + CompileShaderFromSource("tfx.glsl", "vs_main", GL_VERTEX_SHADER, &vs, tfx_glsl, macro); + + return vs; +} + +GLuint GSDeviceOGL::CompileGS(GSSelector sel) +{ + GLuint gs; + // Easy case + if(! (sel.prim > 0 && (sel.iip == 0 || sel.prim == 3))) + return 0; + + std::string macro = format("#define GS_IIP %d\n", sel.iip) + + format("#define GS_PRIM %d\n", sel.prim); + + CompileShaderFromSource("tfx.glsl", "gs_main", GL_GEOMETRY_SHADER, &gs, tfx_glsl, macro); + + return gs; +} + +GLuint GSDeviceOGL::CreateSampler(PSSamplerSelector sel) +{ + return CreateSampler(sel.ltf, sel.tau, sel.tav); +} + +GSDepthStencilOGL* GSDeviceOGL::CreateDepthStencil(OMDepthStencilSelector dssel) +{ + GSDepthStencilOGL* dss = new GSDepthStencilOGL(); + + if (dssel.date) + { + dss->EnableStencil(); + dss->SetStencil(GL_EQUAL, dssel.alpha_stencil ? GL_ZERO : GL_KEEP); } - GSDeviceOGL::VSSelector vs_sel; - for (uint32 key = 0; key < (1 << 5); key++) { - vs_sel.key = key; - SetupVS(vs_sel, NULL); + + if(dssel.ztst != ZTST_ALWAYS || dssel.zwe) + { + static const GLenum ztst[] = + { + GL_NEVER, + GL_ALWAYS, + GL_GEQUAL, + GL_GREATER + }; + dss->EnableDepth(); + dss->SetDepth(ztst[dssel.ztst], dssel.zwe); } - // Use sane reset value - GSSetShader(0); - VSSetShader(0); + + return dss; +} + +GSBlendStateOGL* GSDeviceOGL::CreateBlend(OMBlendSelector bsel, uint8 afix) +{ + GSBlendStateOGL* bs = new GSBlendStateOGL(); + + if(bsel.abe) + { + int i = ((bsel.a * 3 + bsel.b) * 3 + bsel.c) * 3 + bsel.d; + + bs->EnableBlend(); + bs->SetRGB(m_blendMapD3D9[i].op, m_blendMapD3D9[i].src, m_blendMapD3D9[i].dst); + + if(m_blendMapD3D9[i].bogus == 1) + { + if (bsel.a == 0) + bs->SetRGB(m_blendMapD3D9[i].op, GL_ONE, m_blendMapD3D9[i].dst); + else + bs->SetRGB(m_blendMapD3D9[i].op, m_blendMapD3D9[i].src, GL_ONE); + + const string afixstr = format("%d >> 7", afix); + const char *col[3] = {"Cs", "Cd", "0"}; + const char *alpha[3] = {"As", "Ad", afixstr.c_str()}; + + // FIXME, need to investigate OGL capabilities. Maybe for OGL5 ;) + fprintf(stderr, "Impossible blend for D3D: (%s - %s) * %s + %s\n", col[bsel.a], col[bsel.b], alpha[bsel.c], col[bsel.d]); + } + + // Not very good but I don't wanna write another 81 row table + if(bsel.negative) bs->RevertOp(); + } + + bs->SetMask(bsel.wr, bsel.wg, bsel.wb, bsel.wa); + + return bs; +} + +GLuint GSDeviceOGL::CompilePS(PSSelector sel) +{ + GLuint ps; + + std::string macro = format("#define PS_FST %d\n", sel.fst) + + format("#define PS_WMS %d\n", sel.wms) + + format("#define PS_WMT %d\n", sel.wmt) + + format("#define PS_FMT %d\n", sel.fmt) + + format("#define PS_AEM %d\n", sel.aem) + + format("#define PS_TFX %d\n", sel.tfx) + + format("#define PS_TCC %d\n", sel.tcc) + + format("#define PS_ATST %d\n", sel.atst) + + format("#define PS_FOG %d\n", sel.fog) + + format("#define PS_CLR1 %d\n", sel.clr1) + + format("#define PS_FBA %d\n", sel.fba) + + format("#define PS_AOUT %d\n", sel.aout) + + format("#define PS_LTF %d\n", sel.ltf) + + format("#define PS_COLCLIP %d\n", sel.colclip) + + format("#define PS_DATE %d\n", sel.date) + + format("#define PS_SPRITEHACK %d\n", sel.spritehack) + + format("#define PS_TCOFFSETHACK %d\n", sel.tcoffsethack) + + format("#define PS_POINT_SAMPLER %d\n", sel.point_sampler); + + CompileShaderFromSource("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, &ps, tfx_glsl, macro); + + return ps; } void GSDeviceOGL::SetupVS(VSSelector sel, const VSConstantBuffer* cb) { - // ************************************************************* - // Static - // ************************************************************* - auto i = m_vs.find(sel); + GLuint vs = m_vs[sel]; - if(i == m_vs.end()) - { - std::string macro = format("#define VS_BPPZ %d\n", sel.bppz) - + format("#define VS_LOGZ %d\n", sel.logz) - + format("#define VS_TME %d\n", sel.tme) - + format("#define VS_FST %d\n", sel.fst); - - GLuint vs; - CompileShaderFromSource("tfx.glsl", "vs_main", GL_VERTEX_SHADER, &vs, tfx_glsl, macro); - - m_vs[sel] = vs; - i = m_vs.find(sel); - } - - // ************************************************************* - // Dynamic - // ************************************************************* - if(cb != NULL && m_vs_cb_cache.Update(cb)) { + if(m_vs_cb_cache.Update(cb)) { SetUniformBuffer(m_vs_cb); m_vs_cb->upload(cb); } - VSSetShader(i->second); + VSSetShader(vs); } void GSDeviceOGL::SetupGS(GSSelector sel) { - // ************************************************************* - // Static - // ************************************************************* - GLuint gs = 0; - if(sel.prim > 0 && (sel.iip == 0 || sel.prim == 3)) - { - auto i = m_gs.find(sel); + GLuint gs = m_gs[sel]; - if(i == m_gs.end()) { - std::string macro = format("#define GS_IIP %d\n", sel.iip) - + format("#define GS_PRIM %d\n", sel.prim); - - CompileShaderFromSource("tfx.glsl", "gs_main", GL_GEOMETRY_SHADER, &gs, tfx_glsl, macro); - - m_gs[sel] = gs; - } else { - gs = i->second; - } - } - // ************************************************************* - // Dynamic - // ************************************************************* GSSetShader(gs); } @@ -138,29 +219,8 @@ void GSDeviceOGL::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerS GLuint ps; auto i = m_ps.find(sel); - if (i == m_ps.end()) - { - std::string macro = format("#define PS_FST %d\n", sel.fst) - + format("#define PS_WMS %d\n", sel.wms) - + format("#define PS_WMT %d\n", sel.wmt) - + format("#define PS_FMT %d\n", sel.fmt) - + format("#define PS_AEM %d\n", sel.aem) - + format("#define PS_TFX %d\n", sel.tfx) - + format("#define PS_TCC %d\n", sel.tcc) - + format("#define PS_ATST %d\n", sel.atst) - + format("#define PS_FOG %d\n", sel.fog) - + format("#define PS_CLR1 %d\n", sel.clr1) - + format("#define PS_FBA %d\n", sel.fba) - + format("#define PS_AOUT %d\n", sel.aout) - + format("#define PS_LTF %d\n", sel.ltf) - + format("#define PS_COLCLIP %d\n", sel.colclip) - + format("#define PS_DATE %d\n", sel.date) - + format("#define PS_SPRITEHACK %d\n", sel.spritehack) - + format("#define PS_TCOFFSETHACK %d\n", sel.tcoffsethack) - + format("#define PS_POINT_SAMPLER %d\n", sel.point_sampler); - - CompileShaderFromSource("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, &ps, tfx_glsl, macro); - + if (i == m_ps.end()) { + ps = CompilePS(sel); m_ps[sel] = ps; } else { ps = i->second; @@ -183,21 +243,7 @@ void GSDeviceOGL::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerS ssel.ltf = 0; } - auto i = m_ps_ss.find(ssel); - - if(i != m_ps_ss.end()) - { - ss0 = i->second; - } - else - { - // ************************************************************* - // Static - // ************************************************************* - CreateSampler(ss0, ssel.ltf, ssel.tau, ssel.tav); - - m_ps_ss[ssel] = ss0; - } + ss0 = m_ps_ss[ssel]; if(sel.fmt >= 3) { @@ -211,86 +257,26 @@ void GSDeviceOGL::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerS void GSDeviceOGL::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix) { - auto i = m_om_dss.find(dssel); + GSDepthStencilOGL* dss = m_om_dss[dssel]; - // ************************************************************* - // Static - // ************************************************************* - if (i == m_om_dss.end()) - { - GSDepthStencilOGL* dss = new GSDepthStencilOGL(); - - if (dssel.date) - { - dss->EnableStencil(); - dss->SetStencil(GL_EQUAL, dssel.alpha_stencil ? GL_ZERO : GL_KEEP); - } - - if(dssel.ztst != ZTST_ALWAYS || dssel.zwe) - { - static const GLenum ztst[] = - { - GL_NEVER, - GL_ALWAYS, - GL_GEQUAL, - GL_GREATER - }; - dss->EnableDepth(); - dss->SetDepth(ztst[dssel.ztst], dssel.zwe); - } - - m_om_dss[dssel] = dss; - i = m_om_dss.find(dssel); - } - - // ************************************************************* - // Dynamic - // ************************************************************* - OMSetDepthStencilState(i->second, 1); + OMSetDepthStencilState(dss, 1); // ************************************************************* // Static // ************************************************************* auto j = m_om_bs.find(bsel); + GSBlendStateOGL* bs; if(j == m_om_bs.end()) { - GSBlendStateOGL* bs = new GSBlendStateOGL(); - - if(bsel.abe) - { - int i = ((bsel.a * 3 + bsel.b) * 3 + bsel.c) * 3 + bsel.d; - - bs->EnableBlend(); - bs->SetRGB(m_blendMapD3D9[i].op, m_blendMapD3D9[i].src, m_blendMapD3D9[i].dst); - - if(m_blendMapD3D9[i].bogus == 1) - { - if (bsel.a == 0) - bs->SetRGB(m_blendMapD3D9[i].op, GL_ONE, m_blendMapD3D9[i].dst); - else - bs->SetRGB(m_blendMapD3D9[i].op, m_blendMapD3D9[i].src, GL_ONE); - - const string afixstr = format("%d >> 7", afix); - const char *col[3] = {"Cs", "Cd", "0"}; - const char *alpha[3] = {"As", "Ad", afixstr.c_str()}; - - // FIXME, need to investigate OGL capabilities. Maybe for OGL5 ;) - fprintf(stderr, "Impossible blend for D3D: (%s - %s) * %s + %s\n", col[bsel.a], col[bsel.b], alpha[bsel.c], col[bsel.d]); - } - - // Not very good but I don't wanna write another 81 row table - if(bsel.negative) bs->RevertOp(); - } - - bs->SetMask(bsel.wr, bsel.wg, bsel.wb, bsel.wa); - + bs = CreateBlend(bsel, afix); m_om_bs[bsel] = bs; - j = m_om_bs.find(bsel); + } else { + bs = j->second; } // ************************************************************* // Dynamic // ************************************************************* - OMSetBlendState(j->second, (float)(int)afix / 0x80); + OMSetBlendState(bs, (float)(int)afix / 0x80); } diff --git a/plugins/GSdx/GSVertexArrayOGL.h b/plugins/GSdx/GSVertexArrayOGL.h index 8c1f83ad57..1b7fb65058 100644 --- a/plugins/GSdx/GSVertexArrayOGL.h +++ b/plugins/GSdx/GSVertexArrayOGL.h @@ -31,13 +31,13 @@ struct GSInputLayoutOGL { }; class GSBufferOGL { - size_t m_stride; + const size_t m_stride; size_t m_start; size_t m_count; size_t m_limit; - GLenum m_target; + const GLenum m_target; GLuint m_buffer; - size_t m_default_size; + const bool m_sub_data_config; public: GSBufferOGL(GLenum target, size_t stride) : @@ -46,15 +46,16 @@ class GSBufferOGL { , m_count(0) , m_limit(0) , m_target(target) + , m_sub_data_config((bool)theApp.GetConfig("ogl_vertex_subdata", 1)) { gl_GenBuffers(1, &m_buffer); // Opengl works best with 1-4MB buffer. - m_default_size = 2 * 1024 * 1024 / m_stride; + m_limit = 2 * 1024 * 1024 / m_stride; } ~GSBufferOGL() { gl_DeleteBuffers(1, &m_buffer); } - void allocate() { allocate(m_default_size); } + void allocate() { allocate(m_limit); } void allocate(size_t new_limit) { @@ -68,9 +69,26 @@ class GSBufferOGL { gl_BindBuffer(m_target, m_buffer); } - void upload(const void* src, uint32 count) + void subdata_upload(const void* src, uint32 count) + { + m_count = count; + + // Current GPU buffer is really too small need to allocate a new one + if (m_count > m_limit) { + allocate(std::max(m_count * 3 / 2, m_limit)); + + } else if (m_count > (m_limit - m_start) ) { + // Not enough left free room. Just go back at the beginning + m_start = 0; + // Orphan the buffer to avoid synchronization + allocate(m_limit); + } + + gl_BufferSubData(m_target, m_stride * m_start, m_stride * m_count, src); + } + + void map_upload(const void* src, uint32 count) { - // Upload the data to the buffer void* dst; if (Map(&dst, count)) { // FIXME which one to use @@ -80,14 +98,16 @@ class GSBufferOGL { } } + void upload(const void* src, uint32 count) + { + if (m_sub_data_config) { + subdata_upload(src, count); + } else { + map_upload(src, count); + } + } + bool Map(void** pointer, uint32 count ) { -#ifdef ENABLE_OGL_DEBUG - GLint b_size = -1; - gl_GetBufferParameteriv(m_target, GL_BUFFER_SIZE, &b_size); - - if (b_size <= 0) return false; -#endif - m_count = count; // Note: For an explanation of the map flag @@ -96,7 +116,7 @@ class GSBufferOGL { // Current GPU buffer is really too small need to allocate a new one if (m_count > m_limit) { - allocate(std::max(m_count * 3 / 2, m_default_size)); + allocate(std::max(m_count * 3 / 2, m_limit)); } else if (m_count > (m_limit - m_start) ) { // Not enough left free room. Just go back at the beginning @@ -113,13 +133,7 @@ class GSBufferOGL { // Upload the data to the buffer *pointer = (uint8*) gl_MapBufferRange(m_target, m_stride*m_start, m_stride*m_count, map_flags); - //fprintf(stderr, "Map %x from %d to %d\n", *pointer, m_start, m_start+m_count); -#ifdef ENABLE_OGL_DEBUG - if (*pointer == NULL) { - fprintf(stderr, "CRITICAL ERROR map failed for vb!!!\n"); - return false; - } -#endif + return true; } diff --git a/plugins/GSdx/res/tfx.glsl b/plugins/GSdx/res/tfx.glsl index 6e2c0d09ae..82939e2ca4 100644 --- a/plugins/GSdx/res/tfx.glsl +++ b/plugins/GSdx/res/tfx.glsl @@ -46,7 +46,6 @@ struct vertex { - //vec4 p; vec4 t; vec4 tp; vec4 c; @@ -69,17 +68,14 @@ layout(location = 0) out vertex VSout; #define VSout_c (VSout.c) #else #ifdef DISABLE_SSO -//out vec4 SHADERp; out vec4 SHADERt; out vec4 SHADERtp; out vec4 SHADERc; #else -//layout(location = 0) out vec4 SHADERp; layout(location = 0) out vec4 SHADERt; layout(location = 1) out vec4 SHADERtp; layout(location = 2) out vec4 SHADERc; #endif -//#define VSout_p SHADERp #define VSout_t SHADERt #define VSout_tp SHADERtp #define VSout_c SHADERc @@ -99,11 +95,13 @@ layout(std140) uniform cb20 layout(std140, binding = 20) uniform cb20 #endif { - vec4 VertexScale; - vec4 VertexOffset; + vec2 VertexScale; + vec2 VertexOffset; vec2 TextureScale; }; +const float exp_min32 = exp2(-32); + void vs_main() { uint z; @@ -119,35 +117,25 @@ void vs_main() // input granularity is 1/16 pixel, anything smaller than that won't step drawing up/left by one pixel // example: 133.0625 (133 + 1/16) should start from line 134, ceil(133.0625 - 0.05) still above 133 - vec4 p = vec4(i_p, z, 0) - vec4(0.05f, 0.05f, 0, 0); - vec4 final_p = p * VertexScale - VertexOffset; - // FIXME - // FLIP vertically - final_p.y *= -1.0f; + vec3 p = vec3(i_p, z) - vec3(0.05f, 0.05f, 0.0f); + p = p * vec3(VertexScale, exp_min32) - vec3(VertexOffset, 0.0f); if(VS_LOGZ == 1) { - final_p.z = log2(1.0f + float(z)) / 32.0f; + p.z = log2(1.0f + float(z)) / 32.0f; } - //VSout_p = final_p; - gl_Position = final_p; // NOTE I don't know if it is possible to merge POSITION_OUT and gl_Position -#if VS_RTCOPY - VSout_tp = final_p * vec4(0.5, -0.5, 0, 0) + 0.5; -#endif - + gl_Position = vec4(p, 1.0f); // NOTE I don't know if it is possible to merge POSITION_OUT and gl_Position if(VS_TME != 0) { if(VS_FST != 0) { - //VSout_t.xy = i_t * TextureScale; VSout_t.xy = i_uv * TextureScale; VSout_t.w = 1.0f; } else { - //VSout_t.xy = i_t; VSout_t.xy = i_st; VSout_t.w = i_q; } @@ -188,7 +176,7 @@ layout(points, max_vertices = 1) out; void gs_main() { for(int i = 0; i < gl_in.length(); i++) { - gl_Position = gl_in[i].gl_Position; // FIXME is it useful + gl_Position = gl_in[i].gl_Position; GSout = GSin[i]; EmitVertex(); } @@ -202,7 +190,7 @@ layout(line_strip, max_vertices = 2) out; void gs_main() { for(int i = 0; i < gl_in.length(); i++) { - gl_Position = gl_in[i].gl_Position; // FIXME is it useful + gl_Position = gl_in[i].gl_Position; GSout = GSin[i]; #if GS_IIP == 0 if (i == 0) @@ -220,7 +208,7 @@ layout(triangle_strip, max_vertices = 3) out; void gs_main() { for(int i = 0; i < gl_in.length(); i++) { - gl_Position = gl_in[i].gl_Position; // FIXME is it useful + gl_Position = gl_in[i].gl_Position; GSout = GSin[i]; #if GS_IIP == 0 if (i == 0 || i == 1) @@ -299,23 +287,19 @@ void gs_main() #ifdef FRAGMENT_SHADER #if __VERSION__ > 140 && !(defined(NO_STRUCT)) layout(location = 0) in vertex PSin; -//#define PSin_p (PSin.p) #define PSin_t (PSin.t) #define PSin_tp (PSin.tp) #define PSin_c (PSin.c) #else #ifdef DISABLE_SSO -in vec4 SHADERp; in vec4 SHADERt; in vec4 SHADERtp; in vec4 SHADERc; #else -//layout(location = 0) in vec4 SHADERp; layout(location = 0) in vec4 SHADERt; layout(location = 1) in vec4 SHADERtp; layout(location = 2) in vec4 SHADERc; #endif -//#define PSin_p SHADERp #define PSin_t SHADERt #define PSin_tp SHADERtp #define PSin_c SHADERc @@ -365,10 +349,7 @@ vec4 sample_c(vec2 uv) uv = (trunc(uv * WH.zw) + vec2(0.5, 0.5)) / WH.zw; } - // FIXME I'm not sure it is a good solution to flip texture return texture(TextureSampler, uv); - //FIXME another way to FLIP vertically - //return texture(TextureSampler, vec2(uv.x, 1.0f-uv.y) ); } vec4 sample_p(float u) @@ -698,12 +679,8 @@ vec4 ps_color() void ps_main() { - //FIXME vec4 c = ps_color(); - // FIXME: I'm not sure about the value of others field - // output.c1 = c.a * 2; // used for alpha blending - float alpha = c.a * 2; if(PS_AOUT != 0) // 16 bit output diff --git a/plugins/GSdx/res/tfx.h b/plugins/GSdx/res/tfx.h index 876c58f6c5..398ed01bdd 100644 --- a/plugins/GSdx/res/tfx.h +++ b/plugins/GSdx/res/tfx.h @@ -74,7 +74,6 @@ static const char* tfx_glsl = "\n" "struct vertex\n" "{\n" - " //vec4 p;\n" " vec4 t;\n" " vec4 tp;\n" " vec4 c;\n" @@ -97,17 +96,14 @@ static const char* tfx_glsl = "#define VSout_c (VSout.c)\n" "#else\n" "#ifdef DISABLE_SSO\n" - "//out vec4 SHADERp;\n" "out vec4 SHADERt;\n" "out vec4 SHADERtp;\n" "out vec4 SHADERc;\n" "#else\n" - "//layout(location = 0) out vec4 SHADERp;\n" "layout(location = 0) out vec4 SHADERt;\n" "layout(location = 1) out vec4 SHADERtp;\n" "layout(location = 2) out vec4 SHADERc;\n" "#endif\n" - "//#define VSout_p SHADERp\n" "#define VSout_t SHADERt\n" "#define VSout_tp SHADERtp\n" "#define VSout_c SHADERc\n" @@ -127,11 +123,13 @@ static const char* tfx_glsl = "layout(std140, binding = 20) uniform cb20\n" "#endif\n" "{\n" - " vec4 VertexScale;\n" - " vec4 VertexOffset;\n" + " vec2 VertexScale;\n" + " vec2 VertexOffset;\n" " vec2 TextureScale;\n" "};\n" "\n" + "const float exp_min32 = exp2(-32);\n" + "\n" "void vs_main()\n" "{\n" " uint z;\n" @@ -147,35 +145,25 @@ static const char* tfx_glsl = " // input granularity is 1/16 pixel, anything smaller than that won't step drawing up/left by one pixel\n" " // example: 133.0625 (133 + 1/16) should start from line 134, ceil(133.0625 - 0.05) still above 133\n" "\n" - " vec4 p = vec4(i_p, z, 0) - vec4(0.05f, 0.05f, 0, 0); \n" - " vec4 final_p = p * VertexScale - VertexOffset;\n" - " // FIXME\n" - " // FLIP vertically\n" - " final_p.y *= -1.0f;\n" + " vec3 p = vec3(i_p, z) - vec3(0.05f, 0.05f, 0.0f);\n" + " p = p * vec3(VertexScale, exp_min32) - vec3(VertexOffset, 0.0f);\n" "\n" " if(VS_LOGZ == 1)\n" " {\n" - " final_p.z = log2(1.0f + float(z)) / 32.0f;\n" + " p.z = log2(1.0f + float(z)) / 32.0f;\n" " }\n" "\n" - " //VSout_p = final_p;\n" - " gl_Position = final_p; // NOTE I don't know if it is possible to merge POSITION_OUT and gl_Position\n" - "#if VS_RTCOPY\n" - " VSout_tp = final_p * vec4(0.5, -0.5, 0, 0) + 0.5;\n" - "#endif\n" - "\n" + " gl_Position = vec4(p, 1.0f); // NOTE I don't know if it is possible to merge POSITION_OUT and gl_Position\n" "\n" " if(VS_TME != 0)\n" " {\n" " if(VS_FST != 0)\n" " {\n" - " //VSout_t.xy = i_t * TextureScale;\n" " VSout_t.xy = i_uv * TextureScale;\n" " VSout_t.w = 1.0f;\n" " }\n" " else\n" " {\n" - " //VSout_t.xy = i_t;\n" " VSout_t.xy = i_st;\n" " VSout_t.w = i_q;\n" " }\n" @@ -216,7 +204,7 @@ static const char* tfx_glsl = "void gs_main()\n" "{\n" " for(int i = 0; i < gl_in.length(); i++) {\n" - " gl_Position = gl_in[i].gl_Position; // FIXME is it useful\n" + " gl_Position = gl_in[i].gl_Position;\n" " GSout = GSin[i];\n" " EmitVertex();\n" " }\n" @@ -230,7 +218,7 @@ static const char* tfx_glsl = "void gs_main()\n" "{\n" " for(int i = 0; i < gl_in.length(); i++) {\n" - " gl_Position = gl_in[i].gl_Position; // FIXME is it useful\n" + " gl_Position = gl_in[i].gl_Position;\n" " GSout = GSin[i];\n" "#if GS_IIP == 0\n" " if (i == 0)\n" @@ -248,7 +236,7 @@ static const char* tfx_glsl = "void gs_main()\n" "{\n" " for(int i = 0; i < gl_in.length(); i++) {\n" - " gl_Position = gl_in[i].gl_Position; // FIXME is it useful\n" + " gl_Position = gl_in[i].gl_Position;\n" " GSout = GSin[i];\n" "#if GS_IIP == 0\n" " if (i == 0 || i == 1)\n" @@ -327,23 +315,19 @@ static const char* tfx_glsl = "#ifdef FRAGMENT_SHADER\n" "#if __VERSION__ > 140 && !(defined(NO_STRUCT))\n" "layout(location = 0) in vertex PSin;\n" - "//#define PSin_p (PSin.p)\n" "#define PSin_t (PSin.t)\n" "#define PSin_tp (PSin.tp)\n" "#define PSin_c (PSin.c)\n" "#else\n" "#ifdef DISABLE_SSO\n" - "in vec4 SHADERp;\n" "in vec4 SHADERt;\n" "in vec4 SHADERtp;\n" "in vec4 SHADERc;\n" "#else\n" - "//layout(location = 0) in vec4 SHADERp;\n" "layout(location = 0) in vec4 SHADERt;\n" "layout(location = 1) in vec4 SHADERtp;\n" "layout(location = 2) in vec4 SHADERc;\n" "#endif\n" - "//#define PSin_p SHADERp\n" "#define PSin_t SHADERt\n" "#define PSin_tp SHADERtp\n" "#define PSin_c SHADERc\n" @@ -393,10 +377,7 @@ static const char* tfx_glsl = " uv = (trunc(uv * WH.zw) + vec2(0.5, 0.5)) / WH.zw;\n" " }\n" "\n" - " // FIXME I'm not sure it is a good solution to flip texture\n" " return texture(TextureSampler, uv);\n" - " //FIXME another way to FLIP vertically\n" - " //return texture(TextureSampler, vec2(uv.x, 1.0f-uv.y) );\n" "}\n" "\n" "vec4 sample_p(float u)\n" @@ -726,12 +707,8 @@ static const char* tfx_glsl = "\n" "void ps_main()\n" "{\n" - " //FIXME\n" " vec4 c = ps_color();\n" "\n" - " // FIXME: I'm not sure about the value of others field\n" - " // output.c1 = c.a * 2; // used for alpha blending\n" - "\n" " float alpha = c.a * 2;\n" "\n" " if(PS_AOUT != 0) // 16 bit output\n"