From c1d7b81a558e9eb3d3dacaedc556c5c75e9e3ec8 Mon Sep 17 00:00:00 2001 From: "gregory.hainaut" Date: Mon, 2 Jan 2012 20:08:11 +0000 Subject: [PATCH] gsdx-ogl: * Use a geometry shader pass-through to replace previous AMD workaround * various cosmetic change git-svn-id: http://pcsx2.googlecode.com/svn/branches/gsdx-ogl@5038 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/CMakeLists.txt | 1 + plugins/GSdx/GSDeviceOGL.cpp | 34 ++++---- plugins/GSdx/GSDeviceOGL.h | 13 +++ plugins/GSdx/GSTextureFXOGL.cpp | 7 +- plugins/GSdx/GSTextureOGL.cpp | 135 ++++++++++++++++---------------- plugins/GSdx/GSTextureOGL.h | 1 + plugins/GSdx/res/convert.glsl | 57 +++++++++++--- plugins/GSdx/res/interlace.glsl | 25 +++--- plugins/GSdx/res/merge.glsl | 13 ++- plugins/GSdx/res/tfx.glsl | 25 +++--- 10 files changed, 188 insertions(+), 123 deletions(-) diff --git a/plugins/GSdx/CMakeLists.txt b/plugins/GSdx/CMakeLists.txt index 98ac7ec165..b9690455c0 100644 --- a/plugins/GSdx/CMakeLists.txt +++ b/plugins/GSdx/CMakeLists.txt @@ -19,6 +19,7 @@ set(CommonFlags -std=c++0x -fno-strict-aliasing -DOGL_DEBUG + -DAMD_DRIVER_WORKAROUND ) set(OptimizationFlags diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index a9d794bc56..f4d2b731e7 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -54,8 +54,8 @@ //#define LOUD_DEBUGGING #define SHADER_DEBUG -//#define DUMP_START (13000) -//#define DUMP_LENGTH (200) +//#define DUMP_START (70) +//#define DUMP_LENGTH (130) //#define DUMP_ONLY_FRAME (112) #ifdef DUMP_START @@ -237,6 +237,7 @@ bool GSDeviceOGL::Create(GSWnd* wnd) // convert // **************************************************************** CompileShaderFromSource("convert.glsl", "vs_main", GL_VERTEX_SHADER, &m_convert.vs); + CompileShaderFromSource("convert.glsl", "gs_main", GL_GEOMETRY_SHADER, &m_convert.gs); for(int i = 0; i < countof(m_convert.ps); i++) CompileShaderFromSource("convert.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, &m_convert.ps[i]); @@ -540,9 +541,11 @@ void GSDeviceOGL::DrawPrimitive() case GL_POINTS: topo = "point"; break; default: topo = "!!!!"; } - fprintf(stderr, "Draw %d (Frame %d), %d elem of %s\n", g_draw_count, g_frame_count, m_state.vb_state->count, topo.c_str() ); + fprintf(stderr, "Draw %d (Frame %d), %d elem of %s\n", g_draw_count, g_frame_count, /*m_state.vb_state->count*/ 0, topo.c_str() ); fprintf(stderr, "vs: %d ; gs: %d ; ps: %d\n", m_state.vs, m_state.gs, m_state.ps); fprintf(stderr, "Blend: %d, Depth: %d, Stencil: %d \n",m_state.bs->m_enable, m_state.dss->m_depth_enable, m_state.dss->m_stencil_enable); + m_state.bs->debug(); + m_state.dss->debug_depth(); } #endif @@ -560,19 +563,6 @@ void GSDeviceOGL::DrawPrimitive() g_draw_count++; #endif - - - // FIXME AMD driver bug workaround - // You cannot unattach shader. So destroy everythings and recreate the shader pipeline... - // Slow and painful... - glBindProgramPipeline(0); - glDeleteProgramPipelines(1, &m_pipeline); - m_state.gs = 0; - m_state.ps = 0; - m_state.vs = 0; - - glGenProgramPipelines(1, &m_pipeline); - glBindProgramPipeline(m_pipeline); } void GSDeviceOGL::ClearRenderTarget(GSTexture* t, const GSVector4& c) @@ -822,7 +812,11 @@ void GSDeviceOGL::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, // gs // ************************************ +#ifdef AMD_DRIVER_WORKAROUND + GSSetShader(m_convert.gs); +#else GSSetShader(0); +#endif // ************************************ // ps @@ -914,7 +908,12 @@ void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* ver // gs +#ifdef AMD_DRIVER_WORKAROUND + GSSetShader(m_convert.gs); +#else GSSetShader(0); +#endif + // ps @@ -1044,7 +1043,6 @@ void GSDeviceOGL::PSSetSamplerState(GLuint ss0, GLuint ss1, GLuint ss2) void GSDeviceOGL::PSSetShader(GLuint ps) { - if(m_state.ps != ps) { m_state.ps = ps; @@ -1279,8 +1277,8 @@ void GSDeviceOGL::CompileShaderFromSource(const std::string& glsl_file, const st fprintf(stderr, "%s (entry %s, prog %d) :", glsl_file.c_str(), entry.c_str(), *program); fprintf(stderr, "\n%s", macro_sel.c_str()); fprintf(stderr, "%s\n", log); -#endif free(log); +#endif } void GSDeviceOGL::CheckDebugLog() diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index 9100d1c5d5..dcc3de9a85 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -54,6 +54,13 @@ struct GSBlendStateOGL { , m_a_msk(GL_TRUE) {} + void debug() + { + if (!m_enable) return; + fprintf(stderr,"Blend RGB: %x src:%x dst:%x\n", m_equation_RGB, m_func_sRGB, m_func_dRGB); + fprintf(stderr,"Blend ALPHA: %x src:%x dst:%x\n", m_equation_ALPHA, m_func_sALPHA, m_func_dALPHA); + fprintf(stderr,"Mask. R:%d B:%d G:%d A:%d\n", m_r_msk, m_b_msk, m_g_msk, m_a_msk); + } }; struct GSDepthStencilOGL { @@ -81,6 +88,11 @@ struct GSDepthStencilOGL { , m_stencil_spass_dpass_op(GL_KEEP) {} + void debug_depth() + { + if (!m_depth_enable) return; + fprintf(stderr, "Depth %x, %x\n", m_depth_func, m_depth_mask); + } }; class GSUniformBufferOGL { @@ -503,6 +515,7 @@ class GSDeviceOGL : public GSDevice GLuint ps[8]; // program object GLuint ln; // sampler object GLuint pt; // sampler object + GLuint gs; GSDepthStencilOGL* dss; GSBlendStateOGL* bs; } m_convert; diff --git a/plugins/GSdx/GSTextureFXOGL.cpp b/plugins/GSdx/GSTextureFXOGL.cpp index f3f37466d6..642cef238a 100644 --- a/plugins/GSdx/GSTextureFXOGL.cpp +++ b/plugins/GSdx/GSTextureFXOGL.cpp @@ -120,7 +120,12 @@ void GSDeviceOGL::SetupGS(GSSelector sel) // Static // ************************************************************* GLuint gs = 0; - if(sel.prim > 0 && (sel.iip == 0 || sel.prim == 3)) { +#ifdef AMD_DRIVER_WORKAROUND + if (true) +#else + if(sel.prim > 0 && (sel.iip == 0 || sel.prim == 3)) +#endif + { auto i = m_gs.find(sel); if(i == m_gs.end()) { diff --git a/plugins/GSdx/GSTextureOGL.cpp b/plugins/GSdx/GSTextureOGL.cpp index b4d1f38402..269da857b6 100644 --- a/plugins/GSdx/GSTextureOGL.cpp +++ b/plugins/GSdx/GSTextureOGL.cpp @@ -22,8 +22,8 @@ #pragma once #include "GSTextureOGL.h" -static uint g_state_texture_unit = 0; -static uint g_state_texture_id = 0; +static int g_state_texture_unit = -1; +static int g_state_texture_id = -1; GSTextureOGL::GSTextureOGL(int type, int w, int h, bool msaa, int format) : m_extra_buffer_id(0), @@ -317,6 +317,68 @@ struct BITMAPINFOHEADER #pragma pack(pop) #endif +void GSTextureOGL::Save(const string& fn, const void* image, uint32 pitch) +{ + // Build a BMP file + FILE* fp = fopen(fn.c_str(), "wb"); + + BITMAPINFOHEADER bih; + + memset(&bih, 0, sizeof(bih)); + + bih.biSize = sizeof(bih); + bih.biWidth = m_size.x; + bih.biHeight = m_size.y; + bih.biPlanes = 1; + bih.biBitCount = 32; + bih.biCompression = BI_RGB; + bih.biSizeImage = m_size.x * m_size.y << 2; + + BITMAPFILEHEADER bfh; + + memset(&bfh, 0, sizeof(bfh)); + + uint8* bfType = (uint8*)&bfh.bfType; + + // bfh.bfType = 'MB'; + bfType[0] = 0x42; + bfType[1] = 0x4d; + bfh.bfOffBits = sizeof(bfh) + sizeof(bih); + bfh.bfSize = bfh.bfOffBits + bih.biSizeImage; + bfh.bfReserved1 = bfh.bfReserved2 = 0; + + fwrite(&bfh, 1, sizeof(bfh), fp); + fwrite(&bih, 1, sizeof(bih), fp); + + uint8* data = (uint8*)image + (m_size.y - 1) * pitch; + + for(int h = m_size.y; h > 0; h--, data -= pitch) + { + if (IsDss()) { + // Only get the depth and convert it to an integer + uint8* better_data = data; + for (int w = m_size.x; w > 0; w--, better_data += 8) { + float* input = (float*)better_data; + // FIXME how to dump 32 bits value into 8bits component color + uint32 depth = (uint32)ldexpf(*input, 32); + uint8 small_depth = depth >> 24; + uint8 better_data[4] = {small_depth, small_depth, small_depth, 0 }; + fwrite(&better_data, 1, 4, fp); + } + } else { + // swap red and blue + uint8* better_data = data; + for (int w = m_size.x; w > 0; w--, better_data += 4) { + uint8 red = better_data[2]; + better_data[2] = better_data[0]; + better_data[0] = red; + fwrite(better_data, 1, 4, fp); + } + } + } + + fclose(fp); +} bool GSTextureOGL::Save(const string& fn, bool dds) { @@ -325,6 +387,8 @@ bool GSTextureOGL::Save(const string& fn, bool dds) if (IsDss()) pitch *= 2; char* image = (char*)malloc(pitch * m_size.y); + // FIXME instead of swapping manually B and R maybe you can request the driver to do it + // for us if (IsBackbuffer()) { glReadBuffer(GL_BACK); glReadPixels(0, 0, m_size.x, m_size.y, GL_RGBA, GL_UNSIGNED_BYTE, image); @@ -336,70 +400,9 @@ bool GSTextureOGL::Save(const string& fn, bool dds) glGetTexImage(m_texture_target, 0, GL_RGBA, GL_UNSIGNED_BYTE, image); } - // Build a BMP file - if(FILE* fp = fopen(fn.c_str(), "wb")) - { - BITMAPINFOHEADER bih; + Save(fn, image, pitch); + free(image); - memset(&bih, 0, sizeof(bih)); - - bih.biSize = sizeof(bih); - bih.biWidth = m_size.x; - bih.biHeight = m_size.y; - bih.biPlanes = 1; - bih.biBitCount = 32; - bih.biCompression = BI_RGB; - bih.biSizeImage = m_size.x * m_size.y << 2; - - BITMAPFILEHEADER bfh; - - memset(&bfh, 0, sizeof(bfh)); - - uint8* bfType = (uint8*)&bfh.bfType; - - // bfh.bfType = 'MB'; - bfType[0] = 0x42; - bfType[1] = 0x4d; - bfh.bfOffBits = sizeof(bfh) + sizeof(bih); - bfh.bfSize = bfh.bfOffBits + bih.biSizeImage; - bfh.bfReserved1 = bfh.bfReserved2 = 0; - - fwrite(&bfh, 1, sizeof(bfh), fp); - fwrite(&bih, 1, sizeof(bih), fp); - - uint8* data = (uint8*)image + (m_size.y - 1) * pitch; - - for(int h = m_size.y; h > 0; h--, data -= pitch) - { - if (IsDss()) { - // Only get the depth and convert it to an integer - uint8* better_data = data; - for (int w = m_size.x; w > 0; w--, better_data += 8) { - float* input = (float*)better_data; - // FIXME how to dump 32 bits value into 8bits component color - uint32 depth = (uint32)ldexpf(*input, 32); - uint8 small_depth = depth >> 24; - uint8 better_data[4] = {small_depth, small_depth, small_depth, 0 }; - fwrite(&better_data, 1, 4, fp); - } - } else { - // swap red and blue - uint8* better_data = data; - for (int w = m_size.x; w > 0; w--, better_data += 4) { - uint8 red = better_data[2]; - better_data[2] = better_data[0]; - better_data[0] = red; - fwrite(better_data, 1, 4, fp); - } - } - } - - fclose(fp); - - free(image); - return true; - } - - return false; + return true; } diff --git a/plugins/GSdx/GSTextureOGL.h b/plugins/GSdx/GSTextureOGL.h index 9138ff4ec4..92c9010928 100644 --- a/plugins/GSdx/GSTextureOGL.h +++ b/plugins/GSdx/GSTextureOGL.h @@ -39,6 +39,7 @@ class GSTextureOGL : public GSTexture bool Map(GSMap& m, const GSVector4i* r = NULL); void Unmap(); bool Save(const string& fn, bool dds = false); + void Save(const string& fn, const void* image, uint32 pitch); void EnableUnit(uint unit); void Attach(GLenum attachment); diff --git a/plugins/GSdx/res/convert.glsl b/plugins/GSdx/res/convert.glsl index d2c9f3fb0f..2887215d72 100644 --- a/plugins/GSdx/res/convert.glsl +++ b/plugins/GSdx/res/convert.glsl @@ -1,5 +1,12 @@ //#version 420 // Keep it for editor detection +struct vertex_basic +{ + vec4 p; + vec2 t; +}; + + #ifdef VERTEX_SHADER out gl_PerVertex { @@ -19,24 +26,54 @@ layout(location = 1) in vec2 TEXCOORD0; // smooth, the default, means to do perspective-correct interpolation. // // The centroid qualifier only matters when multisampling. If this qualifier is not present, then the value is interpolated to the pixel's center, anywhere in the pixel, or to one of the pixel's samples. This sample may lie outside of the actual primitive being rendered, since a primitive can cover only part of a pixel's area. The centroid qualifier is used to prevent this; the interpolation point must fall within both the pixel's area and the primitive's area. -// FIXME gl_Position -layout(location = 0) out vec4 POSITION_OUT; -layout(location = 1) out vec2 TEXCOORD0_OUT; +layout(location = 0) out vertex_basic VSout; void vs_main() { - POSITION_OUT = POSITION; - TEXCOORD0_OUT = TEXCOORD0; + VSout.p = POSITION; + VSout.t = TEXCOORD0; gl_Position = POSITION; // NOTE I don't know if it is possible to merge POSITION_OUT and gl_Position } #endif +#ifdef GEOMETRY_SHADER +in gl_PerVertex { + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[]; +} gl_in[]; + +out gl_PerVertex { + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[]; +}; + +// FIXME +// AMD Driver bug again !!!! +//layout(location = 0) in vertex GSin[]; +in vertex_basic GSin[]; + +layout(location = 0) out vertex_basic GSout; +layout(triangles) in; +layout(triangle_strip, max_vertices = 3) out; + +void gs_main() +{ + for(int i = 0; i < gl_in.length(); i++) { + gl_Position = gl_in[i].gl_Position; + GSout = GSin[i]; + EmitVertex(); + } + EndPrimitive(); +} +#endif + #ifdef FRAGMENT_SHADER // NOTE: pixel can be clip with "discard" -layout(location = 0) in vec4 SV_Position; -layout(location = 1) in vec2 TEXCOORD0; +layout(location = 0) in vertex_basic PSin; layout(location = 0) out vec4 SV_Target0; layout(location = 1) out uint SV_Target1; @@ -45,7 +82,7 @@ layout(binding = 0) uniform sampler2D TextureSampler; vec4 sample_c() { - return texture(TextureSampler, vec2(TEXCOORD0.x,TEXCOORD0.y) ); + return texture(TextureSampler, PSin.t ); } vec4 ps_crt(uint i) @@ -88,7 +125,7 @@ void ps_main7() void ps_main5() // triangular { - highp uvec4 p = uvec4(SV_Position); + highp uvec4 p = uvec4(PSin.p); vec4 c = ps_crt(((p.x + ((p.y >> 1u) & 1u) * 3u) >> 1u) % 3u); @@ -97,7 +134,7 @@ void ps_main5() // triangular void ps_main6() // diagonal { - uvec4 p = uvec4(SV_Position); + uvec4 p = uvec4(PSin.p); vec4 c = ps_crt((p.x + (p.y % 3)) % 3); diff --git a/plugins/GSdx/res/interlace.glsl b/plugins/GSdx/res/interlace.glsl index 2792ea60a6..97e567fa1b 100644 --- a/plugins/GSdx/res/interlace.glsl +++ b/plugins/GSdx/res/interlace.glsl @@ -1,8 +1,13 @@ //#version 420 // Keep it for editor detection +struct vertex_basic +{ + vec4 p; + vec2 t; +}; + #ifdef FRAGMENT_SHADER -layout(location = 0) in vec4 SV_Position; -layout(location = 1) in vec2 TEXCOORD0; +layout(location = 0) in vertex_basic PSin; layout(location = 0) out vec4 SV_Target0; @@ -19,8 +24,8 @@ void ps_main0() { // I'm not sure it impact us but be safe to lookup texture before conditional if // see: http://www.opengl.org/wiki/GLSL_Sampler#Non-uniform_flow_control - vec4 c = texture(TextureSampler, TEXCOORD0); - if (fract(TEXCOORD0.y * hH) - 0.5 < 0.0) + vec4 c = texture(TextureSampler, PSin.t); + if (fract(PSin.t.y * hH) - 0.5 < 0.0) discard; SV_Target0 = c; @@ -30,8 +35,8 @@ void ps_main1() { // I'm not sure it impact us but be safe to lookup texture before conditional if // see: http://www.opengl.org/wiki/GLSL_Sampler#Non-uniform_flow_control - vec4 c = texture(TextureSampler, TEXCOORD0); - if (0.5 - fract(TEXCOORD0.y * hH) < 0.0) + vec4 c = texture(TextureSampler, PSin.t); + if (0.5 - fract(PSin.t.y * hH) < 0.0) discard; SV_Target0 = c; @@ -39,16 +44,16 @@ void ps_main1() void ps_main2() { - vec4 c0 = texture(TextureSampler, TEXCOORD0 - ZrH); - vec4 c1 = texture(TextureSampler, TEXCOORD0); - vec4 c2 = texture(TextureSampler, TEXCOORD0 + ZrH); + vec4 c0 = texture(TextureSampler, PSin.t - ZrH); + vec4 c1 = texture(TextureSampler, PSin.t); + vec4 c2 = texture(TextureSampler, PSin.t + ZrH); SV_Target0 = (c0 + c1 * 2 + c2) / 4; } void ps_main3() { - SV_Target0 = texture(TextureSampler, TEXCOORD0); + SV_Target0 = texture(TextureSampler, PSin.t); } #endif diff --git a/plugins/GSdx/res/merge.glsl b/plugins/GSdx/res/merge.glsl index 2e438d4055..e9a93bdc88 100644 --- a/plugins/GSdx/res/merge.glsl +++ b/plugins/GSdx/res/merge.glsl @@ -1,8 +1,13 @@ //#version 420 // Keep it for editor detection +struct vertex_basic +{ + vec4 p; + vec2 t; +}; + #ifdef FRAGMENT_SHADER -layout(location = 0) in vec4 SV_Position; -layout(location = 1) in vec2 TEXCOORD0; +layout(location = 0) in vertex_basic PSin; layout(location = 0) out vec4 SV_Target0; @@ -15,14 +20,14 @@ layout(binding = 0) uniform sampler2D TextureSampler; void ps_main0() { - vec4 c = texture(TextureSampler, TEXCOORD0); + vec4 c = texture(TextureSampler, PSin.t); c.a = min(c.a * 2, 1.0); SV_Target0 = c; } void ps_main1() { - vec4 c = texture(TextureSampler, TEXCOORD0); + vec4 c = texture(TextureSampler, PSin.t); c.a = BGColor.a; SV_Target0 = c; } diff --git a/plugins/GSdx/res/tfx.glsl b/plugins/GSdx/res/tfx.glsl index 4ac5f22848..0737c5e56e 100644 --- a/plugins/GSdx/res/tfx.glsl +++ b/plugins/GSdx/res/tfx.glsl @@ -55,7 +55,7 @@ layout(location = 3) in uvec2 i_p; layout(location = 4) in uint i_z; layout(location = 5) in vec4 i_f; -layout(location = 0) out vertex OUT; +layout(location = 0) out vertex VSout; out gl_PerVertex { vec4 gl_Position; @@ -91,33 +91,33 @@ void vs_main() // FLIP vertically final_p.y *= -1.0f; - OUT.p = final_p; + VSout.p = final_p; gl_Position = final_p; // NOTE I don't know if it is possible to merge POSITION_OUT and gl_Position #if VS_RTCOPY - OUT.tp = final_p * vec4(0.5, -0.5, 0, 0) + 0.5; + VSout.tp = final_p * vec4(0.5, -0.5, 0, 0) + 0.5; #endif if(VS_TME != 0) { if(VS_FST != 0) { - OUT.t.xy = i_t * TextureScale; - OUT.t.w = 1.0f; + VSout.t.xy = i_t * TextureScale; + VSout.t.w = 1.0f; } else { - OUT.t.xy = i_t; - OUT.t.w = i_q; + VSout.t.xy = i_t; + VSout.t.w = i_q; } } else { - OUT.t.xy = vec2(0.0f, 0.0f); - OUT.t.w = 1.0f; + VSout.t.xy = vec2(0.0f, 0.0f); + VSout.t.w = 1.0f; } - OUT.c = i_c; - OUT.t.z = i_f.a; + VSout.c = i_c; + VSout.t.z = i_f.a; } #endif @@ -633,8 +633,5 @@ void ps_main() } SV_Target1 = c; - - //SV_Target0 = vec4(1.0f,0.0f,0.0f, 1.0f); - //SV_Target1 = vec4(0.0f,1.0f,0.0f, 1.0f); } #endif