From ea33beb36023306c8ce71ca991e0f716151e2f8f Mon Sep 17 00:00:00 2001 From: "gregory.hainaut" Date: Fri, 30 Dec 2011 13:55:33 +0000 Subject: [PATCH] gsdx-ogl: LINUX-ONLY * Fix the Geomtry shader to output 2 triangles for quad primitive (ie 2R rendering) - There is an AMD driver bug on geomtry shader input interface (well could be the spec too). Tell me if it still working on nvidia * Add a workaroung to a previous AMD bug. It is impossible to unattach a shader so destroy the full shader pipeline... * Be more strict on FBO management. Would optimize it later * use a texture insted of a render buffer for depth-stencil management. * add more dumping capabilities (in particular depth buffer) git-svn-id: http://pcsx2.googlecode.com/svn/branches/gsdx-ogl@5033 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GSDeviceOGL.cpp | 70 +++++++++++++--- plugins/GSdx/GSDeviceOGL.h | 14 +++- plugins/GSdx/GSTextureOGL.cpp | 146 +++++++--------------------------- plugins/GSdx/res/tfx.glsl | 85 +++++++++++--------- 4 files changed, 147 insertions(+), 168 deletions(-) diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index 74a3ecd720..19c9d76e44 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -53,8 +53,10 @@ // glUniformBlockBinding(program, block_index, block_binding_point); //#define LOUD_DEBUGGING -//#define DUMP_START (380) -#define DUMP_LENGTH (20) +#define SHADER_DEBUG +//#define DUMP_START (500) +//#define DUMP_LENGTH (40) +//#define DUMP_ONLY_FRAME (112) #ifdef DUMP_START static uint32 g_draw_count = 0; @@ -508,16 +510,27 @@ void GSDeviceOGL::Flip() void GSDeviceOGL::DrawPrimitive() { - glDrawArrays(m_state.topology, m_state.vb_state->start, m_state.vb_state->count); #ifdef DUMP_START - if (g_draw_count > DUMP_START && g_draw_count < (DUMP_START+DUMP_LENGTH)) { + bool dump_me = false; + if ( (g_draw_count > DUMP_START && g_draw_count < (DUMP_START+DUMP_LENGTH)) ) + dump_me = true; +#ifdef DUMP_ONLY_FRAME + if (DUMP_ONLY_FRAME != 0 && DUMP_ONLY_FRAME == g_frame_count) + dump_me = true; + else if (DUMP_ONLY_FRAME != 0) + dump_me = false; +#endif +#endif + + // DUMP INPUT +#ifdef DUMP_START + if ( dump_me ) { for (auto i = 0 ; i < 3 ; i++) { if (m_state.ps_srv[i] != NULL) { - m_state.ps_srv[i]->Save(format("/tmp/in_%d__%d.bmp", g_draw_count, i),false); + m_state.ps_srv[i]->Save(format("/tmp/in_%d__%d.bmp", g_draw_count, i)); } } - if (m_state.rtv != NULL) m_state.rtv->Save(format("/tmp/out_%d.bmp", g_draw_count),false); - if (m_state.dsv != NULL) m_state.dsv->Save(format("/tmp/out_%d_ds.bmp", g_draw_count),false); + if (m_state.dsv != NULL) m_state.dsv->Save(format("/tmp/ds_in_%d.bmp", g_draw_count)); string topo; switch (m_state.topology) { @@ -530,18 +543,41 @@ void GSDeviceOGL::DrawPrimitive() fprintf(stderr, "Draw %d (Frame %d), %d elem of %s\n", g_draw_count, g_frame_count, m_state.vb_state->count, topo.c_str() ); fprintf(stderr, "vs: %d ; gs: %d ; ps: %d\n", m_state.vs, m_state.gs, m_state.ps); fprintf(stderr, "Blend: %d, Depth: %d, Stencil: %d \n",m_state.bs->m_enable, m_state.dss->m_depth_enable, m_state.dss->m_stencil_enable); + } +#endif + + glDrawArrays(m_state.topology, m_state.vb_state->start, m_state.vb_state->count); + + // DUMP OUTPUT +#ifdef DUMP_START + if ( dump_me ) { + if (m_state.rtv != NULL) m_state.rtv->Save(format("/tmp/out_%d.bmp", g_draw_count)); + if (m_state.dsv != NULL) m_state.dsv->Save(format("/tmp/ds_out_%d.bmp", g_draw_count)); - //fprintf(stderr, "type: %d, format: 0x%x\n", m_state.rtv->GetType(), m_state.rtv->GetFormat()); fprintf(stderr, "\n"); } g_draw_count++; #endif + + + // FIXME AMD driver bug workaround + // You cannot unattach shader. So destroy everythings and recreate the shader pipeline... + // Slow and painful... + glBindProgramPipeline(0); + glDeleteProgramPipelines(1, &m_pipeline); + m_state.gs = 0; + m_state.ps = 0; + m_state.vs = 0; + + glGenProgramPipelines(1, &m_pipeline); + glBindProgramPipeline(m_pipeline); } void GSDeviceOGL::ClearRenderTarget(GSTexture* t, const GSVector4& c) { + GLuint fbo_old = m_state.fbo; if (static_cast(t)->IsBackbuffer()) { // FIXME I really not sure OMSetFBO(0); @@ -557,6 +593,7 @@ void GSDeviceOGL::ClearRenderTarget(GSTexture* t, const GSVector4& c) static_cast(t)->Attach(GL_COLOR_ATTACHMENT0); glClearBufferfv(GL_COLOR, 0, c.v); } + OMSetFBO(fbo_old); } void GSDeviceOGL::ClearRenderTarget(GSTexture* t, uint32 c) @@ -567,16 +604,19 @@ void GSDeviceOGL::ClearRenderTarget(GSTexture* t, uint32 c) void GSDeviceOGL::ClearDepth(GSTexture* t, float c) { + GLuint fbo_old = m_state.fbo; // FIXME I need to clarify this FBO attachment stuff // I would like to avoid FBO for a basic clean operation OMSetFBO(m_fbo); static_cast(t)->Attach(GL_DEPTH_STENCIL_ATTACHMENT); // FIXME can you clean depth and stencil separately glClearBufferfv(GL_DEPTH, 0, &c); + OMSetFBO(fbo_old); } void GSDeviceOGL::ClearStencil(GSTexture* t, uint8 c) { + GLuint fbo_old = m_state.fbo; // FIXME I need to clarify this FBO attachment stuff // I would like to avoid FBO for a basic clean operation OMSetFBO(m_fbo); @@ -584,6 +624,7 @@ void GSDeviceOGL::ClearStencil(GSTexture* t, uint8 c) GLint color = c; // FIXME can you clean depth and stencil separately glClearBufferiv(GL_STENCIL, 0, &color); + OMSetFBO(fbo_old); } GSTexture* GSDeviceOGL::CreateRenderTarget(int w, int h, bool msaa, int format) @@ -665,6 +706,11 @@ void GSDeviceOGL::CopyRect(GSTexture* st, GSTexture* dt, const GSVector4i& r) // GL_NV_copy_image seem like the good extension but not supported on AMD... // Maybe opengl 4.3 ! // FIXME check those function work as expected + + // FIXME FBO + GLuint fbo_old = m_state.fbo; + OMSetFBO(m_fbo); + // Set the input of glCopyTexSubImage2D static_cast(st)->Attach(GL_COLOR_ATTACHMENT1); @@ -674,6 +720,8 @@ void GSDeviceOGL::CopyRect(GSTexture* st, GSTexture* dt, const GSVector4i& r) static_cast(dt)->EnableUnit(0); glCopyTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 0, 0, dt->GetWidth(), dt->GetHeight()); + OMSetFBO(fbo_old); + #if 0 // FIXME attach the texture to the FBO GSTextureOGL* st_ogl = (GSTextureOGL*) st; @@ -879,6 +927,9 @@ void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* ver // +#ifdef DUMP_START + fprintf(stderr, "draw date!!!\n"); +#endif DrawPrimitive(); // @@ -982,7 +1033,6 @@ void GSDeviceOGL::GSSetShader(GLuint gs) if(m_state.gs != gs) { m_state.gs = gs; - // FIXME AMD driver bug !!!!!!!! glUseProgramStages(m_pipeline, GL_GEOMETRY_SHADER_BIT, gs); } } @@ -1244,6 +1294,7 @@ void GSDeviceOGL::CompileShaderFromSource(const std::string& glsl_file, const st free(header_str); free(sources_array); +#ifdef SHADER_DEBUG // Print a nice debug log GLint log_length = 0; glGetProgramiv(*program, GL_INFO_LOG_LENGTH, &log_length); @@ -1254,6 +1305,7 @@ void GSDeviceOGL::CompileShaderFromSource(const std::string& glsl_file, const st fprintf(stderr, "%s (entry %s, prog %d) :", glsl_file.c_str(), entry.c_str(), *program); fprintf(stderr, "\n%s", macro_sel.c_str()); fprintf(stderr, "%s\n", log); +#endif free(log); } diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index cc3da91b60..98ce44d7f2 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -180,10 +180,16 @@ struct GSVertexBufferState { for (int i = 0; i < layout_nbr; i++) { // Note this function need both a vertex array object and a GL_ARRAY_BUFFER buffer glEnableVertexAttribArray(layout[i].index); - if (layout[i].type == GL_UNSIGNED_INT || layout[i].type == GL_UNSIGNED_SHORT) - glVertexAttribIPointer(layout[i].index, layout[i].size, layout[i].type, layout[i].stride, layout[i].offset); - else - glVertexAttribPointer(layout[i].index, layout[i].size, layout[i].type, layout[i].normalize, layout[i].stride, layout[i].offset); + switch (layout[i].type) { + case GL_UNSIGNED_SHORT: + case GL_UNSIGNED_INT: + // Rule: when shader use integral (not normalized) you must use glVertexAttribIPointer (note the extra I) + glVertexAttribIPointer(layout[i].index, layout[i].size, layout[i].type, layout[i].stride, layout[i].offset); + break; + default: + glVertexAttribPointer(layout[i].index, layout[i].size, layout[i].type, layout[i].normalize, layout[i].stride, layout[i].offset); + break; + } } } diff --git a/plugins/GSdx/GSTextureOGL.cpp b/plugins/GSdx/GSTextureOGL.cpp index 2cf2510196..094f5a9021 100644 --- a/plugins/GSdx/GSTextureOGL.cpp +++ b/plugins/GSdx/GSTextureOGL.cpp @@ -95,14 +95,10 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, bool msaa, int format) // corollary we can maybe use it for multisample stuff case GSTexture::Texture: case GSTexture::RenderTarget: + case GSTexture::DepthStencil: glGenTextures(1, &m_texture_id); m_texture_target = GL_TEXTURE_2D; break; - case GSTexture::DepthStencil: - glGenRenderbuffers(1, &m_texture_id); - m_texture_target = GL_RENDERBUFFER; - break; - break; case GSTexture::Backbuffer: m_texture_target = 0; m_texture_id = 0; @@ -123,8 +119,8 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, bool msaa, int format) // Allocate the buffer switch (m_type) { case GSTexture::DepthStencil: - glBindRenderbuffer(m_texture_target, m_texture_id); - glRenderbufferStorageMultisample(m_texture_target, msaa_level, m_format, m_size.y, m_size.x); + EnableUnit(1); + glTexImage2D(m_texture_target, 0, m_format, w, h, 0, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, NULL); break; case GSTexture::RenderTarget: case GSTexture::Texture: @@ -155,28 +151,12 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, bool msaa, int format) GSTextureOGL::~GSTextureOGL() { glDeleteBuffers(1, &m_extra_buffer_id); - switch (m_type) { - case GSTexture::Texture: - case GSTexture::RenderTarget: - glDeleteTextures(1, &m_texture_id); - break; - case GSTexture::DepthStencil: - glDeleteRenderbuffers(1, &m_texture_id); - break; - case GSTexture::Offscreen: - assert(0); - break; - default: - break; - } + glDeleteTextures(1, &m_texture_id); } void GSTextureOGL::Attach(GLenum attachment) { - if (m_type == GSTexture::DepthStencil) - glFramebufferRenderbuffer(GL_FRAMEBUFFER, attachment, m_texture_target, m_texture_id); - else - glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, m_texture_target, m_texture_id, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, m_texture_target, m_texture_id, 0); } bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch) @@ -245,28 +225,21 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch) void GSTextureOGL::EnableUnit(uint unit) { - switch (m_type) { - case GSTexture::DepthStencil: - case GSTexture::Offscreen: - assert(0); - break; - case GSTexture::RenderTarget: - case GSTexture::Texture: - // FIXME - // Howto allocate the texture unit !!! - // In worst case the HW renderer seems to use 3 texture unit - // For the moment SW renderer only use 1 so don't bother - if (g_state_texture_unit != unit) { - g_state_texture_unit = unit; - glActiveTexture(GL_TEXTURE0 + unit); - // When you change the texture unit, texture must be rebinded - g_state_texture_id = m_texture_id; - glBindTexture(m_texture_target, m_texture_id); - } else if (g_state_texture_id != m_texture_id) { - g_state_texture_id = m_texture_id; - glBindTexture(m_texture_target, m_texture_id); - } - break; + if (!IsBackbuffer()) { + // FIXME + // Howto allocate the texture unit !!! + // In worst case the HW renderer seems to use 3 texture unit + // For the moment SW renderer only use 1 so don't bother + if (g_state_texture_unit != unit) { + g_state_texture_unit = unit; + glActiveTexture(GL_TEXTURE0 + unit); + // When you change the texture unit, texture must be rebinded + g_state_texture_id = m_texture_id; + glBindTexture(m_texture_target, m_texture_id); + } else if (g_state_texture_id != m_texture_id) { + g_state_texture_id = m_texture_id; + glBindTexture(m_texture_target, m_texture_id); + } } } @@ -303,13 +276,6 @@ bool GSTextureOGL::Map(GSMap& m, const GSVector4i* r) return false; #if 0 - if(r != NULL) - { - // ASSERT(0); // not implemented - - return false; - } - if(m_texture && m_desc.Usage == D3D11_USAGE_STAGING) { D3D11_MAPPED_SUBRESOURCE map; @@ -370,24 +336,20 @@ struct BITMAPINFOHEADER bool GSTextureOGL::Save(const string& fn, bool dds) { - // Code not yet working - if (IsDss()) return false; - // Collect the texture data uint32 pitch = 4 * m_size.x; if (IsDss()) pitch *= 2; char* image = (char*)malloc(pitch * m_size.y); if (IsBackbuffer()) { - // TODO backbuffer glReadBuffer(GL_BACK); glReadPixels(0, 0, m_size.x, m_size.y, GL_RGBA, GL_UNSIGNED_BYTE, image); } else if(IsDss()) { - Attach(GL_DEPTH_STENCIL_ATTACHMENT); - glGetTexImage(GL_TEXTURE_2D, 0, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, image); + EnableUnit(1); + glGetTexImage(m_texture_target, 0, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, image); } else { EnableUnit(0); - glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_BYTE, image); + glGetTexImage(m_texture_target, 0, GL_RGBA, GL_UNSIGNED_BYTE, image); } // Build a BMP file @@ -430,8 +392,11 @@ bool GSTextureOGL::Save(const string& fn, bool dds) uint8* better_data = data; for (int w = m_size.x; w > 0; w--, better_data += 8) { float* input = (float*)better_data; + // FIXME how to dump 32 bits value into 8bits component color uint32 depth = (uint32)ldexpf(*input, 32); - fwrite(&depth, 1, 4, fp); + uint8 small_depth = depth >> 24; + uint8 better_data[4] = {small_depth, small_depth, small_depth, 0 }; + fwrite(&better_data, 1, 4, fp); } } else { // swap red and blue @@ -443,7 +408,6 @@ bool GSTextureOGL::Save(const string& fn, bool dds) fwrite(better_data, 1, 4, fp); } } - // fwrite(data, 1, m_size.x << 2, fp); // TODO: swap red-blue? } fclose(fp); @@ -453,61 +417,5 @@ bool GSTextureOGL::Save(const string& fn, bool dds) } return false; -#if 0 - CComPtr res; - - if(m_desc.BindFlags & D3D11_BIND_DEPTH_STENCIL) - { - HRESULT hr; - - D3D11_TEXTURE2D_DESC desc; - - memset(&desc, 0, sizeof(desc)); - - m_texture->GetDesc(&desc); - - desc.Usage = D3D11_USAGE_STAGING; - desc.BindFlags = 0; - desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; - - CComPtr src, dst; - - hr = m_dev->CreateTexture2D(&desc, NULL, &src); - - m_ctx->CopyResource(src, m_texture); - - desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - - hr = m_dev->CreateTexture2D(&desc, NULL, &dst); - - D3D11_MAPPED_SUBRESOURCE sm, dm; - - hr = m_ctx->Map(src, 0, D3D11_MAP_READ, 0, &sm); - hr = m_ctx->Map(dst, 0, D3D11_MAP_WRITE, 0, &dm); - - uint8* s = (uint8*)sm.pData; - uint8* d = (uint8*)dm.pData; - - for(uint32 y = 0; y < desc.Height; y++, s += sm.RowPitch, d += dm.RowPitch) - { - for(uint32 x = 0; x < desc.Width; x++) - { - ((uint32*)d)[x] = (uint32)(ldexpf(((float*)s)[x*2], 32)); - } - } - - m_ctx->Unmap(src, 0); - m_ctx->Unmap(dst, 0); - - res = dst; - } - else - { - res = m_texture; - } - - return SUCCEEDED(D3DX11SaveTextureToFile(m_ctx, res, dds ? D3DX11_IFF_DDS : D3DX11_IFF_BMP, fn.c_str())); -#endif } diff --git a/plugins/GSdx/res/tfx.glsl b/plugins/GSdx/res/tfx.glsl index fbf84f659d..8e8f63304a 100644 --- a/plugins/GSdx/res/tfx.glsl +++ b/plugins/GSdx/res/tfx.glsl @@ -87,6 +87,9 @@ void vs_main() vec4 p = vec4(i_p, z, 0) - vec4(0.05f, 0.05f, 0, 0); vec4 final_p = p * VertexScale - VertexOffset; + // FIXME + // FLIP vertically + final_p.y *= -1.0f; OUT.p = final_p; gl_Position = final_p; // NOTE I don't know if it is possible to merge POSITION_OUT and gl_Position @@ -132,7 +135,10 @@ out gl_PerVertex { float gl_ClipDistance[]; }; -layout(location = 0) in vertex GSin[]; +// FIXME +// AMD Driver bug again !!!! +//layout(location = 0) in vertex GSin[]; +in vertex GSin[]; layout(location = 0) out vertex GSout; @@ -188,46 +194,59 @@ void gs_main() #elif GS_PRIM == 3 layout(lines) in; -layout(triangle_strip, max_vertices = 4) out; +layout(triangle_strip, max_vertices = 6) out; void gs_main() { // left top => GSin[0]; // right bottom => GSin[1]; + vertex rb = GSin[1]; + vertex lt = GSin[0]; - - // left top - GSout = GSin[0]; - - GSout.p.z = GSin[1].p.z; - GSout.t.zw = GSin[1].t.zw; - gl_Position = GSout.p; // FIXME is it useful + lt.p.z = rb.p.z; + lt.t.zw = rb.t.zw; #if GS_IIP == 0 - GSout.c = GSin[1].c; + lt.c = rb.c; #endif + + vertex lb = rb; + lb.p.x = lt.p.x; + lb.t.x = lt.t.x; + + vertex rt = rb; + rt.p.y = lt.p.y; + rt.t.y = lt.t.y; + + // Triangle 1 + gl_Position = lt.p; + GSout = lt; EmitVertex(); - // left bottom - GSout = GSin[1]; - gl_Position = gl_in[1].gl_Position; // FIXME is it useful - gl_Position.x = GSin[0].p.x; - GSout.p.x = GSin[0].p.x; - GSout.t.x = GSin[0].t.x; + gl_Position = lb.p; + GSout = lb; EmitVertex(); - // rigth top - GSout = GSin[1]; - gl_Position = gl_in[1].gl_Position; // FIXME is it useful - gl_Position.y = GSin[0].p.y; - GSout.p.y = GSin[0].p.y; - GSout.t.y = GSin[0].t.y; + gl_Position = rt.p; + GSout = rt; EmitVertex(); - // rigth bottom - GSout = GSin[1]; - gl_Position = GSin[1].p; // FIXME is it useful + EndPrimitive(); + + // Triangle 2 + gl_Position = lb.p; + GSout = lb; EmitVertex(); + gl_Position = rt.p; + GSout = rt; + EmitVertex(); + + gl_Position = rb.p; + GSout = rb; + EmitVertex(); + + EndPrimitive(); + } #endif @@ -238,13 +257,8 @@ void gs_main() layout(location = 0) in vertex PSin; // Same buffer but 2 colors for dual source blending -//FIXME -#if 1 - layout(location = 0, index = 0) out vec4 SV_Target0; - layout(location = 0, index = 1) out vec4 SV_Target1; -#else - layout(location = 0) out vec4 SV_Target; -#endif +layout(location = 0, index = 0) out vec4 SV_Target0; +layout(location = 0, index = 1) out vec4 SV_Target1; layout(binding = 0) uniform sampler2D TextureSampler; layout(binding = 1) uniform sampler2D PaletteSampler; @@ -264,7 +278,10 @@ layout(std140, binding = 5) uniform cb1 vec4 sample_c(vec2 uv) { + // FIXME I'm not sure it is a good solution to flip texture return texture(TextureSampler, uv); + //FIXME another way to FLIP vertically + //return texture(TextureSampler, vec2(uv.x, 1.0f-uv.y) ); } vec4 sample_p(float u) @@ -598,7 +615,6 @@ vec4 ps_color() void ps_main() { //FIXME -#if 1 vec4 c = ps_color(); // FIXME: I'm not sure about the value of others field @@ -620,8 +636,5 @@ void ps_main() //SV_Target0 = vec4(1.0f,0.0f,0.0f, 1.0f); //SV_Target1 = vec4(0.0f,1.0f,0.0f, 1.0f); -#else - SV_Target = vec4(1.0f,0.0f,0.0f, 1.0f); -#endif } #endif