gsdx-ogl: LINUX-ONLY

* Fix the Geomtry shader to output 2 triangles for quad primitive (ie 2R rendering)
- There is an AMD  driver bug on geomtry shader input interface (well could be the spec too). Tell me if it still working on nvidia
* Add a workaroung to a previous AMD bug. It is impossible to unattach a shader so destroy the full shader pipeline...
* Be more strict on FBO management. Would optimize it later
* use a texture insted of a render buffer for depth-stencil management.
* add more dumping capabilities (in particular depth buffer)


git-svn-id: http://pcsx2.googlecode.com/svn/branches/gsdx-ogl@5033 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gregory.hainaut 2011-12-30 13:55:33 +00:00
parent 000220f93f
commit ea33beb360
4 changed files with 147 additions and 168 deletions

View File

@ -53,8 +53,10 @@
// glUniformBlockBinding(program, block_index, block_binding_point);
//#define LOUD_DEBUGGING
//#define DUMP_START (380)
#define DUMP_LENGTH (20)
#define SHADER_DEBUG
//#define DUMP_START (500)
//#define DUMP_LENGTH (40)
//#define DUMP_ONLY_FRAME (112)
#ifdef DUMP_START
static uint32 g_draw_count = 0;
@ -508,16 +510,27 @@ void GSDeviceOGL::Flip()
void GSDeviceOGL::DrawPrimitive()
{
glDrawArrays(m_state.topology, m_state.vb_state->start, m_state.vb_state->count);
#ifdef DUMP_START
if (g_draw_count > DUMP_START && g_draw_count < (DUMP_START+DUMP_LENGTH)) {
bool dump_me = false;
if ( (g_draw_count > DUMP_START && g_draw_count < (DUMP_START+DUMP_LENGTH)) )
dump_me = true;
#ifdef DUMP_ONLY_FRAME
if (DUMP_ONLY_FRAME != 0 && DUMP_ONLY_FRAME == g_frame_count)
dump_me = true;
else if (DUMP_ONLY_FRAME != 0)
dump_me = false;
#endif
#endif
// DUMP INPUT
#ifdef DUMP_START
if ( dump_me ) {
for (auto i = 0 ; i < 3 ; i++) {
if (m_state.ps_srv[i] != NULL) {
m_state.ps_srv[i]->Save(format("/tmp/in_%d__%d.bmp", g_draw_count, i),false);
m_state.ps_srv[i]->Save(format("/tmp/in_%d__%d.bmp", g_draw_count, i));
}
}
if (m_state.rtv != NULL) m_state.rtv->Save(format("/tmp/out_%d.bmp", g_draw_count),false);
if (m_state.dsv != NULL) m_state.dsv->Save(format("/tmp/out_%d_ds.bmp", g_draw_count),false);
if (m_state.dsv != NULL) m_state.dsv->Save(format("/tmp/ds_in_%d.bmp", g_draw_count));
string topo;
switch (m_state.topology) {
@ -530,18 +543,41 @@ void GSDeviceOGL::DrawPrimitive()
fprintf(stderr, "Draw %d (Frame %d), %d elem of %s\n", g_draw_count, g_frame_count, m_state.vb_state->count, topo.c_str() );
fprintf(stderr, "vs: %d ; gs: %d ; ps: %d\n", m_state.vs, m_state.gs, m_state.ps);
fprintf(stderr, "Blend: %d, Depth: %d, Stencil: %d \n",m_state.bs->m_enable, m_state.dss->m_depth_enable, m_state.dss->m_stencil_enable);
}
#endif
glDrawArrays(m_state.topology, m_state.vb_state->start, m_state.vb_state->count);
// DUMP OUTPUT
#ifdef DUMP_START
if ( dump_me ) {
if (m_state.rtv != NULL) m_state.rtv->Save(format("/tmp/out_%d.bmp", g_draw_count));
if (m_state.dsv != NULL) m_state.dsv->Save(format("/tmp/ds_out_%d.bmp", g_draw_count));
//fprintf(stderr, "type: %d, format: 0x%x\n", m_state.rtv->GetType(), m_state.rtv->GetFormat());
fprintf(stderr, "\n");
}
g_draw_count++;
#endif
// FIXME AMD driver bug workaround
// You cannot unattach shader. So destroy everythings and recreate the shader pipeline...
// Slow and painful...
glBindProgramPipeline(0);
glDeleteProgramPipelines(1, &m_pipeline);
m_state.gs = 0;
m_state.ps = 0;
m_state.vs = 0;
glGenProgramPipelines(1, &m_pipeline);
glBindProgramPipeline(m_pipeline);
}
void GSDeviceOGL::ClearRenderTarget(GSTexture* t, const GSVector4& c)
{
GLuint fbo_old = m_state.fbo;
if (static_cast<GSTextureOGL*>(t)->IsBackbuffer()) {
// FIXME I really not sure
OMSetFBO(0);
@ -557,6 +593,7 @@ void GSDeviceOGL::ClearRenderTarget(GSTexture* t, const GSVector4& c)
static_cast<GSTextureOGL*>(t)->Attach(GL_COLOR_ATTACHMENT0);
glClearBufferfv(GL_COLOR, 0, c.v);
}
OMSetFBO(fbo_old);
}
void GSDeviceOGL::ClearRenderTarget(GSTexture* t, uint32 c)
@ -567,16 +604,19 @@ void GSDeviceOGL::ClearRenderTarget(GSTexture* t, uint32 c)
void GSDeviceOGL::ClearDepth(GSTexture* t, float c)
{
GLuint fbo_old = m_state.fbo;
// FIXME I need to clarify this FBO attachment stuff
// I would like to avoid FBO for a basic clean operation
OMSetFBO(m_fbo);
static_cast<GSTextureOGL*>(t)->Attach(GL_DEPTH_STENCIL_ATTACHMENT);
// FIXME can you clean depth and stencil separately
glClearBufferfv(GL_DEPTH, 0, &c);
OMSetFBO(fbo_old);
}
void GSDeviceOGL::ClearStencil(GSTexture* t, uint8 c)
{
GLuint fbo_old = m_state.fbo;
// FIXME I need to clarify this FBO attachment stuff
// I would like to avoid FBO for a basic clean operation
OMSetFBO(m_fbo);
@ -584,6 +624,7 @@ void GSDeviceOGL::ClearStencil(GSTexture* t, uint8 c)
GLint color = c;
// FIXME can you clean depth and stencil separately
glClearBufferiv(GL_STENCIL, 0, &color);
OMSetFBO(fbo_old);
}
GSTexture* GSDeviceOGL::CreateRenderTarget(int w, int h, bool msaa, int format)
@ -666,6 +707,11 @@ void GSDeviceOGL::CopyRect(GSTexture* st, GSTexture* dt, const GSVector4i& r)
// Maybe opengl 4.3 !
// FIXME check those function work as expected
// FIXME FBO
GLuint fbo_old = m_state.fbo;
OMSetFBO(m_fbo);
// Set the input of glCopyTexSubImage2D
static_cast<GSTextureOGL*>(st)->Attach(GL_COLOR_ATTACHMENT1);
glReadBuffer(GL_COLOR_ATTACHMENT1);
@ -674,6 +720,8 @@ void GSDeviceOGL::CopyRect(GSTexture* st, GSTexture* dt, const GSVector4i& r)
static_cast<GSTextureOGL*>(dt)->EnableUnit(0);
glCopyTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 0, 0, dt->GetWidth(), dt->GetHeight());
OMSetFBO(fbo_old);
#if 0
// FIXME attach the texture to the FBO
GSTextureOGL* st_ogl = (GSTextureOGL*) st;
@ -879,6 +927,9 @@ void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* ver
//
#ifdef DUMP_START
fprintf(stderr, "draw date!!!\n");
#endif
DrawPrimitive();
//
@ -982,7 +1033,6 @@ void GSDeviceOGL::GSSetShader(GLuint gs)
if(m_state.gs != gs)
{
m_state.gs = gs;
// FIXME AMD driver bug !!!!!!!!
glUseProgramStages(m_pipeline, GL_GEOMETRY_SHADER_BIT, gs);
}
}
@ -1244,6 +1294,7 @@ void GSDeviceOGL::CompileShaderFromSource(const std::string& glsl_file, const st
free(header_str);
free(sources_array);
#ifdef SHADER_DEBUG
// Print a nice debug log
GLint log_length = 0;
glGetProgramiv(*program, GL_INFO_LOG_LENGTH, &log_length);
@ -1254,6 +1305,7 @@ void GSDeviceOGL::CompileShaderFromSource(const std::string& glsl_file, const st
fprintf(stderr, "%s (entry %s, prog %d) :", glsl_file.c_str(), entry.c_str(), *program);
fprintf(stderr, "\n%s", macro_sel.c_str());
fprintf(stderr, "%s\n", log);
#endif
free(log);
}

View File

@ -180,10 +180,16 @@ struct GSVertexBufferState {
for (int i = 0; i < layout_nbr; i++) {
// Note this function need both a vertex array object and a GL_ARRAY_BUFFER buffer
glEnableVertexAttribArray(layout[i].index);
if (layout[i].type == GL_UNSIGNED_INT || layout[i].type == GL_UNSIGNED_SHORT)
switch (layout[i].type) {
case GL_UNSIGNED_SHORT:
case GL_UNSIGNED_INT:
// Rule: when shader use integral (not normalized) you must use glVertexAttribIPointer (note the extra I)
glVertexAttribIPointer(layout[i].index, layout[i].size, layout[i].type, layout[i].stride, layout[i].offset);
else
break;
default:
glVertexAttribPointer(layout[i].index, layout[i].size, layout[i].type, layout[i].normalize, layout[i].stride, layout[i].offset);
break;
}
}
}

View File

@ -95,14 +95,10 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, bool msaa, int format)
// corollary we can maybe use it for multisample stuff
case GSTexture::Texture:
case GSTexture::RenderTarget:
case GSTexture::DepthStencil:
glGenTextures(1, &m_texture_id);
m_texture_target = GL_TEXTURE_2D;
break;
case GSTexture::DepthStencil:
glGenRenderbuffers(1, &m_texture_id);
m_texture_target = GL_RENDERBUFFER;
break;
break;
case GSTexture::Backbuffer:
m_texture_target = 0;
m_texture_id = 0;
@ -123,8 +119,8 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, bool msaa, int format)
// Allocate the buffer
switch (m_type) {
case GSTexture::DepthStencil:
glBindRenderbuffer(m_texture_target, m_texture_id);
glRenderbufferStorageMultisample(m_texture_target, msaa_level, m_format, m_size.y, m_size.x);
EnableUnit(1);
glTexImage2D(m_texture_target, 0, m_format, w, h, 0, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, NULL);
break;
case GSTexture::RenderTarget:
case GSTexture::Texture:
@ -155,27 +151,11 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, bool msaa, int format)
GSTextureOGL::~GSTextureOGL()
{
glDeleteBuffers(1, &m_extra_buffer_id);
switch (m_type) {
case GSTexture::Texture:
case GSTexture::RenderTarget:
glDeleteTextures(1, &m_texture_id);
break;
case GSTexture::DepthStencil:
glDeleteRenderbuffers(1, &m_texture_id);
break;
case GSTexture::Offscreen:
assert(0);
break;
default:
break;
}
}
void GSTextureOGL::Attach(GLenum attachment)
{
if (m_type == GSTexture::DepthStencil)
glFramebufferRenderbuffer(GL_FRAMEBUFFER, attachment, m_texture_target, m_texture_id);
else
glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, m_texture_target, m_texture_id, 0);
}
@ -245,13 +225,7 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch)
void GSTextureOGL::EnableUnit(uint unit)
{
switch (m_type) {
case GSTexture::DepthStencil:
case GSTexture::Offscreen:
assert(0);
break;
case GSTexture::RenderTarget:
case GSTexture::Texture:
if (!IsBackbuffer()) {
// FIXME
// Howto allocate the texture unit !!!
// In worst case the HW renderer seems to use 3 texture unit
@ -266,7 +240,6 @@ void GSTextureOGL::EnableUnit(uint unit)
g_state_texture_id = m_texture_id;
glBindTexture(m_texture_target, m_texture_id);
}
break;
}
}
@ -303,13 +276,6 @@ bool GSTextureOGL::Map(GSMap& m, const GSVector4i* r)
return false;
#if 0
if(r != NULL)
{
// ASSERT(0); // not implemented
return false;
}
if(m_texture && m_desc.Usage == D3D11_USAGE_STAGING)
{
D3D11_MAPPED_SUBRESOURCE map;
@ -370,24 +336,20 @@ struct BITMAPINFOHEADER
bool GSTextureOGL::Save(const string& fn, bool dds)
{
// Code not yet working
if (IsDss()) return false;
// Collect the texture data
uint32 pitch = 4 * m_size.x;
if (IsDss()) pitch *= 2;
char* image = (char*)malloc(pitch * m_size.y);
if (IsBackbuffer()) {
// TODO backbuffer
glReadBuffer(GL_BACK);
glReadPixels(0, 0, m_size.x, m_size.y, GL_RGBA, GL_UNSIGNED_BYTE, image);
} else if(IsDss()) {
Attach(GL_DEPTH_STENCIL_ATTACHMENT);
glGetTexImage(GL_TEXTURE_2D, 0, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, image);
EnableUnit(1);
glGetTexImage(m_texture_target, 0, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, image);
} else {
EnableUnit(0);
glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_BYTE, image);
glGetTexImage(m_texture_target, 0, GL_RGBA, GL_UNSIGNED_BYTE, image);
}
// Build a BMP file
@ -430,8 +392,11 @@ bool GSTextureOGL::Save(const string& fn, bool dds)
uint8* better_data = data;
for (int w = m_size.x; w > 0; w--, better_data += 8) {
float* input = (float*)better_data;
// FIXME how to dump 32 bits value into 8bits component color
uint32 depth = (uint32)ldexpf(*input, 32);
fwrite(&depth, 1, 4, fp);
uint8 small_depth = depth >> 24;
uint8 better_data[4] = {small_depth, small_depth, small_depth, 0 };
fwrite(&better_data, 1, 4, fp);
}
} else {
// swap red and blue
@ -443,7 +408,6 @@ bool GSTextureOGL::Save(const string& fn, bool dds)
fwrite(better_data, 1, 4, fp);
}
}
// fwrite(data, 1, m_size.x << 2, fp); // TODO: swap red-blue?
}
fclose(fp);
@ -453,61 +417,5 @@ bool GSTextureOGL::Save(const string& fn, bool dds)
}
return false;
#if 0
CComPtr<ID3D11Resource> res;
if(m_desc.BindFlags & D3D11_BIND_DEPTH_STENCIL)
{
HRESULT hr;
D3D11_TEXTURE2D_DESC desc;
memset(&desc, 0, sizeof(desc));
m_texture->GetDesc(&desc);
desc.Usage = D3D11_USAGE_STAGING;
desc.BindFlags = 0;
desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
CComPtr<ID3D11Texture2D> src, dst;
hr = m_dev->CreateTexture2D(&desc, NULL, &src);
m_ctx->CopyResource(src, m_texture);
desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
hr = m_dev->CreateTexture2D(&desc, NULL, &dst);
D3D11_MAPPED_SUBRESOURCE sm, dm;
hr = m_ctx->Map(src, 0, D3D11_MAP_READ, 0, &sm);
hr = m_ctx->Map(dst, 0, D3D11_MAP_WRITE, 0, &dm);
uint8* s = (uint8*)sm.pData;
uint8* d = (uint8*)dm.pData;
for(uint32 y = 0; y < desc.Height; y++, s += sm.RowPitch, d += dm.RowPitch)
{
for(uint32 x = 0; x < desc.Width; x++)
{
((uint32*)d)[x] = (uint32)(ldexpf(((float*)s)[x*2], 32));
}
}
m_ctx->Unmap(src, 0);
m_ctx->Unmap(dst, 0);
res = dst;
}
else
{
res = m_texture;
}
return SUCCEEDED(D3DX11SaveTextureToFile(m_ctx, res, dds ? D3DX11_IFF_DDS : D3DX11_IFF_BMP, fn.c_str()));
#endif
}

View File

@ -87,6 +87,9 @@ void vs_main()
vec4 p = vec4(i_p, z, 0) - vec4(0.05f, 0.05f, 0, 0);
vec4 final_p = p * VertexScale - VertexOffset;
// FIXME
// FLIP vertically
final_p.y *= -1.0f;
OUT.p = final_p;
gl_Position = final_p; // NOTE I don't know if it is possible to merge POSITION_OUT and gl_Position
@ -132,7 +135,10 @@ out gl_PerVertex {
float gl_ClipDistance[];
};
layout(location = 0) in vertex GSin[];
// FIXME
// AMD Driver bug again !!!!
//layout(location = 0) in vertex GSin[];
in vertex GSin[];
layout(location = 0) out vertex GSout;
@ -188,46 +194,59 @@ void gs_main()
#elif GS_PRIM == 3
layout(lines) in;
layout(triangle_strip, max_vertices = 4) out;
layout(triangle_strip, max_vertices = 6) out;
void gs_main()
{
// left top => GSin[0];
// right bottom => GSin[1];
vertex rb = GSin[1];
vertex lt = GSin[0];
// left top
GSout = GSin[0];
GSout.p.z = GSin[1].p.z;
GSout.t.zw = GSin[1].t.zw;
gl_Position = GSout.p; // FIXME is it useful
lt.p.z = rb.p.z;
lt.t.zw = rb.t.zw;
#if GS_IIP == 0
GSout.c = GSin[1].c;
lt.c = rb.c;
#endif
vertex lb = rb;
lb.p.x = lt.p.x;
lb.t.x = lt.t.x;
vertex rt = rb;
rt.p.y = lt.p.y;
rt.t.y = lt.t.y;
// Triangle 1
gl_Position = lt.p;
GSout = lt;
EmitVertex();
// left bottom
GSout = GSin[1];
gl_Position = gl_in[1].gl_Position; // FIXME is it useful
gl_Position.x = GSin[0].p.x;
GSout.p.x = GSin[0].p.x;
GSout.t.x = GSin[0].t.x;
gl_Position = lb.p;
GSout = lb;
EmitVertex();
// rigth top
GSout = GSin[1];
gl_Position = gl_in[1].gl_Position; // FIXME is it useful
gl_Position.y = GSin[0].p.y;
GSout.p.y = GSin[0].p.y;
GSout.t.y = GSin[0].t.y;
gl_Position = rt.p;
GSout = rt;
EmitVertex();
// rigth bottom
GSout = GSin[1];
gl_Position = GSin[1].p; // FIXME is it useful
EndPrimitive();
// Triangle 2
gl_Position = lb.p;
GSout = lb;
EmitVertex();
gl_Position = rt.p;
GSout = rt;
EmitVertex();
gl_Position = rb.p;
GSout = rb;
EmitVertex();
EndPrimitive();
}
#endif
@ -238,13 +257,8 @@ void gs_main()
layout(location = 0) in vertex PSin;
// Same buffer but 2 colors for dual source blending
//FIXME
#if 1
layout(location = 0, index = 0) out vec4 SV_Target0;
layout(location = 0, index = 1) out vec4 SV_Target1;
#else
layout(location = 0) out vec4 SV_Target;
#endif
layout(binding = 0) uniform sampler2D TextureSampler;
layout(binding = 1) uniform sampler2D PaletteSampler;
@ -264,7 +278,10 @@ layout(std140, binding = 5) uniform cb1
vec4 sample_c(vec2 uv)
{
// FIXME I'm not sure it is a good solution to flip texture
return texture(TextureSampler, uv);
//FIXME another way to FLIP vertically
//return texture(TextureSampler, vec2(uv.x, 1.0f-uv.y) );
}
vec4 sample_p(float u)
@ -598,7 +615,6 @@ vec4 ps_color()
void ps_main()
{
//FIXME
#if 1
vec4 c = ps_color();
// FIXME: I'm not sure about the value of others field
@ -620,8 +636,5 @@ void ps_main()
//SV_Target0 = vec4(1.0f,0.0f,0.0f, 1.0f);
//SV_Target1 = vec4(0.0f,1.0f,0.0f, 1.0f);
#else
SV_Target = vec4(1.0f,0.0f,0.0f, 1.0f);
#endif
}
#endif