gsdx-ogl:

* Use a geometry shader pass-through to replace previous AMD workaround
* various cosmetic change


git-svn-id: http://pcsx2.googlecode.com/svn/branches/gsdx-ogl@5038 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gregory.hainaut 2012-01-02 20:08:11 +00:00
parent 2f4e2d8b6b
commit c1d7b81a55
10 changed files with 188 additions and 123 deletions

View File

@ -19,6 +19,7 @@ set(CommonFlags
-std=c++0x
-fno-strict-aliasing
-DOGL_DEBUG
-DAMD_DRIVER_WORKAROUND
)
set(OptimizationFlags

View File

@ -54,8 +54,8 @@
//#define LOUD_DEBUGGING
#define SHADER_DEBUG
//#define DUMP_START (13000)
//#define DUMP_LENGTH (200)
//#define DUMP_START (70)
//#define DUMP_LENGTH (130)
//#define DUMP_ONLY_FRAME (112)
#ifdef DUMP_START
@ -237,6 +237,7 @@ bool GSDeviceOGL::Create(GSWnd* wnd)
// convert
// ****************************************************************
CompileShaderFromSource("convert.glsl", "vs_main", GL_VERTEX_SHADER, &m_convert.vs);
CompileShaderFromSource("convert.glsl", "gs_main", GL_GEOMETRY_SHADER, &m_convert.gs);
for(int i = 0; i < countof(m_convert.ps); i++)
CompileShaderFromSource("convert.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, &m_convert.ps[i]);
@ -540,9 +541,11 @@ void GSDeviceOGL::DrawPrimitive()
case GL_POINTS: topo = "point"; break;
default: topo = "!!!!";
}
fprintf(stderr, "Draw %d (Frame %d), %d elem of %s\n", g_draw_count, g_frame_count, m_state.vb_state->count, topo.c_str() );
fprintf(stderr, "Draw %d (Frame %d), %d elem of %s\n", g_draw_count, g_frame_count, /*m_state.vb_state->count*/ 0, topo.c_str() );
fprintf(stderr, "vs: %d ; gs: %d ; ps: %d\n", m_state.vs, m_state.gs, m_state.ps);
fprintf(stderr, "Blend: %d, Depth: %d, Stencil: %d \n",m_state.bs->m_enable, m_state.dss->m_depth_enable, m_state.dss->m_stencil_enable);
m_state.bs->debug();
m_state.dss->debug_depth();
}
#endif
@ -560,19 +563,6 @@ void GSDeviceOGL::DrawPrimitive()
g_draw_count++;
#endif
// FIXME AMD driver bug workaround
// You cannot unattach shader. So destroy everythings and recreate the shader pipeline...
// Slow and painful...
glBindProgramPipeline(0);
glDeleteProgramPipelines(1, &m_pipeline);
m_state.gs = 0;
m_state.ps = 0;
m_state.vs = 0;
glGenProgramPipelines(1, &m_pipeline);
glBindProgramPipeline(m_pipeline);
}
void GSDeviceOGL::ClearRenderTarget(GSTexture* t, const GSVector4& c)
@ -822,7 +812,11 @@ void GSDeviceOGL::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt,
// gs
// ************************************
#ifdef AMD_DRIVER_WORKAROUND
GSSetShader(m_convert.gs);
#else
GSSetShader(0);
#endif
// ************************************
// ps
@ -914,7 +908,12 @@ void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* ver
// gs
#ifdef AMD_DRIVER_WORKAROUND
GSSetShader(m_convert.gs);
#else
GSSetShader(0);
#endif
// ps
@ -1044,7 +1043,6 @@ void GSDeviceOGL::PSSetSamplerState(GLuint ss0, GLuint ss1, GLuint ss2)
void GSDeviceOGL::PSSetShader(GLuint ps)
{
if(m_state.ps != ps)
{
m_state.ps = ps;
@ -1279,8 +1277,8 @@ void GSDeviceOGL::CompileShaderFromSource(const std::string& glsl_file, const st
fprintf(stderr, "%s (entry %s, prog %d) :", glsl_file.c_str(), entry.c_str(), *program);
fprintf(stderr, "\n%s", macro_sel.c_str());
fprintf(stderr, "%s\n", log);
#endif
free(log);
#endif
}
void GSDeviceOGL::CheckDebugLog()

View File

@ -54,6 +54,13 @@ struct GSBlendStateOGL {
, m_a_msk(GL_TRUE)
{}
void debug()
{
if (!m_enable) return;
fprintf(stderr,"Blend RGB: %x src:%x dst:%x\n", m_equation_RGB, m_func_sRGB, m_func_dRGB);
fprintf(stderr,"Blend ALPHA: %x src:%x dst:%x\n", m_equation_ALPHA, m_func_sALPHA, m_func_dALPHA);
fprintf(stderr,"Mask. R:%d B:%d G:%d A:%d\n", m_r_msk, m_b_msk, m_g_msk, m_a_msk);
}
};
struct GSDepthStencilOGL {
@ -81,6 +88,11 @@ struct GSDepthStencilOGL {
, m_stencil_spass_dpass_op(GL_KEEP)
{}
void debug_depth()
{
if (!m_depth_enable) return;
fprintf(stderr, "Depth %x, %x\n", m_depth_func, m_depth_mask);
}
};
class GSUniformBufferOGL {
@ -503,6 +515,7 @@ class GSDeviceOGL : public GSDevice
GLuint ps[8]; // program object
GLuint ln; // sampler object
GLuint pt; // sampler object
GLuint gs;
GSDepthStencilOGL* dss;
GSBlendStateOGL* bs;
} m_convert;

View File

@ -120,7 +120,12 @@ void GSDeviceOGL::SetupGS(GSSelector sel)
// Static
// *************************************************************
GLuint gs = 0;
if(sel.prim > 0 && (sel.iip == 0 || sel.prim == 3)) {
#ifdef AMD_DRIVER_WORKAROUND
if (true)
#else
if(sel.prim > 0 && (sel.iip == 0 || sel.prim == 3))
#endif
{
auto i = m_gs.find(sel);
if(i == m_gs.end()) {

View File

@ -22,8 +22,8 @@
#pragma once
#include "GSTextureOGL.h"
static uint g_state_texture_unit = 0;
static uint g_state_texture_id = 0;
static int g_state_texture_unit = -1;
static int g_state_texture_id = -1;
GSTextureOGL::GSTextureOGL(int type, int w, int h, bool msaa, int format)
: m_extra_buffer_id(0),
@ -317,6 +317,68 @@ struct BITMAPINFOHEADER
#pragma pack(pop)
#endif
void GSTextureOGL::Save(const string& fn, const void* image, uint32 pitch)
{
// Build a BMP file
FILE* fp = fopen(fn.c_str(), "wb");
BITMAPINFOHEADER bih;
memset(&bih, 0, sizeof(bih));
bih.biSize = sizeof(bih);
bih.biWidth = m_size.x;
bih.biHeight = m_size.y;
bih.biPlanes = 1;
bih.biBitCount = 32;
bih.biCompression = BI_RGB;
bih.biSizeImage = m_size.x * m_size.y << 2;
BITMAPFILEHEADER bfh;
memset(&bfh, 0, sizeof(bfh));
uint8* bfType = (uint8*)&bfh.bfType;
// bfh.bfType = 'MB';
bfType[0] = 0x42;
bfType[1] = 0x4d;
bfh.bfOffBits = sizeof(bfh) + sizeof(bih);
bfh.bfSize = bfh.bfOffBits + bih.biSizeImage;
bfh.bfReserved1 = bfh.bfReserved2 = 0;
fwrite(&bfh, 1, sizeof(bfh), fp);
fwrite(&bih, 1, sizeof(bih), fp);
uint8* data = (uint8*)image + (m_size.y - 1) * pitch;
for(int h = m_size.y; h > 0; h--, data -= pitch)
{
if (IsDss()) {
// Only get the depth and convert it to an integer
uint8* better_data = data;
for (int w = m_size.x; w > 0; w--, better_data += 8) {
float* input = (float*)better_data;
// FIXME how to dump 32 bits value into 8bits component color
uint32 depth = (uint32)ldexpf(*input, 32);
uint8 small_depth = depth >> 24;
uint8 better_data[4] = {small_depth, small_depth, small_depth, 0 };
fwrite(&better_data, 1, 4, fp);
}
} else {
// swap red and blue
uint8* better_data = data;
for (int w = m_size.x; w > 0; w--, better_data += 4) {
uint8 red = better_data[2];
better_data[2] = better_data[0];
better_data[0] = red;
fwrite(better_data, 1, 4, fp);
}
}
}
fclose(fp);
}
bool GSTextureOGL::Save(const string& fn, bool dds)
{
@ -325,6 +387,8 @@ bool GSTextureOGL::Save(const string& fn, bool dds)
if (IsDss()) pitch *= 2;
char* image = (char*)malloc(pitch * m_size.y);
// FIXME instead of swapping manually B and R maybe you can request the driver to do it
// for us
if (IsBackbuffer()) {
glReadBuffer(GL_BACK);
glReadPixels(0, 0, m_size.x, m_size.y, GL_RGBA, GL_UNSIGNED_BYTE, image);
@ -336,70 +400,9 @@ bool GSTextureOGL::Save(const string& fn, bool dds)
glGetTexImage(m_texture_target, 0, GL_RGBA, GL_UNSIGNED_BYTE, image);
}
// Build a BMP file
if(FILE* fp = fopen(fn.c_str(), "wb"))
{
BITMAPINFOHEADER bih;
Save(fn, image, pitch);
free(image);
memset(&bih, 0, sizeof(bih));
bih.biSize = sizeof(bih);
bih.biWidth = m_size.x;
bih.biHeight = m_size.y;
bih.biPlanes = 1;
bih.biBitCount = 32;
bih.biCompression = BI_RGB;
bih.biSizeImage = m_size.x * m_size.y << 2;
BITMAPFILEHEADER bfh;
memset(&bfh, 0, sizeof(bfh));
uint8* bfType = (uint8*)&bfh.bfType;
// bfh.bfType = 'MB';
bfType[0] = 0x42;
bfType[1] = 0x4d;
bfh.bfOffBits = sizeof(bfh) + sizeof(bih);
bfh.bfSize = bfh.bfOffBits + bih.biSizeImage;
bfh.bfReserved1 = bfh.bfReserved2 = 0;
fwrite(&bfh, 1, sizeof(bfh), fp);
fwrite(&bih, 1, sizeof(bih), fp);
uint8* data = (uint8*)image + (m_size.y - 1) * pitch;
for(int h = m_size.y; h > 0; h--, data -= pitch)
{
if (IsDss()) {
// Only get the depth and convert it to an integer
uint8* better_data = data;
for (int w = m_size.x; w > 0; w--, better_data += 8) {
float* input = (float*)better_data;
// FIXME how to dump 32 bits value into 8bits component color
uint32 depth = (uint32)ldexpf(*input, 32);
uint8 small_depth = depth >> 24;
uint8 better_data[4] = {small_depth, small_depth, small_depth, 0 };
fwrite(&better_data, 1, 4, fp);
}
} else {
// swap red and blue
uint8* better_data = data;
for (int w = m_size.x; w > 0; w--, better_data += 4) {
uint8 red = better_data[2];
better_data[2] = better_data[0];
better_data[0] = red;
fwrite(better_data, 1, 4, fp);
}
}
}
fclose(fp);
free(image);
return true;
}
return false;
return true;
}

View File

@ -39,6 +39,7 @@ class GSTextureOGL : public GSTexture
bool Map(GSMap& m, const GSVector4i* r = NULL);
void Unmap();
bool Save(const string& fn, bool dds = false);
void Save(const string& fn, const void* image, uint32 pitch);
void EnableUnit(uint unit);
void Attach(GLenum attachment);

View File

@ -1,5 +1,12 @@
//#version 420 // Keep it for editor detection
struct vertex_basic
{
vec4 p;
vec2 t;
};
#ifdef VERTEX_SHADER
out gl_PerVertex {
@ -19,24 +26,54 @@ layout(location = 1) in vec2 TEXCOORD0;
// smooth, the default, means to do perspective-correct interpolation.
//
// The centroid qualifier only matters when multisampling. If this qualifier is not present, then the value is interpolated to the pixel's center, anywhere in the pixel, or to one of the pixel's samples. This sample may lie outside of the actual primitive being rendered, since a primitive can cover only part of a pixel's area. The centroid qualifier is used to prevent this; the interpolation point must fall within both the pixel's area and the primitive's area.
// FIXME gl_Position
layout(location = 0) out vec4 POSITION_OUT;
layout(location = 1) out vec2 TEXCOORD0_OUT;
layout(location = 0) out vertex_basic VSout;
void vs_main()
{
POSITION_OUT = POSITION;
TEXCOORD0_OUT = TEXCOORD0;
VSout.p = POSITION;
VSout.t = TEXCOORD0;
gl_Position = POSITION; // NOTE I don't know if it is possible to merge POSITION_OUT and gl_Position
}
#endif
#ifdef GEOMETRY_SHADER
in gl_PerVertex {
vec4 gl_Position;
float gl_PointSize;
float gl_ClipDistance[];
} gl_in[];
out gl_PerVertex {
vec4 gl_Position;
float gl_PointSize;
float gl_ClipDistance[];
};
// FIXME
// AMD Driver bug again !!!!
//layout(location = 0) in vertex GSin[];
in vertex_basic GSin[];
layout(location = 0) out vertex_basic GSout;
layout(triangles) in;
layout(triangle_strip, max_vertices = 3) out;
void gs_main()
{
for(int i = 0; i < gl_in.length(); i++) {
gl_Position = gl_in[i].gl_Position;
GSout = GSin[i];
EmitVertex();
}
EndPrimitive();
}
#endif
#ifdef FRAGMENT_SHADER
// NOTE: pixel can be clip with "discard"
layout(location = 0) in vec4 SV_Position;
layout(location = 1) in vec2 TEXCOORD0;
layout(location = 0) in vertex_basic PSin;
layout(location = 0) out vec4 SV_Target0;
layout(location = 1) out uint SV_Target1;
@ -45,7 +82,7 @@ layout(binding = 0) uniform sampler2D TextureSampler;
vec4 sample_c()
{
return texture(TextureSampler, vec2(TEXCOORD0.x,TEXCOORD0.y) );
return texture(TextureSampler, PSin.t );
}
vec4 ps_crt(uint i)
@ -88,7 +125,7 @@ void ps_main7()
void ps_main5() // triangular
{
highp uvec4 p = uvec4(SV_Position);
highp uvec4 p = uvec4(PSin.p);
vec4 c = ps_crt(((p.x + ((p.y >> 1u) & 1u) * 3u) >> 1u) % 3u);
@ -97,7 +134,7 @@ void ps_main5() // triangular
void ps_main6() // diagonal
{
uvec4 p = uvec4(SV_Position);
uvec4 p = uvec4(PSin.p);
vec4 c = ps_crt((p.x + (p.y % 3)) % 3);

View File

@ -1,8 +1,13 @@
//#version 420 // Keep it for editor detection
struct vertex_basic
{
vec4 p;
vec2 t;
};
#ifdef FRAGMENT_SHADER
layout(location = 0) in vec4 SV_Position;
layout(location = 1) in vec2 TEXCOORD0;
layout(location = 0) in vertex_basic PSin;
layout(location = 0) out vec4 SV_Target0;
@ -19,8 +24,8 @@ void ps_main0()
{
// I'm not sure it impact us but be safe to lookup texture before conditional if
// see: http://www.opengl.org/wiki/GLSL_Sampler#Non-uniform_flow_control
vec4 c = texture(TextureSampler, TEXCOORD0);
if (fract(TEXCOORD0.y * hH) - 0.5 < 0.0)
vec4 c = texture(TextureSampler, PSin.t);
if (fract(PSin.t.y * hH) - 0.5 < 0.0)
discard;
SV_Target0 = c;
@ -30,8 +35,8 @@ void ps_main1()
{
// I'm not sure it impact us but be safe to lookup texture before conditional if
// see: http://www.opengl.org/wiki/GLSL_Sampler#Non-uniform_flow_control
vec4 c = texture(TextureSampler, TEXCOORD0);
if (0.5 - fract(TEXCOORD0.y * hH) < 0.0)
vec4 c = texture(TextureSampler, PSin.t);
if (0.5 - fract(PSin.t.y * hH) < 0.0)
discard;
SV_Target0 = c;
@ -39,16 +44,16 @@ void ps_main1()
void ps_main2()
{
vec4 c0 = texture(TextureSampler, TEXCOORD0 - ZrH);
vec4 c1 = texture(TextureSampler, TEXCOORD0);
vec4 c2 = texture(TextureSampler, TEXCOORD0 + ZrH);
vec4 c0 = texture(TextureSampler, PSin.t - ZrH);
vec4 c1 = texture(TextureSampler, PSin.t);
vec4 c2 = texture(TextureSampler, PSin.t + ZrH);
SV_Target0 = (c0 + c1 * 2 + c2) / 4;
}
void ps_main3()
{
SV_Target0 = texture(TextureSampler, TEXCOORD0);
SV_Target0 = texture(TextureSampler, PSin.t);
}
#endif

View File

@ -1,8 +1,13 @@
//#version 420 // Keep it for editor detection
struct vertex_basic
{
vec4 p;
vec2 t;
};
#ifdef FRAGMENT_SHADER
layout(location = 0) in vec4 SV_Position;
layout(location = 1) in vec2 TEXCOORD0;
layout(location = 0) in vertex_basic PSin;
layout(location = 0) out vec4 SV_Target0;
@ -15,14 +20,14 @@ layout(binding = 0) uniform sampler2D TextureSampler;
void ps_main0()
{
vec4 c = texture(TextureSampler, TEXCOORD0);
vec4 c = texture(TextureSampler, PSin.t);
c.a = min(c.a * 2, 1.0);
SV_Target0 = c;
}
void ps_main1()
{
vec4 c = texture(TextureSampler, TEXCOORD0);
vec4 c = texture(TextureSampler, PSin.t);
c.a = BGColor.a;
SV_Target0 = c;
}

View File

@ -55,7 +55,7 @@ layout(location = 3) in uvec2 i_p;
layout(location = 4) in uint i_z;
layout(location = 5) in vec4 i_f;
layout(location = 0) out vertex OUT;
layout(location = 0) out vertex VSout;
out gl_PerVertex {
vec4 gl_Position;
@ -91,33 +91,33 @@ void vs_main()
// FLIP vertically
final_p.y *= -1.0f;
OUT.p = final_p;
VSout.p = final_p;
gl_Position = final_p; // NOTE I don't know if it is possible to merge POSITION_OUT and gl_Position
#if VS_RTCOPY
OUT.tp = final_p * vec4(0.5, -0.5, 0, 0) + 0.5;
VSout.tp = final_p * vec4(0.5, -0.5, 0, 0) + 0.5;
#endif
if(VS_TME != 0)
{
if(VS_FST != 0)
{
OUT.t.xy = i_t * TextureScale;
OUT.t.w = 1.0f;
VSout.t.xy = i_t * TextureScale;
VSout.t.w = 1.0f;
}
else
{
OUT.t.xy = i_t;
OUT.t.w = i_q;
VSout.t.xy = i_t;
VSout.t.w = i_q;
}
}
else
{
OUT.t.xy = vec2(0.0f, 0.0f);
OUT.t.w = 1.0f;
VSout.t.xy = vec2(0.0f, 0.0f);
VSout.t.w = 1.0f;
}
OUT.c = i_c;
OUT.t.z = i_f.a;
VSout.c = i_c;
VSout.t.z = i_f.a;
}
#endif
@ -633,8 +633,5 @@ void ps_main()
}
SV_Target1 = c;
//SV_Target0 = vec4(1.0f,0.0f,0.0f, 1.0f);
//SV_Target1 = vec4(0.0f,1.0f,0.0f, 1.0f);
}
#endif