gsdx-ogl: LINUX-ONLY

* rewrite the vertex management with a nice GSVertexBufferState object
* extend GSUniformBufferOGL to use a better object interface
* properly delete texture
* manage buffer with glMap* instead of glBufferSubData


git-svn-id: http://pcsx2.googlecode.com/svn/branches/gsdx-ogl@4990 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gregory.hainaut 2011-12-15 18:27:58 +00:00
parent 4029cc5195
commit ab01926ed5
3 changed files with 176 additions and 196 deletions

View File

@ -56,10 +56,9 @@
GSDeviceOGL::GSDeviceOGL()
: m_free_window(false)
, m_window(NULL)
, m_vb(0)
, m_pipeline(0)
, m_fbo(0)
, m_sr_vb_offset(0)
, m_vb_sr(NULL)
, m_srv_changed(false)
, m_ss_changed(false)
{
@ -74,6 +73,9 @@ GSDeviceOGL::GSDeviceOGL()
GSDeviceOGL::~GSDeviceOGL()
{
// Clean vertex buffer state
delete (m_vb_sr);
// Clean m_merge
for (uint i = 0; i < 2; i++)
glDeleteProgram(m_merge.ps[i]);
@ -86,8 +88,6 @@ GSDeviceOGL::~GSDeviceOGL()
delete (m_interlace.cb);
// Clean m_convert
glDeleteVertexArrays(1, &m_convert.va);
glDeleteBuffers(1, &m_convert.vb);
glDeleteProgram(m_convert.vs);
for (uint i = 0; i < 2; i++)
glDeleteProgram(m_convert.ps[i]);
@ -101,7 +101,6 @@ GSDeviceOGL::~GSDeviceOGL()
delete m_date.bs;
// Clean various opengl allocation
glDeleteBuffers(1, &m_vb);
glDeleteProgramPipelines(1, &m_pipeline);
glDeleteFramebuffers(1, &m_fbo);
}
@ -180,31 +179,18 @@ bool GSDeviceOGL::Create(GSWnd* wnd)
OMSetFBO(0);
// ****************************************************************
// convert
// Vertex buffer state
// ****************************************************************
glGenVertexArrays(1, &m_convert.va);
IASetVertexArrray(m_convert.va);
glGenBuffers(1, &m_convert.vb);
IASetVertexBufferBind(m_convert.vb);
glBufferData(GL_ARRAY_BUFFER, 4 * 4 * sizeof(GSVertexPT1), NULL, GL_STREAM_DRAW);
GSInputLayout il_convert[2] =
{
{0, 4, GL_FLOAT, sizeof(GSVertexPT1), (const GLvoid*)offsetof(struct GSVertexPT1, p) },
{1, 2, GL_FLOAT, sizeof(GSVertexPT1), (const GLvoid*)offsetof(struct GSVertexPT1, t) },
};
m_vb_sr = new GSVertexBufferState(sizeof(GSVertexPT1), il_convert, countof(il_convert));
for (int i = 0; i < 2; i++) {
// Note this function need both a vertex array object and a GL_ARRAY_BUFFER buffer
glEnableVertexAttribArray(il_convert[i].index);
glVertexAttribPointer(il_convert[i].index, il_convert[i].size, il_convert[i].type, GL_FALSE, il_convert[i].stride, il_convert[i].offset);
}
// Unbind to avoid issue with the setup of others parameters
IASetVertexArrray(0);
IASetVertexBufferBind(0);
// ****************************************************************
// convert
// ****************************************************************
CompileShaderFromSource("convert.glsl", "vs_main", GL_VERTEX_SHADER, &m_convert.vs);
for(int i = 0; i < countof(m_convert.ps); i++)
CompileShaderFromSource("convert.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, &m_convert.ps[i]);
@ -264,10 +250,6 @@ bool GSDeviceOGL::Create(GSWnd* wnd)
// merge
// ****************************************************************
m_merge.cb = new GSUniformBufferOGL(1, sizeof(MergeConstantBuffer));
glGenBuffers(1, &m_merge.cb->buffer);
glBindBuffer(GL_UNIFORM_BUFFER, m_merge.cb->buffer);
glBufferData(GL_UNIFORM_BUFFER, m_merge.cb->byte_size, NULL, GL_DYNAMIC_DRAW);
glBindBufferBase(GL_UNIFORM_BUFFER, m_merge.cb->index, m_merge.cb->buffer);
for(int i = 0; i < countof(m_merge.ps); i++)
CompileShaderFromSource("merge.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, &m_merge.ps[i]);
@ -285,10 +267,6 @@ bool GSDeviceOGL::Create(GSWnd* wnd)
// interlace
// ****************************************************************
m_interlace.cb = new GSUniformBufferOGL(2, sizeof(InterlaceConstantBuffer));
glGenBuffers(1, &m_interlace.cb->buffer);
glBindBuffer(GL_UNIFORM_BUFFER, m_interlace.cb->buffer);
glBufferData(GL_UNIFORM_BUFFER, m_interlace.cb->byte_size, NULL, GL_DYNAMIC_DRAW);
glBindBufferBase(GL_UNIFORM_BUFFER, m_interlace.cb->index, m_interlace.cb->buffer);
for(int i = 0; i < countof(m_interlace.ps); i++)
CompileShaderFromSource("interlace.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, &m_interlace.ps[i]);
@ -473,27 +451,6 @@ bool GSDeviceOGL::Reset(int w, int h)
// in the backbuffer
m_backbuffer = new GSTextureOGL(0, w, h, false, 0);
#if 0
if(m_swapchain)
{
DXGI_SWAP_CHAIN_DESC scd;
memset(&scd, 0, sizeof(scd));
m_swapchain->GetDesc(&scd);
m_swapchain->ResizeBuffers(scd.BufferCount, w, h, scd.BufferDesc.Format, 0);
CComPtr<ID3D11Texture2D> backbuffer;
if(FAILED(m_swapchain->GetBuffer(0, __uuidof(ID3D11Texture2D), (void**)&backbuffer)))
{
return false;
}
m_backbuffer = new GSTexture11(backbuffer);
}
#endif
return true;
}
@ -506,7 +463,7 @@ void GSDeviceOGL::Flip()
void GSDeviceOGL::DrawPrimitive()
{
glDrawArrays(m_state.topology, m_vertices.start, m_vertices.count);
glDrawArrays(m_state.topology, m_state.vb_state->start, m_state.vb_state->count);
}
void GSDeviceOGL::ClearRenderTarget(GSTexture* t, const GSVector4& c)
@ -716,14 +673,9 @@ void GSDeviceOGL::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt,
{GSVector4(right, top, 0.5f, 1.0f), GSVector2(flip_sr.z, flip_sr.w)},
};
IASetVertexArrray(m_convert.va);
IASetVertexBufferBind(m_convert.vb);
// FIXME it will worth some benchmark.
// What is the faster always use the same. Or pack to difference emplacement. I'm afraid
// that in all case the GPU will be stall to wait the data
// Note maybe create a new buffer can be faster.
// m_sr_vb_offset = 0;
glBufferSubData(GL_ARRAY_BUFFER, m_sr_vb_offset * 4 * sizeof(GSVertexPT1) , sizeof(GSVertexPT1) * 4, vertices);
IASetVertexState(m_vb_sr);
IASetVertexBuffer(vertices, 4);
IASetPrimitiveTopology(GL_TRIANGLE_STRIP);
// ************************************
// vs
@ -748,8 +700,7 @@ void GSDeviceOGL::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt,
// ************************************
// Draw
// ************************************
glDrawArrays(GL_TRIANGLE_STRIP, m_sr_vb_offset * 4, 4);
m_sr_vb_offset = (m_sr_vb_offset + 1) & 0x3;
DrawPrimitive();
// ************************************
// End
@ -771,11 +722,11 @@ void GSDeviceOGL::DoMerge(GSTexture* st[2], GSVector4* sr, GSTexture* dt, GSVect
if(st[0])
{
if (m_state.cb != m_merge.cb->buffer) {
m_state.cb = m_merge.cb->buffer;
glBindBuffer(GL_UNIFORM_BUFFER, m_merge.cb->buffer);
if (m_state.cb != m_merge.cb) {
m_state.cb = m_merge.cb;
m_state.cb->bind();
}
glBufferSubData(GL_UNIFORM_BUFFER, 0, m_merge.cb->byte_size, &c.v);
m_state.cb->upload(&c.v);
StretchRect(st[0], sr[0], dt, dr[0], m_merge.ps[mmod ? 1 : 0], m_merge.bs);
}
@ -793,11 +744,11 @@ void GSDeviceOGL::DoInterlace(GSTexture* st, GSTexture* dt, int shader, bool lin
cb.ZrH = GSVector2(0, 1.0f / s.y);
cb.hH = s.y / 2;
if (m_state.cb != m_interlace.cb->buffer) {
m_state.cb = m_interlace.cb->buffer;
glBindBuffer(GL_UNIFORM_BUFFER, m_interlace.cb->buffer);
if (m_state.cb != m_interlace.cb) {
m_state.cb = m_interlace.cb;
m_state.cb->bind();
}
glBufferSubData(GL_UNIFORM_BUFFER, 0, m_interlace.cb->byte_size, &cb);
m_state.cb->upload(&cb);
StretchRect(st, sr, dt, dr, m_interlace.ps[shader], linear);
}
@ -823,88 +774,52 @@ GSTexture* GSDeviceOGL::Resolve(GSTexture* t)
return NULL;
}
void GSDeviceOGL::IASetVertexArrray(GLuint va)
void GSDeviceOGL::EndScene()
{
if (m_state.va != va) {
glBindVertexArray(va);
m_state.va = va;
m_state.vb_state->start += m_state.vb_state->count;
m_state.vb_state->count = 0;
}
void GSDeviceOGL::IASetVertexState(GSVertexBufferState* vb_state)
{
if (m_state.vb_state != vb_state) {
m_state.vb_state = vb_state;
vb_state->bind();
}
}
void GSDeviceOGL::IASetVertexBufferBind(GLuint vb)
void GSDeviceOGL::IASetVertexBuffer(const void* vertices, size_t count)
{
if (m_state.vb != vb) {
glBindBuffer(GL_ARRAY_BUFFER, vb);
m_state.vb = vb;
// Note: For an explanation of the map flag
// see http://www.opengl.org/wiki/Buffer_Object_Streaming
uint32 map_flags = GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT;
GSVertexBufferState* vb = m_state.vb_state;
vb->count = count;
// Current GPU buffer is really too small need to realocate a new one
if (count > vb->limit) {
vb->allocate(std::max<int>(count * 3 / 2, 60000));
} else if (count > (vb->limit - vb->start) ) {
// Not enough left free room. Just go back at the beginning
vb->start = 0;
// Tell the driver that it can orphan previous buffer and restart from a scratch buffer.
// Technically the buffer will not be accessible by the application anymore but the
// GL will effectively remove it when draws call are finised.
map_flags |= GL_MAP_INVALIDATE_BUFFER_BIT;
} else {
// Tell the driver that it doesn't need to contain any valid buffer data, and that you promise to write the entire range you map
map_flags |= GL_MAP_INVALIDATE_RANGE_BIT;
}
vb->upload(vertices, map_flags);
}
void GSDeviceOGL::IASetVertexBuffer(const void* vertices, size_t stride, size_t count)
{
ASSERT(m_vertices.count == 0);
if(count * stride > m_vertices.limit * m_vertices.stride)
{
// Current GPU buffer is too small need to realocate a new one
if (m_vb) {
glDeleteBuffers(1, &m_vb);
m_vb = 0;
}
m_vertices.start = 0;
m_vertices.count = 0;
m_vertices.limit = std::max<int>(count * 3 / 2, 11000);
m_vertices.stride = stride;
}
if(!m_vb)
{
glGenBuffers(1, &m_vb);
IASetVertexBufferBind(m_vb);
// Allocate the buffer
glBufferData(GL_ARRAY_BUFFER, m_vertices.limit * m_vertices.stride, NULL, GL_STREAM_DRAW);
//m_vb_changed = true;
}
// append data or go back to the beginning
// Hum why we don't always go back to the beginning !!!
if(m_vertices.start + count > m_vertices.limit || stride != m_vertices.stride)
m_vertices.start = 0;
// Fill the buffer
glBufferSubData(GL_ARRAY_BUFFER, m_vertices.start * stride, count * stride, vertices);
m_vertices.count = count;
}
#if 0
void GSDeviceOGL::IASetInputLayout(GSInputLayout* layout, int layout_nbr)
{
if(m_state.layout != layout || m_state.layout_nbr != layout_nbr || m_vb_changed)
{
// Remove old configuration.
for (int i = m_state.layout_nbr ; i > (m_state.layout_nbr - layout_nbr) ; i--) {
glDisableVertexAttribArray(i);
}
for (int i = 0; i < layout_nbr; i++) {
glEnableVertexAttribArray(layout[i].index);
glVertexAttribPointer(layout[i].index, layout[i].size, layout[i].type, GL_FALSE, layout[i].stride, layout[i].offset);
}
m_vb_changed = false;
m_state.layout = layout;
m_state.layout_nbr = layout_nbr;
}
}
#endif
void GSDeviceOGL::IASetPrimitiveTopology(GLenum topology)
{
if(m_state.topology != topology)
{
m_state.topology = topology;
}
m_state.topology = topology;
}
void GSDeviceOGL::VSSetShader(GLuint vs)

View File

@ -74,15 +74,43 @@ struct GSDepthStencilOGL {
{}
};
struct GSUniformBufferOGL {
class GSUniformBufferOGL {
GLuint buffer; // data object
GLuint index; // GLSL slot
uint byte_size; // size of the data
uint size; // size of the data
GSUniformBufferOGL(GLuint index, uint byte_size) : buffer(0)
, index(index)
, byte_size(byte_size)
{}
public:
GSUniformBufferOGL(GLuint index, uint size) : index(index)
, size(size)
{
glGenBuffers(1, &buffer);
bind();
allocate();
attach();
}
void bind()
{
glBindBuffer(GL_UNIFORM_BUFFER, buffer);
}
void allocate()
{
glBufferData(GL_UNIFORM_BUFFER, size, NULL, GL_STREAM_DRAW);
}
void attach()
{
glBindBufferBase(GL_UNIFORM_BUFFER, index, buffer);
}
void upload(const void* src)
{
uint32 flags = GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT;
uint8* dst = (uint8*) glMapBufferRange(GL_UNIFORM_BUFFER, 0, size, flags);
memcpy(dst, src, size);
glUnmapBuffer(GL_UNIFORM_BUFFER);
}
~GSUniformBufferOGL() {
glDeleteBuffers(1, &buffer);
@ -97,6 +125,60 @@ struct GSInputLayout {
const GLvoid* offset;
};
struct GSVertexBufferState {
size_t stride;
size_t start;
size_t count;
size_t limit;
GLuint vb;
GLuint va;
GSVertexBufferState(size_t stride, GSInputLayout* layout, uint32 layout_nbr) : stride(stride)
, count(0)
{
glGenBuffers(1, &vb);
glGenVertexArrays(1, &va);
bind();
allocate(60000); // Opengl works best with 1-4MB buffer. 60k element seems a good value.
set_internal_format(layout, layout_nbr);
}
void allocate(size_t new_limit)
{
start = 0;
limit = new_limit;
glBufferData(GL_ARRAY_BUFFER, limit * stride, NULL, GL_STREAM_DRAW);
}
void bind()
{
glBindVertexArray(va);
glBindBuffer(GL_ARRAY_BUFFER, vb);
}
void upload(const void* src, uint32 flags)
{
uint8* dst = (uint8*) glMapBufferRange(GL_ARRAY_BUFFER, stride*start, stride*count, flags);
memcpy(dst, src, stride*count);
glUnmapBuffer(GL_ARRAY_BUFFER);
}
void set_internal_format(GSInputLayout* layout, uint32 layout_nbr)
{
for (int i = 0; i < layout_nbr; i++) {
// Note this function need both a vertex array object and a GL_ARRAY_BUFFER buffer
glEnableVertexAttribArray(layout[i].index);
glVertexAttribPointer(layout[i].index, layout[i].size, layout[i].type, GL_FALSE, layout[i].stride, layout[i].offset);
}
}
~GSVertexBufferState()
{
glDeleteBuffers(1, &vb);
glDeleteVertexArrays(1, &va);
}
};
class GSDeviceOGL : public GSDevice
{
uint32 m_msaa; // Level of Msaa
@ -104,10 +186,10 @@ class GSDeviceOGL : public GSDevice
bool m_free_window;
GSWnd* m_window;
GLuint m_vb; // vertex buffer object
GLuint m_pipeline; // pipeline to attach program shader
GLuint m_fbo; // frame buffer container
uint32 m_sr_vb_offset;
GSVertexBufferState* m_vb_sr; // vb_state for StretchRect
struct {
GLuint ps[2]; // program object
@ -121,12 +203,6 @@ class GSDeviceOGL : public GSDevice
} m_interlace;
struct {
// Hum I think this one is useless. As far as I understand
// it only get the index name of GLSL-equivalent input attribut
// ??? CComPtr<ID3D11InputLayout> il;
//GSInputLayout il[2]; // description of the vertex array
GLuint va; // vertex array object
GLuint vb; // vertex buffer
GLuint vs; // program object
GLuint ps[8]; // program object
GLuint ln; // sampler object
@ -141,40 +217,12 @@ class GSDeviceOGL : public GSDevice
GSBlendStateOGL* bs;
} m_date;
// struct
// {
// ID3D11Buffer* vb;
// size_t vb_stride;
// ID3D11InputLayout* layout;
// D3D11_PRIMITIVE_TOPOLOGY topology;
// ID3D11VertexShader* vs;
// ID3D11Buffer* vs_cb;
// ID3D11GeometryShader* gs;
// ID3D11ShaderResourceView* ps_srv[3];
// ID3D11PixelShader* ps;
// ID3D11Buffer* ps_cb;
// ID3D11SamplerState* ps_ss[3];
// GSVector2i viewport;
// GSVector4i scissor;
// ID3D11DepthStencilState* dss;
// uint8 sref;
// ID3D11BlendState* bs;
// float bf;
// ID3D11RenderTargetView* rtv;
// ID3D11DepthStencilView* dsv;
// } m_state;
struct
{
GLuint vb; // vertex buffer
// Hum I think those things can be dropped on OGL. It probably need an others architecture (see glVertexAttribPointer)
// size_t vb_stride;
// ID3D11InputLayout* layout;
//GSInputLayout* layout;
//uint32 layout_nbr;
GLuint va; // vertex array
GSVertexBufferState* vb_state;
GLenum topology; // (ie GL_TRIANGLES...)
GLuint vs; // program
GLuint cb; // uniform current buffer
GSUniformBufferOGL* cb; // uniform current buffer
GLuint gs; // program
// FIXME texture binding. Maybe not equivalent for the state but the best I could find.
GSTextureOGL* ps_srv[3];
@ -197,7 +245,6 @@ class GSDeviceOGL : public GSDevice
bool m_srv_changed;
bool m_ss_changed;
//bool m_vb_changed;
#if 0
CComPtr<ID3D11Device> m_dev;
@ -271,11 +318,11 @@ class GSDeviceOGL : public GSDevice
void CompileShaderFromSource(const std::string& glsl_file, const std::string& entry, GLenum type, GLuint* program);
void EndScene();
void IASetPrimitiveTopology(GLenum topology);
//void IASetInputLayout(GSInputLayout* layout, int layout_nbr);
void IASetVertexBuffer(const void* vertices, size_t stride, size_t count);
void IASetVertexBufferBind(GLuint vb);
void IASetVertexArrray(GLuint va);
void IASetVertexBuffer(const void* vertices, size_t count);
void IASetVertexState(GSVertexBufferState* vb_state);
void VSSetShader(GLuint vs);
void GSSetShader(GLuint gs);

View File

@ -157,7 +157,21 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, bool msaa, int format)
GSTextureOGL::~GSTextureOGL()
{
// glDeleteTextures or glDeleteRenderbuffers
glDeleteBuffers(1, &m_extra_buffer_id);
switch (m_type) {
case GSTexture::Texture:
case GSTexture::RenderTarget:
glDeleteTextures(1, &m_texture_id);
break;
case GSTexture::DepthStencil:
glDeleteRenderbuffers(1, &m_texture_id);
break;
case GSTexture::Offscreen:
assert(0);
break;
default:
break;
}
}
void GSTextureOGL::Attach(GLenum attachment)
@ -192,15 +206,19 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch)
// The case appears on SW mode. Src pitch is 2x dst pitch.
int rowbytes = r.width() << 2;
if (pitch != rowbytes) {
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, m_extra_buffer_id);
uint32 pbo_size = m_size.x * m_size.y * 4;
uint32 map_flags = GL_MAP_WRITE_BIT;
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, m_extra_buffer_id);
if (!m_extra_buffer_allocated) {
glBufferData(GL_PIXEL_UNPACK_BUFFER, m_size.x * m_size.y * 4, NULL, GL_STREAM_DRAW);
glBufferData(GL_PIXEL_UNPACK_BUFFER, pbo_size, NULL, GL_STREAM_DRAW);
m_extra_buffer_allocated = true;
} else {
GL_MAP_INVALIDATE_BUFFER_BIT;
}
uint8* src = (uint8*) data;
uint8* dst = (uint8*) glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY);
uint8* dst = (uint8*) glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, pbo_size, map_flags);
for(int h = r.height(); h > 0; h--, src += pitch, dst += rowbytes)
{
memcpy(dst, src, rowbytes);