From 7b889a10ccd21a2544caeb28ea8fca2afb3848b5 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 28 Jan 2016 20:01:10 +0300 Subject: [PATCH] Add vertex texture buffers for VS input Support vertex instancing in vertex shader using VertexID Relax OpenGL requirements by removing 4.5 features Use EXT version of TexBufferRange; Implement buffer copy using TexBuffer Apply travis workaround by danilaml Fix vertex upload in in case of inlined array --- .travis.yml | 6 +- rpcs3/Emu/RSX/Common/BufferUtils.cpp | 2 +- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 119 ++++++++++++++++++++++----- rpcs3/Emu/RSX/GL/GLGSRender.h | 7 ++ rpcs3/Emu/RSX/GL/GLProcTable.h | 4 + rpcs3/Emu/RSX/GL/GLVertexProgram.cpp | 66 ++++++++++++++- rpcs3/Emu/RSX/GL/GLVertexProgram.h | 3 + rpcs3/Emu/RSX/GL/gl_helpers.h | 33 +++++++- 8 files changed, 211 insertions(+), 29 deletions(-) diff --git a/.travis.yml b/.travis.yml index ea5fab1fdc..5c6763d423 100644 --- a/.travis.yml +++ b/.travis.yml @@ -33,8 +33,10 @@ git: before_install: # shutdown services on Travis, which may have a memory impact - if [ "$TRAVIS_OS_NAME" = "linux" ]; then - echo "yes" | sudo apt-add-repository 'deb http://repos.codelite.org/wx3.0/ubuntu/ precise universe'; sudo apt-get install libwxgtk3.0-dev; + sudo apt-add-repository -y ppa:libreoffice/ppa; + sudo apt-get update; + sudo apt-get install libglew-dev; fi; - if [ "$TRAVIS_OS_NAME" = "linux" ] && [ "$CXX" = "g++" ]; then export CXX="g++-5" CC="gcc-5" CXXFLAGS="-Wno-format-security"; @@ -73,7 +75,7 @@ addons: - cmake - libopenal-dev - freeglut3-dev - - libglew-dev +# - libglew-dev apt version is too old - libc6-dev - llvm-3.6 - llvm-3.6-dev diff --git a/rpcs3/Emu/RSX/Common/BufferUtils.cpp b/rpcs3/Emu/RSX/Common/BufferUtils.cpp index 43448826b2..eb6264e122 100644 --- a/rpcs3/Emu/RSX/Common/BufferUtils.cpp +++ b/rpcs3/Emu/RSX/Common/BufferUtils.cpp @@ -62,7 +62,7 @@ void write_vertex_array_data_to_buffer(void *buffer, u32 first, u32 count, size_ *c_dst++ = *c_src++; } if (vertex_array_desc.size * sizeof(u16) < element_size) - *c_dst++ = 0x3800; + *c_dst++ = 0x3c00; break; } diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 181b9c3877..b06dde3af6 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -30,6 +30,25 @@ namespace } throw EXCEPTION("Unknow depth format"); } + + u32 to_gl_internal_type(rsx::vertex_base_type type, u8 size) + { + /** + * The buffer texture spec only allows fetches aligned to 8, 16, 32, etc... + * This rules out most 3-component formats, except for the 32-wide RGB32F, RGB32I, RGB32UI + */ + const u32 vec1_types[] = { GL_R16, GL_R32F, GL_R16F, GL_R8, GL_R32I, GL_R16F, GL_R8 }; + const u32 vec2_types[] = { GL_RG16, GL_RG32F, GL_RG16F, GL_RG8, GL_RG32I, GL_RG16F, GL_RG8 }; + const u32 vec3_types[] = { GL_RGBA16, GL_RGB32F, GL_RGBA16F, GL_RGBA8, GL_RGB32I, GL_RGBA16F, GL_RGBA8 }; //VEC3 COMPONENTS NOT SUPPORTED! + const u32 vec4_types[] = { GL_RGBA16, GL_RGBA32F, GL_RGBA16F, GL_RGBA8, GL_RGBA32I, GL_RGBA16F, GL_RGBA8 }; + + const u32* vec_selectors[] = { 0, vec1_types, vec2_types, vec3_types, vec4_types }; + + if (type > rsx::vertex_base_type::ub256) + throw EXCEPTION("OpenGL error: unknown vertex base type 0x%X.", (u32)type); + + return vec_selectors[size][(int)type]; + } } GLGSRender::GLGSRender() : GSRender(frame_type::OpenGL) @@ -397,11 +416,11 @@ void GLGSRender::end() }; u32 input_mask = rsx::method_registers[NV4097_SET_VERTEX_ATTRIB_INPUT_MASK]; - m_vao.bind(); std::vector vertex_index_array; vertex_draw_count = 0; u32 min_index, max_index; + if (draw_command == rsx::draw_command::indexed) { rsx::index_array_type type = rsx::to_index_array_type(rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4); @@ -426,6 +445,7 @@ void GLGSRender::end() if (draw_command == rsx::draw_command::inlined_array) { + vertex_arrays_data.resize(inline_vertex_array.size() * sizeof(u32)); write_inline_array_to_buffer(vertex_arrays_data.data()); u32 offset = 0; for (int index = 0; index < rsx::limits::vertex_count; ++index) @@ -436,12 +456,25 @@ void GLGSRender::end() continue; int location; - if (!m_program->attribs.has_location(reg_table[index], &location)) + if (!m_program->uniforms.has_location(reg_table[index] + "_buffer", &location)) continue; - __glcheck m_program->attribs[location] = - (m_vao + offset) - .config(gl_types(vertex_info.type), vertex_info.size, gl_normalized(vertex_info.type)); + const u32 element_size = rsx::get_vertex_type_size_on_host(vertex_info.type, vertex_info.size); + const u32 gl_type = to_gl_internal_type(vertex_info.type, vertex_info.size); + const u32 data_size = element_size * vertex_draw_count; + + auto &buffer = m_gl_attrib_buffers[index].buffer; + auto &texture = m_gl_attrib_buffers[index].texture; + + buffer->data(data_size, nullptr); + buffer->sub_data(0, data_size, vertex_arrays_data.data()+offset); + + //Attach buffer to texture + texture->copy_from(*buffer, gl_type); + + //Link texture to uniform + m_program->uniforms.texture(location, index +rsx::limits::vertex_count, *texture); + offset += rsx::get_vertex_type_size_on_host(vertex_info.type, vertex_info.size); } } @@ -463,7 +496,7 @@ void GLGSRender::end() continue; int location; - if (!m_program->attribs.has_location(reg_table[index], &location)) + if (!m_program->uniforms.has_location(reg_table[index]+"_buffer", &location)) continue; if (vertex_arrays_info[index].size > 0) @@ -495,29 +528,48 @@ void GLGSRender::end() vertex_arrays_offsets[index] = gsl::narrow(position); vertex_arrays_data.resize(position + size); - memcpy(vertex_arrays_data.data() + position, vertex_array.data(), size); + const u32 gl_type = to_gl_internal_type(vertex_info.type, vertex_info.size); + const u32 data_size = element_size * vertex_draw_count; - __glcheck m_program->attribs[location] = - (m_vao + vertex_arrays_offsets[index]) - .config(gl_types(vertex_info.type), vertex_info.size, gl_normalized(vertex_info.type)); + auto &buffer = m_gl_attrib_buffers[index].buffer; + auto &texture = m_gl_attrib_buffers[index].texture; + + buffer->data(data_size, nullptr); + buffer->sub_data(0, data_size, vertex_array.data()); + + //Attach buffer to texture + texture->copy_from(*buffer, gl_type); + + //Link texture to uniform + m_program->uniforms.texture(location, index + rsx::limits::vertex_count, *texture); } else if (register_vertex_info[index].size > 0) { + //Untested! auto &vertex_data = register_vertex_data[index]; auto &vertex_info = register_vertex_info[index]; switch (vertex_info.type) { case rsx::vertex_base_type::f: - switch (register_vertex_info[index].size) - { - case 1: apply_attrib_array(*m_program, location, vertex_data); break; - case 2: apply_attrib_array(*m_program, location, vertex_data); break; - case 3: apply_attrib_array(*m_program, location, vertex_data); break; - case 4: apply_attrib_array(*m_program, location, vertex_data); break; - } - break; + { + const u32 element_size = rsx::get_vertex_type_size_on_host(vertex_info.type, vertex_info.size); + const u32 gl_type = to_gl_internal_type(vertex_info.type, vertex_info.size); + const u32 data_size = vertex_data.size(); + auto &buffer = m_gl_attrib_buffers[index].buffer; + auto &texture = m_gl_attrib_buffers[index].texture; + + buffer->data(data_size, nullptr); + buffer->sub_data(0, data_size, vertex_data.data()); + + //Attach buffer to texture + texture->copy_from(*buffer, gl_type); + + //Link texture to uniform + m_program->uniforms.texture(location, index + rsx::limits::vertex_count, *texture); + break; + } default: LOG_ERROR(RSX, "bad non array vertex data format (type = %d, size = %d)", vertex_info.type, vertex_info.size); break; @@ -525,7 +577,11 @@ void GLGSRender::end() } } } - m_vbo.data(vertex_arrays_data.size(), vertex_arrays_data.data()); + +// glDraw* will fail without at least attrib0 defined if we are on compatibility profile +// Someone should really test AMD behaviour here, Nvidia is too permissive. There is no buffer currently bound, but on NV it works ok + glEnableVertexAttribArray(0); + glVertexAttribPointer(0, 2, GL_FLOAT, false, 0, 0); if (draw_command == rsx::draw_command::indexed) { @@ -609,6 +665,18 @@ void GLGSRender::on_init_thread() m_vao.array_buffer = m_vbo; m_vao.element_array_buffer = m_ebo; + + for (texture_buffer_pair &attrib_buffer : m_gl_attrib_buffers) + { + gl::texture *&tex = attrib_buffer.texture; + tex = new gl::texture(gl::texture::target::textureBuffer); + tex->create(); + tex->set_target(gl::texture::target::textureBuffer); + + gl::buffer *&buf = attrib_buffer.buffer; + buf = new gl::buffer(); + buf->create(); + } } void GLGSRender::on_exit() @@ -649,6 +717,19 @@ void GLGSRender::on_exit() if (m_fragment_constants_buffer) m_fragment_constants_buffer.remove(); + + for (texture_buffer_pair &attrib_buffer : m_gl_attrib_buffers) + { + gl::texture *&tex = attrib_buffer.texture; + tex->remove(); + delete tex; + tex = nullptr; + + gl::buffer *&buf = attrib_buffer.buffer; + buf->remove(); + delete buf; + buf = nullptr; + } } void nv4097_clear_surface(u32 arg, GLGSRender* renderer) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index ec7babfe86..bcc77b5300 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -22,6 +22,13 @@ private: rsx::surface_info m_surface; + struct texture_buffer_pair + { + gl::texture *texture; + gl::buffer *buffer; + } + m_gl_attrib_buffers[rsx::limits::vertex_count]; + public: gl::fbo draw_fbo; diff --git a/rpcs3/Emu/RSX/GL/GLProcTable.h b/rpcs3/Emu/RSX/GL/GLProcTable.h index 0a4bb05c13..89f971baee 100644 --- a/rpcs3/Emu/RSX/GL/GLProcTable.h +++ b/rpcs3/Emu/RSX/GL/GLProcTable.h @@ -166,6 +166,10 @@ OPENGL_PROC(PFNGLCHECKFRAMEBUFFERSTATUSPROC, CheckFramebufferStatus); OPENGL_PROC(PFNGLBINDBUFFERBASEPROC, BindBufferBase); +//Texture Buffers +OPENGL_PROC(PFNGLTEXBUFFERPROC, TexBuffer); +OPENGL_PROC(PFNGLTEXTUREBUFFERRANGEEXTPROC, TextureBufferRangeEXT); + //KHR_debug OPENGL_PROC(PFNGLDEBUGMESSAGECONTROLARBPROC, DebugMessageControlARB); OPENGL_PROC(PFNGLDEBUGMESSAGEINSERTARBPROC, DebugMessageInsertARB); diff --git a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp index 7fcb207e89..c448f48091 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp @@ -27,7 +27,7 @@ std::string GLVertexDecompilerThread::compareFunction(COMPARE f, const std::stri void GLVertexDecompilerThread::insertHeader(std::stringstream &OS) { - OS << "#version 420" << std::endl << std::endl; + OS << "#version 430" << std::endl << std::endl; OS << "layout(std140, binding = 0) uniform ScaleOffsetBuffer" << std::endl; OS << "{" << std::endl; OS << " mat4 scaleOffsetMat;" << std::endl; @@ -36,10 +36,33 @@ void GLVertexDecompilerThread::insertHeader(std::stringstream &OS) void GLVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::vector& inputs) { - for (const ParamType PT : inputs) + std::vector> input_data; + for (const ParamType &PT : inputs) { for (const ParamItem &PI : PT.items) - OS << /*"layout(location = " << PI.location << ") "*/ "in " << PT.type << " " << PI.name << ";" << std::endl; + { + input_data.push_back(std::make_tuple(PI.location, PI.name)); + } + } + + /** + * Its is important that the locations are in the order that vertex attributes are expected. + * If order is not adhered to, channels may be swapped leading to corruption + */ + + std::sort(input_data.begin(), input_data.end()); + + int location = 1; + for (const std::tuple item : input_data) + { + for (const ParamType &PT : inputs) + { + for (const ParamItem &PI : PT.items) + { + if (PI.name == std::get<1>(item)) + OS << "layout(location=" << location++ << ")" << " uniform samplerBuffer" << " " << PI.name << "_buffer;" << std::endl; + } + } } } @@ -101,6 +124,37 @@ void GLVertexDecompilerThread::insertOutputs(std::stringstream & OS, const std:: } } +void add_input(std::stringstream & OS, const ParamItem &PI, const std::vector &inputs) +{ + for (const auto &real_input : inputs) + { + if (real_input.location != PI.location) + continue; + + if (!real_input.is_array) + { + OS << " vec4 " << PI.name << " = texelFetch(" << PI.name << "_buffer, 0);" << std::endl; + return; + } + + if (real_input.frequency > 1) + { + if (real_input.is_modulo) + { + OS << " vec4 " << PI.name << "= texelFetch(" << PI.name << "_buffer, gl_VertexID %" << real_input.frequency << ");" << std::endl; + return; + } + + OS << " vec4 " << PI.name << "= texelFetch(" << PI.name << "_buffer, gl_VertexID /" << real_input.frequency << ");" << std::endl; + return; + } + + OS << " vec4 " << PI.name << "= texelFetch(" << PI.name << "_buffer, gl_VertexID);" << std::endl; + return; + } + + OS << " vec4 " << PI.name << " = vec4(0., 0., 0., 1.);" << std::endl; +} void GLVertexDecompilerThread::insertMainStart(std::stringstream & OS) { @@ -118,6 +172,12 @@ void GLVertexDecompilerThread::insertMainStart(std::stringstream & OS) OS << ";" << std::endl; } } + + for (const ParamType &PT : m_parr.params[PF_PARAM_IN]) + { + for (const ParamItem &PI : PT.items) + add_input(OS, PI, rsx_vertex_program.rsx_vertex_inputs); + } } void GLVertexDecompilerThread::insertMainEnd(std::stringstream & OS) diff --git a/rpcs3/Emu/RSX/GL/GLVertexProgram.h b/rpcs3/Emu/RSX/GL/GLVertexProgram.h index fc116b7470..117e040934 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexProgram.h +++ b/rpcs3/Emu/RSX/GL/GLVertexProgram.h @@ -19,10 +19,13 @@ protected: virtual void insertOutputs(std::stringstream &OS, const std::vector &outputs) override; virtual void insertMainStart(std::stringstream &OS) override; virtual void insertMainEnd(std::stringstream &OS) override; + + const RSXVertexProgram &rsx_vertex_program; public: GLVertexDecompilerThread(const RSXVertexProgram &prog, std::string& shader, ParamArray& parr) : VertexProgramDecompiler(prog) , m_shader(shader) + , rsx_vertex_program(prog) { } diff --git a/rpcs3/Emu/RSX/GL/gl_helpers.h b/rpcs3/Emu/RSX/GL/gl_helpers.h index 568e511ad4..7e7c121930 100644 --- a/rpcs3/Emu/RSX/GL/gl_helpers.h +++ b/rpcs3/Emu/RSX/GL/gl_helpers.h @@ -830,7 +830,8 @@ namespace gl { texture1D = GL_TEXTURE_1D, texture2D = GL_TEXTURE_2D, - texture3D = GL_TEXTURE_3D + texture3D = GL_TEXTURE_3D, + textureBuffer = GL_TEXTURE_BUFFER }; enum class channel_type @@ -863,9 +864,10 @@ namespace gl GLenum pname; switch (new_binding.get_target()) { - case target::texture1D: pname = GL_TEXTURE_1D_BINDING_EXT; break; - case target::texture2D: pname = GL_TEXTURE_2D_BINDING_EXT; break; - case target::texture3D: pname = GL_TEXTURE_3D_BINDING_EXT; break; + case target::texture1D: pname = GL_TEXTURE_BINDING_1D; break; + case target::texture2D: pname = GL_TEXTURE_BINDING_2D; break; + case target::texture3D: pname = GL_TEXTURE_BINDING_3D; break; + case target::textureBuffer: pname = GL_TEXTURE_BINDING_BUFFER; break; } glGetIntegerv(pname, &m_last_binding); @@ -1131,6 +1133,29 @@ namespace gl __glcheck glTexSubImage2D((GLenum)get_target(), level(), 0, 0, width(), height(), (GLenum)format, (GLenum)type, src); } + void copy_from(buffer &buf, u32 gl_format_type, u32 offset, u32 length) + { + if (get_target() != target::textureBuffer) + throw EXCEPTION("OpenGL error: texture cannot copy from buffer"); + + if (!offset) + { + copy_from(buf, gl_format_type); + return; + } + + if (glTextureBufferRangeEXT == nullptr) + throw EXCEPTION("OpenGL error: partial buffer access for textures is unsupported on your system"); + + __glcheck glTextureBufferRangeEXT(id(), (GLenum)target::textureBuffer, gl_format_type, buf.id(), offset, length); + } + + void copy_from(buffer &buf, u32 gl_format_type) + { + save_binding_state save(*this); + __glcheck glTexBuffer((GLenum)target::textureBuffer, gl_format_type, buf.id()); + } + void copy_from(const buffer& buf, texture::format format, texture::type type, class pixel_unpack_settings pixel_settings) { buffer::save_binding_state save_buffer(buffer::target::pixel_unpack, buf);