From ed946264118900755c2259fdf48f475b187e260c Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 11 Jun 2016 22:51:34 +0300 Subject: [PATCH] gl: use unsynchronized buffer streaming for attribs fix linux build --- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 30 +++------ rpcs3/Emu/RSX/GL/GLGSRender.h | 10 +-- rpcs3/Emu/RSX/GL/GLProcTable.h | 2 +- rpcs3/Emu/RSX/GL/gl_helpers.h | 99 +++++++++++++++++++++++++++++- rpcs3/Emu/RSX/GL/vertex_buffer.cpp | 92 +++++++++++++-------------- 5 files changed, 157 insertions(+), 76 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 3fd6cc6336..8838d23896 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -299,7 +299,8 @@ void GLGSRender::end() * samplers. So far only sampler2D has been largely used, hiding the problem. This call shall also degrade performance further * if used every draw call. Fixes shader validation issues on AMD. */ - m_program->validate(); + if (g_cfg_rsx_debug_output) + m_program->validate(); if (draw_command == rsx::draw_command::indexed) { @@ -388,18 +389,13 @@ void GLGSRender::on_init_thread() m_vao.array_buffer = m_vbo; m_vao.element_array_buffer = m_ebo; - for (texture_buffer_pair &attrib_buffer : m_gl_attrib_buffers) + for (gl::texture &tex : m_gl_attrib_buffers) { - gl::texture *&tex = attrib_buffer.texture; - tex = new gl::texture(gl::texture::target::textureBuffer); - tex->create(); - tex->set_target(gl::texture::target::textureBuffer); - - gl::buffer *&buf = attrib_buffer.buffer; - buf = new gl::buffer(); - buf->create(); + tex.create(); + tex.set_target(gl::texture::target::textureBuffer); } + m_attrib_ring_buffer.reset(new gl::ring_buffer(16 * 0x100000)); m_gl_texture_cache.initialize_rtt_cache(); } @@ -436,18 +432,12 @@ void GLGSRender::on_exit() if (m_fragment_constants_buffer) m_fragment_constants_buffer.remove(); - for (texture_buffer_pair &attrib_buffer : m_gl_attrib_buffers) + for (gl::texture &tex : m_gl_attrib_buffers) { - gl::texture *&tex = attrib_buffer.texture; - tex->remove(); - delete tex; - tex = nullptr; - - gl::buffer *&buf = attrib_buffer.buffer; - buf->remove(); - delete buf; - buf = nullptr; + tex.remove(); } + + m_attrib_ring_buffer->destroy(); } void nv4097_clear_surface(u32 arg, GLGSRender* renderer) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index 88800dcacb..ae732c1abd 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -25,15 +25,11 @@ private: rsx::surface_info m_surface; gl_render_targets m_rtts; - struct texture_buffer_pair - { - gl::texture *texture; - gl::buffer *buffer; - } - m_gl_attrib_buffers[rsx::limits::vertex_count]; - gl::gl_texture_cache m_gl_texture_cache; + gl::texture m_gl_attrib_buffers[rsx::limits::vertex_count]; + std::unique_ptr m_attrib_ring_buffer; + public: gl::fbo draw_fbo; diff --git a/rpcs3/Emu/RSX/GL/GLProcTable.h b/rpcs3/Emu/RSX/GL/GLProcTable.h index 086743442d..4b374615f2 100644 --- a/rpcs3/Emu/RSX/GL/GLProcTable.h +++ b/rpcs3/Emu/RSX/GL/GLProcTable.h @@ -163,7 +163,7 @@ OPENGL_PROC(PFNGLGETINTEGER64VPROC, GetInteger64v); OPENGL_PROC(PFNGLCHECKFRAMEBUFFERSTATUSPROC, CheckFramebufferStatus); - +OPENGL_PROC(PFNGLMAPBUFFERRANGEPROC, MapBufferRange); OPENGL_PROC(PFNGLBINDBUFFERBASEPROC, BindBufferBase); //Texture Buffers diff --git a/rpcs3/Emu/RSX/GL/gl_helpers.h b/rpcs3/Emu/RSX/GL/gl_helpers.h index 18e337b352..7fda56c72c 100644 --- a/rpcs3/Emu/RSX/GL/gl_helpers.h +++ b/rpcs3/Emu/RSX/GL/gl_helpers.h @@ -469,7 +469,7 @@ namespace gl { target target_ = current_target(); save_binding_state save(target_, *this); - glBufferData((GLenum)target_, size, data_, GL_STREAM_COPY); + glBufferData((GLenum)target_, size, data_, GL_STREAM_DRAW); m_size = size; } @@ -569,6 +569,99 @@ namespace gl } }; + class ring_buffer + { + buffer storage_buffer; + u32 m_data_loc = 0; + u32 m_size; + + u32 m_mapped_block_size = 0; + u32 m_mapped_block_offset; + u32 m_mapped_reserve_offset; + u32 m_mapped_bytes_available; + void *m_mapped_base = nullptr; + + public: + ring_buffer(u32 initial_size) + { + storage_buffer.create(); + storage_buffer.data(initial_size); + m_size = initial_size; + } + + void destroy() + { + storage_buffer.remove(); + } + + std::pair alloc_and_map(u32 size) + { + size = (size + 255) & ~255; + + //storage_buffer.bind(storage_buffer.current_target()); + glBindBuffer(GL_TEXTURE_BUFFER, storage_buffer.id()); + u32 limit = m_data_loc + size; + if (limit > m_size) + { + //Orphan this buffer and have the driver allocate a new one instead of looping back to the front. + //Hopefully, the driver will track usage here and re-use if sync is not a problem + if (size > m_size) + m_size = size; + + storage_buffer.data(m_size, nullptr); + m_data_loc = 0; + } + + void *ptr = glMapBufferRange(GL_TEXTURE_BUFFER, m_data_loc, size, GL_MAP_WRITE_BIT|GL_MAP_INVALIDATE_RANGE_BIT|GL_MAP_UNSYNCHRONIZED_BIT); + u32 offset = m_data_loc; + m_data_loc += size; + return std::make_pair(ptr, offset); + } + + void unmap() + { + //storage_buffer.unmap(); + glUnmapBuffer(GL_TEXTURE_BUFFER); + m_mapped_block_size = 0; + m_mapped_base = 0; + } + + void reserve_and_map(u32 max_size) + { + max_size = (max_size + 4095) & ~4095; + auto mapping = alloc_and_map(max_size); + m_mapped_base = mapping.first; + m_mapped_block_offset = mapping.second; + m_mapped_reserve_offset = 0; + m_mapped_bytes_available = max_size; + } + + std::pair alloc_from_reserve(u32 size) + { + size = (size + 255) & ~255; + + if (m_mapped_bytes_available < size || !m_mapped_base) + { + if (m_mapped_base) + unmap(); + + reserve_and_map((size > 4096) ? size : 4096); + } + + void *ptr = (char*)m_mapped_base + m_mapped_reserve_offset; + u32 offset = m_mapped_reserve_offset + m_mapped_block_offset; + m_mapped_reserve_offset += size; + m_mapped_bytes_available -= size; + + return std::make_pair(ptr, offset); + } + + buffer& get_buffer() + { + return storage_buffer; + } + }; + class vao { template @@ -1140,11 +1233,11 @@ namespace gl if (get_target() != target::textureBuffer) throw EXCEPTION("OpenGL error: texture cannot copy from buffer"); - if (!offset) +/* if (!offset) { copy_from(buf, gl_format_type); return; - } + }*/ if (glTextureBufferRangeEXT == nullptr) throw EXCEPTION("OpenGL error: partial buffer access for textures is unsupported on your system"); diff --git a/rpcs3/Emu/RSX/GL/vertex_buffer.cpp b/rpcs3/Emu/RSX/GL/vertex_buffer.cpp index 4daf8db025..31a38fa17b 100644 --- a/rpcs3/Emu/RSX/GL/vertex_buffer.cpp +++ b/rpcs3/Emu/RSX/GL/vertex_buffer.cpp @@ -158,7 +158,7 @@ void GLGSRender::set_vertex_buffer() //initialize vertex attributes //merge all vertex arrays - std::vector vertex_arrays_data; + //std::vector vertex_arrays_data; const std::string reg_table[] = { @@ -176,6 +176,15 @@ void GLGSRender::set_vertex_buffer() vertex_draw_count = 0; u32 min_index, max_index; + u32 max_vertex_attrib_size = 0; + for (u8 index = 0; index < rsx::limits::vertex_count; ++index) + { + if (vertex_arrays_info[index].size == 0) + continue; + + max_vertex_attrib_size += (vertex_arrays_info[index].size << 2); + } + if (draw_command == rsx::draw_command::indexed) { rsx::index_array_type type = rsx::to_index_array_type(rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4); @@ -207,6 +216,7 @@ void GLGSRender::set_vertex_buffer() } vertex_draw_count = (u32)(inline_vertex_array.size() * sizeof(u32)) / stride; + m_attrib_ring_buffer->reserve_and_map(vertex_draw_count * max_vertex_attrib_size); for (int index = 0; index < rsx::limits::vertex_count; ++index) { @@ -228,12 +238,11 @@ void GLGSRender::set_vertex_buffer() u32 data_size = element_size * vertex_draw_count; u32 gl_type = to_gl_internal_type(vertex_info.type, vertex_info.size); - auto &buffer = m_gl_attrib_buffers[index].buffer; - auto &texture = m_gl_attrib_buffers[index].texture; + auto &texture = m_gl_attrib_buffers[index]; - vertex_arrays_data.resize(data_size); u8 *src = reinterpret_cast(inline_vertex_array.data()); - u8 *dst = vertex_arrays_data.data(); + auto mapping = m_attrib_ring_buffer->alloc_from_reserve(data_size); + u8 *dst = static_cast(mapping.first); src += offsets[index]; prepare_buffer_for_writing(dst, vertex_info.type, vertex_info.size, vertex_draw_count); @@ -255,14 +264,10 @@ void GLGSRender::set_vertex_buffer() dst += element_size; } - buffer->data(data_size, nullptr); - buffer->sub_data(0, data_size, vertex_arrays_data.data()); - - //Attach buffer to texture - texture->copy_from(*buffer, gl_type); + texture.copy_from(m_attrib_ring_buffer->get_buffer(), gl_type, mapping.second, data_size); //Link texture to uniform - m_program->uniforms.texture(location, index + rsx::limits::textures_count, *texture); + m_program->uniforms.texture(location, index + rsx::limits::textures_count, texture); if (!is_primitive_native(draw_mode)) { std::tie(vertex_draw_count, vertex_index_array) = get_index_array_for_emulated_non_indexed_draw({ { 0, vertex_draw_count } }, draw_mode); @@ -280,6 +285,8 @@ void GLGSRender::set_vertex_buffer() if (draw_command == rsx::draw_command::array || draw_command == rsx::draw_command::indexed) { + m_attrib_ring_buffer->reserve_and_map(vertex_draw_count * max_vertex_attrib_size); + for (int index = 0; index < rsx::limits::vertex_count; ++index) { int location; @@ -298,12 +305,16 @@ void GLGSRender::set_vertex_buffer() if (vertex_arrays_info[index].size > 0) { auto &vertex_info = vertex_arrays_info[index]; - // Active vertex array - std::vector vertex_array; // Fill vertex_array u32 element_size = rsx::get_vertex_type_size_on_host(vertex_info.type, vertex_info.size); - vertex_array.resize(vertex_draw_count * element_size); + //vertex_array.resize(vertex_draw_count * element_size); + + u32 data_size = vertex_draw_count * element_size; + u32 gl_type = to_gl_internal_type(vertex_info.type, vertex_info.size); + auto &texture = m_gl_attrib_buffers[index]; + + u32 buffer_offset = 0; // Get source pointer u32 base_offset = rsx::method_registers[NV4097_SET_VERTEX_DATA_BASE_OFFSET]; @@ -313,9 +324,13 @@ void GLGSRender::set_vertex_buffer() if (draw_command == rsx::draw_command::array) { + auto mapping = m_attrib_ring_buffer->alloc_from_reserve(data_size); + gsl::byte *dst = static_cast(mapping.first); + buffer_offset = mapping.second; + size_t offset = 0; - gsl::span dest_span(vertex_array); - prepare_buffer_for_writing(vertex_array.data(), vertex_info.type, vertex_info.size, vertex_draw_count); + gsl::span dest_span(dst, data_size); + prepare_buffer_for_writing(dst, vertex_info.type, vertex_info.size, vertex_draw_count); for (const auto &first_count : first_count_commands) { @@ -325,30 +340,21 @@ void GLGSRender::set_vertex_buffer() } if (draw_command == rsx::draw_command::indexed) { - vertex_array.resize((max_index + 1) * element_size); - gsl::span dest_span(vertex_array); - prepare_buffer_for_writing(vertex_array.data(), vertex_info.type, vertex_info.size, vertex_draw_count); + data_size = (max_index + 1) * element_size; + auto mapping = m_attrib_ring_buffer->alloc_from_reserve(data_size); + gsl::byte *dst = static_cast(mapping.first); + buffer_offset = mapping.second; + + gsl::span dest_span(dst, data_size); + prepare_buffer_for_writing(dst, vertex_info.type, vertex_info.size, vertex_draw_count); write_vertex_array_data_to_buffer(dest_span, src_ptr, 0, max_index + 1, vertex_info.type, vertex_info.size, vertex_info.stride, rsx::get_vertex_type_size_on_host(vertex_info.type, vertex_info.size)); } - size_t size = vertex_array.size(); - size_t position = vertex_arrays_data.size(); - vertex_arrays_data.resize(position + size); - - u32 gl_type = to_gl_internal_type(vertex_info.type, vertex_info.size); - - auto &buffer = m_gl_attrib_buffers[index].buffer; - auto &texture = m_gl_attrib_buffers[index].texture; - - buffer->data(static_cast(size), nullptr); - buffer->sub_data(0, static_cast(size), vertex_array.data()); - - //Attach buffer to texture - texture->copy_from(*buffer, gl_type); + texture.copy_from(m_attrib_ring_buffer->get_buffer(), gl_type, buffer_offset, data_size); //Link texture to uniform - m_program->uniforms.texture(location, index + rsx::limits::textures_count, *texture); + m_program->uniforms.texture(location, index + rsx::limits::textures_count, texture); } else if (register_vertex_info[index].size > 0) { @@ -364,17 +370,16 @@ void GLGSRender::set_vertex_buffer() const u32 gl_type = to_gl_internal_type(vertex_info.type, vertex_info.size); const size_t data_size = vertex_data.size(); - auto &buffer = m_gl_attrib_buffers[index].buffer; - auto &texture = m_gl_attrib_buffers[index].texture; + auto &texture = m_gl_attrib_buffers[index]; - buffer->data(data_size, nullptr); - buffer->sub_data(0, data_size, vertex_data.data()); + auto mapping = m_attrib_ring_buffer->alloc_from_reserve(data_size); + u8 *dst = static_cast(mapping.first); - //Attach buffer to texture - texture->copy_from(*buffer, gl_type); + memcpy(dst, vertex_data.data(), data_size); + texture.copy_from(m_attrib_ring_buffer->get_buffer(), gl_type, mapping.second, data_size); //Link texture to uniform - m_program->uniforms.texture(location, index + rsx::limits::textures_count, *texture); + m_program->uniforms.texture(location, index + rsx::limits::textures_count, texture); break; } default: @@ -396,10 +401,7 @@ void GLGSRender::set_vertex_buffer() } } - // glDraw* will fail without at least attrib0 defined if we are on compatibility profile - // Someone should really test AMD behaviour here, Nvidia is too permissive. There is no buffer currently bound, but on NV it works ok - glEnableVertexAttribArray(0); - glVertexAttribPointer(0, 2, GL_FLOAT, false, 0, 0); + m_attrib_ring_buffer->unmap(); if (draw_command == rsx::draw_command::indexed) {