From 40a598b0ab6f7389470ce7ddd92f2e733cc37dce Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 9 Jun 2016 15:57:05 +0300 Subject: [PATCH 1/5] gl/vk: Quickly ignore access violations outside cache range --- rpcs3/Emu/RSX/GL/gl_texture_cache.h | 12 ++++++++++++ rpcs3/Emu/RSX/VK/VKTextureCache.h | 12 +++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/GL/gl_texture_cache.h b/rpcs3/Emu/RSX/GL/gl_texture_cache.h index d87a07e304..ceedf7f19b 100644 --- a/rpcs3/Emu/RSX/GL/gl_texture_cache.h +++ b/rpcs3/Emu/RSX/GL/gl_texture_cache.h @@ -62,6 +62,8 @@ namespace gl std::vector texture_cache; std::vector rtt_cache; u32 frame_ctr; + std::pair texture_cache_range = std::make_pair(0xFFFFFFFF, 0); + u32 max_tex_address = 0; bool lock_memory_region(u32 start, u32 size) { @@ -69,6 +71,12 @@ namespace gl start = start & ~(memory_page_size - 1); size = (u32)align(size, memory_page_size); + if (start < texture_cache_range.first) + texture_cache_range = std::make_pair(start, texture_cache_range.second); + + if ((start+size) > texture_cache_range.second) + texture_cache_range = std::make_pair(texture_cache_range.first, (start+size)); + return vm::page_protect(start, size, 0, 0, vm::page_writable); } @@ -500,6 +508,10 @@ namespace gl bool mark_as_dirty(u32 address) { + if (address < texture_cache_range.first || + address > texture_cache_range.second) + return false; + bool response = false; for (gl_cached_texture &tex: texture_cache) diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index e9f16b0fc4..29d447b172 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -30,7 +30,7 @@ namespace vk { private: std::vector m_cache; - + std::pair texture_cache_range = std::make_pair(0xFFFFFFFF, 0); std::vector > m_temporary_image_view; bool lock_memory_region(u32 start, u32 size) @@ -120,6 +120,12 @@ namespace vk obj.protected_rgn_end += obj.protected_rgn_start; lock_memory_region(static_cast(obj.protected_rgn_start), static_cast(obj.native_rsx_size)); + + if (obj.protected_rgn_start < texture_cache_range.first) + texture_cache_range = std::make_pair(obj.protected_rgn_start, texture_cache_range.second); + + if (obj.protected_rgn_end > texture_cache_range.second) + texture_cache_range = std::make_pair(texture_cache_range.first, obj.protected_rgn_end); } void unlock_object(cached_texture_object &obj) @@ -254,6 +260,10 @@ namespace vk bool invalidate_address(u32 rsx_address) { + if (rsx_address < texture_cache_range.first || + rsx_address > texture_cache_range.second) + return false; + for (cached_texture_object &tex : m_cache) { if (tex.dirty) continue; From ed946264118900755c2259fdf48f475b187e260c Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 11 Jun 2016 22:51:34 +0300 Subject: [PATCH 2/5] gl: use unsynchronized buffer streaming for attribs fix linux build --- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 30 +++------ rpcs3/Emu/RSX/GL/GLGSRender.h | 10 +-- rpcs3/Emu/RSX/GL/GLProcTable.h | 2 +- rpcs3/Emu/RSX/GL/gl_helpers.h | 99 +++++++++++++++++++++++++++++- rpcs3/Emu/RSX/GL/vertex_buffer.cpp | 92 +++++++++++++-------------- 5 files changed, 157 insertions(+), 76 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 3fd6cc6336..8838d23896 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -299,7 +299,8 @@ void GLGSRender::end() * samplers. So far only sampler2D has been largely used, hiding the problem. This call shall also degrade performance further * if used every draw call. Fixes shader validation issues on AMD. */ - m_program->validate(); + if (g_cfg_rsx_debug_output) + m_program->validate(); if (draw_command == rsx::draw_command::indexed) { @@ -388,18 +389,13 @@ void GLGSRender::on_init_thread() m_vao.array_buffer = m_vbo; m_vao.element_array_buffer = m_ebo; - for (texture_buffer_pair &attrib_buffer : m_gl_attrib_buffers) + for (gl::texture &tex : m_gl_attrib_buffers) { - gl::texture *&tex = attrib_buffer.texture; - tex = new gl::texture(gl::texture::target::textureBuffer); - tex->create(); - tex->set_target(gl::texture::target::textureBuffer); - - gl::buffer *&buf = attrib_buffer.buffer; - buf = new gl::buffer(); - buf->create(); + tex.create(); + tex.set_target(gl::texture::target::textureBuffer); } + m_attrib_ring_buffer.reset(new gl::ring_buffer(16 * 0x100000)); m_gl_texture_cache.initialize_rtt_cache(); } @@ -436,18 +432,12 @@ void GLGSRender::on_exit() if (m_fragment_constants_buffer) m_fragment_constants_buffer.remove(); - for (texture_buffer_pair &attrib_buffer : m_gl_attrib_buffers) + for (gl::texture &tex : m_gl_attrib_buffers) { - gl::texture *&tex = attrib_buffer.texture; - tex->remove(); - delete tex; - tex = nullptr; - - gl::buffer *&buf = attrib_buffer.buffer; - buf->remove(); - delete buf; - buf = nullptr; + tex.remove(); } + + m_attrib_ring_buffer->destroy(); } void nv4097_clear_surface(u32 arg, GLGSRender* renderer) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index 88800dcacb..ae732c1abd 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -25,15 +25,11 @@ private: rsx::surface_info m_surface; gl_render_targets m_rtts; - struct texture_buffer_pair - { - gl::texture *texture; - gl::buffer *buffer; - } - m_gl_attrib_buffers[rsx::limits::vertex_count]; - gl::gl_texture_cache m_gl_texture_cache; + gl::texture m_gl_attrib_buffers[rsx::limits::vertex_count]; + std::unique_ptr m_attrib_ring_buffer; + public: gl::fbo draw_fbo; diff --git a/rpcs3/Emu/RSX/GL/GLProcTable.h b/rpcs3/Emu/RSX/GL/GLProcTable.h index 086743442d..4b374615f2 100644 --- a/rpcs3/Emu/RSX/GL/GLProcTable.h +++ b/rpcs3/Emu/RSX/GL/GLProcTable.h @@ -163,7 +163,7 @@ OPENGL_PROC(PFNGLGETINTEGER64VPROC, GetInteger64v); OPENGL_PROC(PFNGLCHECKFRAMEBUFFERSTATUSPROC, CheckFramebufferStatus); - +OPENGL_PROC(PFNGLMAPBUFFERRANGEPROC, MapBufferRange); OPENGL_PROC(PFNGLBINDBUFFERBASEPROC, BindBufferBase); //Texture Buffers diff --git a/rpcs3/Emu/RSX/GL/gl_helpers.h b/rpcs3/Emu/RSX/GL/gl_helpers.h index 18e337b352..7fda56c72c 100644 --- a/rpcs3/Emu/RSX/GL/gl_helpers.h +++ b/rpcs3/Emu/RSX/GL/gl_helpers.h @@ -469,7 +469,7 @@ namespace gl { target target_ = current_target(); save_binding_state save(target_, *this); - glBufferData((GLenum)target_, size, data_, GL_STREAM_COPY); + glBufferData((GLenum)target_, size, data_, GL_STREAM_DRAW); m_size = size; } @@ -569,6 +569,99 @@ namespace gl } }; + class ring_buffer + { + buffer storage_buffer; + u32 m_data_loc = 0; + u32 m_size; + + u32 m_mapped_block_size = 0; + u32 m_mapped_block_offset; + u32 m_mapped_reserve_offset; + u32 m_mapped_bytes_available; + void *m_mapped_base = nullptr; + + public: + ring_buffer(u32 initial_size) + { + storage_buffer.create(); + storage_buffer.data(initial_size); + m_size = initial_size; + } + + void destroy() + { + storage_buffer.remove(); + } + + std::pair alloc_and_map(u32 size) + { + size = (size + 255) & ~255; + + //storage_buffer.bind(storage_buffer.current_target()); + glBindBuffer(GL_TEXTURE_BUFFER, storage_buffer.id()); + u32 limit = m_data_loc + size; + if (limit > m_size) + { + //Orphan this buffer and have the driver allocate a new one instead of looping back to the front. + //Hopefully, the driver will track usage here and re-use if sync is not a problem + if (size > m_size) + m_size = size; + + storage_buffer.data(m_size, nullptr); + m_data_loc = 0; + } + + void *ptr = glMapBufferRange(GL_TEXTURE_BUFFER, m_data_loc, size, GL_MAP_WRITE_BIT|GL_MAP_INVALIDATE_RANGE_BIT|GL_MAP_UNSYNCHRONIZED_BIT); + u32 offset = m_data_loc; + m_data_loc += size; + return std::make_pair(ptr, offset); + } + + void unmap() + { + //storage_buffer.unmap(); + glUnmapBuffer(GL_TEXTURE_BUFFER); + m_mapped_block_size = 0; + m_mapped_base = 0; + } + + void reserve_and_map(u32 max_size) + { + max_size = (max_size + 4095) & ~4095; + auto mapping = alloc_and_map(max_size); + m_mapped_base = mapping.first; + m_mapped_block_offset = mapping.second; + m_mapped_reserve_offset = 0; + m_mapped_bytes_available = max_size; + } + + std::pair alloc_from_reserve(u32 size) + { + size = (size + 255) & ~255; + + if (m_mapped_bytes_available < size || !m_mapped_base) + { + if (m_mapped_base) + unmap(); + + reserve_and_map((size > 4096) ? size : 4096); + } + + void *ptr = (char*)m_mapped_base + m_mapped_reserve_offset; + u32 offset = m_mapped_reserve_offset + m_mapped_block_offset; + m_mapped_reserve_offset += size; + m_mapped_bytes_available -= size; + + return std::make_pair(ptr, offset); + } + + buffer& get_buffer() + { + return storage_buffer; + } + }; + class vao { template @@ -1140,11 +1233,11 @@ namespace gl if (get_target() != target::textureBuffer) throw EXCEPTION("OpenGL error: texture cannot copy from buffer"); - if (!offset) +/* if (!offset) { copy_from(buf, gl_format_type); return; - } + }*/ if (glTextureBufferRangeEXT == nullptr) throw EXCEPTION("OpenGL error: partial buffer access for textures is unsupported on your system"); diff --git a/rpcs3/Emu/RSX/GL/vertex_buffer.cpp b/rpcs3/Emu/RSX/GL/vertex_buffer.cpp index 4daf8db025..31a38fa17b 100644 --- a/rpcs3/Emu/RSX/GL/vertex_buffer.cpp +++ b/rpcs3/Emu/RSX/GL/vertex_buffer.cpp @@ -158,7 +158,7 @@ void GLGSRender::set_vertex_buffer() //initialize vertex attributes //merge all vertex arrays - std::vector vertex_arrays_data; + //std::vector vertex_arrays_data; const std::string reg_table[] = { @@ -176,6 +176,15 @@ void GLGSRender::set_vertex_buffer() vertex_draw_count = 0; u32 min_index, max_index; + u32 max_vertex_attrib_size = 0; + for (u8 index = 0; index < rsx::limits::vertex_count; ++index) + { + if (vertex_arrays_info[index].size == 0) + continue; + + max_vertex_attrib_size += (vertex_arrays_info[index].size << 2); + } + if (draw_command == rsx::draw_command::indexed) { rsx::index_array_type type = rsx::to_index_array_type(rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4); @@ -207,6 +216,7 @@ void GLGSRender::set_vertex_buffer() } vertex_draw_count = (u32)(inline_vertex_array.size() * sizeof(u32)) / stride; + m_attrib_ring_buffer->reserve_and_map(vertex_draw_count * max_vertex_attrib_size); for (int index = 0; index < rsx::limits::vertex_count; ++index) { @@ -228,12 +238,11 @@ void GLGSRender::set_vertex_buffer() u32 data_size = element_size * vertex_draw_count; u32 gl_type = to_gl_internal_type(vertex_info.type, vertex_info.size); - auto &buffer = m_gl_attrib_buffers[index].buffer; - auto &texture = m_gl_attrib_buffers[index].texture; + auto &texture = m_gl_attrib_buffers[index]; - vertex_arrays_data.resize(data_size); u8 *src = reinterpret_cast(inline_vertex_array.data()); - u8 *dst = vertex_arrays_data.data(); + auto mapping = m_attrib_ring_buffer->alloc_from_reserve(data_size); + u8 *dst = static_cast(mapping.first); src += offsets[index]; prepare_buffer_for_writing(dst, vertex_info.type, vertex_info.size, vertex_draw_count); @@ -255,14 +264,10 @@ void GLGSRender::set_vertex_buffer() dst += element_size; } - buffer->data(data_size, nullptr); - buffer->sub_data(0, data_size, vertex_arrays_data.data()); - - //Attach buffer to texture - texture->copy_from(*buffer, gl_type); + texture.copy_from(m_attrib_ring_buffer->get_buffer(), gl_type, mapping.second, data_size); //Link texture to uniform - m_program->uniforms.texture(location, index + rsx::limits::textures_count, *texture); + m_program->uniforms.texture(location, index + rsx::limits::textures_count, texture); if (!is_primitive_native(draw_mode)) { std::tie(vertex_draw_count, vertex_index_array) = get_index_array_for_emulated_non_indexed_draw({ { 0, vertex_draw_count } }, draw_mode); @@ -280,6 +285,8 @@ void GLGSRender::set_vertex_buffer() if (draw_command == rsx::draw_command::array || draw_command == rsx::draw_command::indexed) { + m_attrib_ring_buffer->reserve_and_map(vertex_draw_count * max_vertex_attrib_size); + for (int index = 0; index < rsx::limits::vertex_count; ++index) { int location; @@ -298,12 +305,16 @@ void GLGSRender::set_vertex_buffer() if (vertex_arrays_info[index].size > 0) { auto &vertex_info = vertex_arrays_info[index]; - // Active vertex array - std::vector vertex_array; // Fill vertex_array u32 element_size = rsx::get_vertex_type_size_on_host(vertex_info.type, vertex_info.size); - vertex_array.resize(vertex_draw_count * element_size); + //vertex_array.resize(vertex_draw_count * element_size); + + u32 data_size = vertex_draw_count * element_size; + u32 gl_type = to_gl_internal_type(vertex_info.type, vertex_info.size); + auto &texture = m_gl_attrib_buffers[index]; + + u32 buffer_offset = 0; // Get source pointer u32 base_offset = rsx::method_registers[NV4097_SET_VERTEX_DATA_BASE_OFFSET]; @@ -313,9 +324,13 @@ void GLGSRender::set_vertex_buffer() if (draw_command == rsx::draw_command::array) { + auto mapping = m_attrib_ring_buffer->alloc_from_reserve(data_size); + gsl::byte *dst = static_cast(mapping.first); + buffer_offset = mapping.second; + size_t offset = 0; - gsl::span dest_span(vertex_array); - prepare_buffer_for_writing(vertex_array.data(), vertex_info.type, vertex_info.size, vertex_draw_count); + gsl::span dest_span(dst, data_size); + prepare_buffer_for_writing(dst, vertex_info.type, vertex_info.size, vertex_draw_count); for (const auto &first_count : first_count_commands) { @@ -325,30 +340,21 @@ void GLGSRender::set_vertex_buffer() } if (draw_command == rsx::draw_command::indexed) { - vertex_array.resize((max_index + 1) * element_size); - gsl::span dest_span(vertex_array); - prepare_buffer_for_writing(vertex_array.data(), vertex_info.type, vertex_info.size, vertex_draw_count); + data_size = (max_index + 1) * element_size; + auto mapping = m_attrib_ring_buffer->alloc_from_reserve(data_size); + gsl::byte *dst = static_cast(mapping.first); + buffer_offset = mapping.second; + + gsl::span dest_span(dst, data_size); + prepare_buffer_for_writing(dst, vertex_info.type, vertex_info.size, vertex_draw_count); write_vertex_array_data_to_buffer(dest_span, src_ptr, 0, max_index + 1, vertex_info.type, vertex_info.size, vertex_info.stride, rsx::get_vertex_type_size_on_host(vertex_info.type, vertex_info.size)); } - size_t size = vertex_array.size(); - size_t position = vertex_arrays_data.size(); - vertex_arrays_data.resize(position + size); - - u32 gl_type = to_gl_internal_type(vertex_info.type, vertex_info.size); - - auto &buffer = m_gl_attrib_buffers[index].buffer; - auto &texture = m_gl_attrib_buffers[index].texture; - - buffer->data(static_cast(size), nullptr); - buffer->sub_data(0, static_cast(size), vertex_array.data()); - - //Attach buffer to texture - texture->copy_from(*buffer, gl_type); + texture.copy_from(m_attrib_ring_buffer->get_buffer(), gl_type, buffer_offset, data_size); //Link texture to uniform - m_program->uniforms.texture(location, index + rsx::limits::textures_count, *texture); + m_program->uniforms.texture(location, index + rsx::limits::textures_count, texture); } else if (register_vertex_info[index].size > 0) { @@ -364,17 +370,16 @@ void GLGSRender::set_vertex_buffer() const u32 gl_type = to_gl_internal_type(vertex_info.type, vertex_info.size); const size_t data_size = vertex_data.size(); - auto &buffer = m_gl_attrib_buffers[index].buffer; - auto &texture = m_gl_attrib_buffers[index].texture; + auto &texture = m_gl_attrib_buffers[index]; - buffer->data(data_size, nullptr); - buffer->sub_data(0, data_size, vertex_data.data()); + auto mapping = m_attrib_ring_buffer->alloc_from_reserve(data_size); + u8 *dst = static_cast(mapping.first); - //Attach buffer to texture - texture->copy_from(*buffer, gl_type); + memcpy(dst, vertex_data.data(), data_size); + texture.copy_from(m_attrib_ring_buffer->get_buffer(), gl_type, mapping.second, data_size); //Link texture to uniform - m_program->uniforms.texture(location, index + rsx::limits::textures_count, *texture); + m_program->uniforms.texture(location, index + rsx::limits::textures_count, texture); break; } default: @@ -396,10 +401,7 @@ void GLGSRender::set_vertex_buffer() } } - // glDraw* will fail without at least attrib0 defined if we are on compatibility profile - // Someone should really test AMD behaviour here, Nvidia is too permissive. There is no buffer currently bound, but on NV it works ok - glEnableVertexAttribArray(0); - glVertexAttribPointer(0, 2, GL_FLOAT, false, 0, 0); + m_attrib_ring_buffer->unmap(); if (draw_command == rsx::draw_command::indexed) { From 6761d4a30cb19a8466c4ff1f43d5e52ad64686ad Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 12 Jun 2016 12:05:22 +0300 Subject: [PATCH 3/5] gl: use streaming buffers for uniform & elem buffer as well gl: stream uniform data using stream buffer gl: vertex streaming improvements and bugfixes gl: add basic timing info check for profiling gl: ebo streaming fixes and enhancements --- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 134 +++++++++++++++++------------ rpcs3/Emu/RSX/GL/GLGSRender.h | 16 ++-- rpcs3/Emu/RSX/GL/GLProcTable.h | 1 + rpcs3/Emu/RSX/GL/gl_helpers.h | 37 +++++--- rpcs3/Emu/RSX/GL/vertex_buffer.cpp | 63 ++++++++------ 5 files changed, 150 insertions(+), 101 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 8838d23896..778267c3f3 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -7,6 +7,7 @@ #include "../Common/BufferUtils.h" extern cfg::bool_entry g_cfg_rsx_debug_output; +extern cfg::bool_entry g_cfg_rsx_overlay; #define DUMP_VERTEX_DATA 0 @@ -70,6 +71,8 @@ void GLGSRender::begin() init_buffers(); + std::chrono::time_point then = std::chrono::system_clock::now(); + u32 color_mask = rsx::method_registers[NV4097_SET_COLOR_MASK]; bool color_mask_b = !!(color_mask & 0xff); bool color_mask_g = !!((color_mask >> 8) & 0xff); @@ -241,6 +244,10 @@ void GLGSRender::begin() { __glcheck glPrimitiveRestartIndex(rsx::method_registers[NV4097_SET_RESTART_INDEX]); } + + std::chrono::time_point now = std::chrono::system_clock::now(); + m_begin_time += std::chrono::duration_cast(now - then).count(); + m_draw_calls++; } namespace @@ -266,8 +273,6 @@ void GLGSRender::end() return; } - //LOG_NOTICE(Log::RSX, "draw()"); - draw_fbo.bind(); m_program->use(); @@ -292,13 +297,11 @@ void GLGSRender::end() } } - set_vertex_buffer(); + u32 offset_in_index_buffer = set_vertex_buffer(); + m_vao.bind(); + + std::chrono::time_point then = std::chrono::system_clock::now(); - /** - * Validate fails if called right after linking a program because the VS and FS both use textures bound using different - * samplers. So far only sampler2D has been largely used, hiding the problem. This call shall also degrade performance further - * if used every draw call. Fixes shader validation issues on AMD. - */ if (g_cfg_rsx_debug_output) m_program->validate(); @@ -307,19 +310,22 @@ void GLGSRender::end() rsx::index_array_type indexed_type = rsx::to_index_array_type(rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4); if (indexed_type == rsx::index_array_type::u32) - __glcheck glDrawElements(gl::draw_mode(draw_mode), vertex_draw_count, GL_UNSIGNED_INT, nullptr); + __glcheck glDrawElements(gl::draw_mode(draw_mode), vertex_draw_count, GL_UNSIGNED_INT, (GLvoid *)(offset_in_index_buffer)); if (indexed_type == rsx::index_array_type::u16) - __glcheck glDrawElements(gl::draw_mode(draw_mode), vertex_draw_count, GL_UNSIGNED_SHORT, nullptr); + __glcheck glDrawElements(gl::draw_mode(draw_mode), vertex_draw_count, GL_UNSIGNED_SHORT, (GLvoid *)(offset_in_index_buffer)); } else if (!is_primitive_native(draw_mode)) { - __glcheck glDrawElements(gl::draw_mode(draw_mode), vertex_draw_count, GL_UNSIGNED_SHORT, nullptr); + __glcheck glDrawElements(gl::draw_mode(draw_mode), vertex_draw_count, GL_UNSIGNED_SHORT, (GLvoid *)(offset_in_index_buffer)); } else { draw_fbo.draw_arrays(draw_mode, vertex_draw_count); } + std::chrono::time_point now = std::chrono::system_clock::now(); + m_draw_time += std::chrono::duration_cast(now - then).count(); + write_buffers(); rsx::thread::end(); @@ -376,18 +382,6 @@ void GLGSRender::on_init_thread() glEnable(GL_VERTEX_PROGRAM_POINT_SIZE); m_vao.create(); - m_vbo.create(); - m_ebo.create(); - m_scale_offset_buffer.create(32 * sizeof(float)); - m_vertex_constants_buffer.create(512 * 4 * sizeof(float)); - m_fragment_constants_buffer.create(); - - glBindBufferBase(GL_UNIFORM_BUFFER, 0, m_scale_offset_buffer.id()); - glBindBufferBase(GL_UNIFORM_BUFFER, 1, m_vertex_constants_buffer.id()); - glBindBufferBase(GL_UNIFORM_BUFFER, 2, m_fragment_constants_buffer.id()); - - m_vao.array_buffer = m_vbo; - m_vao.element_array_buffer = m_ebo; for (gl::texture &tex : m_gl_attrib_buffers) { @@ -395,7 +389,11 @@ void GLGSRender::on_init_thread() tex.set_target(gl::texture::target::textureBuffer); } - m_attrib_ring_buffer.reset(new gl::ring_buffer(16 * 0x100000)); + m_attrib_ring_buffer.reset(new gl::ring_buffer(16 * 0x100000, gl::buffer::target::texture)); + m_uniform_ring_buffer.reset(new gl::ring_buffer(16 * 0x100000, gl::buffer::target::uniform)); + m_index_ring_buffer.reset(new gl::ring_buffer(0x100000, gl::buffer::target::element_array)); + + m_vao.element_array_buffer = m_index_ring_buffer->get_buffer(); m_gl_texture_cache.initialize_rtt_cache(); } @@ -414,30 +412,17 @@ void GLGSRender::on_exit() if (m_flip_tex_color) m_flip_tex_color.remove(); - if (m_vbo) - m_vbo.remove(); - - if (m_ebo) - m_ebo.remove(); - if (m_vao) m_vao.remove(); - if (m_scale_offset_buffer) - m_scale_offset_buffer.remove(); - - if (m_vertex_constants_buffer) - m_vertex_constants_buffer.remove(); - - if (m_fragment_constants_buffer) - m_fragment_constants_buffer.remove(); - for (gl::texture &tex : m_gl_attrib_buffers) { tex.remove(); } m_attrib_ring_buffer->destroy(); + m_uniform_ring_buffer->destroy(); + m_index_ring_buffer->destroy(); } void nv4097_clear_surface(u32 arg, GLGSRender* renderer) @@ -570,32 +555,47 @@ bool GLGSRender::load_program() (m_program.recreate() += { fp.compile(), vp.compile() }).make(); #endif - size_t max_buffer_sz =(size_t) m_vertex_constants_buffer.size(); - size_t fragment_constants_sz = m_prog_buffer.get_fragment_constants_buffer_size(fragment_program); - if (fragment_constants_sz > max_buffer_sz) - max_buffer_sz = fragment_constants_sz; + u32 fragment_constants_sz = m_prog_buffer.get_fragment_constants_buffer_size(fragment_program); + fragment_constants_sz = std::max(32U, fragment_constants_sz); + u32 max_buffer_sz = 8192 + 512 + fragment_constants_sz; u32 is_alpha_tested = !!(rsx::method_registers[NV4097_SET_ALPHA_TEST_ENABLE]); u8 alpha_ref_raw = (u8)(rsx::method_registers[NV4097_SET_ALPHA_REF] & 0xFF); float alpha_ref = alpha_ref_raw / 255.f; - std::vector client_side_buf(max_buffer_sz); + u8 *buf; + u32 scale_offset_offset; + u32 vertex_constants_offset; + u32 fragment_constants_offset; - fill_scale_offset_data(client_side_buf.data(), false); - memcpy(client_side_buf.data() + 16 * sizeof(float), &rsx::method_registers[NV4097_SET_FOG_PARAMS], sizeof(float)); - memcpy(client_side_buf.data() + 17 * sizeof(float), &rsx::method_registers[NV4097_SET_FOG_PARAMS + 1], sizeof(float)); - memcpy(client_side_buf.data() + 18 * sizeof(float), &is_alpha_tested, sizeof(u32)); - memcpy(client_side_buf.data() + 19 * sizeof(float), &alpha_ref, sizeof(float)); - m_scale_offset_buffer.data(m_scale_offset_buffer.size(), nullptr); - m_scale_offset_buffer.sub_data(0, m_scale_offset_buffer.size(), client_side_buf.data()); + m_uniform_ring_buffer->reserve_and_map(max_buffer_sz); + auto mapping = m_uniform_ring_buffer->alloc_from_reserve(512); + buf = static_cast(mapping.first); + scale_offset_offset = mapping.second; - fill_vertex_program_constants_data(client_side_buf.data()); - m_vertex_constants_buffer.data(m_vertex_constants_buffer.size(), nullptr); - m_vertex_constants_buffer.sub_data(0, m_vertex_constants_buffer.size(), client_side_buf.data()); + fill_scale_offset_data(buf, false); + memcpy(buf + 16 * sizeof(float), &rsx::method_registers[NV4097_SET_FOG_PARAMS], sizeof(float)); + memcpy(buf + 17 * sizeof(float), &rsx::method_registers[NV4097_SET_FOG_PARAMS + 1], sizeof(float)); + memcpy(buf + 18 * sizeof(float), &is_alpha_tested, sizeof(u32)); + memcpy(buf + 19 * sizeof(float), &alpha_ref, sizeof(float)); - m_prog_buffer.fill_fragment_constans_buffer({ reinterpret_cast(client_side_buf.data()), gsl::narrow(fragment_constants_sz) }, fragment_program); - m_fragment_constants_buffer.data(fragment_constants_sz, nullptr); - m_fragment_constants_buffer.sub_data(0, fragment_constants_sz, client_side_buf.data()); + mapping = m_uniform_ring_buffer->alloc_from_reserve(512 * 16); + buf = static_cast(mapping.first); + vertex_constants_offset = mapping.second; + + fill_vertex_program_constants_data(buf); + + mapping = m_uniform_ring_buffer->alloc_from_reserve(fragment_constants_sz); + buf = static_cast(mapping.first); + fragment_constants_offset = mapping.second; + + m_prog_buffer.fill_fragment_constans_buffer({ reinterpret_cast(buf), gsl::narrow(fragment_constants_sz) }, fragment_program); + + m_uniform_ring_buffer->unmap(); + + glBindBufferRange(GL_UNIFORM_BUFFER, 0, m_uniform_ring_buffer->get_buffer().id(), scale_offset_offset, 512); + glBindBufferRange(GL_UNIFORM_BUFFER, 1, m_uniform_ring_buffer->get_buffer().id(), vertex_constants_offset, 512 * 16); + glBindBufferRange(GL_UNIFORM_BUFFER, 2, m_uniform_ring_buffer->get_buffer().id(), fragment_constants_offset, fragment_constants_sz); return true; } @@ -714,6 +714,26 @@ void GLGSRender::flip(int buffer) } m_frame->flip(m_context); + + if (g_cfg_rsx_overlay) + { + //TODO: Display overlay in a cross-platform manner + //Core context throws wgl font functions out of the window as they use display lists + //Only show debug info if the user really requests it + + if (g_cfg_rsx_debug_output) + { + std::string message = + "draw_calls: " + std::to_string(m_draw_calls) + ", " + "draw_call_setup: " + std::to_string(m_begin_time) + "us, " + "vertex_upload_time: " + std::to_string(m_vertex_upload_time) + "us, " + "draw_call_execution: " + std::to_string(m_draw_time) + "us"; + + LOG_ERROR(RSX, message.c_str()); + } + } + + m_draw_calls = 0; + m_begin_time = 0; + m_draw_time = 0; + m_vertex_upload_time = 0; for (auto &tex : m_rtts.invalidated_resources) { diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index ae732c1abd..ce22766993 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -29,6 +29,13 @@ private: gl::texture m_gl_attrib_buffers[rsx::limits::vertex_count]; std::unique_ptr m_attrib_ring_buffer; + std::unique_ptr m_uniform_ring_buffer; + std::unique_ptr m_index_ring_buffer; + + u32 m_draw_calls = 0; + u32 m_begin_time = 0; + u32 m_draw_time = 0; + u32 m_vertex_upload_time = 0; public: gl::fbo draw_fbo; @@ -40,12 +47,7 @@ private: gl::fbo m_flip_fbo; gl::texture m_flip_tex_color; - gl::buffer m_scale_offset_buffer; - gl::buffer m_vertex_constants_buffer; - gl::buffer m_fragment_constants_buffer; - - gl::buffer m_vbo; - gl::buffer m_ebo; + //vaos are mandatory for core profile gl::vao m_vao; public: @@ -54,7 +56,7 @@ public: private: static u32 enable(u32 enable, u32 cap); static u32 enable(u32 enable, u32 cap, u32 index); - void set_vertex_buffer(); + u32 set_vertex_buffer(); public: bool load_program(); diff --git a/rpcs3/Emu/RSX/GL/GLProcTable.h b/rpcs3/Emu/RSX/GL/GLProcTable.h index 4b374615f2..04d99aee12 100644 --- a/rpcs3/Emu/RSX/GL/GLProcTable.h +++ b/rpcs3/Emu/RSX/GL/GLProcTable.h @@ -164,6 +164,7 @@ OPENGL_PROC(PFNGLGETINTEGER64VPROC, GetInteger64v); OPENGL_PROC(PFNGLCHECKFRAMEBUFFERSTATUSPROC, CheckFramebufferStatus); OPENGL_PROC(PFNGLMAPBUFFERRANGEPROC, MapBufferRange); +OPENGL_PROC(PFNGLBINDBUFFERRANGEPROC, BindBufferRange); OPENGL_PROC(PFNGLBINDBUFFERBASEPROC, BindBufferBase); //Texture Buffers diff --git a/rpcs3/Emu/RSX/GL/gl_helpers.h b/rpcs3/Emu/RSX/GL/gl_helpers.h index 7fda56c72c..c24e9487ac 100644 --- a/rpcs3/Emu/RSX/GL/gl_helpers.h +++ b/rpcs3/Emu/RSX/GL/gl_helpers.h @@ -372,7 +372,9 @@ namespace gl pixel_pack = GL_PIXEL_PACK_BUFFER, pixel_unpack = GL_PIXEL_UNPACK_BUFFER, array = GL_ARRAY_BUFFER, - element_array = GL_ELEMENT_ARRAY_BUFFER + element_array = GL_ELEMENT_ARRAY_BUFFER, + uniform = GL_UNIFORM_BUFFER, + texture = GL_TEXTURE_BUFFER }; enum class access { @@ -421,6 +423,8 @@ namespace gl case target::pixel_unpack: pname = GL_PIXEL_UNPACK_BUFFER_BINDING; break; case target::array: pname = GL_ARRAY_BUFFER_BINDING; break; case target::element_array: pname = GL_ELEMENT_ARRAY_BUFFER_BINDING; break; + case target::uniform: pname = GL_UNIFORM_BUFFER_BINDING; break; + case target::texture: pname = GL_TEXTURE_BUFFER_BINDING; break; } glGetIntegerv(pname, &m_last_binding); @@ -465,6 +469,13 @@ namespace gl data(size, data_); } + void create(target target_, GLsizeiptr size, const void* data_ = nullptr) + { + create(); + m_target = target_; + data(size, data_); + } + void data(GLsizeiptr size, const void* data_ = nullptr) { target target_ = current_target(); @@ -572,6 +583,7 @@ namespace gl class ring_buffer { buffer storage_buffer; + buffer::target m_target; u32 m_data_loc = 0; u32 m_size; @@ -582,11 +594,12 @@ namespace gl void *m_mapped_base = nullptr; public: - ring_buffer(u32 initial_size) + ring_buffer(u32 initial_size, buffer::target target) { storage_buffer.create(); storage_buffer.data(initial_size); m_size = initial_size; + m_target = target; } void destroy() @@ -598,13 +611,10 @@ namespace gl { size = (size + 255) & ~255; - //storage_buffer.bind(storage_buffer.current_target()); - glBindBuffer(GL_TEXTURE_BUFFER, storage_buffer.id()); + glBindBuffer((GLenum)m_target, storage_buffer.id()); u32 limit = m_data_loc + size; if (limit > m_size) { - //Orphan this buffer and have the driver allocate a new one instead of looping back to the front. - //Hopefully, the driver will track usage here and re-use if sync is not a problem if (size > m_size) m_size = size; @@ -612,7 +622,7 @@ namespace gl m_data_loc = 0; } - void *ptr = glMapBufferRange(GL_TEXTURE_BUFFER, m_data_loc, size, GL_MAP_WRITE_BIT|GL_MAP_INVALIDATE_RANGE_BIT|GL_MAP_UNSYNCHRONIZED_BIT); + void *ptr = glMapBufferRange((GLenum)m_target, m_data_loc, size, GL_MAP_WRITE_BIT|GL_MAP_INVALIDATE_RANGE_BIT|GL_MAP_UNSYNCHRONIZED_BIT); u32 offset = m_data_loc; m_data_loc += size; return std::make_pair(ptr, offset); @@ -620,8 +630,7 @@ namespace gl void unmap() { - //storage_buffer.unmap(); - glUnmapBuffer(GL_TEXTURE_BUFFER); + glUnmapBuffer((GLenum)m_target); m_mapped_block_size = 0; m_mapped_base = 0; } @@ -638,21 +647,29 @@ namespace gl std::pair alloc_from_reserve(u32 size) { - size = (size + 255) & ~255; + size = (size + 15) & ~15; if (m_mapped_bytes_available < size || !m_mapped_base) { if (m_mapped_base) + { + //This doesn't really work for some reason, probably since the caller should bind the target + //before making this call as the block may be reallocated + LOG_ERROR(RSX, "reserved allocation exceeded. check for corruption!"); unmap(); + } reserve_and_map((size > 4096) ? size : 4096); } + EXPECTS(m_mapped_bytes_available >= size); + void *ptr = (char*)m_mapped_base + m_mapped_reserve_offset; u32 offset = m_mapped_reserve_offset + m_mapped_block_offset; m_mapped_reserve_offset += size; m_mapped_bytes_available -= size; + EXPECTS((offset & 15) == 0); return std::make_pair(ptr, offset); } diff --git a/rpcs3/Emu/RSX/GL/vertex_buffer.cpp b/rpcs3/Emu/RSX/GL/vertex_buffer.cpp index 31a38fa17b..903745553d 100644 --- a/rpcs3/Emu/RSX/GL/vertex_buffer.cpp +++ b/rpcs3/Emu/RSX/GL/vertex_buffer.cpp @@ -127,8 +127,8 @@ namespace throw EXCEPTION("unknow vertex type"); } - // return vertex count and filled index array if primitive type is not native (empty array otherwise) - std::tuple> get_index_array_for_emulated_non_indexed_draw(const std::vector> &first_count_commands, rsx::primitive_type primitive_mode) + // return vertex count if primitive type is not native (empty array otherwise) + std::tuple get_index_array_for_emulated_non_indexed_draw(const std::vector> &first_count_commands, rsx::primitive_type primitive_mode, gl::ring_buffer &dst) { u32 vertex_draw_count = 0; assert(!is_primitive_native(primitive_mode)); @@ -138,9 +138,10 @@ namespace vertex_draw_count += (u32)get_index_count(primitive_mode, pair.second); } - std::vector vertex_index_array(vertex_draw_count * sizeof(u16)); u32 first = 0; - char* mapped_buffer = (char*)vertex_index_array.data(); + auto mapping = dst.alloc_and_map(vertex_draw_count * sizeof(u16)); + char *mapped_buffer = (char *)mapping.first; + for (const auto &pair : first_count_commands) { size_t element_count = get_index_count(primitive_mode, pair.second); @@ -149,16 +150,17 @@ namespace first += pair.second; } - return std::make_tuple(vertex_draw_count, vertex_index_array); + dst.unmap(); + return std::make_tuple(vertex_draw_count, mapping.second); } } -void GLGSRender::set_vertex_buffer() +u32 GLGSRender::set_vertex_buffer() { //initialize vertex attributes - //merge all vertex arrays - //std::vector vertex_arrays_data; + + std::chrono::time_point then = std::chrono::system_clock::now(); const std::string reg_table[] = { @@ -171,18 +173,21 @@ void GLGSRender::set_vertex_buffer() }; u32 input_mask = rsx::method_registers[NV4097_SET_VERTEX_ATTRIB_INPUT_MASK]; - - std::vector vertex_index_array; - vertex_draw_count = 0; - u32 min_index, max_index; - + u32 min_index = 0, max_index = 0; u32 max_vertex_attrib_size = 0; + u32 offset_in_index_buffer = 0; + + vertex_draw_count = 0; + + //place holder; replace with actual index buffer + gsl::span index_array; + for (u8 index = 0; index < rsx::limits::vertex_count; ++index) { if (vertex_arrays_info[index].size == 0) continue; - max_vertex_attrib_size += (vertex_arrays_info[index].size << 2); + max_vertex_attrib_size += 16; } if (draw_command == rsx::draw_command::indexed) @@ -193,12 +198,19 @@ void GLGSRender::set_vertex_buffer() { vertex_draw_count += first_count.second; } + // Index count vertex_draw_count = (u32)get_index_count(draw_mode, gsl::narrow(vertex_draw_count)); - vertex_index_array.resize(vertex_draw_count * type_size); + u32 block_sz = vertex_draw_count * type_size; + + auto mapping = m_index_ring_buffer->alloc_and_map(block_sz); + void *ptr = mapping.first; + offset_in_index_buffer = mapping.second; - gsl::span dst{ reinterpret_cast(vertex_index_array.data()), gsl::narrow(vertex_index_array.size()) }; + gsl::span dst{ reinterpret_cast(ptr), gsl::narrow(block_sz) }; std::tie(min_index, max_index) = write_index_array_data_to_buffer(dst, type, draw_mode, first_count_commands); + + m_index_ring_buffer->unmap(); } if (draw_command == rsx::draw_command::inlined_array) @@ -270,7 +282,7 @@ void GLGSRender::set_vertex_buffer() m_program->uniforms.texture(location, index + rsx::limits::textures_count, texture); if (!is_primitive_native(draw_mode)) { - std::tie(vertex_draw_count, vertex_index_array) = get_index_array_for_emulated_non_indexed_draw({ { 0, vertex_draw_count } }, draw_mode); + std::tie(vertex_draw_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw({ { 0, vertex_draw_count } }, draw_mode, *m_index_ring_buffer); } } } @@ -285,7 +297,8 @@ void GLGSRender::set_vertex_buffer() if (draw_command == rsx::draw_command::array || draw_command == rsx::draw_command::indexed) { - m_attrib_ring_buffer->reserve_and_map(vertex_draw_count * max_vertex_attrib_size); + u32 verts_allocated = std::max(vertex_draw_count, max_index + 1); + m_attrib_ring_buffer->reserve_and_map(verts_allocated * max_vertex_attrib_size); for (int index = 0; index < rsx::limits::vertex_count; ++index) { @@ -395,20 +408,16 @@ void GLGSRender::set_vertex_buffer() continue; } } + if (draw_command == rsx::draw_command::array && !is_primitive_native(draw_mode)) { - std::tie(vertex_draw_count, vertex_index_array) = get_index_array_for_emulated_non_indexed_draw(first_count_commands, draw_mode); + std::tie(vertex_draw_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw(first_count_commands, draw_mode, *m_index_ring_buffer); } } m_attrib_ring_buffer->unmap(); + std::chrono::time_point now = std::chrono::system_clock::now(); + m_vertex_upload_time += std::chrono::duration_cast(now - then).count(); - if (draw_command == rsx::draw_command::indexed) - { - m_ebo.data(vertex_index_array.size(), vertex_index_array.data()); - } - else if (!is_primitive_native(draw_mode)) - { - m_ebo.data(vertex_index_array.size(), vertex_index_array.data()); - } + return offset_in_index_buffer; } \ No newline at end of file From 28a5d4d4f063d6c9f8e6f0e72db6a3072660b43b Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 12 Jun 2016 16:37:30 +0300 Subject: [PATCH 4/5] Add a workaround for a khronos spec bug --- rpcs3/Emu/RSX/GL/OpenGL.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/rpcs3/Emu/RSX/GL/OpenGL.h b/rpcs3/Emu/RSX/GL/OpenGL.h index 3c51a2792a..04fe45a84e 100644 --- a/rpcs3/Emu/RSX/GL/OpenGL.h +++ b/rpcs3/Emu/RSX/GL/OpenGL.h @@ -25,6 +25,12 @@ typedef BOOL (WINAPI* PFNWGLSWAPINTERVALEXTPROC) (int interval); #include #endif +#ifndef GL_TEXTURE_BUFFER_BINDING +//During spec release, this enum was removed during upgrade from ARB equivalent +//See https://www.khronos.org/bugzilla/show_bug.cgi?id=844 +#define GL_TEXTURE_BUFFER_BINDING 0x8C2A +#endif + namespace gl { void init(); From 3a63b624868de0e4791ef128ab50a53ed866628b Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 12 Jun 2016 18:54:15 +0300 Subject: [PATCH 5/5] gl: dynamically determine texture buffer offset alignment fix alignment issues for gpus where align < 16 is ok --- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 1 + rpcs3/Emu/RSX/GL/GLGSRender.h | 2 ++ rpcs3/Emu/RSX/GL/gl_helpers.h | 7 ++++--- rpcs3/Emu/RSX/GL/vertex_buffer.cpp | 8 ++++---- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 778267c3f3..69a8ca10f6 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -381,6 +381,7 @@ void GLGSRender::on_init_thread() LOG_NOTICE(RSX, "%s", (const char*)glGetString(GL_VENDOR)); glEnable(GL_VERTEX_PROGRAM_POINT_SIZE); + glGetIntegerv(GL_TEXTURE_BUFFER_OFFSET_ALIGNMENT, &m_min_texbuffer_alignment); m_vao.create(); for (gl::texture &tex : m_gl_attrib_buffers) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index ce22766993..1c64d0b3c6 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -36,6 +36,8 @@ private: u32 m_begin_time = 0; u32 m_draw_time = 0; u32 m_vertex_upload_time = 0; + + GLint m_min_texbuffer_alignment = 256; public: gl::fbo draw_fbo; diff --git a/rpcs3/Emu/RSX/GL/gl_helpers.h b/rpcs3/Emu/RSX/GL/gl_helpers.h index c24e9487ac..0f8ada9369 100644 --- a/rpcs3/Emu/RSX/GL/gl_helpers.h +++ b/rpcs3/Emu/RSX/GL/gl_helpers.h @@ -645,9 +645,10 @@ namespace gl m_mapped_bytes_available = max_size; } - std::pair alloc_from_reserve(u32 size) + std::pair alloc_from_reserve(u32 size, u32 alignment = 16) { - size = (size + 15) & ~15; + alignment -= 1; + size = (size + alignment) & ~alignment; if (m_mapped_bytes_available < size || !m_mapped_base) { @@ -669,7 +670,7 @@ namespace gl m_mapped_reserve_offset += size; m_mapped_bytes_available -= size; - EXPECTS((offset & 15) == 0); + EXPECTS((offset & alignment) == 0); return std::make_pair(ptr, offset); } diff --git a/rpcs3/Emu/RSX/GL/vertex_buffer.cpp b/rpcs3/Emu/RSX/GL/vertex_buffer.cpp index 903745553d..41e6f83b44 100644 --- a/rpcs3/Emu/RSX/GL/vertex_buffer.cpp +++ b/rpcs3/Emu/RSX/GL/vertex_buffer.cpp @@ -253,7 +253,7 @@ u32 GLGSRender::set_vertex_buffer() auto &texture = m_gl_attrib_buffers[index]; u8 *src = reinterpret_cast(inline_vertex_array.data()); - auto mapping = m_attrib_ring_buffer->alloc_from_reserve(data_size); + auto mapping = m_attrib_ring_buffer->alloc_from_reserve(data_size, m_min_texbuffer_alignment); u8 *dst = static_cast(mapping.first); src += offsets[index]; @@ -337,7 +337,7 @@ u32 GLGSRender::set_vertex_buffer() if (draw_command == rsx::draw_command::array) { - auto mapping = m_attrib_ring_buffer->alloc_from_reserve(data_size); + auto mapping = m_attrib_ring_buffer->alloc_from_reserve(data_size, m_min_texbuffer_alignment); gsl::byte *dst = static_cast(mapping.first); buffer_offset = mapping.second; @@ -354,7 +354,7 @@ u32 GLGSRender::set_vertex_buffer() if (draw_command == rsx::draw_command::indexed) { data_size = (max_index + 1) * element_size; - auto mapping = m_attrib_ring_buffer->alloc_from_reserve(data_size); + auto mapping = m_attrib_ring_buffer->alloc_from_reserve(data_size, m_min_texbuffer_alignment); gsl::byte *dst = static_cast(mapping.first); buffer_offset = mapping.second; @@ -385,7 +385,7 @@ u32 GLGSRender::set_vertex_buffer() auto &texture = m_gl_attrib_buffers[index]; - auto mapping = m_attrib_ring_buffer->alloc_from_reserve(data_size); + auto mapping = m_attrib_ring_buffer->alloc_from_reserve(data_size, m_min_texbuffer_alignment); u8 *dst = static_cast(mapping.first); memcpy(dst, vertex_data.data(), data_size);