From 9aa632bcc1b80aa6d478e9eb73b369253ccbe649 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 10 Jun 2017 23:32:17 +0300 Subject: [PATCH] rsx/vk: Fixes for ring buffer allocation and image clipping (#2850) --- rpcs3/Emu/RSX/Common/ring_buffer_helper.h | 28 ++++++++++- rpcs3/Emu/RSX/GL/GLTextureCache.h | 2 +- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 61 +++++++++-------------- rpcs3/Emu/RSX/VK/VKGSRender.h | 6 +++ rpcs3/Emu/RSX/rsx_methods.cpp | 27 +++++++--- rpcs3/Emu/RSX/rsx_utils.h | 55 ++++++++++++++++++++ 6 files changed, 133 insertions(+), 46 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/ring_buffer_helper.h b/rpcs3/Emu/RSX/Common/ring_buffer_helper.h index f5d0bc9314..544bfed522 100644 --- a/rpcs3/Emu/RSX/Common/ring_buffer_helper.h +++ b/rpcs3/Emu/RSX/Common/ring_buffer_helper.h @@ -43,6 +43,9 @@ struct data_heap size_t m_size; size_t m_put_pos; // Start of free space + size_t m_min_guard_size; //If an allocation touches the guard region, reset the heap to avoid going over budget + size_t m_current_allocated_size; + size_t m_largest_allocated_pool; public: data_heap() = default; ~data_heap() = default; @@ -51,11 +54,16 @@ public: size_t m_get_pos; // End of free space - void init(size_t heap_size) + void init(size_t heap_size, size_t min_guard_size=0x10000) { m_size = heap_size; m_put_pos = 0; m_get_pos = heap_size - 1; + + //allocation stats + m_min_guard_size = min_guard_size; + m_current_allocated_size = 0; + m_largest_allocated_pool = 0; } template @@ -64,6 +72,11 @@ public: if (!can_alloc(size)) fmt::throw_exception("Working buffer not big enough" HERE); size_t alloc_size = align(size, Alignement); size_t aligned_put_pos = align(m_put_pos, Alignement); + + const size_t block_length = (aligned_put_pos - m_put_pos) + alloc_size; + m_current_allocated_size += block_length; + m_largest_allocated_pool = std::max(m_largest_allocated_pool, block_length); + if (aligned_put_pos + alloc_size < m_size) { m_put_pos = aligned_put_pos + alloc_size; @@ -83,4 +96,17 @@ public: { return (m_put_pos - 1 > 0) ? m_put_pos - 1 : m_size - 1; } + + bool is_critical() + { + const size_t guard_length = std::max(m_min_guard_size, m_largest_allocated_pool); + return (m_current_allocated_size + guard_length) > m_size; + } + + void reset_allocation_stats() + { + m_current_allocated_size = 0; + m_largest_allocated_pool = 0; + m_get_pos = get_current_put_pos_minus_one(); + } }; diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index c1014d354c..e93de9030e 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -1187,7 +1187,7 @@ namespace gl //NOTE: It is possible that the check is simpler (if (clip_x >= clip_width)) //Needs verification if ((dst.offset_x + dst.clip_x + dst.clip_width) > max_dst_width) dst.clip_x = 0; - if ((dst.offset_y + dst.clip_y + dst.clip_width) > max_dst_height) dst.clip_y = 0; + if ((dst.offset_y + dst.clip_y + dst.clip_height) > max_dst_height) dst.clip_y = 0; if (dst.clip_x || dst.clip_y) { diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 5a89b9a5c3..7c04eaabca 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -583,15 +583,14 @@ VKGSRender::VKGSRender() : GSRender() } - -#define RING_BUFFER_SIZE 16 * 1024 * DESCRIPTOR_MAX_DRAW_CALLS - - m_uniform_buffer_ring_info.init(RING_BUFFER_SIZE); - m_uniform_buffer_ring_info.heap.reset(new vk::buffer(*m_device, RING_BUFFER_SIZE, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 0)); - m_index_buffer_ring_info.init(RING_BUFFER_SIZE); - m_index_buffer_ring_info.heap.reset(new vk::buffer(*m_device, RING_BUFFER_SIZE, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, 0)); - m_texture_upload_buffer_ring_info.init(8 * RING_BUFFER_SIZE); - m_texture_upload_buffer_ring_info.heap.reset(new vk::buffer(*m_device, 8 * RING_BUFFER_SIZE, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, 0)); + m_attrib_ring_info.init(VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000); + m_attrib_ring_info.heap.reset(new vk::buffer(*m_device, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0)); + m_uniform_buffer_ring_info.init(VK_UBO_RING_BUFFER_SIZE_M * 0x100000); + m_uniform_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 0)); + m_index_buffer_ring_info.init(VK_INDEX_RING_BUFFER_SIZE_M * 0x100000); + m_index_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, 0)); + m_texture_upload_buffer_ring_info.init(VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000); + m_texture_upload_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, 0)); m_render_passes = get_precomputed_render_passes(*m_device, m_optimal_tiling_supported_formats); @@ -777,8 +776,12 @@ void VKGSRender::begin() { rsx::thread::begin(); - //Ease resource pressure if the number of draw calls becomes too high - if (m_used_descriptors >= DESCRIPTOR_MAX_DRAW_CALLS) + //Ease resource pressure if the number of draw calls becomes too high or we are running low on memory resources + if (m_used_descriptors >= DESCRIPTOR_MAX_DRAW_CALLS || + m_attrib_ring_info.is_critical() || + m_texture_upload_buffer_ring_info.is_critical() || + m_uniform_buffer_ring_info.is_critical() || + m_index_buffer_ring_info.is_critical()) { std::chrono::time_point submit_start = steady_clock::now(); @@ -787,10 +790,10 @@ void VKGSRender::begin() CHECK_RESULT(vkResetDescriptorPool(*m_device, descriptor_pool, 0)); m_used_descriptors = 0; - m_uniform_buffer_ring_info.m_get_pos = m_uniform_buffer_ring_info.get_current_put_pos_minus_one(); - m_index_buffer_ring_info.m_get_pos = m_index_buffer_ring_info.get_current_put_pos_minus_one(); - m_attrib_ring_info.m_get_pos = m_attrib_ring_info.get_current_put_pos_minus_one(); - m_texture_upload_buffer_ring_info.m_get_pos = m_texture_upload_buffer_ring_info.get_current_put_pos_minus_one(); + m_uniform_buffer_ring_info.reset_allocation_stats(); + m_index_buffer_ring_info.reset_allocation_stats(); + m_attrib_ring_info.reset_allocation_stats(); + m_texture_upload_buffer_ring_info.reset_allocation_stats(); std::chrono::time_point submit_end = steady_clock::now(); m_flip_time += std::chrono::duration_cast(submit_end - submit_start).count(); @@ -1010,9 +1013,6 @@ void VKGSRender::on_init_thread() } GSRender::on_init_thread(); - m_attrib_ring_info.init(8 * RING_BUFFER_SIZE); - m_attrib_ring_info.heap.reset(new vk::buffer(*m_device, 8 * RING_BUFFER_SIZE, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT|VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0)); - rsx_thread = std::this_thread::get_id(); } @@ -1052,22 +1052,7 @@ void VKGSRender::clear_surface(u32 mask) const u32 fb_height = m_framebuffer_to_clean.back()->height(); //clip region - //TODO: Move clipping logic to shared code. Its used in other places as well - if (scissor_x >= fb_width) - scissor_x = 0; - - if (scissor_y >= fb_height) - scissor_y = 0; - - const u32 scissor_limit_x = scissor_x + scissor_w; - const u32 scissor_limit_y = scissor_y + scissor_h; - - if (scissor_limit_x > fb_width) - scissor_w = fb_width - scissor_x; - - if (scissor_limit_y > fb_height) - scissor_h = fb_height - scissor_y; - + std::tie(scissor_x, scissor_y, scissor_w, scissor_h) = rsx::clip_region(fb_width, fb_height, scissor_x, scissor_y, scissor_w, scissor_h, true); VkClearRect region = { { { scissor_x, scissor_y },{ scissor_w, scissor_h } }, 0, 1 }; auto targets = vk::get_draw_buffers(rsx::method_registers.surface_color_target()); @@ -1920,10 +1905,10 @@ void VKGSRender::flip(int buffer) std::chrono::time_point flip_end = steady_clock::now(); m_flip_time = std::chrono::duration_cast(flip_end - flip_start).count(); - m_uniform_buffer_ring_info.m_get_pos = m_uniform_buffer_ring_info.get_current_put_pos_minus_one(); - m_index_buffer_ring_info.m_get_pos = m_index_buffer_ring_info.get_current_put_pos_minus_one(); - m_attrib_ring_info.m_get_pos = m_attrib_ring_info.get_current_put_pos_minus_one(); - m_texture_upload_buffer_ring_info.m_get_pos = m_texture_upload_buffer_ring_info.get_current_put_pos_minus_one(); + m_uniform_buffer_ring_info.reset_allocation_stats(); + m_index_buffer_ring_info.reset_allocation_stats(); + m_attrib_ring_info.reset_allocation_stats(); + m_texture_upload_buffer_ring_info.reset_allocation_stats(); //Resource destruction is handled within the real swap handler diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index ad52a2bffe..d7575abca5 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -16,6 +16,12 @@ #pragma comment(lib, "VKstatic.1.lib") +//Heap allocation sizes in MB +#define VK_ATTRIB_RING_BUFFER_SIZE_M 256 +#define VK_UBO_RING_BUFFER_SIZE_M 32 +#define VK_INDEX_RING_BUFFER_SIZE_M 64 +#define VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M 128 + #define VK_MAX_ASYNC_CB_COUNT 64 struct command_buffer_chunk: public vk::command_buffer diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index a439e98ef6..daf420c5e6 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -429,14 +429,29 @@ namespace rsx const f32 in_x = method_registers.blit_engine_in_x(); const f32 in_y = method_registers.blit_engine_in_y(); - const u16 clip_w = std::min(method_registers.blit_engine_clip_width(), out_w); - const u16 clip_h = std::min(method_registers.blit_engine_clip_height(), out_h); + //Clipping + //Validate that clipping rect will fit onto both src and dst regions + u16 clip_w = std::min(method_registers.blit_engine_clip_width(), out_w); + u16 clip_h = std::min(method_registers.blit_engine_clip_height(), out_h); - // if the clip'd region will end up outside of the source area, we ignore the given clip x/y and just use 0 - // see: Spyro - BLES00382 intro, psgl sdk samples - const u16 clip_x = method_registers.blit_engine_clip_x() > (in_x + in_w - clip_w) ? 0 : method_registers.blit_engine_clip_x(); - const u16 clip_y = method_registers.blit_engine_clip_y() > (in_y + in_h - clip_h) ? 0 : method_registers.blit_engine_clip_y(); + u16 clip_x = method_registers.blit_engine_clip_x(); + u16 clip_y = method_registers.blit_engine_clip_y(); + if (clip_w == 0) + { + clip_x = 0; + clip_w = out_w; + } + + if (clip_h == 0) + { + clip_y = 0; + clip_h = out_h; + } + + //Fit onto dst + if (clip_x && (out_x + clip_x + clip_w) > out_w) clip_x = 0; + if (clip_y && (out_y + clip_y + clip_h) > out_h) clip_y = 0; u16 in_pitch = method_registers.blit_engine_input_pitch(); diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index ac0276cbe3..7d05c776df 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -151,4 +151,59 @@ namespace rsx void fill_viewport_matrix(void *buffer, bool transpose); std::array get_constant_blend_colors(); + + /** + * Clips a rect so that it never falls outside the parent region + * attempt_fit: allows resizing of the requested region. If false, failure to fit will result in the child rect being pinned to (0, 0) + */ + template + std::tuple clip_region(T parent_width, T parent_height, T clip_x, T clip_y, T clip_width, T clip_height, bool attempt_fit) + { + T x = clip_x; + T y = clip_y; + T width = clip_width; + T height = clip_height; + + if ((clip_x + clip_width) > parent_width) + { + if (clip_x >= parent_width) + { + if (clip_width < parent_width) + width = clip_width; + else + width = parent_width; + + x = (T)0; + } + else + { + if (attempt_fit) + width = parent_width - clip_x; + else + width = std::min(clip_width, parent_width); + } + } + + if ((clip_y + clip_height) > parent_height) + { + if (clip_y >= parent_height) + { + if (clip_height < parent_height) + height = clip_height; + else + height = parent_height; + + y = (T)0; + } + else + { + if (attempt_fit) + height = parent_height - clip_y; + else + height = std::min(clip_height, parent_height); + } + } + + return std::make_tuple(x, y, width, height); + } }