rsx/vk: Fixes for ring buffer allocation and image clipping (#2850)

This commit is contained in:
kd-11 2017-06-10 23:32:17 +03:00 committed by GitHub
parent 0dd10d1457
commit 9aa632bcc1
6 changed files with 133 additions and 46 deletions

View File

@ -43,6 +43,9 @@ struct data_heap
size_t m_size; size_t m_size;
size_t m_put_pos; // Start of free space size_t m_put_pos; // Start of free space
size_t m_min_guard_size; //If an allocation touches the guard region, reset the heap to avoid going over budget
size_t m_current_allocated_size;
size_t m_largest_allocated_pool;
public: public:
data_heap() = default; data_heap() = default;
~data_heap() = default; ~data_heap() = default;
@ -51,11 +54,16 @@ public:
size_t m_get_pos; // End of free space size_t m_get_pos; // End of free space
void init(size_t heap_size) void init(size_t heap_size, size_t min_guard_size=0x10000)
{ {
m_size = heap_size; m_size = heap_size;
m_put_pos = 0; m_put_pos = 0;
m_get_pos = heap_size - 1; m_get_pos = heap_size - 1;
//allocation stats
m_min_guard_size = min_guard_size;
m_current_allocated_size = 0;
m_largest_allocated_pool = 0;
} }
template<int Alignement> template<int Alignement>
@ -64,6 +72,11 @@ public:
if (!can_alloc<Alignement>(size)) fmt::throw_exception("Working buffer not big enough" HERE); if (!can_alloc<Alignement>(size)) fmt::throw_exception("Working buffer not big enough" HERE);
size_t alloc_size = align(size, Alignement); size_t alloc_size = align(size, Alignement);
size_t aligned_put_pos = align(m_put_pos, Alignement); size_t aligned_put_pos = align(m_put_pos, Alignement);
const size_t block_length = (aligned_put_pos - m_put_pos) + alloc_size;
m_current_allocated_size += block_length;
m_largest_allocated_pool = std::max(m_largest_allocated_pool, block_length);
if (aligned_put_pos + alloc_size < m_size) if (aligned_put_pos + alloc_size < m_size)
{ {
m_put_pos = aligned_put_pos + alloc_size; m_put_pos = aligned_put_pos + alloc_size;
@ -83,4 +96,17 @@ public:
{ {
return (m_put_pos - 1 > 0) ? m_put_pos - 1 : m_size - 1; return (m_put_pos - 1 > 0) ? m_put_pos - 1 : m_size - 1;
} }
bool is_critical()
{
const size_t guard_length = std::max(m_min_guard_size, m_largest_allocated_pool);
return (m_current_allocated_size + guard_length) > m_size;
}
void reset_allocation_stats()
{
m_current_allocated_size = 0;
m_largest_allocated_pool = 0;
m_get_pos = get_current_put_pos_minus_one();
}
}; };

View File

@ -1187,7 +1187,7 @@ namespace gl
//NOTE: It is possible that the check is simpler (if (clip_x >= clip_width)) //NOTE: It is possible that the check is simpler (if (clip_x >= clip_width))
//Needs verification //Needs verification
if ((dst.offset_x + dst.clip_x + dst.clip_width) > max_dst_width) dst.clip_x = 0; if ((dst.offset_x + dst.clip_x + dst.clip_width) > max_dst_width) dst.clip_x = 0;
if ((dst.offset_y + dst.clip_y + dst.clip_width) > max_dst_height) dst.clip_y = 0; if ((dst.offset_y + dst.clip_y + dst.clip_height) > max_dst_height) dst.clip_y = 0;
if (dst.clip_x || dst.clip_y) if (dst.clip_x || dst.clip_y)
{ {

View File

@ -583,15 +583,14 @@ VKGSRender::VKGSRender() : GSRender()
} }
m_attrib_ring_info.init(VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000);
#define RING_BUFFER_SIZE 16 * 1024 * DESCRIPTOR_MAX_DRAW_CALLS m_attrib_ring_info.heap.reset(new vk::buffer(*m_device, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0));
m_uniform_buffer_ring_info.init(VK_UBO_RING_BUFFER_SIZE_M * 0x100000);
m_uniform_buffer_ring_info.init(RING_BUFFER_SIZE); m_uniform_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 0));
m_uniform_buffer_ring_info.heap.reset(new vk::buffer(*m_device, RING_BUFFER_SIZE, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 0)); m_index_buffer_ring_info.init(VK_INDEX_RING_BUFFER_SIZE_M * 0x100000);
m_index_buffer_ring_info.init(RING_BUFFER_SIZE); m_index_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, 0));
m_index_buffer_ring_info.heap.reset(new vk::buffer(*m_device, RING_BUFFER_SIZE, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, 0)); m_texture_upload_buffer_ring_info.init(VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000);
m_texture_upload_buffer_ring_info.init(8 * RING_BUFFER_SIZE); m_texture_upload_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, 0));
m_texture_upload_buffer_ring_info.heap.reset(new vk::buffer(*m_device, 8 * RING_BUFFER_SIZE, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, 0));
m_render_passes = get_precomputed_render_passes(*m_device, m_optimal_tiling_supported_formats); m_render_passes = get_precomputed_render_passes(*m_device, m_optimal_tiling_supported_formats);
@ -777,8 +776,12 @@ void VKGSRender::begin()
{ {
rsx::thread::begin(); rsx::thread::begin();
//Ease resource pressure if the number of draw calls becomes too high //Ease resource pressure if the number of draw calls becomes too high or we are running low on memory resources
if (m_used_descriptors >= DESCRIPTOR_MAX_DRAW_CALLS) if (m_used_descriptors >= DESCRIPTOR_MAX_DRAW_CALLS ||
m_attrib_ring_info.is_critical() ||
m_texture_upload_buffer_ring_info.is_critical() ||
m_uniform_buffer_ring_info.is_critical() ||
m_index_buffer_ring_info.is_critical())
{ {
std::chrono::time_point<steady_clock> submit_start = steady_clock::now(); std::chrono::time_point<steady_clock> submit_start = steady_clock::now();
@ -787,10 +790,10 @@ void VKGSRender::begin()
CHECK_RESULT(vkResetDescriptorPool(*m_device, descriptor_pool, 0)); CHECK_RESULT(vkResetDescriptorPool(*m_device, descriptor_pool, 0));
m_used_descriptors = 0; m_used_descriptors = 0;
m_uniform_buffer_ring_info.m_get_pos = m_uniform_buffer_ring_info.get_current_put_pos_minus_one(); m_uniform_buffer_ring_info.reset_allocation_stats();
m_index_buffer_ring_info.m_get_pos = m_index_buffer_ring_info.get_current_put_pos_minus_one(); m_index_buffer_ring_info.reset_allocation_stats();
m_attrib_ring_info.m_get_pos = m_attrib_ring_info.get_current_put_pos_minus_one(); m_attrib_ring_info.reset_allocation_stats();
m_texture_upload_buffer_ring_info.m_get_pos = m_texture_upload_buffer_ring_info.get_current_put_pos_minus_one(); m_texture_upload_buffer_ring_info.reset_allocation_stats();
std::chrono::time_point<steady_clock> submit_end = steady_clock::now(); std::chrono::time_point<steady_clock> submit_end = steady_clock::now();
m_flip_time += std::chrono::duration_cast<std::chrono::microseconds>(submit_end - submit_start).count(); m_flip_time += std::chrono::duration_cast<std::chrono::microseconds>(submit_end - submit_start).count();
@ -1010,9 +1013,6 @@ void VKGSRender::on_init_thread()
} }
GSRender::on_init_thread(); GSRender::on_init_thread();
m_attrib_ring_info.init(8 * RING_BUFFER_SIZE);
m_attrib_ring_info.heap.reset(new vk::buffer(*m_device, 8 * RING_BUFFER_SIZE, m_memory_type_mapping.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT|VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0));
rsx_thread = std::this_thread::get_id(); rsx_thread = std::this_thread::get_id();
} }
@ -1052,22 +1052,7 @@ void VKGSRender::clear_surface(u32 mask)
const u32 fb_height = m_framebuffer_to_clean.back()->height(); const u32 fb_height = m_framebuffer_to_clean.back()->height();
//clip region //clip region
//TODO: Move clipping logic to shared code. Its used in other places as well std::tie(scissor_x, scissor_y, scissor_w, scissor_h) = rsx::clip_region<u16>(fb_width, fb_height, scissor_x, scissor_y, scissor_w, scissor_h, true);
if (scissor_x >= fb_width)
scissor_x = 0;
if (scissor_y >= fb_height)
scissor_y = 0;
const u32 scissor_limit_x = scissor_x + scissor_w;
const u32 scissor_limit_y = scissor_y + scissor_h;
if (scissor_limit_x > fb_width)
scissor_w = fb_width - scissor_x;
if (scissor_limit_y > fb_height)
scissor_h = fb_height - scissor_y;
VkClearRect region = { { { scissor_x, scissor_y },{ scissor_w, scissor_h } }, 0, 1 }; VkClearRect region = { { { scissor_x, scissor_y },{ scissor_w, scissor_h } }, 0, 1 };
auto targets = vk::get_draw_buffers(rsx::method_registers.surface_color_target()); auto targets = vk::get_draw_buffers(rsx::method_registers.surface_color_target());
@ -1920,10 +1905,10 @@ void VKGSRender::flip(int buffer)
std::chrono::time_point<steady_clock> flip_end = steady_clock::now(); std::chrono::time_point<steady_clock> flip_end = steady_clock::now();
m_flip_time = std::chrono::duration_cast<std::chrono::microseconds>(flip_end - flip_start).count(); m_flip_time = std::chrono::duration_cast<std::chrono::microseconds>(flip_end - flip_start).count();
m_uniform_buffer_ring_info.m_get_pos = m_uniform_buffer_ring_info.get_current_put_pos_minus_one(); m_uniform_buffer_ring_info.reset_allocation_stats();
m_index_buffer_ring_info.m_get_pos = m_index_buffer_ring_info.get_current_put_pos_minus_one(); m_index_buffer_ring_info.reset_allocation_stats();
m_attrib_ring_info.m_get_pos = m_attrib_ring_info.get_current_put_pos_minus_one(); m_attrib_ring_info.reset_allocation_stats();
m_texture_upload_buffer_ring_info.m_get_pos = m_texture_upload_buffer_ring_info.get_current_put_pos_minus_one(); m_texture_upload_buffer_ring_info.reset_allocation_stats();
//Resource destruction is handled within the real swap handler //Resource destruction is handled within the real swap handler

View File

@ -16,6 +16,12 @@
#pragma comment(lib, "VKstatic.1.lib") #pragma comment(lib, "VKstatic.1.lib")
//Heap allocation sizes in MB
#define VK_ATTRIB_RING_BUFFER_SIZE_M 256
#define VK_UBO_RING_BUFFER_SIZE_M 32
#define VK_INDEX_RING_BUFFER_SIZE_M 64
#define VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M 128
#define VK_MAX_ASYNC_CB_COUNT 64 #define VK_MAX_ASYNC_CB_COUNT 64
struct command_buffer_chunk: public vk::command_buffer struct command_buffer_chunk: public vk::command_buffer

View File

@ -429,14 +429,29 @@ namespace rsx
const f32 in_x = method_registers.blit_engine_in_x(); const f32 in_x = method_registers.blit_engine_in_x();
const f32 in_y = method_registers.blit_engine_in_y(); const f32 in_y = method_registers.blit_engine_in_y();
const u16 clip_w = std::min(method_registers.blit_engine_clip_width(), out_w); //Clipping
const u16 clip_h = std::min(method_registers.blit_engine_clip_height(), out_h); //Validate that clipping rect will fit onto both src and dst regions
u16 clip_w = std::min(method_registers.blit_engine_clip_width(), out_w);
u16 clip_h = std::min(method_registers.blit_engine_clip_height(), out_h);
// if the clip'd region will end up outside of the source area, we ignore the given clip x/y and just use 0 u16 clip_x = method_registers.blit_engine_clip_x();
// see: Spyro - BLES00382 intro, psgl sdk samples u16 clip_y = method_registers.blit_engine_clip_y();
const u16 clip_x = method_registers.blit_engine_clip_x() > (in_x + in_w - clip_w) ? 0 : method_registers.blit_engine_clip_x();
const u16 clip_y = method_registers.blit_engine_clip_y() > (in_y + in_h - clip_h) ? 0 : method_registers.blit_engine_clip_y();
if (clip_w == 0)
{
clip_x = 0;
clip_w = out_w;
}
if (clip_h == 0)
{
clip_y = 0;
clip_h = out_h;
}
//Fit onto dst
if (clip_x && (out_x + clip_x + clip_w) > out_w) clip_x = 0;
if (clip_y && (out_y + clip_y + clip_h) > out_h) clip_y = 0;
u16 in_pitch = method_registers.blit_engine_input_pitch(); u16 in_pitch = method_registers.blit_engine_input_pitch();

View File

@ -151,4 +151,59 @@ namespace rsx
void fill_viewport_matrix(void *buffer, bool transpose); void fill_viewport_matrix(void *buffer, bool transpose);
std::array<float, 4> get_constant_blend_colors(); std::array<float, 4> get_constant_blend_colors();
/**
* Clips a rect so that it never falls outside the parent region
* attempt_fit: allows resizing of the requested region. If false, failure to fit will result in the child rect being pinned to (0, 0)
*/
template <typename T>
std::tuple<T, T, T, T> clip_region(T parent_width, T parent_height, T clip_x, T clip_y, T clip_width, T clip_height, bool attempt_fit)
{
T x = clip_x;
T y = clip_y;
T width = clip_width;
T height = clip_height;
if ((clip_x + clip_width) > parent_width)
{
if (clip_x >= parent_width)
{
if (clip_width < parent_width)
width = clip_width;
else
width = parent_width;
x = (T)0;
}
else
{
if (attempt_fit)
width = parent_width - clip_x;
else
width = std::min(clip_width, parent_width);
}
}
if ((clip_y + clip_height) > parent_height)
{
if (clip_y >= parent_height)
{
if (clip_height < parent_height)
height = clip_height;
else
height = parent_height;
y = (T)0;
}
else
{
if (attempt_fit)
height = parent_height - clip_y;
else
height = std::min(clip_height, parent_height);
}
}
return std::make_tuple(x, y, width, height);
}
} }