rsx: Fix index vertex array range with modulo calculation

This commit is contained in:
Eladash 2023-08-18 20:40:45 +03:00 committed by Elad Ashkenazi
parent ee3c7f335f
commit a26b8dff18
2 changed files with 102 additions and 5 deletions

View File

@ -62,11 +62,12 @@ namespace rsx
u32 real_offset_address = 0; u32 real_offset_address = 0;
u8 memory_location = 0; u8 memory_location = 0;
u8 attribute_stride = 0; u8 attribute_stride = 0;
std::pair<u32, u32> vertex_range{};
rsx::simple_array<interleaved_attribute_t> locations; rsx::simple_array<interleaved_attribute_t> locations;
// Check if we need to upload a full unoptimized range, i.e [0-max_index] // Check if we need to upload a full unoptimized range, i.e [0-max_index]
std::pair<u32, u32> calculate_required_range(u32 first, u32 count) const; std::pair<u32, u32> calculate_required_range(u32 first, u32 count);
}; };
enum attribute_buffer_placement : u8 enum attribute_buffer_placement : u8
@ -100,6 +101,7 @@ namespace rsx
result->single_vertex = false; result->single_vertex = false;
result->locations.clear(); result->locations.clear();
result->interleaved = true; result->interleaved = true;
result->vertex_range.second = 0;
return result; return result;
} }

View File

@ -269,8 +269,14 @@ namespace rsx
} }
} }
std::pair<u32, u32> interleaved_range_info::calculate_required_range(u32 first, u32 count) const std::pair<u32, u32> interleaved_range_info::calculate_required_range(u32 first, u32 count)
{ {
if (vertex_range.second)
{
// Cached result
return vertex_range;
}
if (single_vertex) if (single_vertex)
{ {
return { 0, 1 }; return { 0, 1 };
@ -280,10 +286,15 @@ namespace rsx
u32 _max_index = 0; u32 _max_index = 0;
u32 _min_index = first; u32 _min_index = first;
u32 frequencies[rsx::limits::vertex_count];
u32 freq_count = rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed ? 0 : u32{umax};
u32 max_result_by_division = 0; // Guaranteed maximum
for (const auto &attrib : locations) for (const auto &attrib : locations)
{ {
if (attrib.frequency <= 1) [[likely]] if (attrib.frequency <= 1) [[likely]]
{ {
freq_count = umax;
_max_index = max_index; _max_index = max_index;
} }
else else
@ -294,12 +305,24 @@ namespace rsx
{ {
// Actually uses the modulo operator // Actually uses the modulo operator
_min_index = 0; _min_index = 0;
_max_index = attrib.frequency - 1; _max_index = std::max<u32>(_max_index, attrib.frequency - 1);
if (max_result_by_division < _max_index)
{
if (freq_count != umax)
{
if (std::find(frequencies, frequencies + freq_count, attrib.frequency) == frequencies + freq_count)
{
frequencies[freq_count++] = attrib.frequency;
}
}
}
} }
else else
{ {
// Same as having no modulo // Same as having no modulo
_max_index = max_index; _max_index = max_index;
freq_count = umax;
} }
} }
else else
@ -307,12 +330,84 @@ namespace rsx
// Division operator // Division operator
_min_index = std::min(_min_index, first / attrib.frequency); _min_index = std::min(_min_index, first / attrib.frequency);
_max_index = std::max<u32>(_max_index, utils::aligned_div(max_index, attrib.frequency)); _max_index = std::max<u32>(_max_index, utils::aligned_div(max_index, attrib.frequency));
if (freq_count > 0 && freq_count != umax)
{
const u32 max = utils::aligned_div(max_index, attrib.frequency);
max_result_by_division = std::max<u32>(max_result_by_division, max);
// Discard lower frequencies because it has been proven that there are indices higher than them
freq_count -= frequencies + freq_count - std::remove_if(frequencies, frequencies + freq_count, [&max_result_by_division](u32 freq)
{
return freq <= max_result_by_division;
});
}
} }
} }
} }
while (freq_count > 0 && freq_count != umax)
{
const rsx::index_array_type index_type = rsx::method_registers.current_draw_clause.is_immediate_draw ?
rsx::index_array_type::u32 :
rsx::method_registers.index_type();
const u32 index_size = index_type == rsx::index_array_type::u32 ? 4 : 2;
// If we can access a bit a more memory than required - do it
// The alternative would be re-iterating again over all of them
if (get_location(real_offset_address) == CELL_GCM_LOCATION_LOCAL)
{
const auto render = rsx::get_current_renderer();
if (utils::add_saturate<u32>(real_offset_address - rsx::constants::local_mem_base, (_max_index + 1) * attribute_stride) <= render->local_mem_size)
{
break;
}
}
else if (real_offset_address % 0x100000 + (_max_index + 1) * attribute_stride <= 0x100000)//(vm::check_addr(real_offset_address, vm::page_readable, (_max_index + 1) * attribute_stride))
{
break;
}
_max_index = 0;
// Force aligned indices as realhw
const u32 address = (0 - index_size) & get_address(rsx::method_registers.index_array_address(), rsx::method_registers.index_array_location());
auto re_evaluate = [&](auto ptr)
{
for (u32 _index = first; _index < first + count; _index++)
{
const auto value = ptr[_index];
for (u32 freq_it = 0; freq_it < freq_count; freq_it++)
{
const auto res = value % frequencies[freq_it];
if (res > _max_index)
{
_max_index = value;
}
}
}
};
if (index_size == 4)
{
re_evaluate(vm::get_super_ptr<u32>(address));
}
else
{
re_evaluate(vm::get_super_ptr<u16>(address));
}
break;
}
ensure(_max_index >= _min_index); ensure(_max_index >= _min_index);
return { _min_index, (_max_index - _min_index) + 1 }; vertex_range = { _min_index, (_max_index - _min_index) + 1 };
return vertex_range;
} }
u32 get_vertex_type_size_on_host(vertex_base_type type, u32 size) u32 get_vertex_type_size_on_host(vertex_base_type type, u32 size)
@ -2798,7 +2893,7 @@ namespace rsx
if (persistent != nullptr) if (persistent != nullptr)
{ {
for (const auto &block : layout.interleaved_blocks) for (interleaved_range_info* block : layout.interleaved_blocks)
{ {
auto range = block->calculate_required_range(first_vertex, vertex_count); auto range = block->calculate_required_range(first_vertex, vertex_count);