diff --git a/rpcs3/Emu/RSX/Core/RSXVertexTypes.h b/rpcs3/Emu/RSX/Core/RSXVertexTypes.h index e65c168a09..21f1770487 100644 --- a/rpcs3/Emu/RSX/Core/RSXVertexTypes.h +++ b/rpcs3/Emu/RSX/Core/RSXVertexTypes.h @@ -62,11 +62,12 @@ namespace rsx u32 real_offset_address = 0; u8 memory_location = 0; u8 attribute_stride = 0; + std::pair vertex_range{}; rsx::simple_array locations; // Check if we need to upload a full unoptimized range, i.e [0-max_index] - std::pair calculate_required_range(u32 first, u32 count) const; + std::pair calculate_required_range(u32 first, u32 count); }; enum attribute_buffer_placement : u8 @@ -100,6 +101,7 @@ namespace rsx result->single_vertex = false; result->locations.clear(); result->interleaved = true; + result->vertex_range.second = 0; return result; } diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index d480b93387..eeef1124ac 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -269,8 +269,14 @@ namespace rsx } } - std::pair interleaved_range_info::calculate_required_range(u32 first, u32 count) const + std::pair interleaved_range_info::calculate_required_range(u32 first, u32 count) { + if (vertex_range.second) + { + // Cached result + return vertex_range; + } + if (single_vertex) { return { 0, 1 }; @@ -280,10 +286,15 @@ namespace rsx u32 _max_index = 0; u32 _min_index = first; + u32 frequencies[rsx::limits::vertex_count]; + u32 freq_count = rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed ? 0 : u32{umax}; + u32 max_result_by_division = 0; // Guaranteed maximum + for (const auto &attrib : locations) { if (attrib.frequency <= 1) [[likely]] { + freq_count = umax; _max_index = max_index; } else @@ -294,12 +305,24 @@ namespace rsx { // Actually uses the modulo operator _min_index = 0; - _max_index = attrib.frequency - 1; + _max_index = std::max(_max_index, attrib.frequency - 1); + + if (max_result_by_division < _max_index) + { + if (freq_count != umax) + { + if (std::find(frequencies, frequencies + freq_count, attrib.frequency) == frequencies + freq_count) + { + frequencies[freq_count++] = attrib.frequency; + } + } + } } else { // Same as having no modulo _max_index = max_index; + freq_count = umax; } } else @@ -307,12 +330,84 @@ namespace rsx // Division operator _min_index = std::min(_min_index, first / attrib.frequency); _max_index = std::max(_max_index, utils::aligned_div(max_index, attrib.frequency)); + + if (freq_count > 0 && freq_count != umax) + { + const u32 max = utils::aligned_div(max_index, attrib.frequency); + max_result_by_division = std::max(max_result_by_division, max); + + // Discard lower frequencies because it has been proven that there are indices higher than them + freq_count -= frequencies + freq_count - std::remove_if(frequencies, frequencies + freq_count, [&max_result_by_division](u32 freq) + { + return freq <= max_result_by_division; + }); + } } } } + while (freq_count > 0 && freq_count != umax) + { + const rsx::index_array_type index_type = rsx::method_registers.current_draw_clause.is_immediate_draw ? + rsx::index_array_type::u32 : + rsx::method_registers.index_type(); + + const u32 index_size = index_type == rsx::index_array_type::u32 ? 4 : 2; + + // If we can access a bit a more memory than required - do it + // The alternative would be re-iterating again over all of them + if (get_location(real_offset_address) == CELL_GCM_LOCATION_LOCAL) + { + const auto render = rsx::get_current_renderer(); + + if (utils::add_saturate(real_offset_address - rsx::constants::local_mem_base, (_max_index + 1) * attribute_stride) <= render->local_mem_size) + { + break; + } + } + else if (real_offset_address % 0x100000 + (_max_index + 1) * attribute_stride <= 0x100000)//(vm::check_addr(real_offset_address, vm::page_readable, (_max_index + 1) * attribute_stride)) + { + break; + } + + _max_index = 0; + + // Force aligned indices as realhw + const u32 address = (0 - index_size) & get_address(rsx::method_registers.index_array_address(), rsx::method_registers.index_array_location()); + + auto re_evaluate = [&](auto ptr) + { + for (u32 _index = first; _index < first + count; _index++) + { + const auto value = ptr[_index]; + + for (u32 freq_it = 0; freq_it < freq_count; freq_it++) + { + const auto res = value % frequencies[freq_it]; + + if (res > _max_index) + { + _max_index = value; + } + } + } + }; + + if (index_size == 4) + { + re_evaluate(vm::get_super_ptr(address)); + } + else + { + re_evaluate(vm::get_super_ptr(address)); + } + + break; + } + ensure(_max_index >= _min_index); - return { _min_index, (_max_index - _min_index) + 1 }; + vertex_range = { _min_index, (_max_index - _min_index) + 1 }; + return vertex_range; } u32 get_vertex_type_size_on_host(vertex_base_type type, u32 size) @@ -2798,7 +2893,7 @@ namespace rsx if (persistent != nullptr) { - for (const auto &block : layout.interleaved_blocks) + for (interleaved_range_info* block : layout.interleaved_blocks) { auto range = block->calculate_required_range(first_vertex, vertex_count);