[Base] Don't use reverse iterator for UTF-8.

[Base] Don't use reverse iterator for UTF-8. Fixes #1760.
This commit is contained in:
gibbed 2021-05-01 15:12:21 -05:00 committed by Rick Gibbed
parent b5fc1efad5
commit dbd980150b
1 changed files with 71 additions and 60 deletions

View File

@ -19,9 +19,7 @@
namespace utfcpp = utf8;
using citer = std::string_view::const_iterator;
using criter = std::string_view::const_reverse_iterator;
using utf8_citer = utfcpp::iterator<std::string_view::const_iterator>;
using utf8_criter = utfcpp::iterator<std::string_view::const_reverse_iterator>;
namespace xe::utf8 {
@ -54,25 +52,10 @@ std::pair<utf8_citer, utf8_citer> make_citer(const utf8_citer begin,
utf8_citer(end.base(), begin.base(), end.base())};
}
std::pair<utf8_criter, utf8_criter> make_criter(const std::string_view view) {
return {utf8_criter(view.crbegin(), view.crbegin(), view.crend()),
utf8_criter(view.crend(), view.crbegin(), view.crend())};
}
std::pair<utf8_criter, utf8_criter> make_criter(const utf8_criter begin,
const utf8_criter end) {
return {utf8_criter(begin.base(), begin.base(), end.base()),
utf8_criter(end.base(), begin.base(), end.base())};
}
size_t byte_length(utf8_citer begin, utf8_citer end) {
return size_t(std::distance(begin.base(), end.base()));
}
size_t byte_length(utf8_criter begin, utf8_criter end) {
return size_t(std::distance(begin.base(), end.base()));
}
size_t count(const std::string_view view) {
return size_t(utfcpp::distance(view.cbegin(), view.cend()));
}
@ -435,21 +418,23 @@ bool ends_with(const std::string_view haystack, const std::string_view needle) {
return false;
}
auto [haystack_begin, haystack_end] = make_criter(haystack);
auto [needle_begin, needle_end] = make_criter(needle);
auto [haystack_begin, haystack_end] = make_citer(haystack);
auto [needle_begin, needle_end] = make_citer(needle);
auto needle_count = count(needle);
auto it = haystack_begin;
auto it = haystack_end;
auto end = it;
for (size_t i = 0; i < needle_count; ++i) {
if (end == haystack_end) {
--it;
for (size_t i = 1; i < needle_count; ++i) {
if (it == haystack_begin) {
// not enough room in target for search
return false;
}
++end;
--it;
}
auto [sub_start, sub_end] = make_criter(it, end);
auto [sub_start, sub_end] = make_citer(it, end);
return std::equal(needle_begin, needle_end, sub_start, sub_end);
}
@ -461,21 +446,23 @@ bool ends_with_case(const std::string_view haystack,
return false;
}
auto [haystack_begin, haystack_end] = make_criter(haystack);
auto [needle_begin, needle_end] = make_criter(needle);
auto [haystack_begin, haystack_end] = make_citer(haystack);
auto [needle_begin, needle_end] = make_citer(needle);
auto needle_count = count(needle);
auto it = haystack_begin;
auto it = haystack_end;
auto end = it;
--it;
for (size_t i = 0; i < needle_count; ++i) {
if (end == haystack_end) {
if (it == haystack_begin) {
// not enough room in target for search
return false;
}
++end;
--it;
}
auto [sub_start, sub_end] = make_criter(it, end);
auto [sub_start, sub_end] = make_citer(it, end);
return std::equal(needle_begin, needle_end, sub_start, sub_end,
equal_ascii_case);
}
@ -492,7 +479,9 @@ std::string join_paths(const std::string_view left_path,
return std::string(left_path);
}
auto [it, end] = make_criter(left_path);
utf8_citer it;
std::tie(std::ignore, it) = make_citer(left_path);
--it;
std::string result = std::string(left_path);
if (*it != static_cast<uint32_t>(separator)) {
@ -563,25 +552,38 @@ std::string find_name_from_path(const std::string_view path,
return std::string();
}
auto [begin, end] = make_criter(path);
auto [begin, end] = make_citer(path);
auto it = begin;
auto it = end;
--it;
// path is padded with separator
size_t padding = 0;
if (*it == uint32_t(separator)) {
++it;
--it;
--end;
padding = 1;
}
if (it == end) {
// path is just separator
if (it == begin) {
return std::string();
}
it = std::find(it, end, uint32_t(separator));
if (it == end) {
// search for separator
while (it != begin) {
if (*it == uint32_t(separator)) {
break;
}
--it;
}
// no separator -- copy entire string (except trailing separator)
if (it == begin) {
return std::string(path.substr(0, path.size() - padding));
}
auto length = byte_length(begin, it);
auto length = byte_length(std::next(it), end);
auto offset = path.length() - length;
return std::string(path.substr(offset, length - padding));
}
@ -593,20 +595,25 @@ std::string find_base_name_from_path(const std::string_view path,
return std::string();
}
auto [begin, end] = make_criter(name);
auto [begin, end] = make_citer(name);
auto it = std::find(begin, end, uint32_t('.'));
if (it == end) {
auto it = end;
--it;
while (it != begin) {
if (*it == uint32_t('.')) {
break;
}
--it;
}
if (it == begin) {
return name;
}
it++;
if (it == end) {
return std::string();
}
auto length = name.length() - byte_length(begin, it);
return std::string(name.substr(0, length));
auto length = byte_length(it, end);
auto offset = name.length() - length;
return std::string(name.substr(0, offset));
}
std::string find_base_path(const std::string_view path, char32_t separator) {
@ -614,25 +621,29 @@ std::string find_base_path(const std::string_view path, char32_t separator) {
return std::string();
}
auto [begin, end] = make_criter(path);
auto [begin, end] = make_citer(path);
auto it = end;
--it;
auto it = begin;
if (*it == uint32_t(separator)) {
++it;
--it;
}
it = std::find(it, end, uint32_t(separator));
if (it == end) {
while (it != begin) {
if (*it == uint32_t(separator)) {
break;
}
--it;
}
if (it == begin) {
return std::string();
}
++it;
if (it == end) {
return std::string();
}
auto length = path.length() - byte_length(begin, it);
return std::string(path.substr(0, length));
auto length = byte_length(it, end);
auto offset = path.length() - length;
return std::string(path.substr(0, offset));
}
std::string canonicalize_path(const std::string_view path, char32_t separator) {