Add and improve UTF8 tests.

This commit is contained in:
gibbed 2021-05-01 15:14:30 -05:00 committed by Rick Gibbed
parent dbd980150b
commit 653931ec8f
1 changed files with 308 additions and 32 deletions

View File

@ -16,16 +16,220 @@
namespace xe::base::test {
// TODO(gibbed): bit messy?
// TODO(gibbed): predicate variant?
#define TEST_EXAMPLE(func, left, right) REQUIRE(func(left) == right)
#define TEST_EXAMPLES_1(func, language, results) \
TEST_EXAMPLE(func, examples::k##language##Values[0], results.language[0])
#define TEST_EXAMPLES_2(func, language, results) \
TEST_EXAMPLE(func, examples::k##language##Values[0], results.language[0]); \
TEST_EXAMPLE(func, examples::k##language##Values[1], results.language[1])
#define TEST_EXAMPLES_3(func, language, results) \
TEST_EXAMPLE(func, examples::k##language##Values[0], results.language[0]); \
TEST_EXAMPLE(func, examples::k##language##Values[1], results.language[1]); \
TEST_EXAMPLE(func, examples::k##language##Values[2], results.language[2])
namespace examples {
// https://www.cl.cam.ac.uk/~mgk25/ucs/examples/quickbrown.txt
const size_t kDanishCount = 1;
const char* kDanishValues[kDanishCount] = {
u8"Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen Wolther "
u8"spillede på xylofon.",
};
#define TEST_LANGUAGE_EXAMPLES_Danish(func, results) \
TEST_EXAMPLES_1(func, Danish, results)
const size_t kGermanCount = 3;
const char* kGermanValues[kGermanCount] = {
u8"Falsches Üben von Xylophonmusik quält jeden größeren Zwerg",
u8"Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich",
u8"Heizölrückstoßabdämpfung",
};
#define TEST_LANGUAGE_EXAMPLES_German(func, results) \
TEST_EXAMPLES_2(func, German, results)
const size_t kGreekCount = 2;
const char* kGreekValues[kGreekCount] = {
u8"Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ στὸ χρυσαφὶ ξέφωτο",
u8"Ξεσκεπάζω τὴν ψυχοφθόρα βδελυγμία",
};
#define TEST_LANGUAGE_EXAMPLES_Greek(func, results) \
TEST_EXAMPLES_2(func, Greek, results)
const size_t kEnglishCount = 1;
const char* kEnglishValues[kEnglishCount] = {
u8"The quick brown fox jumps over the lazy dog",
};
#define TEST_LANGUAGE_EXAMPLES_English(func, results) \
TEST_EXAMPLES_1(func, English, results)
const size_t kSpanishCount = 1;
const char* kSpanishValues[kSpanishCount] = {
u8"El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y frío, "
u8"añoraba a su querido cachorro.",
};
#define TEST_LANGUAGE_EXAMPLES_Spanish(func, results) \
TEST_EXAMPLES_1(func, Spanish, results)
const size_t kFrenchCount = 3;
const char* kFrenchValues[kFrenchCount] = {
u8"Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à "
u8"côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce qui "
u8"lui permet de penser à la cænogenèse de l'être dont il est question "
u8"dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, "
u8"pense-t-il, diminue çà et là la qualité de son œuvre.",
u8"l'île exiguë\n"
u8"Où l'obèse jury mûr\n"
u8"Fête l'haï volapük,\n"
u8"Âne ex aéquo au whist,\n"
u8"Ôtez ce vœu déçu.",
u8"Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en canoë "
u8"au delà des îles, près du mälström où brûlent les novæ.",
};
#define TEST_LANGUAGE_EXAMPLES_French(func, results) \
TEST_EXAMPLES_3(func, French, results)
const size_t kIrishGaelicCount = 1;
const char* kIrishGaelicValues[kIrishGaelicCount] = {
u8"D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh",
};
#define TEST_LANGUAGE_EXAMPLES_IrishGaelic(func, results) \
TEST_EXAMPLES_1(func, IrishGaelic, results)
const size_t kHungarianCount = 1;
const char* kHungarianValues[kHungarianCount] = {
u8"Árvíztűrő tükörfúrógép",
};
#define TEST_LANGUAGE_EXAMPLES_Hungarian(func, results) \
TEST_EXAMPLES_1(func, Hungarian, results)
const size_t kIcelandicCount = 2;
const char* kIcelandicValues[kIcelandicCount] = {
u8"Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa",
u8"Sævör grét áðan því úlpan var ónýt",
};
#define TEST_LANGUAGE_EXAMPLES_Icelandic(func, results) \
TEST_EXAMPLES_2(func, Icelandic, results)
const size_t kJapaneseCount = 2;
const char* kJapaneseValues[kJapaneseCount] = {
u8"いろはにほへとちりぬるを\n"
u8"わかよたれそつねならむ\n"
u8"うゐのおくやまけふこえて\n"
u8"あさきゆめみしゑひもせす\n",
u8"イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム\n"
u8"ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン",
};
#define TEST_LANGUAGE_EXAMPLES_Japanese(func, results) \
TEST_EXAMPLES_2(func, Japanese, results)
const size_t kHebrewCount = 1;
const char* kHebrewValues[kHebrewCount] = {
u8"? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה",
};
#define TEST_LANGUAGE_EXAMPLES_Hebrew(func, results) \
TEST_EXAMPLES_1(func, Hebrew, results)
const size_t kPolishCount = 1;
const char* kPolishValues[kPolishCount] = {
u8"Pchnąć w tę łódź jeża lub ośm skrzyń fig",
};
#define TEST_LANGUAGE_EXAMPLES_Polish(func, results) \
TEST_EXAMPLES_1(func, Polish, results)
const size_t kRussianCount = 2;
const char* kRussianValues[kRussianCount] = {
u8"В чащах юга жил бы цитрус? Да, но фальшивый экземпляр!",
u8"Съешь же ещё этих мягких французских булок да выпей чаю",
};
#define TEST_LANGUAGE_EXAMPLES_Russian(func, results) \
TEST_EXAMPLES_2(func, Russian, results)
const size_t kTurkishCount = 1;
const char* kTurkishValues[kTurkishCount] = {
u8"Pijamalı hasta, yağız şoföre çabucak güvendi.",
};
#define TEST_LANGUAGE_EXAMPLES_Turkish(func, results) \
TEST_EXAMPLES_1(func, Turkish, results)
#define TEST_LANGUAGE_EXAMPLES(func, results) \
TEST_LANGUAGE_EXAMPLES_Danish(func, results); \
TEST_LANGUAGE_EXAMPLES_German(func, results); \
TEST_LANGUAGE_EXAMPLES_Greek(func, results); \
TEST_LANGUAGE_EXAMPLES_English(func, results); \
TEST_LANGUAGE_EXAMPLES_Spanish(func, results); \
TEST_LANGUAGE_EXAMPLES_French(func, results); \
TEST_LANGUAGE_EXAMPLES_IrishGaelic(func, results); \
TEST_LANGUAGE_EXAMPLES_Hungarian(func, results); \
TEST_LANGUAGE_EXAMPLES_Icelandic(func, results); \
TEST_LANGUAGE_EXAMPLES_Japanese(func, results); \
TEST_LANGUAGE_EXAMPLES_Hebrew(func, results); \
TEST_LANGUAGE_EXAMPLES_Polish(func, results); \
TEST_LANGUAGE_EXAMPLES_Russian(func, results); \
TEST_LANGUAGE_EXAMPLES_Turkish(func, results)
} // namespace examples
#define TEST_EXAMPLE_RESULT(language) T language[examples::k##language##Count]
template <typename T>
struct example_results {
TEST_EXAMPLE_RESULT(Danish);
TEST_EXAMPLE_RESULT(German);
TEST_EXAMPLE_RESULT(Greek);
TEST_EXAMPLE_RESULT(English);
TEST_EXAMPLE_RESULT(Spanish);
TEST_EXAMPLE_RESULT(French);
TEST_EXAMPLE_RESULT(IrishGaelic);
TEST_EXAMPLE_RESULT(Hungarian);
TEST_EXAMPLE_RESULT(Icelandic);
TEST_EXAMPLE_RESULT(Japanese);
TEST_EXAMPLE_RESULT(Hebrew);
TEST_EXAMPLE_RESULT(Polish);
TEST_EXAMPLE_RESULT(Russian);
TEST_EXAMPLE_RESULT(Turkish);
};
#undef TEST_EXAMPLE_RESULT
TEST_CASE("utf8::count", "UTF-8 Count") {
example_results<size_t> results = {};
results.Danish[0] = 88;
results.German[0] = 58;
results.German[1] = 54;
results.Greek[0] = 52;
results.Greek[1] = 33;
results.English[0] = 43;
results.Spanish[0] = 99;
results.French[0] = 327;
results.French[1] = 93;
results.French[2] = 126;
results.IrishGaelic[0] = 68;
results.Hungarian[0] = 22;
results.Icelandic[0] = 50;
results.Icelandic[1] = 34;
results.Japanese[0] = 51;
results.Japanese[1] = 55;
results.Hebrew[0] = 52;
results.Polish[0] = 40;
results.Russian[0] = 54;
results.Russian[1] = 55;
results.Turkish[0] = 45;
TEST_LANGUAGE_EXAMPLES(utf8::count, results);
}
// TODO(gibbed): lower_ascii
// TODO(gibbed): upper_ascii
// TODO(gibbed): hash_fnv1a
// TODO(gibbed): hash_fnv1a_case
TEST_CASE("utf8::split", "UTF-8 Split") {
std::vector<std::string_view> parts;
// Danish
parts = utf8::split(
u8"Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen Wolther "
u8"spillede på xylofon.",
u8"æcå");
parts = utf8::split(examples::kDanishValues[0], u8"æcå");
REQUIRE(parts.size() == 4);
REQUIRE(parts[0] == u8"Quizdeltagerne spiste jordb");
REQUIRE(parts[1] == u8"r med fløde, mens ");
@ -33,43 +237,41 @@ TEST_CASE("utf8::split", "UTF-8 Split") {
REQUIRE(parts[3] == u8" xylofon.");
// German
parts = utf8::split(
u8"Falsches Üben von Xylophonmusik quält jeden größeren Zwerg\n"
u8"Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich\n"
u8"Heizölrückstoßabdämpfung",
u8"ßS");
REQUIRE(parts.size() == 4);
parts = utf8::split(examples::kGermanValues[0], u8"ßS");
REQUIRE(parts.size() == 2);
REQUIRE(parts[0] == u8"Falsches Üben von Xylophonmusik quält jeden grö");
REQUIRE(parts[1] ==
u8"eren Zwerg\nZwölf Boxkämpfer jagten Eva quer über den ");
REQUIRE(parts[2] == u8"ylter Deich\nHeizölrücksto");
REQUIRE(parts[3] == u8"abdämpfung");
REQUIRE(parts[1] == u8"eren Zwerg");
parts = utf8::split(examples::kGermanValues[1], u8"ßS");
REQUIRE(parts.size() == 2);
REQUIRE(parts[0] == u8"Zwölf Boxkämpfer jagten Eva quer über den ");
REQUIRE(parts[1] == u8"ylter Deich");
parts = utf8::split(examples::kGermanValues[2], u8"ßS");
REQUIRE(parts.size() == 2);
REQUIRE(parts[0] == u8"Heizölrücksto");
REQUIRE(parts[1] == u8"abdämpfung");
// Greek
parts = utf8::split(
u8"Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ στὸ χρυσαφὶ ξέφωτο\n"
u8"Ξεσκεπάζω τὴν ψυχοφθόρα βδελυγμία",
u8"πφ");
REQUIRE(parts.size() == 6);
parts = utf8::split(examples::kGreekValues[0], u8"πφ");
REQUIRE(parts.size() == 4);
REQUIRE(parts[0] == u8"Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ ");
REQUIRE(parts[1] == u8"ιὰ στὸ χρυσα");
REQUIRE(parts[2] == u8"ὶ ξέ");
REQUIRE(parts[3] == u8"ωτο\nΞεσκε");
REQUIRE(parts[4] == u8"άζω τὴν ψυχο");
REQUIRE(parts[5] == u8"θόρα βδελυγμία");
REQUIRE(parts[3] == u8"ωτο");
parts = utf8::split(examples::kGreekValues[1], u8"πφ");
REQUIRE(parts.size() == 3);
REQUIRE(parts[0] == u8"Ξεσκε");
REQUIRE(parts[1] == u8"άζω τὴν ψυχο");
REQUIRE(parts[2] == u8"θόρα βδελυγμία");
// English
parts = utf8::split("The quick brown fox jumps over the lazy dog", "xy");
parts = utf8::split(examples::kEnglishValues[0], "xy");
REQUIRE(parts.size() == 3);
REQUIRE(parts[0] == u8"The quick brown fo");
REQUIRE(parts[1] == u8" jumps over the laz");
REQUIRE(parts[2] == u8" dog");
// Spanish
parts = utf8::split(
u8"El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y "
u8"frío, añoraba a su querido cachorro.",
u8"ójd");
parts = utf8::split(examples::kSpanishValues[0], u8"ójd");
REQUIRE(parts.size() == 4);
REQUIRE(parts[0] == u8"El pingüino Wenceslao hizo kil");
REQUIRE(parts[1] == u8"metros ba");
@ -93,11 +295,26 @@ TEST_CASE("utf8::equal_z", "UTF-8 Equal Z") {
REQUIRE_FALSE(utf8::equal_z(u8"bar", u8"baz\0"));
}
TEST_CASE("utf8::equal_case_z", "UTF-8 Equal Case Z") {
REQUIRE(utf8::equal_z(u8"foo", u8"foo\0"));
REQUIRE_FALSE(utf8::equal_z(u8"bar", u8"baz\0"));
TEST_CASE("utf8::equal_case", "UTF-8 Equal Case") {
REQUIRE(utf8::equal_case(u8"foo", u8"foo\0"));
REQUIRE_FALSE(utf8::equal_case(u8"bar", u8"baz\0"));
}
TEST_CASE("utf8::equal_case_z", "UTF-8 Equal Case Z") {
REQUIRE(utf8::equal_case_z(u8"foo", u8"foo\0"));
REQUIRE_FALSE(utf8::equal_case_z(u8"bar", u8"baz\0"));
}
// TODO(gibbed): find_any_of
// TODO(gibbed): find_any_of_case
// TODO(gibbed): find_first_of
// TODO(gibbed): find_first_of_case
// TODO(gibbed): starts_with
// TODO(gibbed): starts_with_case
// TODO(gibbed): ends_with
// TODO(gibbed): ends_with_case
// TODO(gibbed): split_path
TEST_CASE("utf8::join_paths", "UTF-8 Join Paths") {
REQUIRE(utf8::join_paths({u8"X:", u8"foo", u8"bar", u8"baz", u8"qux"},
'\\') == "X:\\foo\\bar\\baz\\qux");
@ -105,6 +322,8 @@ TEST_CASE("utf8::join_paths", "UTF-8 Join Paths") {
"X:/foo/bar/baz/qux");
}
// TODO(gibbed): join_guest_paths
TEST_CASE("utf8::fix_path_separators", "UTF-8 Fix Path Separators") {
REQUIRE(utf8::fix_path_separators("X:\\foo/bar\\baz/qux", '\\') ==
"X:\\foo\\bar\\baz\\qux");
@ -112,17 +331,54 @@ TEST_CASE("utf8::fix_path_separators", "UTF-8 Fix Path Separators") {
"X:/foo/bar/baz/qux");
}
// TODO(gibbed): fix_guest_path_separators
TEST_CASE("utf8::find_name_from_path", "UTF-8 Find Name From Path") {
REQUIRE(utf8::find_name_from_path("X:\\foo\\bar\\baz\\qux", '\\') == "qux");
REQUIRE(utf8::find_name_from_path("X:/foo/bar/baz/qux", '/') == "qux");
REQUIRE(utf8::find_name_from_path("X:\\foo\\bar\\baz\\qux.txt", '\\') ==
"qux.txt");
REQUIRE(utf8::find_name_from_path(
"X:\\ほげ\\ぴよ\\ふが\\ほげら\\ほげほげ.txt", '\\') ==
"ほげほげ.txt");
REQUIRE(utf8::find_name_from_path("X:\\ほげ\\ぴよ\\ふが\\ほげら.ほげほげ",
'\\') == "ほげら.ほげほげ");
REQUIRE(utf8::find_name_from_path("X:/foo/bar/baz/qux.txt", '/') ==
"qux.txt");
REQUIRE(utf8::find_name_from_path("X:/ほげ/ぴよ/ふが/ほげら/ほげほげ.txt",
'/') == "ほげほげ.txt");
REQUIRE(utf8::find_name_from_path("X:/ほげ/ぴよ/ふが/ほげら.ほげほげ", '/') ==
"ほげら.ほげほげ");
}
// TODO(gibbed): find_name_from_guest_path
TEST_CASE("utf8::find_base_name_from_path", "UTF-8 Find Base Name From Path") {
REQUIRE(utf8::find_base_name_from_path("X:\\foo\\bar\\baz\\qux.txt", '\\') ==
"qux");
REQUIRE(utf8::find_base_name_from_path(
"X:\\ほげ\\ぴよ\\ふが\\ほげら\\ほげほげ.txt", '\\') ==
"ほげほげ");
REQUIRE(utf8::find_base_name_from_path(
"X:\\ほげ\\ぴよ\\ふが\\ほげら.ほげほげ", '\\') == "ほげら");
REQUIRE(utf8::find_base_name_from_path("X:/foo/bar/baz/qux.txt", '/') ==
"qux");
REQUIRE(utf8::find_base_name_from_path(
"X:/ほげ/ぴよ/ふが/ほげら/ほげほげ.txt", '/') == "ほげほげ");
REQUIRE(utf8::find_base_name_from_path("X:/ほげ/ぴよ/ふが/ほげら.ほげほげ",
'/') == "ほげら");
}
// TODO(gibbed): find_base_name_from_guest_path
TEST_CASE("utf8::find_base_path", "UTF-8 Find Base Path") {
REQUIRE(utf8::find_base_path("X:\\foo\\bar\\baz\\qux", '\\') ==
"X:\\foo\\bar\\baz");
REQUIRE(utf8::find_base_path("X:/foo/bar/baz/qux", '/') == "X:/foo/bar/baz");
REQUIRE(utf8::find_base_path("X:/ほげ/ぴよ/ふが/ほげら/ほげほげ", '/') ==
"X:/ほげ/ぴよ/ふが/ほげら");
}
// TODO(gibbed): find_base_guest_path
TEST_CASE("utf8::canonicalize_path", "UTF-8 Canonicalize Path") {
REQUIRE(utf8::canonicalize_path("X:\\foo\\bar\\baz\\qux", '\\') ==
"X:\\foo\\bar\\baz\\qux");
@ -130,10 +386,30 @@ TEST_CASE("utf8::canonicalize_path", "UTF-8 Canonicalize Path") {
"X:\\foo\\baz\\qux");
REQUIRE(utf8::canonicalize_path("X:\\foo\\..\\baz\\qux", '\\') ==
"X:\\baz\\qux");
REQUIRE(utf8::canonicalize_path("X:\\foo\\.\\baz\\..\\qux", '\\') ==
"X:\\foo\\qux");
REQUIRE(utf8::canonicalize_path("X:\\foo\\.\\..\\baz\\qux", '\\') ==
"X:\\baz\\qux");
REQUIRE(utf8::canonicalize_path("X:\\.\\bar\\baz\\qux", '\\') ==
"X:\\bar\\baz\\qux");
REQUIRE(utf8::canonicalize_path("X:\\..\\bar\\baz\\qux", '\\') ==
"X:\\bar\\baz\\qux");
REQUIRE(utf8::canonicalize_path("X:/foo/bar/baz/qux", '/') ==
"X:/foo/bar/baz/qux");
REQUIRE(utf8::canonicalize_path(
"X:\\ほげ\\ぴよ\\.\\ふが\\..\\ほげら\\ほげほげ", '\\') ==
"X:\\ほげ\\ぴよ\\ほげら\\ほげほげ");
REQUIRE(utf8::canonicalize_path("X:/foo/./baz/qux", '/') == "X:/foo/baz/qux");
REQUIRE(utf8::canonicalize_path("X:/foo/../baz/qux", '/') == "X:/baz/qux");
REQUIRE(utf8::canonicalize_path("X:/foo/./baz/../qux", '/') == "X:/foo/qux");
REQUIRE(utf8::canonicalize_path("X:/foo/./../baz/qux", '/') == "X:/baz/qux");
REQUIRE(utf8::canonicalize_path("X:/./bar/baz/qux", '/') == "X:/bar/baz/qux");
REQUIRE(utf8::canonicalize_path("X:/../bar/baz/qux", '/') ==
"X:/bar/baz/qux");
REQUIRE(utf8::canonicalize_path("X:/ほげ/ぴよ/./ふが/../ほげら/ほげほげ",
'/') == "X:/ほげ/ぴよ/ほげら/ほげほげ");
}
// TODO(gibbed): canonicalize_guest_path
} // namespace xe::base::test