Path: Fix several errors in URLEncode/Decode and add tests

This commit is contained in:
Stenzek 2025-01-20 21:44:44 +10:00
parent 78902d7632
commit 4b20fc6922
No known key found for this signature in database
2 changed files with 85 additions and 28 deletions

View File

@ -277,3 +277,65 @@ TEST(Path, CreateFileURL)
ASSERT_EQ(Path::CreateFileURL("/foo/bar"), "file:///foo/bar"); ASSERT_EQ(Path::CreateFileURL("/foo/bar"), "file:///foo/bar");
#endif #endif
} }
TEST(Path, URLEncode)
{
// Basic cases
ASSERT_EQ(Path::URLEncode("hello world"), "hello%20world");
ASSERT_EQ(Path::URLEncode(""), "");
ASSERT_EQ(Path::URLEncode("abcABC123"), "abcABC123");
// Special characters
ASSERT_EQ(Path::URLEncode("!@#$%^&*()"), "%21%40%23%24%25%5E%26%2A%28%29");
ASSERT_EQ(Path::URLEncode("[]{}<>"), "%5B%5D%7B%7D%3C%3E");
ASSERT_EQ(Path::URLEncode(",./?;:'\""), "%2C.%2F%3F%3B%3A%27%22");
// Unicode characters
ASSERT_EQ(Path::URLEncode("こんにちは"), "%E3%81%93%E3%82%93%E3%81%AB%E3%81%A1%E3%81%AF");
ASSERT_EQ(Path::URLEncode("über"), "%C3%BCber");
// Additional special characters
ASSERT_EQ(Path::URLEncode("=&?"), "%3D%26%3F");
ASSERT_EQ(Path::URLEncode("\\|`"), "%5C%7C%60");
ASSERT_EQ(Path::URLEncode("§±€"), "%C2%A7%C2%B1%E2%82%AC");
ASSERT_EQ(Path::URLEncode("%20%2F%3F"), "%2520%252F%253F");
ASSERT_EQ(Path::URLEncode("tab\tline\nreturn\r"), "tab%09line%0Areturn%0D");
// Mixed content
ASSERT_EQ(Path::URLEncode("path/to/my file.txt"), "path%2Fto%2Fmy%20file.txt");
ASSERT_EQ(Path::URLEncode("user+name@example.com"), "user%2Bname%40example.com");
}
TEST(Path, URLDecode)
{
// Basic cases
ASSERT_EQ(Path::URLDecode("hello%20world"), "hello world");
ASSERT_EQ(Path::URLDecode(""), "");
ASSERT_EQ(Path::URLDecode("abcABC123"), "abcABC123");
// Special characters
ASSERT_EQ(Path::URLDecode("%21%40%23%24%25%5E%26%2A%28%29"), "!@#$%^&*()");
ASSERT_EQ(Path::URLDecode("%5B%5D%7B%7D%3C%3E"), "[]{}<>");
ASSERT_EQ(Path::URLDecode("%2C%2F%3F%3B%3A%27%22"), ",/?;:'\"");
// Additional special characters
ASSERT_EQ(Path::URLDecode("%3D%26%3F"), "=&?");
ASSERT_EQ(Path::URLDecode("%5C%7C%60"), "\\|`");
ASSERT_EQ(Path::URLDecode("%C2%A7%C2%B1%E2%82%AC"), "§±€");
ASSERT_EQ(Path::URLDecode("%2520%252F%253F"), "%20%2F%3F");
ASSERT_EQ(Path::URLDecode("tab%09line%0Areturn%0D"), "tab\tline\nreturn\r");
// Unicode characters
ASSERT_EQ(Path::URLDecode("%E3%81%93%E3%82%93%E3%81%AB%E3%81%A1%E3%81%AF"), "こんにちは");
ASSERT_EQ(Path::URLDecode("%C3%BCber"), "über");
// Mixed content
ASSERT_EQ(Path::URLDecode("path%2Fto%2Fmy%20file.txt"), "path/to/my file.txt");
ASSERT_EQ(Path::URLDecode("user%2Bname%40example.com"), "user+name@example.com");
// Invalid decode cases - decoder should stop at first error
ASSERT_EQ(Path::URLDecode("hello%2G"), "hello"); // Invalid hex char 'G'
ASSERT_EQ(Path::URLDecode("test%"), "test"); // Incomplete escape sequence
ASSERT_EQ(Path::URLDecode("path%%20name"), "path"); // Invalid % followed by valid sequence
ASSERT_EQ(Path::URLDecode("abc%2"), "abc"); // Truncated escape sequence
}

View File

@ -1067,7 +1067,7 @@ std::string Path::URLEncode(std::string_view str)
{ {
const char c = str[i]; const char c = str[i];
if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '-' || c == '_' || if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '-' || c == '_' ||
c == '.' || c == '!' || c == '~' || c == '*' || c == '\'' || c == '(' || c == ')') c == '.' || c == '~')
{ {
ret.push_back(c); ret.push_back(c);
} }
@ -1077,8 +1077,8 @@ std::string Path::URLEncode(std::string_view str)
const unsigned char n1 = static_cast<unsigned char>(c) >> 4; const unsigned char n1 = static_cast<unsigned char>(c) >> 4;
const unsigned char n2 = static_cast<unsigned char>(c) & 0x0F; const unsigned char n2 = static_cast<unsigned char>(c) & 0x0F;
ret.push_back((n1 >= 10) ? ('a' + (n1 - 10)) : ('0' + n1)); ret.push_back((n1 >= 10) ? ('A' + (n1 - 10)) : ('0' + n1));
ret.push_back((n2 >= 10) ? ('a' + (n2 - 10)) : ('0' + n2)); ret.push_back((n2 >= 10) ? ('A' + (n2 - 10)) : ('0' + n2));
} }
} }
@ -1090,34 +1090,29 @@ std::string Path::URLDecode(std::string_view str)
std::string ret; std::string ret;
ret.reserve(str.length()); ret.reserve(str.length());
for (size_t i = 0, l = str.size(); i < l; i++) for (size_t i = 0, l = str.size(); i < l;)
{ {
const char c = str[i]; const char c = str[i++];
if (c == '+') if (c == '%')
{ {
ret.push_back(c); if ((i + 2) > str.length())
}
else if (c == '%')
{
if ((i + 2) >= str.length())
break; break;
const char clower = str[i + 1]; // return -1 which will be negative when or'ed with anything else, so it becomes invalid.
const char cupper = str[i + 2]; static constexpr auto to_nibble = [](char ch) -> int {
const unsigned char lower = return (ch >= '0' && ch <= '9') ?
(clower >= '0' && clower <= '9') ? static_cast<int>(ch - '0') :
static_cast<unsigned char>(clower - '0') : ((ch >= 'a' && ch <= 'f') ? (static_cast<int>(ch - 'a') + 0xa) :
((clower >= 'a' && clower <= 'f') ? ((ch >= 'A' && ch <= 'F') ? (static_cast<int>(ch - 'A') + 0xa) : -1));
static_cast<unsigned char>(clower - 'a') : };
((clower >= 'A' && clower <= 'F') ? static_cast<unsigned char>(clower - 'A') : 0));
const unsigned char upper = const int upper = to_nibble(str[i++]);
(cupper >= '0' && cupper <= '9') ? const int lower = to_nibble(str[i++]);
static_cast<unsigned char>(cupper - '0') : const int dch = lower | (upper << 4);
((cupper >= 'a' && cupper <= 'f') ? if (dch < 0)
static_cast<unsigned char>(cupper - 'a') : break;
((cupper >= 'A' && cupper <= 'F') ? static_cast<unsigned char>(cupper - 'A') : 0));
const char dch = static_cast<char>(lower | (upper << 4)); ret.push_back(static_cast<char>(dch));
ret.push_back(dch);
} }
else else
{ {
@ -1125,7 +1120,7 @@ std::string Path::URLDecode(std::string_view str)
} }
} }
return std::string(str); return ret;
} }
std::string Path::CreateFileURL(std::string_view path) std::string Path::CreateFileURL(std::string_view path)