Path: Fix several errors in URLEncode/Decode and add tests

This commit is contained in:
Stenzek 2025-01-20 21:44:44 +10:00
parent 78902d7632
commit 4b20fc6922
No known key found for this signature in database
2 changed files with 85 additions and 28 deletions

View File

@ -277,3 +277,65 @@ TEST(Path, CreateFileURL)
ASSERT_EQ(Path::CreateFileURL("/foo/bar"), "file:///foo/bar");
#endif
}
TEST(Path, URLEncode)
{
// Basic cases
ASSERT_EQ(Path::URLEncode("hello world"), "hello%20world");
ASSERT_EQ(Path::URLEncode(""), "");
ASSERT_EQ(Path::URLEncode("abcABC123"), "abcABC123");
// Special characters
ASSERT_EQ(Path::URLEncode("!@#$%^&*()"), "%21%40%23%24%25%5E%26%2A%28%29");
ASSERT_EQ(Path::URLEncode("[]{}<>"), "%5B%5D%7B%7D%3C%3E");
ASSERT_EQ(Path::URLEncode(",./?;:'\""), "%2C.%2F%3F%3B%3A%27%22");
// Unicode characters
ASSERT_EQ(Path::URLEncode("こんにちは"), "%E3%81%93%E3%82%93%E3%81%AB%E3%81%A1%E3%81%AF");
ASSERT_EQ(Path::URLEncode("über"), "%C3%BCber");
// Additional special characters
ASSERT_EQ(Path::URLEncode("=&?"), "%3D%26%3F");
ASSERT_EQ(Path::URLEncode("\\|`"), "%5C%7C%60");
ASSERT_EQ(Path::URLEncode("§±€"), "%C2%A7%C2%B1%E2%82%AC");
ASSERT_EQ(Path::URLEncode("%20%2F%3F"), "%2520%252F%253F");
ASSERT_EQ(Path::URLEncode("tab\tline\nreturn\r"), "tab%09line%0Areturn%0D");
// Mixed content
ASSERT_EQ(Path::URLEncode("path/to/my file.txt"), "path%2Fto%2Fmy%20file.txt");
ASSERT_EQ(Path::URLEncode("user+name@example.com"), "user%2Bname%40example.com");
}
TEST(Path, URLDecode)
{
// Basic cases
ASSERT_EQ(Path::URLDecode("hello%20world"), "hello world");
ASSERT_EQ(Path::URLDecode(""), "");
ASSERT_EQ(Path::URLDecode("abcABC123"), "abcABC123");
// Special characters
ASSERT_EQ(Path::URLDecode("%21%40%23%24%25%5E%26%2A%28%29"), "!@#$%^&*()");
ASSERT_EQ(Path::URLDecode("%5B%5D%7B%7D%3C%3E"), "[]{}<>");
ASSERT_EQ(Path::URLDecode("%2C%2F%3F%3B%3A%27%22"), ",/?;:'\"");
// Additional special characters
ASSERT_EQ(Path::URLDecode("%3D%26%3F"), "=&?");
ASSERT_EQ(Path::URLDecode("%5C%7C%60"), "\\|`");
ASSERT_EQ(Path::URLDecode("%C2%A7%C2%B1%E2%82%AC"), "§±€");
ASSERT_EQ(Path::URLDecode("%2520%252F%253F"), "%20%2F%3F");
ASSERT_EQ(Path::URLDecode("tab%09line%0Areturn%0D"), "tab\tline\nreturn\r");
// Unicode characters
ASSERT_EQ(Path::URLDecode("%E3%81%93%E3%82%93%E3%81%AB%E3%81%A1%E3%81%AF"), "こんにちは");
ASSERT_EQ(Path::URLDecode("%C3%BCber"), "über");
// Mixed content
ASSERT_EQ(Path::URLDecode("path%2Fto%2Fmy%20file.txt"), "path/to/my file.txt");
ASSERT_EQ(Path::URLDecode("user%2Bname%40example.com"), "user+name@example.com");
// Invalid decode cases - decoder should stop at first error
ASSERT_EQ(Path::URLDecode("hello%2G"), "hello"); // Invalid hex char 'G'
ASSERT_EQ(Path::URLDecode("test%"), "test"); // Incomplete escape sequence
ASSERT_EQ(Path::URLDecode("path%%20name"), "path"); // Invalid % followed by valid sequence
ASSERT_EQ(Path::URLDecode("abc%2"), "abc"); // Truncated escape sequence
}

View File

@ -1067,7 +1067,7 @@ std::string Path::URLEncode(std::string_view str)
{
const char c = str[i];
if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '-' || c == '_' ||
c == '.' || c == '!' || c == '~' || c == '*' || c == '\'' || c == '(' || c == ')')
c == '.' || c == '~')
{
ret.push_back(c);
}
@ -1077,8 +1077,8 @@ std::string Path::URLEncode(std::string_view str)
const unsigned char n1 = static_cast<unsigned char>(c) >> 4;
const unsigned char n2 = static_cast<unsigned char>(c) & 0x0F;
ret.push_back((n1 >= 10) ? ('a' + (n1 - 10)) : ('0' + n1));
ret.push_back((n2 >= 10) ? ('a' + (n2 - 10)) : ('0' + n2));
ret.push_back((n1 >= 10) ? ('A' + (n1 - 10)) : ('0' + n1));
ret.push_back((n2 >= 10) ? ('A' + (n2 - 10)) : ('0' + n2));
}
}
@ -1090,34 +1090,29 @@ std::string Path::URLDecode(std::string_view str)
std::string ret;
ret.reserve(str.length());
for (size_t i = 0, l = str.size(); i < l; i++)
for (size_t i = 0, l = str.size(); i < l;)
{
const char c = str[i];
if (c == '+')
const char c = str[i++];
if (c == '%')
{
ret.push_back(c);
}
else if (c == '%')
{
if ((i + 2) >= str.length())
if ((i + 2) > str.length())
break;
const char clower = str[i + 1];
const char cupper = str[i + 2];
const unsigned char lower =
(clower >= '0' && clower <= '9') ?
static_cast<unsigned char>(clower - '0') :
((clower >= 'a' && clower <= 'f') ?
static_cast<unsigned char>(clower - 'a') :
((clower >= 'A' && clower <= 'F') ? static_cast<unsigned char>(clower - 'A') : 0));
const unsigned char upper =
(cupper >= '0' && cupper <= '9') ?
static_cast<unsigned char>(cupper - '0') :
((cupper >= 'a' && cupper <= 'f') ?
static_cast<unsigned char>(cupper - 'a') :
((cupper >= 'A' && cupper <= 'F') ? static_cast<unsigned char>(cupper - 'A') : 0));
const char dch = static_cast<char>(lower | (upper << 4));
ret.push_back(dch);
// return -1 which will be negative when or'ed with anything else, so it becomes invalid.
static constexpr auto to_nibble = [](char ch) -> int {
return (ch >= '0' && ch <= '9') ?
static_cast<int>(ch - '0') :
((ch >= 'a' && ch <= 'f') ? (static_cast<int>(ch - 'a') + 0xa) :
((ch >= 'A' && ch <= 'F') ? (static_cast<int>(ch - 'A') + 0xa) : -1));
};
const int upper = to_nibble(str[i++]);
const int lower = to_nibble(str[i++]);
const int dch = lower | (upper << 4);
if (dch < 0)
break;
ret.push_back(static_cast<char>(dch));
}
else
{
@ -1125,7 +1120,7 @@ std::string Path::URLDecode(std::string_view str)
}
}
return std::string(str);
return ret;
}
std::string Path::CreateFileURL(std::string_view path)