/* Copyright (c) 2013-2015 Jeffrey Pfau * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "util/string.h" #include #ifndef HAVE_STRNDUP char* strndup(const char* start, size_t len) { // This is suboptimal, but anything recent should have strndup char* out = malloc((len + 1) * sizeof(char)); strncpy(out, start, len); out[len] = '\0'; return out; } #endif char* strnrstr(const char* restrict haystack, const char* restrict needle, size_t len) { char* last = 0; const char* next = haystack; size_t needleLen = strlen(needle); for (; len >= needleLen; --len, ++next) { if (strncmp(needle, next, needleLen) == 0) { last = (char*) next; } } return last; } static uint32_t _utf16Char(const uint16_t** unicode, size_t* length) { if (*length < 2) { *length = 0; return 0; } uint32_t unichar = **unicode; ++*unicode; *length -= 2; if (unichar < 0xD800 || unichar >= 0xE000) { return unichar; } if (*length < 2) { *length = 0; return 0; } uint16_t highSurrogate = unichar; uint16_t lowSurrogate = **unicode; ++*unicode; *length -= 2; if (highSurrogate >= 0xDC00) { return 0; } if (lowSurrogate < 0xDC00 || lowSurrogate >= 0xE000) { return 0; } highSurrogate -= 0xD800; lowSurrogate -= 0xDC00; return (highSurrogate << 10) + lowSurrogate + 0x10000; } static uint32_t _utf8Char(const char** unicode, size_t* length) { if (*length == 0) { return 0; } char byte = **unicode; --*length; ++*unicode; if (!(byte & 0x80)) { return byte; } uint32_t unichar; static int tops[4] = { 0xC0, 0xE0, 0xF0, 0xF8 }; size_t numBytes; for (numBytes = 0; numBytes < 3; ++numBytes) { if ((byte & tops[numBytes + 1]) == tops[numBytes]) { break; } } unichar = byte & ~tops[numBytes]; if (numBytes == 3) { return 0; } ++numBytes; if (*length < numBytes) { *length = 0; return 0; } size_t i; for (i = 0; i < numBytes; ++i) { unichar <<= 6; byte = **unicode; --*length; ++*unicode; if ((byte & 0xC0) != 0x80) { return 0; } unichar |= byte & 0x3F; } return unichar; } static size_t _toUtf8(uint32_t unichar, char* buffer) { if (unichar > 0x10FFFF) { unichar = 0xFFFD; } if (unichar < 0x80) { buffer[0] = unichar; return 1; } if (unichar < 0x800) { buffer[0] = (unichar >> 6) | 0xC0; buffer[1] = (unichar & 0x3F) | 0x80; return 2; } if (unichar < 0x10000) { buffer[0] = (unichar >> 12) | 0xE0; buffer[1] = ((unichar >> 6) & 0x3F) | 0x80; buffer[2] = (unichar & 0x3F) | 0x80; return 3; } if (unichar < 0x200000) { buffer[0] = (unichar >> 18) | 0xF0; buffer[1] = ((unichar >> 12) & 0x3F) | 0x80; buffer[2] = ((unichar >> 6) & 0x3F) | 0x80; buffer[3] = (unichar & 0x3F) | 0x80; return 4; } // This shouldn't be possible return 0; } int utfcmp(const uint16_t* utf16, const char* utf8, size_t utf16Length, size_t utf8Length) { uint32_t char1 = 0, char2 = 0; while (utf16Length > 0 && utf8Length > 0) { if (char1 < char2) { return -1; } if (char1 > char2) { return 1; } char1 = _utf16Char(&utf16, &utf16Length); char2 = _utf8Char(&utf8, &utf8Length); } if (utf16Length == 0 && utf8Length > 0) { return -1; } if (utf16Length > 0 && utf8Length == 0) { return 1; } return 0; } char* utf16to8(const uint16_t* utf16, size_t length) { char* utf8 = 0; char* offset = 0; char buffer[4]; size_t utf8TotalBytes = 0; size_t utf8Length = 0; while (true) { if (length == 0) { break; } uint32_t unichar = _utf16Char(&utf16, &length); size_t bytes = _toUtf8(unichar, buffer); utf8Length += bytes; if (utf8Length < utf8TotalBytes) { memcpy(offset, buffer, bytes); offset += bytes; } else if (!utf8) { utf8 = malloc(length); if (!utf8) { return 0; } utf8TotalBytes = length; memcpy(utf8, buffer, bytes); offset = utf8 + bytes; } else if (utf8Length >= utf8TotalBytes) { char* newUTF8 = realloc(utf8, utf8TotalBytes * 2); offset = offset - utf8 + newUTF8; if (newUTF8 != utf8) { free(utf8); } if (!newUTF8) { return 0; } utf8 = newUTF8; memcpy(offset, buffer, bytes); offset += bytes; } } char* newUTF8 = realloc(utf8, utf8Length + 1); if (newUTF8 != utf8) { free(utf8); } newUTF8[utf8Length] = '\0'; return newUTF8; } int hexDigit(char digit) { switch (digit) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return digit - '0'; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': return digit - 'a' + 10; case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': return digit - 'A' + 10; default: return -1; } } const char* hex32(const char* line, uint32_t* out) { uint32_t value = 0; int i; for (i = 0; i < 8; ++i, ++line) { char digit = *line; value <<= 4; int nybble = hexDigit(digit); if (nybble < 0) { return 0; } value |= nybble; } *out = value; return line; } const char* hex16(const char* line, uint16_t* out) { uint16_t value = 0; *out = 0; int i; for (i = 0; i < 4; ++i, ++line) { char digit = *line; value <<= 4; int nybble = hexDigit(digit); if (nybble < 0) { return 0; } value |= nybble; } *out = value; return line; }