3rdparty: Update simpleini to v4.22

This commit is contained in:
JordanTheToaster 2024-05-04 13:40:10 +01:00 committed by Connor McLaughlin
parent 90338ed065
commit 6630783686
4 changed files with 313 additions and 122 deletions

View File

@ -1,6 +1,6 @@
The MIT License (MIT)
Copyright (c) 2006-2013 Brodie Thiesfield
Copyright (c) 2006-2022 Brodie Thiesfield
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in

View File

@ -24,7 +24,7 @@
Conversions between UTF32, UTF-16, and UTF-8. Header file.
Several funtions are included here, forming a complete set of
Several functions are included here, forming a complete set of
conversions between the three formats. UTF-7 is not included
here, but is handled in a separate source file.
@ -102,7 +102,7 @@ typedef unsigned char Boolean; /* 0 or 1 */
typedef enum {
conversionOK, /* conversion successful */
sourceExhausted, /* partial character in source, but hit end */
targetExhausted, /* insuff. room in target for conversion */
targetExhausted, /* insufficient room in target for conversion */
sourceIllegal /* source sequence is illegal/malformed */
} ConversionResult;

View File

@ -3,9 +3,9 @@
<table>
<tr><th>Library <td>SimpleIni
<tr><th>File <td>SimpleIni.h
<tr><th>Author <td>Brodie Thiesfield [code at jellycan dot com]
<tr><th>Author <td>Brodie Thiesfield
<tr><th>Source <td>https://github.com/brofield/simpleini
<tr><th>Version <td>4.17
<tr><th>Version <td>4.22
</table>
Jump to the @link CSimpleIniTempl CSimpleIni @endlink interface documentation.
@ -16,7 +16,6 @@
Windows and Linux/Unix. It is fast, simple and source code using this
component will compile unchanged on either OS.
@section features FEATURES
- MIT Licence allows free use in all software (including GPL and commercial)
@ -38,26 +37,36 @@
- support for non-standard character types or file encodings
via user-written converter classes
- support for adding/modifying values programmatically
- compiles cleanly in the following compilers:
- should compile cleanly without warning usually at the strictest warning level
- it has been tested with the following compilers:
- Windows/VC6 (warning level 3)
- Windows/VC.NET 2003 (warning level 4)
- Windows/VC 2005 (warning level 4)
- Windows/VC 2019 (warning level 4)
- Linux/gcc (-Wall)
- Mac OS/c++ (-Wall)
@section usage USAGE SUMMARY
-# Decide if you will be using utf8 or MBCS files, and working with the
data in utf8, wchar_t or ICU chars.
-# If you will only be using straight utf8 files and access the data via the
char interface, then you do not need any conversion library and could define
SI_NO_CONVERSION. Note that no conversion also means no validation of the data.
If no converter is specified then the default converter is SI_CONVERT_GENERIC
on Mac/Linux and SI_CONVERT_WIN32 on Windows. If you need widechar support on
Mac/Linux then use either SI_CONVERT_GENERIC or SI_CONVERT_ICU. These are also
supported on all platforms.
-# Define the appropriate symbol for the converter you wish to use and
include the SimpleIni.h header file. If no specific converter is defined
then the default converter is used. The default conversion mode uses
SI_CONVERT_WIN32 on Windows and SI_CONVERT_GENERIC on all other
platforms. If you are using ICU then SI_CONVERT_ICU is supported on all
platforms.
-# Declare an instance the appropriate class. Note that the following
include the SimpleIni.h header file.
-# Declare an instance of the appropriate class. Note that the following
definitions are just shortcuts for commonly used types. Other types
(PRUnichar, unsigned short, unsigned char) are also possible.
<table>
<tr><th>Interface <th>Case-sensitive <th>Load UTF-8 <th>Load MBCS <th>Typedef
<tr><th>SI_NO_CONVERSION
<tr><td>char <td>No <td>Yes <td>No <td>CSimpleIniA
<tr><td>char <td>Yes <td>Yes <td>No <td>CSimpleIniCaseA
<tr><th>SI_CONVERT_GENERIC
<tr><td>char <td>No <td>Yes <td>Yes #1 <td>CSimpleIniA
<tr><td>char <td>Yes <td>Yes <td>Yes <td>CSimpleIniCaseA
@ -77,6 +86,8 @@
#1 On Windows you are better to use CSimpleIniA with SI_CONVERT_WIN32.<br>
#2 Only affects Windows. On Windows this uses MBCS functions and
so may fold case incorrectly leading to uncertain results.
-# Set all the options that you require, see all the Set*() options below.
The SetUnicode() option is very common and can be specified in the constructor.
-# Call LoadData() or LoadFile() to load and parse the INI configuration file
-# Access and modify the data of the file using the following functions
<table>
@ -88,6 +99,8 @@
<tr><td>GetValue <td>Return a value for a section & key
<tr><td>SetValue <td>Add or update a value for a section & key
<tr><td>Delete <td>Remove a section, or a key from a section
<tr><td>SectionExists <td>Does a section exist?
<tr><td>KeyExists <td>Does a key exist?
</table>
-# Call Save() or SaveFile() to save the INI configuration data
@ -165,14 +178,17 @@
@section contrib CONTRIBUTIONS
Many thanks to the following contributors:
- 2010/05/03: Tobias Gehrig: added GetDoubleValue()
- See list of many contributors in github
@section licence MIT LICENCE
The licence text below is the boilerplate "MIT Licence" used from:
http://www.opensource.org/licenses/mit-license.php
Copyright (c) 2006-2012, Brodie Thiesfield
Copyright (c) 2006-2024, Brodie Thiesfield
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@ -234,16 +250,16 @@
# define SI_ASSERT(x)
#endif
enum SI_Error {
SI_OK = 0, //!< No error
SI_UPDATED = 1, //!< An existing value was updated
SI_INSERTED = 2, //!< A new value was inserted
using SI_Error = int;
constexpr int SI_OK = 0; //!< No error
constexpr int SI_UPDATED = 1; //!< An existing value was updated
constexpr int SI_INSERTED = 2; //!< A new value was inserted
// note: test for any error with (retval < 0)
SI_FAIL = -1, //!< Generic failure
SI_NOMEM = -2, //!< Out of memory error
SI_FILE = -3 //!< File error (see errno for detail error)
};
constexpr int SI_FAIL = -1; //!< Generic failure
constexpr int SI_NOMEM = -2; //!< Out of memory error
constexpr int SI_FILE = -3; //!< File error (see errno for detail error)
#define SI_UTF8_SIGNATURE "\xEF\xBB\xBF"
@ -542,6 +558,35 @@ public:
/** Query the status of spaces output */
bool UsingSpaces() const { return m_bSpaces; }
/** Should we recognise and parse quotes in single line values?
\param a_bParseQuotes Parse quoted data in values?
*/
void SetQuotes(bool a_bParseQuotes = true) {
m_bParseQuotes = a_bParseQuotes;
}
/** Are we permitting keys and values to be quoted? */
bool UsingQuotes() const { return m_bParseQuotes; }
/** When reading/writing an ini file, do we require every key to have an equals
sign to delineate a valid key value. If false, then every valid key must
have an equals sign and any lines without an equals sign is ignored. If
true then keys do not require an equals sign to be considered a key. Note
that this means that any non-commented line of text would become a key.
\param a_bAllowKeyOnly Permit keys without an equals sign or value.
*/
void SetAllowKeyOnly(bool a_bAllowKeyOnly = true) {
m_bAllowKeyOnly = a_bAllowKeyOnly;
}
/** Do we allow keys to exist without a value or equals sign? */
bool GetAllowKeyOnly() const { return m_bAllowKeyOnly; }
/*-----------------------------------------------------------------------*/
/** @}
@{ @name Loading INI Data */
@ -843,13 +888,27 @@ public:
are in use!
@param a_pSection Name of the section to return
@return boolean Was a section matching the supplied
name found.
@return Section data
*/
const TKeyVal * GetSection(
const SI_CHAR * a_pSection
) const;
/** Test if a section exists. Convenience function */
inline bool SectionExists(
const SI_CHAR * a_pSection
) const {
return GetSection(a_pSection) != NULL;
}
/** Test if the key exists in a section. Convenience function. */
inline bool KeyExists(
const SI_CHAR * a_pSection,
const SI_CHAR * a_pKey
) const {
return GetValue(a_pSection, a_pKey) != NULL;
}
/** Retrieve the value for a specific key. If multiple keys are enabled
(see SetMultiKey) then only the first value associated with that key
will be returned, see GetAllValues for getting all values with multikey.
@ -1219,6 +1278,7 @@ private:
bool IsMultiLineTag(const SI_CHAR * a_pData) const;
bool IsMultiLineData(const SI_CHAR * a_pData) const;
bool IsSingleLineQuotedValue(const SI_CHAR* a_pData) const;
bool LoadMultiLineText(
SI_CHAR *& a_pData,
const SI_CHAR *& a_pVal,
@ -1250,6 +1310,9 @@ private:
/** File comment for this data, if one exists. */
const SI_CHAR * m_pFileComment;
/** constant empty string */
const SI_CHAR m_cEmptyString;
/** Parsed INI data. Section -> (Key -> Value). */
TSection m_data;
@ -1271,6 +1334,12 @@ private:
/** Should spaces be written out surrounding the equals sign? */
bool m_bSpaces;
/** Should quoted data in values be recognized and parsed? */
bool m_bParseQuotes;
/** Do keys always need to have an equals sign when reading/writing? */
bool m_bAllowKeyOnly;
/** Next order value, used to ensure sections and keys are output in the
same order that they are loaded/added.
*/
@ -1290,10 +1359,13 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::CSimpleIniTempl(
: m_pData(0)
, m_uDataLen(0)
, m_pFileComment(NULL)
, m_cEmptyString(0)
, m_bStoreIsUtf8(a_bIsUtf8)
, m_bAllowMultiKey(a_bAllowMultiKey)
, m_bAllowMultiLine(a_bAllowMultiLine)
, m_bSpaces(true)
, m_bParseQuotes(false)
, m_bAllowKeyOnly(false)
, m_nOrder(0)
{ }
@ -1392,7 +1464,7 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::LoadFile(
}
// allocate and ensure NULL terminated
char * pData = new(std::nothrow) char[lSize+1];
char * pData = new(std::nothrow) char[lSize+static_cast<size_t>(1)];
if (!pData) {
return SI_NOMEM;
}
@ -1550,6 +1622,7 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::FindEntry(
{
a_pComment = NULL;
bool bHaveValue = false;
SI_CHAR * pTrail = NULL;
while (*a_pData) {
// skip spaces and empty lines
@ -1607,19 +1680,20 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::FindEntry(
}
// find the end of the key name (it may contain spaces)
// and convert it to lowercase as necessary
a_pKey = a_pData;
while (*a_pData && *a_pData != '=' && !IsNewLineChar(*a_pData)) {
++a_pData;
}
// *a_pData is null, equals, or newline
// if it's an invalid line, just skip it
if (*a_pData != '=') {
// if no value and we don't allow no value, then invalid
bHaveValue = (*a_pData == '=');
if (!bHaveValue && !m_bAllowKeyOnly) {
continue;
}
// empty keys are invalid
if (a_pKey == a_pData) {
if (bHaveValue && a_pKey == a_pData) {
while (*a_pData && !IsNewLineChar(*a_pData)) {
++a_pData;
}
@ -1632,6 +1706,9 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::FindEntry(
--pTrail;
}
++pTrail;
if (bHaveValue) {
// process the value
*pTrail = 0;
// skip leading whitespace on the value
@ -1664,6 +1741,23 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::FindEntry(
return LoadMultiLineText(a_pData, a_pVal, pTagName);
}
// check for quoted values, we are not supporting escapes in quoted values (yet)
if (m_bParseQuotes) {
--pTrail;
if (pTrail > a_pVal && *a_pVal == '"' && *pTrail == '"') {
++a_pVal;
*pTrail = 0;
}
}
}
else {
// no value to process, just prepare for the next
if (*a_pData) {
SkipNewLine(a_pData);
}
*pTrail = 0;
}
// return the standard entry
return true;
}
@ -1721,6 +1815,41 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::IsMultiLineData(
return false;
}
template<class SI_CHAR, class SI_STRLESS, class SI_CONVERTER>
bool
CSimpleIniTempl<SI_CHAR, SI_STRLESS, SI_CONVERTER>::IsSingleLineQuotedValue(
const SI_CHAR* a_pData
) const
{
// data needs quoting if it starts or ends with whitespace
// and doesn't have embedded newlines
// empty string
if (!*a_pData) {
return false;
}
// check for prefix
if (IsSpace(*a_pData)) {
return true;
}
// embedded newlines
while (*a_pData) {
if (IsNewLineChar(*a_pData)) {
return false;
}
++a_pData;
}
// check for suffix
if (IsSpace(*--a_pData)) {
return true;
}
return false;
}
template<class SI_CHAR, class SI_STRLESS, class SI_CONVERTER>
bool
CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::IsNewLineChar(
@ -1754,8 +1883,8 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::LoadMultiLineText(
a_pVal = a_pData;
// find the end tag. This tag must start in column 1 and be
// followed by a newline. No whitespace removal is done while
// searching for this tag.
// followed by a newline. We ignore any whitespace after the end
// tag but not whitespace before it.
SI_CHAR cEndOfLineChar = *a_pData;
for(;;) {
// if we are loading comments then we need a comment character as
@ -1811,12 +1940,20 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::LoadMultiLineText(
// if are looking for a tag then do the check now. This is done before
// checking for end of the data, so that if we have the tag at the end
// of the data then the tag is removed correctly.
if (a_pTagName &&
(!IsLess(pDataLine, a_pTagName) && !IsLess(a_pTagName, pDataLine)))
{
if (a_pTagName) {
// strip whitespace from the end of this tag
SI_CHAR* pc = a_pData - 1;
while (pc > pDataLine && IsSpace(*pc)) --pc;
SI_CHAR ch = *++pc;
*pc = 0;
if (!IsLess(pDataLine, a_pTagName) && !IsLess(a_pTagName, pDataLine)) {
break;
}
*pc = ch;
}
// if we are at the end of the data then we just automatically end
// this entry and return the current data.
if (!cEndOfLineChar) {
@ -1916,7 +2053,7 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::AddEntry(
// only set the comment if this is a section only entry
Entry oSection(a_pSection, ++m_nOrder);
if (a_pComment && (!a_pKey || !a_pValue)) {
if (a_pComment && !a_pKey) {
oSection.pComment = a_pComment;
}
@ -1926,14 +2063,15 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::AddEntry(
iSection = i.first;
bInserted = true;
}
if (!a_pKey || !a_pValue) {
// section only entries are specified with pItem and pVal as NULL
if (!a_pKey) {
// section only entries are specified with pItem as NULL
return bInserted ? SI_INSERTED : SI_UPDATED;
}
// check for existence of the key
TKeyVal & keyval = iSection->second;
typename TKeyVal::iterator iKey = keyval.find(a_pKey);
bInserted = iKey == keyval.end();
// remove all existing entries but save the load order and
// comment of the first entry
@ -1956,6 +2094,11 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::AddEntry(
iKey = keyval.end();
}
// values need to be a valid string, even if they are an empty string
if (!a_pValue) {
a_pValue = &m_cEmptyString;
}
// make string copies if necessary
bool bForceCreateNewKey = m_bAllowMultiKey && !a_bForceReplace;
if (a_bCopyStrings) {
@ -1980,8 +2123,8 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::AddEntry(
}
typename TKeyVal::value_type oEntry(oKey, static_cast<const SI_CHAR *>(NULL));
iKey = keyval.insert(oEntry);
bInserted = true;
}
iKey->second = a_pValue;
return bInserted ? SI_INSERTED : SI_UPDATED;
}
@ -2078,7 +2221,11 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::SetLongValue(
// convert to an ASCII string
char szInput[64];
#if __STDC_WANT_SECURE_LIB__ && !_WIN32_WCE
sprintf_s(szInput, a_bUseHex ? "0x%lx" : "%ld", a_nValue);
#else // !__STDC_WANT_SECURE_LIB__
snprintf(szInput, sizeof(szInput), a_bUseHex ? "0x%lx" : "%ld", a_nValue);
#endif // __STDC_WANT_SECURE_LIB__
// convert to output text
SI_CHAR szOutput[64];
@ -2136,7 +2283,11 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::SetDoubleValue(
// convert to an ASCII string
char szInput[64];
#if __STDC_WANT_SECURE_LIB__ && !_WIN32_WCE
sprintf_s(szInput, "%f", a_nValue);
#else // !__STDC_WANT_SECURE_LIB__
snprintf(szInput, sizeof(szInput), "%f", a_nValue);
#endif // __STDC_WANT_SECURE_LIB__
// convert to output text
SI_CHAR szOutput[64];
@ -2431,6 +2582,19 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::Save(
oSections.sort(typename Entry::LoadOrder());
#endif
// if there is an empty section name, then it must be written out first
// regardless of the load order
typename TNamesDepend::iterator is = oSections.begin();
for (; is != oSections.end(); ++is) {
if (!*is->pItem) {
// move the empty section name to the front of the section list
if (is != oSections.begin()) {
oSections.splice(oSections.begin(), oSections, is, std::next(is));
}
break;
}
}
// write the file comment if we have one
bool bNeedNewLine = false;
if (m_pFileComment) {
@ -2506,12 +2670,20 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::Save(
}
a_oOutput.Write(convert.Data());
// write the value
// write the value as long
if (*iValue->pItem || !m_bAllowKeyOnly) {
if (!convert.ConvertToStore(iValue->pItem)) {
return SI_FAIL;
}
a_oOutput.Write(m_bSpaces ? " = " : "=");
if (m_bAllowMultiLine && IsMultiLineData(iValue->pItem)) {
if (m_bParseQuotes && IsSingleLineQuotedValue(iValue->pItem)) {
// the only way to preserve external whitespace on a value (i.e. before or after)
// is to quote it. This is simple quoting, we don't escape quotes within the data.
a_oOutput.Write("\"");
a_oOutput.Write(convert.Data());
a_oOutput.Write("\"");
}
else if (m_bAllowMultiLine && IsMultiLineData(iValue->pItem)) {
// multi-line data needs to be processed specially to ensure
// that we use the correct newline format for the current system
a_oOutput.Write("<<<END_OF_TEXT" SI_NEWLINE_A);
@ -2523,6 +2695,7 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::Save(
else {
a_oOutput.Write(convert.Data());
}
}
a_oOutput.Write(SI_NEWLINE_A);
}
}
@ -2675,13 +2848,15 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::DeleteString(
// SimpleIni.h, set the converter that you wish you use by defining one of the
// following symbols.
//
// SI_NO_CONVERSION Do not make the "W" wide character version of the
// library available. Only CSimpleIniA etc is defined.
// SI_CONVERT_GENERIC Use the Unicode reference conversion library in
// the accompanying files ConvertUTF.h/c
// SI_CONVERT_ICU Use the IBM ICU conversion library. Requires
// ICU headers on include path and icuuc.lib
// SI_CONVERT_WIN32 Use the Win32 API functions for conversion.
#if !defined(SI_CONVERT_GENERIC) && !defined(SI_CONVERT_WIN32) && !defined(SI_CONVERT_ICU)
#if !defined(SI_NO_CONVERSION) && !defined(SI_CONVERT_GENERIC) && !defined(SI_CONVERT_WIN32) && !defined(SI_CONVERT_ICU)
# ifdef _WIN32
# define SI_CONVERT_WIN32
# else
@ -2938,7 +3113,7 @@ public:
// This uses the Unicode reference implementation to do the
// conversion from UTF-8 to wchar_t. The required files are
// ConvertUTF.h and ConvertUTF.c which should be included in
// the distribution but are publically available from unicode.org
// the distribution but are publicly available from unicode.org
// at http://www.unicode.org/Public/PROGRAMS/CVTUTF/
ConversionResult retval;
const UTF8 * pUtf8 = (const UTF8 *) a_pInputData;
@ -3025,7 +3200,7 @@ public:
// This uses the Unicode reference implementation to do the
// conversion from wchar_t to UTF-8. The required files are
// ConvertUTF.h and ConvertUTF.c which should be included in
// the distribution but are publically available from unicode.org
// the distribution but are publicly available from unicode.org
// at http://www.unicode.org/Public/PROGRAMS/CVTUTF/
ConversionResult retval;
UTF8 * pUtf8 = (UTF8 *) a_pOutputData;
@ -3275,13 +3450,8 @@ template<class SI_CHAR>
struct SI_NoCase {
bool operator()(const SI_CHAR * pLeft, const SI_CHAR * pRight) const {
if (sizeof(SI_CHAR) == sizeof(char)) {
#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
return _mbsicmp((const unsigned char *)pLeft,
(const unsigned char *)pRight) < 0;
#else
return _stricmp((const char*)pLeft,
(const char*)pRight) < 0;
#endif
}
if (sizeof(SI_CHAR) == sizeof(wchar_t)) {
return _wcsicmp((const wchar_t *)pLeft,
@ -3416,6 +3586,19 @@ public:
#endif // SI_CONVERT_WIN32
// ---------------------------------------------------------------------------
// SI_NO_CONVERSION
// ---------------------------------------------------------------------------
#ifdef SI_NO_CONVERSION
#define SI_Case SI_GenericCase
#define SI_NoCase SI_GenericNoCase
#endif // SI_NO_CONVERSION
// ---------------------------------------------------------------------------
// TYPE DEFINITIONS
// ---------------------------------------------------------------------------
@ -3425,6 +3608,13 @@ typedef CSimpleIniTempl<char,
typedef CSimpleIniTempl<char,
SI_Case<char>,SI_ConvertA<char> > CSimpleIniCaseA;
#if defined(SI_NO_CONVERSION)
// if there is no wide char conversion then we don't need to define the
// widechar "W" versions of CSimpleIni
# define CSimpleIni CSimpleIniA
# define CSimpleIniCase CSimpleIniCaseA
# define SI_NEWLINE SI_NEWLINE_A
#else
# if defined(SI_CONVERT_ICU)
typedef CSimpleIniTempl<UChar,
SI_NoCase<UChar>,SI_ConvertW<UChar> > CSimpleIniW;
@ -3446,6 +3636,7 @@ typedef CSimpleIniTempl<wchar_t,
# define CSimpleIniCase CSimpleIniCaseA
# define SI_NEWLINE SI_NEWLINE_A
# endif // _UNICODE
#endif
#ifdef _MSC_VER
# pragma warning (pop)

View File

@ -194,7 +194,7 @@ static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080
* Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
* into the first byte, depending on how many bytes follow. There are
* as many entries in this table as there are UTF-8 sequence types.
* (I.e., one byte sequence, two byte... etc.). Remember that sequencs
* (I.e., one byte sequence, two byte... etc.). Remember that sequences
* for *legal* UTF-8 will be 4 or fewer bytes total.
*/
static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };