mirror of https://github.com/PCSX2/pcsx2.git
262 lines
7.0 KiB
C++
262 lines
7.0 KiB
C++
/*
|
|
* Copyright (C) 2001 Edmund Grimley Evans <edmundo@rano.org>
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
|
*/
|
|
|
|
|
|
//#ifdef HAVE_ICONV
|
|
|
|
#include <assert.h>
|
|
#include <errno.h>
|
|
#include <iconv.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
/*
|
|
* Convert data from one encoding to another. Return:
|
|
*
|
|
* -2 : memory allocation failed
|
|
* -1 : unknown encoding
|
|
* 0 : data was converted exactly
|
|
* 1 : data was converted inexactly
|
|
* 2 : data was invalid (but still converted)
|
|
*
|
|
* We convert in two steps, via UTF-8, as this is the only
|
|
* reliable way of distinguishing between invalid input
|
|
* and valid input which iconv refuses to transliterate.
|
|
* We convert from UTF-8 twice, because we have no way of
|
|
* knowing whether the conversion was exact if iconv returns
|
|
* E2BIG (due to a bug in the specification of iconv).
|
|
* An alternative approach is to assume that the output of
|
|
* iconv is never more than 4 times as long as the input,
|
|
* but I prefer to avoid that assumption if possible.
|
|
*/
|
|
|
|
|
|
|
|
int iconvert(const char *fromcode, const char *tocode,
|
|
const char *from, size_t fromlen,
|
|
char **to, size_t *tolen)
|
|
{
|
|
int ret = 0;
|
|
iconv_t cd1, cd2;
|
|
char *ib;
|
|
char *ob;
|
|
char *utfbuf = 0, *outbuf, *newbuf;
|
|
size_t utflen, outlen, ibl, obl, k;
|
|
char tbuf[2048];
|
|
|
|
cd1 = iconv_open("UTF-8", fromcode);
|
|
if (cd1 == (iconv_t)(-1))
|
|
return -1;
|
|
|
|
cd2 = (iconv_t)(-1);
|
|
/* Don't use strcasecmp() as it's locale-dependent. */
|
|
if (!strchr("Uu", tocode[0]) ||
|
|
!strchr("Tt", tocode[1]) ||
|
|
!strchr("Ff", tocode[2]) ||
|
|
tocode[3] != '-' ||
|
|
tocode[4] != '8' ||
|
|
tocode[5] != '\0')
|
|
{
|
|
char *tocode1;
|
|
|
|
/*
|
|
* Try using this non-standard feature of glibc and libiconv.
|
|
* This is deliberately not a config option as people often
|
|
* change their iconv library without rebuilding applications.
|
|
*/
|
|
tocode1 = (char *)malloc(strlen(tocode) + 11);
|
|
if (!tocode1)
|
|
goto fail;
|
|
|
|
strcpy(tocode1, tocode);
|
|
strcat(tocode1, "//TRANSLIT");
|
|
cd2 = iconv_open(tocode1, "UTF-8");
|
|
free(tocode1);
|
|
|
|
if (cd2 == (iconv_t)(-1))
|
|
cd2 = iconv_open(tocode, fromcode);
|
|
|
|
if (cd2 == (iconv_t)(-1))
|
|
{
|
|
iconv_close(cd1);
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
utflen = 1; /*fromlen * 2 + 1; XXX */
|
|
utfbuf = (char *)malloc(utflen);
|
|
if (!utfbuf)
|
|
goto fail;
|
|
|
|
/* Convert to UTF-8 */
|
|
ib = (char *)from;
|
|
ibl = fromlen;
|
|
ob = utfbuf;
|
|
obl = utflen;
|
|
for (;;)
|
|
{
|
|
k = iconv(cd1, &ib, &ibl, &ob, &obl);
|
|
assert((!k && !ibl) ||
|
|
(k == (size_t)(-1) && errno == E2BIG && ibl && obl < 6) ||
|
|
(k == (size_t)(-1) &&
|
|
(errno == EILSEQ || errno == EINVAL) && ibl));
|
|
if (!ibl)
|
|
break;
|
|
if (obl < 6)
|
|
{
|
|
/* Enlarge the buffer */
|
|
utflen *= 2;
|
|
newbuf = (char *)realloc(utfbuf, utflen);
|
|
if (!newbuf)
|
|
goto fail;
|
|
ob = (ob - utfbuf) + newbuf;
|
|
obl = utflen - (ob - newbuf);
|
|
utfbuf = newbuf;
|
|
}
|
|
else
|
|
{
|
|
/* Invalid input */
|
|
ib++, ibl--;
|
|
*ob++ = '#', obl--;
|
|
ret = 2;
|
|
iconv(cd1, 0, 0, 0, 0);
|
|
}
|
|
}
|
|
|
|
if (cd2 == (iconv_t)(-1))
|
|
{
|
|
/* The target encoding was UTF-8 */
|
|
if (tolen)
|
|
*tolen = ob - utfbuf;
|
|
if (!to)
|
|
{
|
|
free(utfbuf);
|
|
iconv_close(cd1);
|
|
return ret;
|
|
}
|
|
newbuf = (char *)realloc(utfbuf, (ob - utfbuf) + 1);
|
|
if (!newbuf)
|
|
goto fail;
|
|
ob = (ob - utfbuf) + newbuf;
|
|
*ob = '\0';
|
|
*to = newbuf;
|
|
iconv_close(cd1);
|
|
return ret;
|
|
}
|
|
|
|
/* Truncate the buffer to be tidy */
|
|
utflen = ob - utfbuf;
|
|
newbuf = (char *)realloc(utfbuf, utflen);
|
|
if (!newbuf)
|
|
goto fail;
|
|
utfbuf = newbuf;
|
|
|
|
/* Convert from UTF-8 to discover how long the output is */
|
|
outlen = 0;
|
|
ib = utfbuf;
|
|
ibl = utflen;
|
|
while (ibl)
|
|
{
|
|
ob = tbuf;
|
|
obl = sizeof(tbuf);
|
|
k = iconv(cd2, &ib, &ibl, &ob, &obl);
|
|
assert((k != (size_t)(-1) && !ibl) ||
|
|
(k == (size_t)(-1) && errno == E2BIG && ibl) ||
|
|
(k == (size_t)(-1) && errno == EILSEQ && ibl));
|
|
if (ibl && !(k == (size_t)(-1) && errno == E2BIG))
|
|
{
|
|
/* Replace one character */
|
|
char *tb = "?";
|
|
size_t tbl = 1;
|
|
|
|
outlen += ob - tbuf;
|
|
ob = tbuf;
|
|
obl = sizeof(tbuf);
|
|
k = iconv(cd2, &tb, &tbl, &ob, &obl);
|
|
assert((!k && !tbl) ||
|
|
(k == (size_t)(-1) && errno == EILSEQ && tbl));
|
|
for (++ib, --ibl; ibl && (*ib & 0x80); ib++, ibl--)
|
|
;
|
|
}
|
|
outlen += ob - tbuf;
|
|
}
|
|
ob = tbuf;
|
|
obl = sizeof(tbuf);
|
|
k = iconv(cd2, 0, 0, &ob, &obl);
|
|
assert(!k);
|
|
outlen += ob - tbuf;
|
|
|
|
/* Convert from UTF-8 for real */
|
|
outbuf = (char *)malloc(outlen + 1);
|
|
if (!outbuf)
|
|
goto fail;
|
|
ib = utfbuf;
|
|
ibl = utflen;
|
|
ob = outbuf;
|
|
obl = outlen;
|
|
while (ibl)
|
|
{
|
|
k = iconv(cd2, &ib, &ibl, &ob, &obl);
|
|
assert((k != (size_t)(-1) && !ibl) ||
|
|
(k == (size_t)(-1) && errno == EILSEQ && ibl));
|
|
if (k && !ret)
|
|
ret = 1;
|
|
if (ibl && !(k == (size_t)(-1) && errno == E2BIG))
|
|
{
|
|
/* Replace one character */
|
|
char *tb = "?";
|
|
size_t tbl = 1;
|
|
|
|
k = iconv(cd2, &tb, &tbl, &ob, &obl);
|
|
assert((!k && !tbl) ||
|
|
(k == (size_t)(-1) && errno == EILSEQ && tbl));
|
|
for (++ib, --ibl; ibl && (*ib & 0x80); ib++, ibl--)
|
|
;
|
|
}
|
|
}
|
|
k = iconv(cd2, 0, 0, &ob, &obl);
|
|
assert(!k);
|
|
assert(!obl);
|
|
*ob = '\0';
|
|
|
|
free(utfbuf);
|
|
iconv_close(cd1);
|
|
iconv_close(cd2);
|
|
if (tolen)
|
|
*tolen = outlen;
|
|
if (!to)
|
|
{
|
|
free(outbuf);
|
|
return ret;
|
|
}
|
|
*to = outbuf;
|
|
return ret;
|
|
|
|
fail:
|
|
free(utfbuf);
|
|
iconv_close(cd1);
|
|
if (cd2 != (iconv_t)(-1))
|
|
iconv_close(cd2);
|
|
return -2;
|
|
}
|
|
|
|
//#endif /* HAVE_ICONV */
|
|
|
|
/* arch-tag: e0ffb4f6-e337-4d5f-af90-d49e2b14041e
|
|
(do not change this comment) */
|