pcsx2/plugins/spu2-x/src/iconvert.cpp

262 lines
7.0 KiB
C++

/*
* Copyright (C) 2001 Edmund Grimley Evans <edmundo@rano.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
//#ifdef HAVE_ICONV
#include <assert.h>
#include <errno.h>
#include <iconv.h>
#include <stdlib.h>
#include <string.h>
/*
* Convert data from one encoding to another. Return:
*
* -2 : memory allocation failed
* -1 : unknown encoding
* 0 : data was converted exactly
* 1 : data was converted inexactly
* 2 : data was invalid (but still converted)
*
* We convert in two steps, via UTF-8, as this is the only
* reliable way of distinguishing between invalid input
* and valid input which iconv refuses to transliterate.
* We convert from UTF-8 twice, because we have no way of
* knowing whether the conversion was exact if iconv returns
* E2BIG (due to a bug in the specification of iconv).
* An alternative approach is to assume that the output of
* iconv is never more than 4 times as long as the input,
* but I prefer to avoid that assumption if possible.
*/
int iconvert(const char *fromcode, const char *tocode,
const char *from, size_t fromlen,
char **to, size_t *tolen)
{
int ret = 0;
iconv_t cd1, cd2;
char *ib;
char *ob;
char *utfbuf = 0, *outbuf, *newbuf;
size_t utflen, outlen, ibl, obl, k;
char tbuf[2048];
cd1 = iconv_open("UTF-8", fromcode);
if (cd1 == (iconv_t)(-1))
return -1;
cd2 = (iconv_t)(-1);
/* Don't use strcasecmp() as it's locale-dependent. */
if (!strchr("Uu", tocode[0]) ||
!strchr("Tt", tocode[1]) ||
!strchr("Ff", tocode[2]) ||
tocode[3] != '-' ||
tocode[4] != '8' ||
tocode[5] != '\0')
{
char *tocode1;
/*
* Try using this non-standard feature of glibc and libiconv.
* This is deliberately not a config option as people often
* change their iconv library without rebuilding applications.
*/
tocode1 = (char *)malloc(strlen(tocode) + 11);
if (!tocode1)
goto fail;
strcpy(tocode1, tocode);
strcat(tocode1, "//TRANSLIT");
cd2 = iconv_open(tocode1, "UTF-8");
free(tocode1);
if (cd2 == (iconv_t)(-1))
cd2 = iconv_open(tocode, fromcode);
if (cd2 == (iconv_t)(-1))
{
iconv_close(cd1);
return -1;
}
}
utflen = 1; /*fromlen * 2 + 1; XXX */
utfbuf = (char *)malloc(utflen);
if (!utfbuf)
goto fail;
/* Convert to UTF-8 */
ib = (char *)from;
ibl = fromlen;
ob = utfbuf;
obl = utflen;
for (;;)
{
k = iconv(cd1, &ib, &ibl, &ob, &obl);
assert((!k && !ibl) ||
(k == (size_t)(-1) && errno == E2BIG && ibl && obl < 6) ||
(k == (size_t)(-1) &&
(errno == EILSEQ || errno == EINVAL) && ibl));
if (!ibl)
break;
if (obl < 6)
{
/* Enlarge the buffer */
utflen *= 2;
newbuf = (char *)realloc(utfbuf, utflen);
if (!newbuf)
goto fail;
ob = (ob - utfbuf) + newbuf;
obl = utflen - (ob - newbuf);
utfbuf = newbuf;
}
else
{
/* Invalid input */
ib++, ibl--;
*ob++ = '#', obl--;
ret = 2;
iconv(cd1, 0, 0, 0, 0);
}
}
if (cd2 == (iconv_t)(-1))
{
/* The target encoding was UTF-8 */
if (tolen)
*tolen = ob - utfbuf;
if (!to)
{
free(utfbuf);
iconv_close(cd1);
return ret;
}
newbuf = (char *)realloc(utfbuf, (ob - utfbuf) + 1);
if (!newbuf)
goto fail;
ob = (ob - utfbuf) + newbuf;
*ob = '\0';
*to = newbuf;
iconv_close(cd1);
return ret;
}
/* Truncate the buffer to be tidy */
utflen = ob - utfbuf;
newbuf = (char *)realloc(utfbuf, utflen);
if (!newbuf)
goto fail;
utfbuf = newbuf;
/* Convert from UTF-8 to discover how long the output is */
outlen = 0;
ib = utfbuf;
ibl = utflen;
while (ibl)
{
ob = tbuf;
obl = sizeof(tbuf);
k = iconv(cd2, &ib, &ibl, &ob, &obl);
assert((k != (size_t)(-1) && !ibl) ||
(k == (size_t)(-1) && errno == E2BIG && ibl) ||
(k == (size_t)(-1) && errno == EILSEQ && ibl));
if (ibl && !(k == (size_t)(-1) && errno == E2BIG))
{
/* Replace one character */
char *tb = "?";
size_t tbl = 1;
outlen += ob - tbuf;
ob = tbuf;
obl = sizeof(tbuf);
k = iconv(cd2, &tb, &tbl, &ob, &obl);
assert((!k && !tbl) ||
(k == (size_t)(-1) && errno == EILSEQ && tbl));
for (++ib, --ibl; ibl && (*ib & 0x80); ib++, ibl--)
;
}
outlen += ob - tbuf;
}
ob = tbuf;
obl = sizeof(tbuf);
k = iconv(cd2, 0, 0, &ob, &obl);
assert(!k);
outlen += ob - tbuf;
/* Convert from UTF-8 for real */
outbuf = (char *)malloc(outlen + 1);
if (!outbuf)
goto fail;
ib = utfbuf;
ibl = utflen;
ob = outbuf;
obl = outlen;
while (ibl)
{
k = iconv(cd2, &ib, &ibl, &ob, &obl);
assert((k != (size_t)(-1) && !ibl) ||
(k == (size_t)(-1) && errno == EILSEQ && ibl));
if (k && !ret)
ret = 1;
if (ibl && !(k == (size_t)(-1) && errno == E2BIG))
{
/* Replace one character */
char *tb = "?";
size_t tbl = 1;
k = iconv(cd2, &tb, &tbl, &ob, &obl);
assert((!k && !tbl) ||
(k == (size_t)(-1) && errno == EILSEQ && tbl));
for (++ib, --ibl; ibl && (*ib & 0x80); ib++, ibl--)
;
}
}
k = iconv(cd2, 0, 0, &ob, &obl);
assert(!k);
assert(!obl);
*ob = '\0';
free(utfbuf);
iconv_close(cd1);
iconv_close(cd2);
if (tolen)
*tolen = outlen;
if (!to)
{
free(outbuf);
return ret;
}
*to = outbuf;
return ret;
fail:
free(utfbuf);
iconv_close(cd1);
if (cd2 != (iconv_t)(-1))
iconv_close(cd2);
return -2;
}
//#endif /* HAVE_ICONV */
/* arch-tag: e0ffb4f6-e337-4d5f-af90-d49e2b14041e
(do not change this comment) */