BizHawk/waterbox/libc/internals/_PDCLIB_encoding.h

210 lines
7.0 KiB
C

/* Encoding support <_PDCLIB_encoding.h>
This file is part of the Public Domain C Library (PDCLib).
Permission is granted to use, modify, and / or redistribute at will.
*/
#ifndef __PDCLIB_ENCODING_H
#define __PDCLIB_ENCODING_H __PDCLIB_ENCODING_H
#include <uchar.h>
/* Must be cauued with bufsize >= 1, in != NULL, out != NULL, ps != NULL
*
* Converts a UTF-16 (char16_t) to a UCS4 (char32_t) value. Returns
* 1, 2 : Valid character (converted to UCS-4)
* -1 : Encoding error
* -2 : Partial character (only lead surrogate in buffer)
*/
static inline int _PDCLIB_c16rtoc32(
_PDCLIB_char32_t *_PDCLIB_restrict out,
const _PDCLIB_char16_t *_PDCLIB_restrict in,
_PDCLIB_size_t bufsize,
_PDCLIB_mbstate_t *_PDCLIB_restrict ps
)
{
if(ps->_Surrogate) {
// We already have a lead surrogate
if((*in & ~0x3FF) != 0xDC00) {
// Encoding error
return -1;
} else {
// Decode and reset state
*out = (ps->_Surrogate & 0x3FF) << 10 | (*in & 0x3FF);
ps->_Surrogate = 0;
return 1;
}
} if((*in & ~0x3FF) == 0xD800) {
// Lead surrogate
if(bufsize >= 2) {
// Buffer big enough
if((in[1] & ~0x3FF) != 0xDC00) {
// Encoding error
return -1;
} else {
*out = (in[0] & 0x3FF) << 10 | (in[1] & 0x3FF);
return 2;
}
} else {
// Buffer too small - update state
ps->_Surrogate = *in;
return -2;
}
} else {
// BMP character
*out = *in;
return 1;
}
}
static inline _PDCLIB_size_t _PDCLIB_c32rtoc16(
_PDCLIB_wchar_t *_PDCLIB_restrict out,
const _PDCLIB_char32_t *_PDCLIB_restrict in,
_PDCLIB_size_t bufsize,
_PDCLIB_mbstate_t *_PDCLIB_restrict ps
)
{
if(ps->_Surrogate) {
*out = ps->_Surrogate;
ps->_Surrogate = 0;
return 0;
}
if(*in <= 0xFFFF) {
// BMP character
*out = *in;
return 1;
} else {
// Supplementary plane character
*out = 0xD800 | (*in >> 10);
if(bufsize >= 2) {
out[1] = 0xDC00 | (*in & 0x3FF);
return 2;
} else {
ps->_Surrogate = 0xDC00 | (*in & 0x3FF);
return 1;
}
}
}
struct _PDCLIB_charcodec_t {
/* Reads at most *_P_insz code units from *_P_inbuf and writes the result
* into *_P_outbuf, writing at most *_P_outsz code units. Updates
* *_P_outbuf, *_P_outsz, *_P_inbuf, *_P_outsz with the resulting state
*
* If _P_outbuf is NULL, then the input must be processed but no output
* generated. _P_outsz may be processed as normal.
*
* Returns true if the conversion completed successfully (i.e. one of
* _P_outsize or _P_insize reached zero and no coding errors were
* encountered), else return false.
*/
/* mbsinit. Mandatory. */
_PDCLIB_bool (*__mbsinit)(const _PDCLIB_mbstate_t *_P_ps);
/* UCS-4 variants. Mandatory. */
_PDCLIB_bool (*__mbstoc32s)(
_PDCLIB_char32_t *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf,
_PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
const char *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf,
_PDCLIB_size_t *_PDCLIB_restrict _P_insz,
_PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
);
_PDCLIB_bool (*__c32stombs)(
char *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf,
_PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
const _PDCLIB_char32_t *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf,
_PDCLIB_size_t *_PDCLIB_restrict _P_insz,
_PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
);
/* UTF-16 variants; same as above except optional.
*
* If not provided, _PDCLib will internally synthesize on top of the UCS-4
* variants above, albeit at a performance cost.
*/
_PDCLIB_bool (*__mbstoc16s)(
_PDCLIB_char16_t *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf,
_PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
const char *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf,
_PDCLIB_size_t *_PDCLIB_restrict _P_insz,
_PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
);
_PDCLIB_bool (*__c16stombs)(
char *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf,
_PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
const _PDCLIB_char16_t *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf,
_PDCLIB_size_t *_PDCLIB_restrict _P_insz,
_PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
);
size_t __mb_max;
};
/* mbstate _PendState values */
enum {
/* Nothing pending; _PendChar ignored */
_PendClear = 0,
/* Process the character stored in _PendChar before reading the buffer
* passed for the conversion
*/
_PendPrefix = 1,
};
/* XXX Defining these here is temporary - will move to xlocale in future */
size_t mbrtoc16_l(
char16_t *_PDCLIB_restrict pc16,
const char *_PDCLIB_restrict s,
size_t n,
mbstate_t *_PDCLIB_restrict ps,
_PDCLIB_locale_t _PDCLIB_restrict l);
size_t c16rtomb_l(
char *_PDCLIB_restrict s,
char16_t c16,
mbstate_t *_PDCLIB_restrict ps,
_PDCLIB_locale_t _PDCLIB_restrict l);
size_t mbrtoc32_l(
char32_t *_PDCLIB_restrict pc32,
const char *_PDCLIB_restrict s,
size_t n,
mbstate_t *_PDCLIB_restrict ps,
_PDCLIB_locale_t _PDCLIB_restrict l);
size_t c32rtomb_l(
char *_PDCLIB_restrict s,
char32_t c32,
mbstate_t *_PDCLIB_restrict ps,
_PDCLIB_locale_t _PDCLIB_restrict l);
#define _PDCLIB_WCHAR_ENCODING_UTF16 16
#define _PDCLIB_WCHAR_ENCODING_UCS4 32
#if !defined(_PDCLIB_WCHAR_ENCODING)
#define _PDCLIB_WCHAR_ENCODING 0
#endif
#if _PDCLIB_WCHAR_ENCODING == _PDCLIB_WCHAR_ENCODING_UTF16
#define _PDCLIB_mbrtocwc_l mbrtoc16_l
#define _PDCLIB_mbrtocwc mbrtoc16
#define _PDCLIB_cwcrtomb_l c16rtomb_l
#define _PDCLIB_cwcrtomb c16rtomb
#elif _PDCLIB_WCHAR_ENCODING == _PDCLIB_WCHAR_ENCODING_UCS4
#define _PDCLIB_mbrtocwc_l mbrtoc32_l
#define _PDCLIB_mbrtocwc mbrtoc32
#define _PDCLIB_cwcrtomb_l c32rtomb_l
#define _PDCLIB_cwcrtomb c32rtomb
#else
#error _PDCLIB_WCHAR_ENCODING not defined correctly
#error Define to one of _PDCLIB_WCHAR_ENCODING_UCS4 or _PDCLIB_WCHAR_ENCODING_UTF16
#endif
#endif