210 lines
7.0 KiB
C
210 lines
7.0 KiB
C
/* Encoding support <_PDCLIB_encoding.h>
|
|
|
|
This file is part of the Public Domain C Library (PDCLib).
|
|
Permission is granted to use, modify, and / or redistribute at will.
|
|
*/
|
|
|
|
#ifndef __PDCLIB_ENCODING_H
|
|
#define __PDCLIB_ENCODING_H __PDCLIB_ENCODING_H
|
|
|
|
#include <uchar.h>
|
|
|
|
/* Must be cauued with bufsize >= 1, in != NULL, out != NULL, ps != NULL
|
|
*
|
|
* Converts a UTF-16 (char16_t) to a UCS4 (char32_t) value. Returns
|
|
* 1, 2 : Valid character (converted to UCS-4)
|
|
* -1 : Encoding error
|
|
* -2 : Partial character (only lead surrogate in buffer)
|
|
*/
|
|
static inline int _PDCLIB_c16rtoc32(
|
|
_PDCLIB_char32_t *_PDCLIB_restrict out,
|
|
const _PDCLIB_char16_t *_PDCLIB_restrict in,
|
|
_PDCLIB_size_t bufsize,
|
|
_PDCLIB_mbstate_t *_PDCLIB_restrict ps
|
|
)
|
|
{
|
|
if(ps->_Surrogate) {
|
|
// We already have a lead surrogate
|
|
if((*in & ~0x3FF) != 0xDC00) {
|
|
// Encoding error
|
|
return -1;
|
|
} else {
|
|
// Decode and reset state
|
|
*out = (ps->_Surrogate & 0x3FF) << 10 | (*in & 0x3FF);
|
|
ps->_Surrogate = 0;
|
|
return 1;
|
|
}
|
|
} if((*in & ~0x3FF) == 0xD800) {
|
|
// Lead surrogate
|
|
if(bufsize >= 2) {
|
|
// Buffer big enough
|
|
if((in[1] & ~0x3FF) != 0xDC00) {
|
|
// Encoding error
|
|
return -1;
|
|
} else {
|
|
*out = (in[0] & 0x3FF) << 10 | (in[1] & 0x3FF);
|
|
return 2;
|
|
}
|
|
} else {
|
|
// Buffer too small - update state
|
|
ps->_Surrogate = *in;
|
|
return -2;
|
|
}
|
|
} else {
|
|
// BMP character
|
|
*out = *in;
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
static inline _PDCLIB_size_t _PDCLIB_c32rtoc16(
|
|
_PDCLIB_wchar_t *_PDCLIB_restrict out,
|
|
const _PDCLIB_char32_t *_PDCLIB_restrict in,
|
|
_PDCLIB_size_t bufsize,
|
|
_PDCLIB_mbstate_t *_PDCLIB_restrict ps
|
|
)
|
|
{
|
|
if(ps->_Surrogate) {
|
|
*out = ps->_Surrogate;
|
|
ps->_Surrogate = 0;
|
|
return 0;
|
|
}
|
|
|
|
if(*in <= 0xFFFF) {
|
|
// BMP character
|
|
*out = *in;
|
|
return 1;
|
|
} else {
|
|
// Supplementary plane character
|
|
*out = 0xD800 | (*in >> 10);
|
|
if(bufsize >= 2) {
|
|
out[1] = 0xDC00 | (*in & 0x3FF);
|
|
return 2;
|
|
} else {
|
|
ps->_Surrogate = 0xDC00 | (*in & 0x3FF);
|
|
return 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
struct _PDCLIB_charcodec_t {
|
|
/* Reads at most *_P_insz code units from *_P_inbuf and writes the result
|
|
* into *_P_outbuf, writing at most *_P_outsz code units. Updates
|
|
* *_P_outbuf, *_P_outsz, *_P_inbuf, *_P_outsz with the resulting state
|
|
*
|
|
* If _P_outbuf is NULL, then the input must be processed but no output
|
|
* generated. _P_outsz may be processed as normal.
|
|
*
|
|
* Returns true if the conversion completed successfully (i.e. one of
|
|
* _P_outsize or _P_insize reached zero and no coding errors were
|
|
* encountered), else return false.
|
|
*/
|
|
|
|
/* mbsinit. Mandatory. */
|
|
_PDCLIB_bool (*__mbsinit)(const _PDCLIB_mbstate_t *_P_ps);
|
|
|
|
/* UCS-4 variants. Mandatory. */
|
|
|
|
_PDCLIB_bool (*__mbstoc32s)(
|
|
_PDCLIB_char32_t *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf,
|
|
_PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
|
|
const char *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf,
|
|
_PDCLIB_size_t *_PDCLIB_restrict _P_insz,
|
|
_PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
|
|
);
|
|
|
|
_PDCLIB_bool (*__c32stombs)(
|
|
char *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf,
|
|
_PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
|
|
const _PDCLIB_char32_t *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf,
|
|
_PDCLIB_size_t *_PDCLIB_restrict _P_insz,
|
|
_PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
|
|
);
|
|
|
|
/* UTF-16 variants; same as above except optional.
|
|
*
|
|
* If not provided, _PDCLib will internally synthesize on top of the UCS-4
|
|
* variants above, albeit at a performance cost.
|
|
*/
|
|
|
|
_PDCLIB_bool (*__mbstoc16s)(
|
|
_PDCLIB_char16_t *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf,
|
|
_PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
|
|
const char *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf,
|
|
_PDCLIB_size_t *_PDCLIB_restrict _P_insz,
|
|
_PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
|
|
);
|
|
|
|
_PDCLIB_bool (*__c16stombs)(
|
|
char *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf,
|
|
_PDCLIB_size_t *_PDCLIB_restrict _P_outsz,
|
|
const _PDCLIB_char16_t *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf,
|
|
_PDCLIB_size_t *_PDCLIB_restrict _P_insz,
|
|
_PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps
|
|
);
|
|
|
|
size_t __mb_max;
|
|
};
|
|
|
|
/* mbstate _PendState values */
|
|
enum {
|
|
/* Nothing pending; _PendChar ignored */
|
|
_PendClear = 0,
|
|
|
|
/* Process the character stored in _PendChar before reading the buffer
|
|
* passed for the conversion
|
|
*/
|
|
_PendPrefix = 1,
|
|
};
|
|
|
|
/* XXX Defining these here is temporary - will move to xlocale in future */
|
|
size_t mbrtoc16_l(
|
|
char16_t *_PDCLIB_restrict pc16,
|
|
const char *_PDCLIB_restrict s,
|
|
size_t n,
|
|
mbstate_t *_PDCLIB_restrict ps,
|
|
_PDCLIB_locale_t _PDCLIB_restrict l);
|
|
|
|
size_t c16rtomb_l(
|
|
char *_PDCLIB_restrict s,
|
|
char16_t c16,
|
|
mbstate_t *_PDCLIB_restrict ps,
|
|
_PDCLIB_locale_t _PDCLIB_restrict l);
|
|
|
|
size_t mbrtoc32_l(
|
|
char32_t *_PDCLIB_restrict pc32,
|
|
const char *_PDCLIB_restrict s,
|
|
size_t n,
|
|
mbstate_t *_PDCLIB_restrict ps,
|
|
_PDCLIB_locale_t _PDCLIB_restrict l);
|
|
|
|
size_t c32rtomb_l(
|
|
char *_PDCLIB_restrict s,
|
|
char32_t c32,
|
|
mbstate_t *_PDCLIB_restrict ps,
|
|
_PDCLIB_locale_t _PDCLIB_restrict l);
|
|
|
|
#define _PDCLIB_WCHAR_ENCODING_UTF16 16
|
|
#define _PDCLIB_WCHAR_ENCODING_UCS4 32
|
|
|
|
#if !defined(_PDCLIB_WCHAR_ENCODING)
|
|
#define _PDCLIB_WCHAR_ENCODING 0
|
|
#endif
|
|
|
|
#if _PDCLIB_WCHAR_ENCODING == _PDCLIB_WCHAR_ENCODING_UTF16
|
|
#define _PDCLIB_mbrtocwc_l mbrtoc16_l
|
|
#define _PDCLIB_mbrtocwc mbrtoc16
|
|
#define _PDCLIB_cwcrtomb_l c16rtomb_l
|
|
#define _PDCLIB_cwcrtomb c16rtomb
|
|
#elif _PDCLIB_WCHAR_ENCODING == _PDCLIB_WCHAR_ENCODING_UCS4
|
|
#define _PDCLIB_mbrtocwc_l mbrtoc32_l
|
|
#define _PDCLIB_mbrtocwc mbrtoc32
|
|
#define _PDCLIB_cwcrtomb_l c32rtomb_l
|
|
#define _PDCLIB_cwcrtomb c32rtomb
|
|
#else
|
|
#error _PDCLIB_WCHAR_ENCODING not defined correctly
|
|
#error Define to one of _PDCLIB_WCHAR_ENCODING_UCS4 or _PDCLIB_WCHAR_ENCODING_UTF16
|
|
#endif
|
|
|
|
#endif
|