diff --git a/Makefile.common b/Makefile.common index 38b31dcee1..1c433f5c04 100644 --- a/Makefile.common +++ b/Makefile.common @@ -1117,6 +1117,7 @@ endif OBJ += $(LIBRETRO_COMM_DIR)/formats/bmp/rbmp_encode.o \ $(LIBRETRO_COMM_DIR)/formats/json/jsonsax.o \ + $(LIBRETRO_COMM_DIR)/formats/json/jsonsax_full.o \ $(LIBRETRO_COMM_DIR)/formats/image_transfer.o ifdef HAVE_COMPRESSION diff --git a/libretro-common/formats/json/jsonsax_full.c b/libretro-common/formats/json/jsonsax_full.c new file mode 100644 index 0000000000..335523c3a9 --- /dev/null +++ b/libretro-common/formats/json/jsonsax_full.c @@ -0,0 +1,4194 @@ +/* + Copyright (c) 2012 John-Anthony Owens + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + IN THE SOFTWARE. +*/ + +#include +#include + +/* Ensure uint32_t type (compiler-dependent). */ +#if defined(_MSC_VER) +typedef unsigned __int32 uint32_t; +#else +#include +#endif + +/* Ensure SIZE_MAX defined. */ +#ifndef SIZE_MAX +#define SIZE_MAX ((size_t)-1) +#endif + +/* Mark APIs for export (as opposed to import) when we build this file. */ +#define JSON_BUILDING +#include + +/* Default allocation constants. */ +#define DEFAULT_TOKEN_BYTES_LENGTH 64 /* MUST be a power of 2 */ +#define DEFAULT_SYMBOL_STACK_SIZE 32 /* MUST be a power of 2 */ + +/* Types for readability. */ +typedef unsigned char byte; +typedef uint32_t Codepoint; + +/* Especially-relevant Unicode codepoints. */ +#define U_(x) ((Codepoint)(x)) +#define NULL_CODEPOINT U_(0x0000) +#define BACKSPACE_CODEPOINT U_(0x0008) +#define TAB_CODEPOINT U_(0x0009) +#define LINE_FEED_CODEPOINT U_(0x000A) +#define FORM_FEED_CODEPOINT U_(0x000C) +#define CARRIAGE_RETURN_CODEPOINT U_(0x000D) +#define FIRST_NON_CONTROL_CODEPOINT U_(0x0020) +#define DELETE_CODEPOINT U_(0x007F) +#define FIRST_NON_ASCII_CODEPOINT U_(0x0080) +#define FIRST_2_BYTE_UTF8_CODEPOINT U_(0x0080) +#define FIRST_3_BYTE_UTF8_CODEPOINT U_(0x0800) +#define LINE_SEPARATOR_CODEPOINT U_(0x2028) +#define PARAGRAPH_SEPARATOR_CODEPOINT U_(0x2029) +#define BOM_CODEPOINT U_(0xFEFF) +#define REPLACEMENT_CHARACTER_CODEPOINT U_(0xFFFD) +#define FIRST_NON_BMP_CODEPOINT U_(0x10000) +#define FIRST_4_BYTE_UTF8_CODEPOINT U_(0x10000) +#define MAX_CODEPOINT U_(0x10FFFF) +#define EOF_CODEPOINT U_(0xFFFFFFFF) + +/* Bit-masking macros. */ +#define BOTTOM_3_BITS(x) ((x) & 0x7) +#define BOTTOM_4_BITS(x) ((x) & 0xF) +#define BOTTOM_5_BITS(x) ((x) & 0x1F) +#define BOTTOM_6_BITS(x) ((x) & 0x3F) + +/* Bit-flag macros. */ +#define GET_FLAGS(x, f) ((x) & (f)) +#define SET_FLAGS_ON(flagstype, x, f) do { (x) |= (flagstype)(f); } while (0) +#define SET_FLAGS_OFF(flagstype, x, f) do { (x) &= (flagstype)~(f); } while (0) +#define SET_FLAGS(flagstype, x, f, cond) do { if (cond) (x) |= (flagstype)(f); else (x) &= (flagstype)~(f); } while (0) + +/* UTF-8 byte-related macros. */ +#define IS_UTF8_SINGLE_BYTE(b) (((b) & 0x80) == 0) +#define IS_UTF8_CONTINUATION_BYTE(b) (((b) & 0xC0) == 0x80) +#define IS_UTF8_FIRST_BYTE_OF_2(b) (((b) & 0xE0) == 0xC0) +#define IS_UTF8_FIRST_BYTE_OF_3(b) (((b) & 0xF0) == 0xE0) +#define IS_UTF8_FIRST_BYTE_OF_4(b) (((b) & 0xF8) == 0xF0) + +/* Unicode codepoint-related macros. */ +#define IS_NONCHARACTER(c) ((((c) & 0xFE) == 0xFE) || (((c) >= 0xFDD0) && ((c) <= 0xFDEF))) +#define IS_SURROGATE(c) (((c) & 0xFFFFF800) == 0xD800) +#define IS_LEADING_SURROGATE(c) (((c) & 0xFFFFFC00) == 0xD800) +#define IS_TRAILING_SURROGATE(c) (((c) & 0xFFFFFC00) == 0xDC00) +#define CODEPOINT_FROM_SURROGATES(hi_lo) ((((hi_lo) >> 16) << 10) + ((hi_lo) & 0xFFFF) + 0xFCA02400) +#define SURROGATES_FROM_CODEPOINT(c) ((((c) << 6) & 0x7FF0000) + ((c) & 0x3FF) + 0xD7C0DC00) +#define SHORTEST_ENCODING_SEQUENCE(enc) (1U << ((enc) >> 1)) +#define LONGEST_ENCODING_SEQUENCE 4 + +/* Internal types that alias enum types in the public API. + By using byte to represent these values internally, + we can guarantee minimal storage size and avoid compiler + warnings when using values of the type in switch statements + that don't have (or need) a default case. */ +typedef byte Encoding; +typedef byte Error; +typedef byte TokenAttributes; + +/******************** Default Memory Suite ********************/ + +static void* JSON_CALL DefaultReallocHandler(void* userData, void* ptr, size_t size) +{ + (void)userData; /* unused */ + return realloc(ptr, size); +} + +static void JSON_CALL DefaultFreeHandler(void* userData, void* ptr) +{ + (void)userData; /* unused */ + free(ptr); +} + +static const JSON_MemorySuite defaultMemorySuite = { NULL, &DefaultReallocHandler, &DefaultFreeHandler }; + +static byte* DoubleBuffer(const JSON_MemorySuite* pMemorySuite, byte* pDefaultBuffer, byte* pBuffer, size_t length) +{ + size_t newLength = length * 2; + if (newLength < length) + { + pBuffer = NULL; + } + else if (pBuffer == pDefaultBuffer) + { + pBuffer = (byte*)pMemorySuite->realloc(pMemorySuite->userData, NULL, newLength); + if (pBuffer) + { + memcpy(pBuffer, pDefaultBuffer, length); + } + } + else + { + pBuffer = (byte*)pMemorySuite->realloc(pMemorySuite->userData, pBuffer, newLength); + } + return pBuffer; +} + +/******************** Unicode Decoder ********************/ + +/* Mutually-exclusive decoder states. */ +/* The bits of DecoderState are layed out as follows: + + ---lllnn + + - = unused (3 bits) + l = expected total sequence length (3 bits) + d = number of bytes decoded so far (2 bits) + */ + +#define DECODER_RESET 0x00 +#define DECODED_1_OF_2 0x09 /* 00001001 */ +#define DECODED_1_OF_3 0x0D /* 00001101 */ +#define DECODED_2_OF_3 0x0E /* 00001110 */ +#define DECODED_1_OF_4 0x11 /* 00010001 */ +#define DECODED_2_OF_4 0x12 /* 00010010 */ +#define DECODED_3_OF_4 0x13 /* 00010011 */ +typedef byte DecoderState; + +#define DECODER_STATE_BYTES(s) (size_t)((s) & 0x3) + +/* Decoder data. */ +typedef struct tag_DecoderData +{ + DecoderState state; + uint32_t bits; +} DecoderData; +typedef DecoderData* Decoder; + +/* The bits of DecoderOutput are layed out as follows: + + ------rrlllccccccccccccccccccccc + + - = unused (6 bits) + r = result code (2 bits) + l = sequence length (3 bits) + c = codepoint (21 bits) + */ +#define SEQUENCE_PENDING 0 +#define SEQUENCE_COMPLETE 1 +#define SEQUENCE_INVALID_INCLUSIVE 2 +#define SEQUENCE_INVALID_EXCLUSIVE 3 +typedef uint32_t DecoderResultCode; + +#define DECODER_OUTPUT(r, l, c) (DecoderOutput)(((r) << 24) | ((l) << 21) | (c)) +#define DECODER_RESULT_CODE(o) (DecoderResultCode)((DecoderOutput)(o) >> 24) +#define DECODER_SEQUENCE_LENGTH(o) (size_t)(((DecoderOutput)(o) >> 21) & 0x7) +#define DECODER_CODEPOINT(o) (Codepoint)((DecoderOutput)(o) & 0x001FFFFF) +typedef uint32_t DecoderOutput; + +/* Decoder functions. */ + +static void Decoder_Reset(Decoder decoder) +{ + decoder->state = DECODER_RESET; + decoder->bits = 0; +} + +static int Decoder_SequencePending(Decoder decoder) +{ + return decoder->state != DECODER_RESET; +} + +static DecoderOutput Decoder_ProcessByte(Decoder decoder, Encoding encoding, byte b) +{ + DecoderOutput output = DECODER_OUTPUT(SEQUENCE_PENDING, 0, 0); + switch (encoding) + { + case JSON_UTF8: + /* When the input encoding is UTF-8, the decoded codepoint's bits are + recorded in the bottom 3 bytes of bits as they are decoded. + The top byte is not used. */ + switch (decoder->state) + { + case DECODER_RESET: + if (IS_UTF8_SINGLE_BYTE(b)) + { + output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 1, b); + } + else if (IS_UTF8_FIRST_BYTE_OF_2(b)) + { + /* UTF-8 2-byte sequences that are overlong encodings can be + detected from just the first byte (C0 or C1). */ + decoder->bits = (uint32_t)BOTTOM_5_BITS(b) << 6; + if (decoder->bits < FIRST_2_BYTE_UTF8_CODEPOINT) + { + output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 1, 0); + } + else + { + decoder->state = DECODED_1_OF_2; + goto noreset; + } + } + else if (IS_UTF8_FIRST_BYTE_OF_3(b)) + { + decoder->bits = (uint32_t)BOTTOM_4_BITS(b) << 12; + decoder->state = DECODED_1_OF_3; + goto noreset; + } + else if (IS_UTF8_FIRST_BYTE_OF_4(b)) + { + /* Some UTF-8 4-byte sequences that encode out-of-range + codepoints can be detected from the first byte (F5 - FF). */ + decoder->bits = (uint32_t)BOTTOM_3_BITS(b) << 18; + if (decoder->bits > MAX_CODEPOINT) + { + output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 1, 0); + } + else + { + decoder->state = DECODED_1_OF_4; + goto noreset; + } + } + else + { + /* The byte is of the form 11111xxx or 10xxxxxx, and is not + a valid first byte for a UTF-8 sequence. */ + output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 1, 0); + } + break; + + case DECODED_1_OF_2: + if (IS_UTF8_CONTINUATION_BYTE(b)) + { + output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 2, decoder->bits | BOTTOM_6_BITS(b)); + } + else + { + output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0); + + } + break; + + case DECODED_1_OF_3: + if (IS_UTF8_CONTINUATION_BYTE(b)) + { + /* UTF-8 3-byte sequences that are overlong encodings or encode + surrogate codepoints can be detected after 2 bytes. */ + decoder->bits |= (uint32_t)BOTTOM_6_BITS(b) << 6; + if ((decoder->bits < FIRST_3_BYTE_UTF8_CODEPOINT) || IS_SURROGATE(decoder->bits)) + { + output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0); + } + else + { + decoder->state = DECODED_2_OF_3; + goto noreset; + } + } + else + { + output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0); + } + break; + + case DECODED_2_OF_3: + if (IS_UTF8_CONTINUATION_BYTE(b)) + { + output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 3, decoder->bits | BOTTOM_6_BITS(b)); + } + else + { + output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 2, 0); + } + break; + + case DECODED_1_OF_4: + if (IS_UTF8_CONTINUATION_BYTE(b)) + { + /* UTF-8 4-byte sequences that are overlong encodings or encode + out-of-range codepoints can be detected after 2 bytes. */ + decoder->bits |= (uint32_t)BOTTOM_6_BITS(b) << 12; + if ((decoder->bits < FIRST_4_BYTE_UTF8_CODEPOINT) || (decoder->bits > MAX_CODEPOINT)) + { + output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0); + } + else + { + decoder->state = DECODED_2_OF_4; + goto noreset; + } + } + else + { + output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0); + } + break; + + case DECODED_2_OF_4: + if (IS_UTF8_CONTINUATION_BYTE(b)) + { + decoder->bits |= (uint32_t)BOTTOM_6_BITS(b) << 6; + decoder->state = DECODED_3_OF_4; + goto noreset; + } + else + { + output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 2, 0); + } + break; + + case DECODED_3_OF_4: + if (IS_UTF8_CONTINUATION_BYTE(b)) + { + output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, decoder->bits | BOTTOM_6_BITS(b)); + } + else + { + output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 3, 0); + } + break; + } + break; + + case JSON_UTF16LE: + /* When the input encoding is UTF-16, the decoded codepoint's bits are + recorded in the bottom 2 bytes of bits as they are decoded. + If those 2 bytes form a leading surrogate, the decoder treats the + surrogate pair as a single 4-byte sequence, shifts the leading + surrogate into the high 2 bytes of bits, and decodes the + trailing surrogate's bits in the bottom 2 bytes of bits. */ + switch (decoder->state) + { + case DECODER_RESET: + decoder->bits = b; + decoder->state = DECODED_1_OF_2; + goto noreset; + + case DECODED_1_OF_2: + decoder->bits |= (uint32_t)b << 8; + if (IS_TRAILING_SURROGATE(decoder->bits)) + { + /* A trailing surrogate cannot appear on its own. */ + output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 2, 0); + } + else if (IS_LEADING_SURROGATE(decoder->bits)) + { + /* A leading surrogate implies a 4-byte surrogate pair. */ + decoder->bits <<= 16; + decoder->state = DECODED_2_OF_4; + goto noreset; + } + else + { + output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 2, decoder->bits); + } + break; + + case DECODED_2_OF_4: + decoder->bits |= b; + decoder->state = DECODED_3_OF_4; + goto noreset; + + case DECODED_3_OF_4: + decoder->bits |= (uint32_t)b << 8; + if (!IS_TRAILING_SURROGATE(decoder->bits & 0xFFFF)) + { + /* A leading surrogate must be followed by a trailing one. + Treat the previous 3 bytes as an invalid 2-byte sequence + followed by the first byte of a new sequence. */ + decoder->bits &= 0xFF; + decoder->state = DECODED_1_OF_2; + output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 2, 0); + goto noreset; + } + else + { + output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, CODEPOINT_FROM_SURROGATES(decoder->bits)); + } + break; + } + break; + + case JSON_UTF16BE: + /* When the input encoding is UTF-16, the decoded codepoint's bits are + recorded in the bottom 2 bytes of bits as they are decoded. + If those 2 bytes form a leading surrogate, the decoder treats the + surrogate pair as a single 4-byte sequence, shifts the leading + surrogate into the high 2 bytes of bits, and decodes the + trailing surrogate's bits in the bottom 2 bytes of bits. */ + switch (decoder->state) + { + case DECODER_RESET: + decoder->bits = (uint32_t)b << 8; + decoder->state = DECODED_1_OF_2; + goto noreset; + + case DECODED_1_OF_2: + decoder->bits |= b; + if (IS_TRAILING_SURROGATE(decoder->bits)) + { + /* A trailing surrogate cannot appear on its own. */ + output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 2, 0); + } + else if (IS_LEADING_SURROGATE(decoder->bits)) + { + /* A leading surrogate implies a 4-byte surrogate pair. */ + decoder->bits <<= 16; + decoder->state = DECODED_2_OF_4; + goto noreset; + } + else + { + output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 2, decoder->bits); + } + break; + + case DECODED_2_OF_4: + decoder->bits |= (uint32_t)b << 8; + decoder->state = DECODED_3_OF_4; + goto noreset; + + case DECODED_3_OF_4: + decoder->bits |= b; + if (!IS_TRAILING_SURROGATE(decoder->bits & 0xFFFF)) + { + /* A leading surrogate must be followed by a trailing one. + Treat the previous 3 bytes as an invalid 2-byte sequence + followed by the first byte of a new sequence. */ + decoder->bits &= 0xFF00; + decoder->state = DECODED_1_OF_2; + output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 2, 0); + goto noreset; + } + else + { + output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, CODEPOINT_FROM_SURROGATES(decoder->bits)); + } + break; + } + break; + + case JSON_UTF32LE: + /* When the input encoding is UTF-32, the decoded codepoint's bits are + recorded in bits as they are decoded. */ + switch (decoder->state) + { + case DECODER_RESET: + decoder->state = DECODED_1_OF_4; + decoder->bits = (uint32_t)b; + goto noreset; + + case DECODED_1_OF_4: + decoder->state = DECODED_2_OF_4; + decoder->bits |= (uint32_t)b << 8; + goto noreset; + + case DECODED_2_OF_4: + decoder->state = DECODED_3_OF_4; + decoder->bits |= (uint32_t)b << 16; + goto noreset; + + case DECODED_3_OF_4: + decoder->bits |= (uint32_t)b << 24; + output = (IS_SURROGATE(decoder->bits) || (decoder->bits > MAX_CODEPOINT)) + ? DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 4, 0) + : DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, decoder->bits); + break; + } + break; + + case JSON_UTF32BE: + /* When the input encoding is UTF-32, the decoded codepoint's bits are + recorded in bits as they are decoded. */ + switch (decoder->state) + { + case DECODER_RESET: + decoder->state = DECODED_1_OF_4; + decoder->bits = (uint32_t)b << 24; + goto noreset; + + case DECODED_1_OF_4: + decoder->state = DECODED_2_OF_4; + decoder->bits |= (uint32_t)b << 16; + goto noreset; + + case DECODED_2_OF_4: + decoder->state = DECODED_3_OF_4; + decoder->bits |= (uint32_t)b << 8; + goto noreset; + + case DECODED_3_OF_4: + decoder->bits |= b; + output = (IS_SURROGATE(decoder->bits) || (decoder->bits > MAX_CODEPOINT)) + ? DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 4, 0) + : DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, decoder->bits); + break; + } + break; + } + + /* Reset the decoder for the next sequence. */ + Decoder_Reset(decoder); + +noreset: + return output; +} + +/******************** Unicode Encoder ********************/ + +/* This function makes the following assumptions about its input: + + 1. The c argument is a valid codepoint (U+0000 - U+10FFFF). + 2. The encoding argument is not JSON_UnknownEncoding. + 3. The pBytes argument points to an array of at least 4 bytes. +*/ +static size_t EncodeCodepoint(Codepoint c, Encoding encoding, byte* pBytes) +{ + size_t length = 0; + switch (encoding) + { + case JSON_UTF8: + if (c < FIRST_2_BYTE_UTF8_CODEPOINT) + { + pBytes[0] = (byte)c; + length = 1; + } + else if (c < FIRST_3_BYTE_UTF8_CODEPOINT) + { + pBytes[0] = (byte)(0xC0 | (c >> 6)); + pBytes[1] = (byte)(0x80 | BOTTOM_6_BITS(c)); + length = 2; + } + else if (c < FIRST_4_BYTE_UTF8_CODEPOINT) + { + pBytes[0] = (byte)(0xE0 | (c >> 12)); + pBytes[1] = (byte)(0x80 | BOTTOM_6_BITS(c >> 6)); + pBytes[2] = (byte)(0x80 | BOTTOM_6_BITS(c)); + length = 3; + } + else + { + pBytes[0] = (byte)(0xF0 | (c >> 18)); + pBytes[1] = (byte)(0x80 | BOTTOM_6_BITS(c >> 12)); + pBytes[2] = (byte)(0x80 | BOTTOM_6_BITS(c >> 6)); + pBytes[3] = (byte)(0x80 | BOTTOM_6_BITS(c)); + length = 4; + } + break; + + case JSON_UTF16LE: + if (c < FIRST_NON_BMP_CODEPOINT) + { + pBytes[0] = (byte)(c); + pBytes[1] = (byte)(c >> 8); + length = 2; + } + else + { + uint32_t surrogates = SURROGATES_FROM_CODEPOINT(c); + + /* Leading surrogate. */ + pBytes[0] = (byte)(surrogates >> 16); + pBytes[1] = (byte)(surrogates >> 24); + + /* Trailing surrogate. */ + pBytes[2] = (byte)(surrogates); + pBytes[3] = (byte)(surrogates >> 8); + length = 4; + } + break; + + case JSON_UTF16BE: + if (c < FIRST_NON_BMP_CODEPOINT) + { + pBytes[1] = (byte)(c); + pBytes[0] = (byte)(c >> 8); + length = 2; + } + else + { + /* The codepoint requires a surrogate pair in UTF-16. */ + uint32_t surrogates = SURROGATES_FROM_CODEPOINT(c); + + /* Leading surrogate. */ + pBytes[1] = (byte)(surrogates >> 16); + pBytes[0] = (byte)(surrogates >> 24); + + /* Trailing surrogate. */ + pBytes[3] = (byte)(surrogates); + pBytes[2] = (byte)(surrogates >> 8); + length = 4; + } + break; + + case JSON_UTF32LE: + pBytes[0] = (byte)(c); + pBytes[1] = (byte)(c >> 8); + pBytes[2] = (byte)(c >> 16); + pBytes[3] = (byte)(c >> 24); + length = 4; + break; + + case JSON_UTF32BE: + pBytes[3] = (byte)(c); + pBytes[2] = (byte)(c >> 8); + pBytes[1] = (byte)(c >> 16); + pBytes[0] = (byte)(c >> 24); + length = 4; + break; + } + return length; +} + +/******************** JSON Lexer States ********************/ + +/* Mutually-exclusive lexer states. */ +#define LEXING_WHITESPACE 0 +#define LEXING_LITERAL 1 +#define LEXING_STRING 2 +#define LEXING_STRING_ESCAPE 3 +#define LEXING_STRING_HEX_ESCAPE_BYTE_1 4 +#define LEXING_STRING_HEX_ESCAPE_BYTE_2 5 +#define LEXING_STRING_HEX_ESCAPE_BYTE_3 6 +#define LEXING_STRING_HEX_ESCAPE_BYTE_4 7 +#define LEXING_STRING_HEX_ESCAPE_BYTE_5 8 +#define LEXING_STRING_HEX_ESCAPE_BYTE_6 9 +#define LEXING_STRING_HEX_ESCAPE_BYTE_7 10 +#define LEXING_STRING_HEX_ESCAPE_BYTE_8 11 +#define LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_BACKSLASH 12 +#define LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_U 13 +#define LEXING_NUMBER_AFTER_MINUS 14 +#define LEXING_NUMBER_AFTER_LEADING_ZERO 15 +#define LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO 16 +#define LEXING_NUMBER_AFTER_X 17 +#define LEXING_NUMBER_HEX_DIGITS 18 +#define LEXING_NUMBER_DECIMAL_DIGITS 19 +#define LEXING_NUMBER_AFTER_DOT 20 +#define LEXING_NUMBER_FRACTIONAL_DIGITS 21 +#define LEXING_NUMBER_AFTER_E 22 +#define LEXING_NUMBER_AFTER_EXPONENT_SIGN 23 +#define LEXING_NUMBER_EXPONENT_DIGITS 24 +#define LEXING_COMMENT_AFTER_SLASH 25 +#define LEXING_SINGLE_LINE_COMMENT 26 +#define LEXING_MULTI_LINE_COMMENT 27 +#define LEXING_MULTI_LINE_COMMENT_AFTER_STAR 28 +#define LEXER_ERROR 255 +typedef byte LexerState; + +/******************** JSON Grammarian ********************/ + +/* The JSON grammar comprises the following productions: + + 1. VALUE => null + 2. VALUE => boolean + 3. VALUE => string + 4. VALUE => number + 5. VALUE => specialnumber + 6. VALUE => { MEMBERS } + 7. VALUE => [ ITEMS ] + 8. MEMBERS => MEMBER MORE_MEMBERS + 9. MEMBERS => e + 10. MEMBER => string : VALUE + 11. MORE_MEMBERS => , MEMBER MORE_MEMBERS + 12. MORE_MEMBERS => e + 13. ITEMS => ITEM MORE_ITEMS + 14. ITEMS => e + 15. ITEM => VALUE + 16. MORE_ITEMS => , ITEM MORE_ITEMS + 17. MORE_ITEMS => e + + We implement a simple LL(1) parser based on this grammar, with events + emitted when certain non-terminals are replaced. +*/ + +/* Mutually-exclusive grammar tokens and non-terminals. The values are defined + so that the bottom 4 bits of a value can be used as an index into the + grammar production rule table. */ +#define T_NONE 0x00 /* tokens are in the form 0x0X */ +#define T_NULL 0x01 +#define T_TRUE 0x02 +#define T_FALSE 0x03 +#define T_STRING 0x04 +#define T_NUMBER 0x05 +#define T_NAN 0x06 +#define T_INFINITY 0x07 +#define T_NEGATIVE_INFINITY 0x08 +#define T_LEFT_CURLY 0x09 +#define T_RIGHT_CURLY 0x0A +#define T_LEFT_SQUARE 0x0B +#define T_RIGHT_SQUARE 0x0C +#define T_COLON 0x0D +#define T_COMMA 0x0E +#define NT_VALUE 0x10 /* non-terminals are in the form 0x1X */ +#define NT_MEMBERS 0x11 +#define NT_MEMBER 0x12 +#define NT_MORE_MEMBERS 0x13 +#define NT_ITEMS 0x14 +#define NT_ITEM 0x15 +#define NT_MORE_ITEMS 0x16 +typedef byte Symbol; + +#define IS_NONTERMINAL(s) ((s) & 0x10) +#define IS_TOKEN(s) !IS_NONTERMINAL(s) + +/* Grammarian data. */ +typedef struct tag_GrammarianData +{ + Symbol* pStack; /* initially set to defaultStack */ + size_t stackSize; + size_t stackUsed; + Symbol defaultStack[DEFAULT_SYMBOL_STACK_SIZE]; +} GrammarianData; +typedef GrammarianData* Grammarian; + +/* Mutually-exclusive result codes returned by the grammarian + after processing a token. */ +#define ACCEPTED_TOKEN 0 +#define REJECTED_TOKEN 1 +#define SYMBOL_STACK_FULL 2 +typedef uint32_t GrammarianResultCode; + +/* Events emitted by the grammarian as a result of processing a + token. Note that EMIT_ARRAY_ITEM always appears bitwise OR-ed + with one of the other values. */ +#define EMIT_NOTHING 0x00 +#define EMIT_NULL 0x01 +#define EMIT_BOOLEAN 0x02 +#define EMIT_STRING 0x03 +#define EMIT_NUMBER 0x04 +#define EMIT_SPECIAL_NUMBER 0x05 +#define EMIT_START_OBJECT 0x06 +#define EMIT_END_OBJECT 0x07 +#define EMIT_OBJECT_MEMBER 0x08 +#define EMIT_START_ARRAY 0x09 +#define EMIT_END_ARRAY 0x0A +#define EMIT_ARRAY_ITEM 0x10 /* may be combined with other values */ +typedef byte GrammarEvent; + +/* The bits of GrammarianOutput are layed out as follows: + + -rreeeee + + - = unused (1 bit) + r = result code (2 bits) + e = event (5 bits) + */ +#define GRAMMARIAN_OUTPUT(r, e) (GrammarianOutput)(((GrammarianResultCode)(r) << 5) | (GrammarEvent)(e)) +#define GRAMMARIAN_RESULT_CODE(o) (GrammarianResultCode)((GrammarianOutput)(o) >> 5) +#define GRAMMARIAN_EVENT(o) (GrammarEvent)((GrammarianOutput)(o) & 0x1F) +typedef byte GrammarianOutput; + +/* Grammar rule used by the grammarian to process a token. */ +typedef struct tag_GrammarRule +{ + Symbol symbolToPush1; + Symbol symbolToPush2; + byte reprocess; + GrammarEvent emit; +} GrammarRule; + +/* Grammarian functions. */ + +static void Grammarian_Reset(Grammarian grammarian, int isInitialized) +{ + /* When we reset the grammarian, we keep the symbol stack that has + already been allocated, if any. If the client wants to reclaim the + memory used by the that buffer, he needs to free the grammarian + and create a new one. */ + if (!isInitialized) + { + grammarian->pStack = grammarian->defaultStack; + grammarian->stackSize = sizeof(grammarian->defaultStack); + } + + /* The grammarian always starts with NT_VALUE on the symbol stack. */ + grammarian->pStack[0] = NT_VALUE; + grammarian->stackUsed = 1; +} + +static void Grammarian_FreeAllocations(Grammarian grammarian, const JSON_MemorySuite* pMemorySuite) +{ + if (grammarian->pStack != grammarian->defaultStack) + { + pMemorySuite->free(pMemorySuite->userData, grammarian->pStack); + } +} + +static int Grammarian_FinishedDocument(Grammarian grammarian) +{ + return !grammarian->stackUsed; +} + +static GrammarianOutput Grammarian_ProcessToken(Grammarian grammarian, Symbol token, const JSON_MemorySuite* pMemorySuite) +{ + /* The order and number of the rows and columns in this table must + match the defined token and non-terminal symbol values. + + The row index is the incoming token's Symbol value. + + The column index is the bottom 4 bits of Symbol value of + the non-terminal at the top of the processing stack. + Since non-terminal Symbol values start at 0x10, taking + the bottom 4 bits yields a 0-based index. */ + static const byte ruleLookup[15][7] = + { + /* V MS M MM IS I MI */ + /* ---- */ { 0, 0, 0, 0, 0, 0, 0 }, + /* null */ { 1, 0, 0, 0, 13, 15, 0 }, + /* true */ { 2, 0, 0, 0, 13, 15, 0 }, + /* false */ { 2, 0, 0, 0, 13, 15, 0 }, + /* string */ { 3, 8, 10, 0, 13, 15, 0 }, + /* number */ { 4, 0, 0, 0, 13, 15, 0 }, + /* NaN */ { 5, 0, 0, 0, 13, 15, 0 }, + /* Inf */ { 5, 0, 0, 0, 13, 15, 0 }, + /* -Inf */ { 5, 0, 0, 0, 13, 15, 0 }, + /* { */ { 6, 0, 0, 0, 13, 15, 0 }, + /* } */ { 0, 9, 0, 12, 0, 0, 0 }, + /* [ */ { 7, 0, 0, 0, 13, 15, 0 }, + /* ] */ { 0, 0, 0, 0, 14, 0, 17 }, + /* : */ { 0, 0, 0, 0, 0, 0, 0 }, + /* , */ { 0, 0, 0, 11, 0, 0, 16 } + }; + + static const GrammarRule rules[17] = + { + /* 1. */ { T_NONE, T_NONE, 0, EMIT_NULL }, + /* 2. */ { T_NONE, T_NONE, 0, EMIT_BOOLEAN }, + /* 3. */ { T_NONE, T_NONE, 0, EMIT_STRING }, + /* 4. */ { T_NONE, T_NONE, 0, EMIT_NUMBER }, + /* 5. */ { T_NONE, T_NONE, 0, EMIT_SPECIAL_NUMBER }, + /* 6. */ { T_RIGHT_CURLY, NT_MEMBERS, 0, EMIT_START_OBJECT }, + /* 7. */ { T_RIGHT_SQUARE, NT_ITEMS, 0, EMIT_START_ARRAY }, + /* 8. */ { NT_MORE_MEMBERS, NT_MEMBER, 1, EMIT_NOTHING }, + /* 9. */ { T_NONE, T_NONE, 1, EMIT_END_OBJECT }, + /* 10. */ { NT_VALUE, T_COLON, 0, EMIT_OBJECT_MEMBER }, + /* 11. */ { NT_MORE_MEMBERS, NT_MEMBER, 0, EMIT_NOTHING }, + /* 12. */ { T_NONE, T_NONE, 1, EMIT_END_OBJECT }, + /* 13. */ { NT_MORE_ITEMS, NT_ITEM, 1, EMIT_NOTHING }, + /* 14. */ { T_NONE, T_NONE, 1, EMIT_END_ARRAY }, + /* 15. */ { NT_VALUE, T_NONE, 1, EMIT_ARRAY_ITEM }, + /* 16. */ { NT_MORE_ITEMS, NT_ITEM, 0, EMIT_NOTHING }, + /* 17. */ { T_NONE, T_NONE, 1, EMIT_END_ARRAY } + }; + + GrammarEvent emit = EMIT_NOTHING; + + /* If the stack is empty, no more tokens were expected. */ + if (Grammarian_FinishedDocument(grammarian)) + { + return GRAMMARIAN_OUTPUT(REJECTED_TOKEN, EMIT_NOTHING); + } + + for (;;) + { + Symbol topSymbol = grammarian->pStack[grammarian->stackUsed - 1]; + if (IS_TOKEN(topSymbol)) + { + if (topSymbol != token) + { + return GRAMMARIAN_OUTPUT(REJECTED_TOKEN, EMIT_NOTHING); + } + grammarian->stackUsed--; + break; + } + else + { + byte ruleNumber = ruleLookup[token][BOTTOM_4_BITS(topSymbol)]; + if (ruleNumber == 0) + { + return GRAMMARIAN_OUTPUT(REJECTED_TOKEN, EMIT_NOTHING); + } + else + { + const GrammarRule* pRule = &rules[ruleNumber - 1]; + if (pRule->symbolToPush1 == T_NONE) + { + /* The rule removes the top symbol and does not replace it. */ + grammarian->stackUsed--; + } + else + { + /* The rule replaces the top symbol with 1 or 2 symbols. */ + grammarian->pStack[grammarian->stackUsed - 1] = pRule->symbolToPush1; + if (pRule->symbolToPush2 != T_NONE) + { + /* The rule replaces the top symbol with 2 symbols. + Make sure the stack has room for the second one. */ + if (grammarian->stackUsed == grammarian->stackSize) + { + Symbol* pBiggerStack = DoubleBuffer(pMemorySuite, grammarian->defaultStack, grammarian->pStack, grammarian->stackSize); + if (!pBiggerStack) + { + return GRAMMARIAN_OUTPUT(SYMBOL_STACK_FULL, EMIT_NOTHING); + } + grammarian->pStack = pBiggerStack; + grammarian->stackSize *= 2; + } + grammarian->pStack[grammarian->stackUsed] = pRule->symbolToPush2; + grammarian->stackUsed++; + } + } + emit |= pRule->emit; + if (!pRule->reprocess) + { + break; + } + } + } + } + + return GRAMMARIAN_OUTPUT(ACCEPTED_TOKEN, emit); +} + +/******************** JSON Parser ********************/ + +#ifndef JSON_NO_PARSER + +/* Combinable parser state flags. */ +#define PARSER_RESET 0x00 +#define PARSER_STARTED 0x01 +#define PARSER_FINISHED 0x02 +#define PARSER_IN_PROTECTED_API 0x04 +#define PARSER_IN_TOKEN_HANDLER 0x08 +#define PARSER_AFTER_CARRIAGE_RETURN 0x10 +typedef byte ParserState; + +/* Combinable parser settings flags. */ +#define PARSER_DEFAULT_FLAGS 0x00 +#define PARSER_ALLOW_BOM 0x01 +#define PARSER_ALLOW_COMMENTS 0x02 +#define PARSER_ALLOW_SPECIAL_NUMBERS 0x04 +#define PARSER_ALLOW_HEX_NUMBERS 0x08 +#define PARSER_REPLACE_INVALID 0x10 +#define PARSER_TRACK_OBJECT_MEMBERS 0x20 +#define PARSER_ALLOW_CONTROL_CHARS 0x40 +#define PARSER_EMBEDDED_DOCUMENT 0x80 +typedef byte ParserFlags; + +/* Sentinel value for parser error location offset. */ +#define ERROR_LOCATION_IS_TOKEN_START 0xFF + +/* An object member name stored in an unordered, singly-linked-list, used for + detecting duplicate member names. Note that the name string is not null- + terminated. */ +typedef struct tag_MemberName +{ + struct tag_MemberName* pNextName; + size_t length; + byte pBytes[1]; /* variable-size buffer */ +} MemberName; + +/* An object's list of member names, and a pointer to the object's + nearest ancestor object, if any. This is used as a stack. Because arrays + do not have named items, they do not need to be recorded in the stack. */ +typedef struct tag_MemberNames +{ + struct tag_MemberNames* pAncestor; + MemberName* pFirstName; +} MemberNames; + +/* A parser instance. */ +struct JSON_Parser_Data +{ + JSON_MemorySuite memorySuite; + void* userData; + ParserState state; + ParserFlags flags; + Encoding inputEncoding; + Encoding stringEncoding; + Encoding numberEncoding; + Symbol token; + TokenAttributes tokenAttributes; + Error error; + byte errorOffset; + LexerState lexerState; + uint32_t lexerBits; + size_t codepointLocationByte; + size_t codepointLocationLine; + size_t codepointLocationColumn; + size_t tokenLocationByte; + size_t tokenLocationLine; + size_t tokenLocationColumn; + size_t depth; + byte* pTokenBytes; + size_t tokenBytesLength; + size_t tokenBytesUsed; + size_t maxStringLength; + size_t maxNumberLength; + MemberNames* pMemberNames; + DecoderData decoderData; + GrammarianData grammarianData; + JSON_Parser_EncodingDetectedHandler encodingDetectedHandler; + JSON_Parser_NullHandler nullHandler; + JSON_Parser_BooleanHandler booleanHandler; + JSON_Parser_StringHandler stringHandler; + JSON_Parser_NumberHandler numberHandler; + JSON_Parser_SpecialNumberHandler specialNumberHandler; + JSON_Parser_StartObjectHandler startObjectHandler; + JSON_Parser_EndObjectHandler endObjectHandler; + JSON_Parser_ObjectMemberHandler objectMemberHandler; + JSON_Parser_StartArrayHandler startArrayHandler; + JSON_Parser_EndArrayHandler endArrayHandler; + JSON_Parser_ArrayItemHandler arrayItemHandler; + byte defaultTokenBytes[DEFAULT_TOKEN_BYTES_LENGTH]; +}; + +/* Parser internal functions. */ + +static void JSON_Parser_SetErrorAtCodepoint(JSON_Parser parser, Error error) +{ + parser->error = error; +} + +static void JSON_Parser_SetErrorAtStringEscapeSequenceStart(JSON_Parser parser, Error error, int codepointsAgo) +{ + /* Note that backtracking from the current codepoint requires us to make + three assumptions, which are always valid in the context of a string + escape sequence: + + 1. The input encoding is not JSON_UnknownEncoding. + + 2 The codepoints we are backing up across are all in the range + U+0000 - U+007F, aka ASCII, so we can assume the number of + bytes comprising them based on the input encoding. + + 3. The codepoints we are backing up across do not include any + line breaks, so we can assume that the line number stays the + same and the column number can simply be decremented. + */ + parser->error = error; + parser->errorOffset = (byte)codepointsAgo; +} + +static void JSON_Parser_SetErrorAtToken(JSON_Parser parser, Error error) +{ + parser->error = error; + parser->errorOffset = ERROR_LOCATION_IS_TOKEN_START; +} + +static JSON_Status JSON_Parser_PushMemberNameList(JSON_Parser parser) +{ + MemberNames* pNames = (MemberNames*)parser->memorySuite.realloc(parser->memorySuite.userData, NULL, sizeof(MemberNames)); + if (!pNames) + { + JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_OutOfMemory); + return JSON_Failure; + } + pNames->pAncestor = parser->pMemberNames; + pNames->pFirstName = NULL; + parser->pMemberNames = pNames; + return JSON_Success; +} + +static void JSON_Parser_PopMemberNameList(JSON_Parser parser) +{ + MemberNames* pAncestor = parser->pMemberNames->pAncestor; + while (parser->pMemberNames->pFirstName) + { + MemberName* pNextName = parser->pMemberNames->pFirstName->pNextName; + parser->memorySuite.free(parser->memorySuite.userData, parser->pMemberNames->pFirstName); + parser->pMemberNames->pFirstName = pNextName; + } + parser->memorySuite.free(parser->memorySuite.userData, parser->pMemberNames); + parser->pMemberNames = pAncestor; +} + +static JSON_Status JSON_Parser_StartContainer(JSON_Parser parser, int isObject) +{ + if (isObject && GET_FLAGS(parser->flags, PARSER_TRACK_OBJECT_MEMBERS) && + !JSON_Parser_PushMemberNameList(parser)) + { + return JSON_Failure; + } + parser->depth++; + return JSON_Success; +} + +static void JSON_Parser_EndContainer(JSON_Parser parser, int isObject) +{ + parser->depth--; + if (isObject && GET_FLAGS(parser->flags, PARSER_TRACK_OBJECT_MEMBERS)) + { + JSON_Parser_PopMemberNameList(parser); + } +} + +static JSON_Status JSON_Parser_AddMemberNameToList(JSON_Parser parser) +{ + if (GET_FLAGS(parser->flags, PARSER_TRACK_OBJECT_MEMBERS)) + { + MemberName* pName; + for (pName = parser->pMemberNames->pFirstName; pName; pName = pName->pNextName) + { + if (pName->length == parser->tokenBytesUsed && !memcmp(pName->pBytes, parser->pTokenBytes, pName->length)) + { + JSON_Parser_SetErrorAtToken(parser, JSON_Error_DuplicateObjectMember); + return JSON_Failure; + } + } + pName = (MemberName*)parser->memorySuite.realloc(parser->memorySuite.userData, NULL, sizeof(MemberName) + parser->tokenBytesUsed - 1); + if (!pName) + { + JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_OutOfMemory); + return JSON_Failure; + } + pName->pNextName = parser->pMemberNames->pFirstName; + pName->length = parser->tokenBytesUsed; + memcpy(pName->pBytes, parser->pTokenBytes, parser->tokenBytesUsed); + parser->pMemberNames->pFirstName = pName; + } + return JSON_Success; +} + +static void JSON_Parser_ResetData(JSON_Parser parser, int isInitialized) +{ + parser->userData = NULL; + parser->flags = PARSER_DEFAULT_FLAGS; + parser->inputEncoding = JSON_UnknownEncoding; + parser->stringEncoding = JSON_UTF8; + parser->numberEncoding = JSON_UTF8; + parser->token = T_NONE; + parser->tokenAttributes = 0; + parser->error = JSON_Error_None; + parser->errorOffset = 0; + parser->lexerState = LEXING_WHITESPACE; + parser->lexerBits = 0; + parser->codepointLocationByte = 0; + parser->codepointLocationLine = 0; + parser->codepointLocationColumn = 0; + parser->tokenLocationByte = 0; + parser->tokenLocationLine = 0; + parser->tokenLocationColumn = 0; + parser->depth = 0; + if (!isInitialized) + { + parser->pTokenBytes = parser->defaultTokenBytes; + parser->tokenBytesLength = sizeof(parser->defaultTokenBytes); + } + else + { + /* When we reset the parser, we keep the output buffer and the symbol + stack that have already been allocated, if any. If the client wants + to reclaim the memory used by the those buffers, he needs to free + the parser and create a new one. */ + } + parser->tokenBytesUsed = 0; + parser->maxStringLength = SIZE_MAX; + parser->maxNumberLength = SIZE_MAX; + if (!isInitialized) + { + parser->pMemberNames = NULL; + } + else + { + while (parser->pMemberNames) + { + JSON_Parser_PopMemberNameList(parser); + } + } + Decoder_Reset(&parser->decoderData); + Grammarian_Reset(&parser->grammarianData, isInitialized); + parser->encodingDetectedHandler = NULL; + parser->nullHandler = NULL; + parser->booleanHandler = NULL; + parser->stringHandler = NULL; + parser->numberHandler = NULL; + parser->specialNumberHandler = NULL; + parser->startObjectHandler = NULL; + parser->endObjectHandler = NULL; + parser->objectMemberHandler = NULL; + parser->startArrayHandler = NULL; + parser->endArrayHandler = NULL; + parser->arrayItemHandler = NULL; + parser->state = PARSER_RESET; /* do this last! */ +} + +static void JSON_Parser_NullTerminateToken(JSON_Parser parser) +{ + /* Because we always ensure that there are LONGEST_ENCODING_SEQUENCE bytes + available at the end of the token buffer when we record codepoints, we + can write the null terminator to the buffer with impunity. */ + static const byte nullTerminatorBytes[LONGEST_ENCODING_SEQUENCE] = { 0 }; + Encoding encoding = (Encoding)((parser->token == T_NUMBER) ? parser->numberEncoding : parser->stringEncoding); + memcpy(parser->pTokenBytes + parser->tokenBytesUsed, nullTerminatorBytes, SHORTEST_ENCODING_SEQUENCE(encoding)); +} + +static JSON_Status JSON_Parser_FlushParser(JSON_Parser parser) +{ + /* The symbol stack should be empty when parsing finishes. */ + if (!Grammarian_FinishedDocument(&parser->grammarianData)) + { + JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_ExpectedMoreTokens); + return JSON_Failure; + } + return JSON_Success; +} + +typedef JSON_Parser_HandlerResult (JSON_CALL * JSON_Parser_SimpleTokenHandler)(JSON_Parser parser); +static JSON_Status JSON_Parser_CallSimpleTokenHandler(JSON_Parser parser, JSON_Parser_SimpleTokenHandler handler) +{ + if (handler) + { + JSON_Parser_HandlerResult result; + SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER); + result = handler(parser); + SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER); + if (result != JSON_Parser_Continue) + { + JSON_Parser_SetErrorAtToken(parser, JSON_Error_AbortedByHandler); + return JSON_Failure; + } + } + return JSON_Success; +} + +static JSON_Status JSON_Parser_CallBooleanHandler(JSON_Parser parser) +{ + if (parser->booleanHandler) + { + JSON_Parser_HandlerResult result; + SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER); + result = parser->booleanHandler(parser, parser->token == T_TRUE ? JSON_True : JSON_False); + SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER); + if (result != JSON_Parser_Continue) + { + JSON_Parser_SetErrorAtToken(parser, JSON_Error_AbortedByHandler); + return JSON_Failure; + } + } + return JSON_Success; +} + +static JSON_Status JSON_Parser_CallStringHandler(JSON_Parser parser, int isObjectMember) +{ + JSON_Parser_StringHandler handler = isObjectMember ? parser->objectMemberHandler : parser->stringHandler; + if (handler) + { + JSON_Parser_HandlerResult result; + JSON_Parser_NullTerminateToken(parser); + SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER); + result = handler(parser, (char*)parser->pTokenBytes, parser->tokenBytesUsed, parser->tokenAttributes); + SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER); + if (result != JSON_Parser_Continue) + { + JSON_Parser_SetErrorAtToken(parser, (isObjectMember && result == JSON_Parser_TreatAsDuplicateObjectMember) + ? JSON_Error_DuplicateObjectMember : JSON_Error_AbortedByHandler); + return JSON_Failure; + } + } + return JSON_Success; +} + +static JSON_Status JSON_Parser_CallNumberHandler(JSON_Parser parser) +{ + if (parser->numberHandler) + { + JSON_Parser_HandlerResult result; + JSON_Parser_NullTerminateToken(parser); + SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER); + result = parser->numberHandler(parser, (char*)parser->pTokenBytes, parser->tokenBytesUsed, parser->tokenAttributes); + SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER); + if (result != JSON_Parser_Continue) + { + JSON_Parser_SetErrorAtToken(parser, JSON_Error_AbortedByHandler); + return JSON_Failure; + } + } + return JSON_Success; +} + +static JSON_Status JSON_Parser_CallSpecialNumberHandler(JSON_Parser parser) +{ + if (parser->specialNumberHandler) + { + JSON_Parser_HandlerResult result; + SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER); + result = parser->specialNumberHandler(parser, parser->token == T_NAN ? JSON_NaN : + (parser->token == T_INFINITY ? JSON_Infinity : JSON_NegativeInfinity)); + SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER); + if (result != JSON_Parser_Continue) + { + JSON_Parser_SetErrorAtToken(parser, JSON_Error_AbortedByHandler); + return JSON_Failure; + } + } + return JSON_Success; +} + +static JSON_Status JSON_Parser_HandleGrammarEvents(JSON_Parser parser, byte emit) +{ + if (GET_FLAGS(emit, EMIT_ARRAY_ITEM)) + { + if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->arrayItemHandler)) + { + return JSON_Failure; + } + SET_FLAGS_OFF(byte, emit, EMIT_ARRAY_ITEM); + } + switch (emit) + { + case EMIT_NULL: + if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->nullHandler)) + { + return JSON_Failure; + } + break; + + case EMIT_BOOLEAN: + if (!JSON_Parser_CallBooleanHandler(parser)) + { + return JSON_Failure; + } + break; + + case EMIT_STRING: + if (!JSON_Parser_CallStringHandler(parser, 0/* isObjectMember */)) + { + return JSON_Failure; + } + break; + + case EMIT_NUMBER: + if (!JSON_Parser_CallNumberHandler(parser)) + { + return JSON_Failure; + } + break; + + case EMIT_SPECIAL_NUMBER: + if (!JSON_Parser_CallSpecialNumberHandler(parser)) + { + return JSON_Failure; + } + break; + + case EMIT_START_OBJECT: + if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->startObjectHandler) || + !JSON_Parser_StartContainer(parser, 1/*isObject*/)) + { + return JSON_Failure; + } + break; + + case EMIT_END_OBJECT: + JSON_Parser_EndContainer(parser, 1/*isObject*/); + if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->endObjectHandler)) + { + return JSON_Failure; + } + break; + + case EMIT_OBJECT_MEMBER: + if (!JSON_Parser_AddMemberNameToList(parser) || /* will fail if member is duplicate */ + !JSON_Parser_CallStringHandler(parser, 1 /* isObjectMember */)) + { + return JSON_Failure; + } + break; + + case EMIT_START_ARRAY: + if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->startArrayHandler) || + !JSON_Parser_StartContainer(parser, 0/*isObject*/)) + { + return JSON_Failure; + } + break; + + case EMIT_END_ARRAY: + JSON_Parser_EndContainer(parser, 0/*isObject*/); + if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->endArrayHandler)) + { + return JSON_Failure; + } + break; + } + if (!parser->depth && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT)) + { + JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_StoppedAfterEmbeddedDocument); + return JSON_Failure; + } + return JSON_Success; +} + +static JSON_Status JSON_Parser_ProcessToken(JSON_Parser parser) +{ + GrammarianOutput output; + output = Grammarian_ProcessToken(&parser->grammarianData, parser->token, &parser->memorySuite); + switch (GRAMMARIAN_RESULT_CODE(output)) + { + case ACCEPTED_TOKEN: + if (!JSON_Parser_HandleGrammarEvents(parser, GRAMMARIAN_EVENT(output))) + { + return JSON_Failure; + } + break; + + case REJECTED_TOKEN: + JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnexpectedToken); + return JSON_Failure; + + case SYMBOL_STACK_FULL: + JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_OutOfMemory); + return JSON_Failure; + } + + /* Reset the lexer to prepare for the next token. */ + parser->lexerState = LEXING_WHITESPACE; + parser->lexerBits = 0; + parser->token = T_NONE; + parser->tokenAttributes = 0; + parser->tokenBytesUsed = 0; + return JSON_Success; +} + +/* Lexer functions. */ + +static const byte expectedLiteralChars[] = { 'u', 'l', 'l', 0, 'r', 'u', 'e', 0, 'a', 'l', 's', 'e', 0, 'a', 'N', 0, 'n', 'f', 'i', 'n', 'i', 't', 'y', 0 }; + +#define NULL_LITERAL_EXPECTED_CHARS_START_INDEX 0 +#define TRUE_LITERAL_EXPECTED_CHARS_START_INDEX 4 +#define FALSE_LITERAL_EXPECTED_CHARS_START_INDEX 8 +#define NAN_LITERAL_EXPECTED_CHARS_START_INDEX 13 +#define INFINITY_LITERAL_EXPECTED_CHARS_START_INDEX 16 + +/* Forward declaration. */ +static JSON_Status JSON_Parser_FlushLexer(JSON_Parser parser); +static JSON_Status JSON_Parser_ProcessCodepoint(JSON_Parser parser, Codepoint c, size_t encodedLength); + +static JSON_Status JSON_Parser_HandleInvalidEncodingSequence(JSON_Parser parser, size_t encodedLength) { + if (parser->token == T_STRING && GET_FLAGS(parser->flags, PARSER_REPLACE_INVALID)) + { + /* Since we're inside a string token, replacing the invalid sequence + with the Unicode replacement character as requested by the client + is a viable way to avoid a parse failure. Outside a string token, + such a replacement would simply trigger JSON_Error_UnknownToken + when we tried to process the replacement character, so it's less + confusing to stick with JSON_Error_InvalidEncodingSequence in that + case. */ + SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsReplacedCharacter); + return JSON_Parser_ProcessCodepoint(parser, REPLACEMENT_CHARACTER_CODEPOINT, encodedLength); + } + else if (!parser->depth && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT)) + { + /* Since we're parsing the top-level value of an embedded + document, assume that the invalid encoding sequence we've + encountered does not actually belong to the document, and + finish parsing by pretending that we've encountered EOF + instead of an invalid sequence. If the content is valid, + this will fail with JSON_Error_StoppedAfterEmbeddedDocument; + otherwise, it will fail with an appropriate error. */ + return JSON_Parser_FlushLexer(parser) && JSON_Parser_FlushParser(parser); + } + JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_InvalidEncodingSequence); + return JSON_Failure; +} + +static JSON_Status JSON_Parser_HandleInvalidNumber(JSON_Parser parser, Codepoint c, int codepointsSinceValidNumber, TokenAttributes attributesToRemove) +{ + SET_FLAGS_OFF(TokenAttributes, parser->tokenAttributes, attributesToRemove); + if (!parser->depth && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT)) + { + /* The invalid number is the top-level value of an embedded document, + and it has a prefix that can be interpreted as a valid number. + We want to backtrack so that we are at the end of that prefix, + and then process the valid token. + + Note that backtracking requires us to make three assumptions, which + are always valid in the context of a number token: + + 1. The input encoding is not JSON_UnknownEncoding. + + 2 The codepoints we are backing up across are all in the range + U+0000 - U+007F, aka ASCII, so we can assume the number of + bytes comprising them based on the input encoding. + + 3. The codepoints we are backing up across do not include any + line breaks, so we can assume that the line number stays the + same and the column number can simply be decremented. + + For example: + + "01" => "0" + "123.!" => "123" + "123e!" => "123" + "123e+!" => "123" + "123e-!" => "123" + "1.2e!" => "1.2" + "1.2e+!" => "1.2" + "1.2e-!" => "1.2" + */ + parser->codepointLocationByte -= (size_t)codepointsSinceValidNumber * SHORTEST_ENCODING_SEQUENCE(parser->inputEncoding); + parser->codepointLocationColumn -= (size_t)codepointsSinceValidNumber; + parser->tokenBytesUsed -= (size_t)codepointsSinceValidNumber * SHORTEST_ENCODING_SEQUENCE(parser->numberEncoding); + return JSON_Parser_ProcessToken(parser); /* always fails */ + } + else if (c == EOF_CODEPOINT) + { + /* Allow JSON_Parser_FlushLexer() to fail. */ + return JSON_Success; + } + JSON_Parser_SetErrorAtToken(parser, JSON_Error_InvalidNumber); + return JSON_Failure; +} + +static void JSON_Parser_StartToken(JSON_Parser parser, Symbol token) +{ + parser->token = token; + parser->tokenLocationByte = parser->codepointLocationByte; + parser->tokenLocationLine = parser->codepointLocationLine; + parser->tokenLocationColumn = parser->codepointLocationColumn; +} + +static JSON_Status JSON_Parser_ProcessCodepoint(JSON_Parser parser, Codepoint c, size_t encodedLength) +{ + Codepoint codepointToRecord = EOF_CODEPOINT; + Encoding tokenEncoding; + size_t maxTokenLength; + int tokenFinished = 0; + + /* If the previous codepoint was U+000D (CARRIAGE RETURN), and the current + codepoint is U+000A (LINE FEED), then treat the 2 codepoints as a single + line break. */ + if (GET_FLAGS(parser->state, PARSER_AFTER_CARRIAGE_RETURN)) + { + if (c == LINE_FEED_CODEPOINT) + { + parser->codepointLocationLine--; + } + SET_FLAGS_OFF(ParserState, parser->state, PARSER_AFTER_CARRIAGE_RETURN); + } + +reprocess: + + switch (parser->lexerState) + { + case LEXING_WHITESPACE: + if (c == '{') + { + JSON_Parser_StartToken(parser, T_LEFT_CURLY); + tokenFinished = 1; + } + else if (c == '}') + { + JSON_Parser_StartToken(parser, T_RIGHT_CURLY); + tokenFinished = 1; + } + else if (c == '[') + { + JSON_Parser_StartToken(parser, T_LEFT_SQUARE); + tokenFinished = 1; + } + else if (c == ']') + { + JSON_Parser_StartToken(parser, T_RIGHT_SQUARE); + tokenFinished = 1; + } + else if (c == ':') + { + JSON_Parser_StartToken(parser, T_COLON); + tokenFinished = 1; + } + else if (c == ',') + { + JSON_Parser_StartToken(parser, T_COMMA); + tokenFinished = 1; + } + else if (c == 'n') + { + JSON_Parser_StartToken(parser, T_NULL); + parser->lexerBits = NULL_LITERAL_EXPECTED_CHARS_START_INDEX; + parser->lexerState = LEXING_LITERAL; + } + else if (c == 't') + { + JSON_Parser_StartToken(parser, T_TRUE); + parser->lexerBits = TRUE_LITERAL_EXPECTED_CHARS_START_INDEX; + parser->lexerState = LEXING_LITERAL; + } + else if (c == 'f') + { + JSON_Parser_StartToken(parser, T_FALSE); + parser->lexerBits = FALSE_LITERAL_EXPECTED_CHARS_START_INDEX; + parser->lexerState = LEXING_LITERAL; + } + else if (c == '"') + { + JSON_Parser_StartToken(parser, T_STRING); + parser->lexerState = LEXING_STRING; + } + else if (c == '-') + { + JSON_Parser_StartToken(parser, T_NUMBER); + parser->tokenAttributes = JSON_IsNegative; + codepointToRecord = '-'; + parser->lexerState = LEXING_NUMBER_AFTER_MINUS; + goto recordNumberCodepointAndAdvance; + } + else if (c == '0') + { + JSON_Parser_StartToken(parser, T_NUMBER); + codepointToRecord = '0'; + parser->lexerState = LEXING_NUMBER_AFTER_LEADING_ZERO; + goto recordNumberCodepointAndAdvance; + } + else if (c >= '1' && c <= '9') + { + JSON_Parser_StartToken(parser, T_NUMBER); + codepointToRecord = c; + parser->lexerState = LEXING_NUMBER_DECIMAL_DIGITS; + goto recordNumberCodepointAndAdvance; + } + else if (c == ' ' || c == TAB_CODEPOINT || c == LINE_FEED_CODEPOINT || + c == CARRIAGE_RETURN_CODEPOINT || c == EOF_CODEPOINT) + { + /* Ignore whitespace between tokens. */ + } + else if (c == BOM_CODEPOINT && parser->codepointLocationByte == 0) + { + if (GET_FLAGS(parser->flags, PARSER_ALLOW_BOM)) + { + /* OK, we'll allow the BOM. */ + } + else + { + JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_BOMNotAllowed); + return JSON_Failure; + } + } + else if (c == '/' && GET_FLAGS(parser->flags, PARSER_ALLOW_COMMENTS)) + { + /* Comments are not real tokens, but we save the location + of the comment as the token location in case of an error. */ + parser->tokenLocationByte = parser->codepointLocationByte; + parser->tokenLocationLine = parser->codepointLocationLine; + parser->tokenLocationColumn = parser->codepointLocationColumn; + parser->lexerState = LEXING_COMMENT_AFTER_SLASH; + } + else if (c == 'N' && GET_FLAGS(parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS)) + { + JSON_Parser_StartToken(parser, T_NAN); + parser->lexerBits = NAN_LITERAL_EXPECTED_CHARS_START_INDEX; + parser->lexerState = LEXING_LITERAL; + } + else if (c == 'I' && GET_FLAGS(parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS)) + { + JSON_Parser_StartToken(parser, T_INFINITY); + parser->lexerBits = INFINITY_LITERAL_EXPECTED_CHARS_START_INDEX; + parser->lexerState = LEXING_LITERAL; + } + else + { + JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_UnknownToken); + return JSON_Failure; + } + goto advance; + + case LEXING_LITERAL: + /* While lexing a literal we store an index into expectedLiteralChars + in lexerBits. */ + if (expectedLiteralChars[parser->lexerBits]) + { + /* The codepoint should match the next character in the literal. */ + if (c != expectedLiteralChars[parser->lexerBits]) + { + JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnknownToken); + return JSON_Failure; + } + parser->lexerBits++; + + /* If the literal is the top-level value of an embedded document, + process it as soon as we consume its last expected codepoint. + Normally we defer processing until the following codepoint + has been examined, so that we can treat sequences like "nullx" + as a single, unknown token rather than a null literal followed + by an unknown token. */ + if (!parser->depth && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT) && + !expectedLiteralChars[parser->lexerBits]) + { + tokenFinished = 1; + } + } + else + { + /* The literal should be finished, so the codepoint should not be + a plausible JSON literal character, but rather EOF, whitespace, + or the first character of the next token. */ + if ((c >= 'A' && c <= 'Z') || + (c >= 'a' && c <= 'z') || + (c >= '0' && c <= '9') || + (c == '_')) + { + JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnknownToken); + return JSON_Failure; + } + if (!JSON_Parser_ProcessToken(parser)) + { + return JSON_Failure; + } + goto reprocess; + } + goto advance; + + case LEXING_STRING: + if (c == EOF_CODEPOINT) + { + /* Allow JSON_Parser_FlushLexer() to fail. */ + } + else if (c == '"') + { + tokenFinished = 1; + } + else if (c == '\\') + { + parser->lexerState = LEXING_STRING_ESCAPE; + } + else if (c < 0x20 && !GET_FLAGS(parser->flags, PARSER_ALLOW_CONTROL_CHARS)) + { + /* ASCII control characters (U+0000 - U+001F) are not allowed to + appear unescaped in string values unless specifically allowed. */ + JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_UnescapedControlCharacter); + return JSON_Failure; + } + else + { + codepointToRecord = c; + goto recordStringCodepointAndAdvance; + } + goto advance; + + case LEXING_STRING_ESCAPE: + if (c == EOF_CODEPOINT) + { + /* Allow JSON_Parser_FlushLexer() to fail. */ + } + else + { + if (c == 'u') + { + parser->lexerState = LEXING_STRING_HEX_ESCAPE_BYTE_1; + } + else + { + if (c == '"' || c == '\\' || c == '/') + { + codepointToRecord = c; + } + else if (c == 'b') + { + codepointToRecord = BACKSPACE_CODEPOINT; + } + else if (c == 't') + { + codepointToRecord = TAB_CODEPOINT; + } + else if (c == 'n') + { + codepointToRecord = LINE_FEED_CODEPOINT; + } + else if (c == 'f') + { + codepointToRecord = FORM_FEED_CODEPOINT; + } + else if (c == 'r') + { + codepointToRecord = CARRIAGE_RETURN_CODEPOINT; + } + else + { + /* The current codepoint location is the first character after + the backslash that started the escape sequence. The error + location should be the beginning of the escape sequence, 1 + character earlier. */ + JSON_Parser_SetErrorAtStringEscapeSequenceStart(parser, JSON_Error_InvalidEscapeSequence, 1); + return JSON_Failure; + } + parser->lexerState = LEXING_STRING; + goto recordStringCodepointAndAdvance; + } + } + goto advance; + + case LEXING_STRING_HEX_ESCAPE_BYTE_1: + case LEXING_STRING_HEX_ESCAPE_BYTE_2: + case LEXING_STRING_HEX_ESCAPE_BYTE_3: + case LEXING_STRING_HEX_ESCAPE_BYTE_4: + case LEXING_STRING_HEX_ESCAPE_BYTE_5: + case LEXING_STRING_HEX_ESCAPE_BYTE_6: + case LEXING_STRING_HEX_ESCAPE_BYTE_7: + case LEXING_STRING_HEX_ESCAPE_BYTE_8: + if (c == EOF_CODEPOINT) + { + /* Allow JSON_Parser_FlushLexer() to fail. */ + } + else + { + /* While lexing a string hex escape sequence we store the bytes + of the escaped codepoint in the low 2 bytes of lexerBits. If + the escape sequence represents a leading surrogate, we shift + the leading surrogate into the high 2 bytes and lex a second + hex escape sequence (which should be a trailing surrogate). */ + int byteNumber = (parser->lexerState - LEXING_STRING_HEX_ESCAPE_BYTE_1) & 0x3; + uint32_t nibble; + if (c >= '0' && c <= '9') + { + nibble = c - '0'; + } + else if (c >= 'A' && c <= 'F') + { + nibble = c - 'A' + 10; + } + else if (c >= 'a' && c <= 'f') + { + nibble = c - 'a' + 10; + } + else + { + /* The current codepoint location is one of the 4 hex digit + character slots in the hex escape sequence. The error + location should be the beginning of the hex escape + sequence, between 2 and 5 bytes earlier. */ + int codepointsAgo = 2 /* for "\u" */ + byteNumber; + JSON_Parser_SetErrorAtStringEscapeSequenceStart(parser, JSON_Error_InvalidEscapeSequence, codepointsAgo); + return JSON_Failure; + } + /* Store the hex digit's bits in the appropriate byte of lexerBits. */ + nibble <<= (3 - byteNumber) * 4 /* shift left by 12, 8, 4, 0 */ ; + parser->lexerBits |= nibble; + if (parser->lexerState == LEXING_STRING_HEX_ESCAPE_BYTE_4) + { + /* The escape sequence is complete. We need to check whether + it represents a leading surrogate (which implies that it + will be immediately followed by a hex-escaped trailing + surrogate), a trailing surrogate (which is invalid), or a + valid codepoint (which should simply be appended to the + string token value). */ + if (IS_LEADING_SURROGATE(parser->lexerBits)) + { + /* Shift the leading surrogate into the high 2 bytes of + lexerBits so that the trailing surrogate can be stored + in the low 2 bytes. */ + parser->lexerBits <<= 16; + parser->lexerState = LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_BACKSLASH; + } + else if (IS_TRAILING_SURROGATE(parser->lexerBits)) + { + /* The current codepoint location is the last hex digit + of the hex escape sequence. The error location should + be the beginning of the hex escape sequence, 5 + characters earlier. */ + JSON_Parser_SetErrorAtStringEscapeSequenceStart(parser, JSON_Error_UnpairedSurrogateEscapeSequence, 5); + return JSON_Failure; + } + else + { + /* The escape sequence represents a BMP codepoint. */ + codepointToRecord = parser->lexerBits; + parser->lexerBits = 0; + parser->lexerState = LEXING_STRING; + goto recordStringCodepointAndAdvance; + } + } + else if (parser->lexerState == LEXING_STRING_HEX_ESCAPE_BYTE_8) + { + /* The second hex escape sequence is complete. We need to + check whether it represents a trailing surrogate as + expected. If so, the surrogate pair represents a single + non-BMP codepoint. */ + if (!IS_TRAILING_SURROGATE(parser->lexerBits & 0xFFFF)) + { + /* The current codepoint location is the last hex digit of + the second hex escape sequence. The error location + should be the beginning of the leading surrogate + hex escape sequence, 11 characters earlier. */ + JSON_Parser_SetErrorAtStringEscapeSequenceStart(parser, JSON_Error_UnpairedSurrogateEscapeSequence, 11); + return JSON_Failure; + } + /* The escape sequence represents a non-BMP codepoint. */ + codepointToRecord = CODEPOINT_FROM_SURROGATES(parser->lexerBits); + parser->lexerBits = 0; + parser->lexerState = LEXING_STRING; + goto recordStringCodepointAndAdvance; + } + else + { + parser->lexerState++; + } + } + goto advance; + + case LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_BACKSLASH: + if (c == EOF_CODEPOINT) + { + /* Allow JSON_Parser_FlushLexer() to fail. */ + } + else + { + if (c != '\\') + { + /* The current codepoint location is the first character after + the leading surrogate hex escape sequence. The error + location should be the beginning of the leading surrogate + hex escape sequence, 6 characters earlier. */ + JSON_Parser_SetErrorAtStringEscapeSequenceStart(parser, JSON_Error_UnpairedSurrogateEscapeSequence, 6); + return JSON_Failure; + } + parser->lexerState = LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_U; + } + goto advance; + + case LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_U: + if (c == EOF_CODEPOINT) + { + /* Allow JSON_Parser_FlushLexer() to fail. */ + } + else + { + if (c != 'u') + { + /* Distinguish between a totally bogus escape sequence + and a valid one that just isn't the hex escape kind + that we require for a trailing surrogate. The current + codepoint location is the first character after the + backslash that should have introduced the trailing + surrogate hex escape sequence. */ + if (c == '"' || c == '\\' || c == '/' || c == 'b' || + c == 't' || c == 'n' || c == 'f' || c == 'r') + { + /* The error location should be at that beginning of the + leading surrogate's hex escape sequence, 7 characters + earlier. */ + JSON_Parser_SetErrorAtStringEscapeSequenceStart(parser, JSON_Error_UnpairedSurrogateEscapeSequence, 7); + } + else + { + /* The error location should be at that backslash, 1 + character earlier. */ + JSON_Parser_SetErrorAtStringEscapeSequenceStart(parser, JSON_Error_InvalidEscapeSequence, 1); + } + return JSON_Failure; + } + parser->lexerState = LEXING_STRING_HEX_ESCAPE_BYTE_5; + } + goto advance; + + case LEXING_NUMBER_AFTER_MINUS: + if (c == EOF_CODEPOINT) + { + /* Allow JSON_Parser_FlushLexer() to fail. */ + } + else if (c == 'I' && GET_FLAGS(parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS)) + { + parser->token = T_NEGATIVE_INFINITY; /* changing horses mid-stream, so to speak */ + parser->lexerBits = INFINITY_LITERAL_EXPECTED_CHARS_START_INDEX; + parser->lexerState = LEXING_LITERAL; + } + else + { + if (c == '0') + { + codepointToRecord = '0'; + parser->lexerState = LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO; + goto recordNumberCodepointAndAdvance; + } + else if (c >= '1' && c <= '9') + { + codepointToRecord = c; + parser->lexerState = LEXING_NUMBER_DECIMAL_DIGITS; + goto recordNumberCodepointAndAdvance; + } + else + { + /* We trigger an unknown token error rather than an invalid number + error so that "Foo" and "-Foo" trigger the same error. */ + JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnknownToken); + return JSON_Failure; + } + } + goto advance; + + case LEXING_NUMBER_AFTER_LEADING_ZERO: + case LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO: + if (c == '.') + { + codepointToRecord = '.'; + SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsDecimalPoint); + parser->lexerState = LEXING_NUMBER_AFTER_DOT; + goto recordNumberCodepointAndAdvance; + } + else if (c == 'e' || c == 'E') + { + codepointToRecord = c; + SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsExponent); + parser->lexerState = LEXING_NUMBER_AFTER_E; + goto recordNumberCodepointAndAdvance; + } + else if (c >= '0' && c <= '9') + { + /* JSON does not allow the integer part of a number to have any + digits after a leading zero. */ + if (!JSON_Parser_HandleInvalidNumber(parser, c, 0, 0)) + { + return JSON_Failure; + } + } + else if ((c == 'x' || c == 'X') && + parser->lexerState == LEXING_NUMBER_AFTER_LEADING_ZERO && + GET_FLAGS(parser->flags, PARSER_ALLOW_HEX_NUMBERS)) + { + codepointToRecord = c; + SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_IsHex); + parser->lexerState = LEXING_NUMBER_AFTER_X; + goto recordNumberCodepointAndAdvance; + } + else + { + /* The number is finished. */ + if (!JSON_Parser_ProcessToken(parser)) + { + return JSON_Failure; + } + goto reprocess; + } + goto advance; + + case LEXING_NUMBER_AFTER_X: + if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f')) + { + codepointToRecord = c; + parser->lexerState = LEXING_NUMBER_HEX_DIGITS; + goto recordNumberCodepointAndAdvance; + } + else if (!JSON_Parser_HandleInvalidNumber(parser, c, 1, JSON_IsHex)) + { + return JSON_Failure; + } + goto advance; + + case LEXING_NUMBER_HEX_DIGITS: + if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f')) + { + codepointToRecord = c; + goto recordNumberCodepointAndAdvance; + } + else + { + /* The number is finished. */ + if (!JSON_Parser_ProcessToken(parser)) + { + return JSON_Failure; + } + goto reprocess; + } + goto advance; + + case LEXING_NUMBER_DECIMAL_DIGITS: + if (c >= '0' && c <= '9') + { + codepointToRecord = c; + goto recordNumberCodepointAndAdvance; + } + else if (c == '.') + { + codepointToRecord = '.'; + SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsDecimalPoint); + parser->lexerState = LEXING_NUMBER_AFTER_DOT; + goto recordNumberCodepointAndAdvance; + } + else if (c == 'e' || c == 'E') + { + codepointToRecord = c; + SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsExponent); + parser->lexerState = LEXING_NUMBER_AFTER_E; + goto recordNumberCodepointAndAdvance; + } + else + { + /* The number is finished. */ + if (!JSON_Parser_ProcessToken(parser)) + { + return JSON_Failure; + } + goto reprocess; + } + goto advance; + + case LEXING_NUMBER_AFTER_DOT: + if (c >= '0' && c <= '9') + { + codepointToRecord = c; + parser->lexerState = LEXING_NUMBER_FRACTIONAL_DIGITS; + goto recordNumberCodepointAndAdvance; + } + else if (!JSON_Parser_HandleInvalidNumber(parser, c, 1, JSON_ContainsDecimalPoint)) + { + return JSON_Failure; + } + goto advance; + + case LEXING_NUMBER_FRACTIONAL_DIGITS: + if (c >= '0' && c <= '9') + { + codepointToRecord = c; + goto recordNumberCodepointAndAdvance; + } + else if (c == 'e' || c == 'E') + { + codepointToRecord = c; + SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsExponent); + parser->lexerState = LEXING_NUMBER_AFTER_E; + goto recordNumberCodepointAndAdvance; + } + else + { + /* The number is finished. */ + if (!JSON_Parser_ProcessToken(parser)) + { + return JSON_Failure; + } + goto reprocess; + } + goto advance; + + case LEXING_NUMBER_AFTER_E: + if (c == '+') + { + codepointToRecord = c; + parser->lexerState = LEXING_NUMBER_AFTER_EXPONENT_SIGN; + goto recordNumberCodepointAndAdvance; + } + else if (c == '-') + { + codepointToRecord = c; + SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsNegativeExponent); + parser->lexerState = LEXING_NUMBER_AFTER_EXPONENT_SIGN; + goto recordNumberCodepointAndAdvance; + } + else if (c >= '0' && c <= '9') + { + codepointToRecord = c; + parser->lexerState = LEXING_NUMBER_EXPONENT_DIGITS; + goto recordNumberCodepointAndAdvance; + } + else if (!JSON_Parser_HandleInvalidNumber(parser, c, 1, JSON_ContainsExponent)) + { + return JSON_Failure; + } + goto advance; + + case LEXING_NUMBER_AFTER_EXPONENT_SIGN: + if (c >= '0' && c <= '9') + { + codepointToRecord = c; + parser->lexerState = LEXING_NUMBER_EXPONENT_DIGITS; + goto recordNumberCodepointAndAdvance; + } + else if (!JSON_Parser_HandleInvalidNumber(parser, c, 2, JSON_ContainsExponent | JSON_ContainsNegativeExponent)) + { + return JSON_Failure; + } + goto advance; + + case LEXING_NUMBER_EXPONENT_DIGITS: + if (c >= '0' && c <= '9') + { + codepointToRecord = c; + goto recordNumberCodepointAndAdvance; + } + else + { + /* The number is finished. */ + if (!JSON_Parser_ProcessToken(parser)) + { + return JSON_Failure; + } + goto reprocess; + } + goto advance; + + case LEXING_COMMENT_AFTER_SLASH: + if (c == '/') + { + parser->lexerState = LEXING_SINGLE_LINE_COMMENT; + } + else if (c == '*') + { + parser->lexerState = LEXING_MULTI_LINE_COMMENT; + } + else + { + JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnknownToken); + return JSON_Failure; + } + goto advance; + + case LEXING_SINGLE_LINE_COMMENT: + if (c == CARRIAGE_RETURN_CODEPOINT || c == LINE_FEED_CODEPOINT || c == EOF_CODEPOINT) + { + parser->lexerState = LEXING_WHITESPACE; + } + goto advance; + + case LEXING_MULTI_LINE_COMMENT: + if (c == '*') + { + parser->lexerState = LEXING_MULTI_LINE_COMMENT_AFTER_STAR; + } + goto advance; + + case LEXING_MULTI_LINE_COMMENT_AFTER_STAR: + if (c == '/') + { + parser->lexerState = LEXING_WHITESPACE; + } + else if (c != '*') + { + parser->lexerState = LEXING_MULTI_LINE_COMMENT; + } + goto advance; + } + +recordStringCodepointAndAdvance: + + tokenEncoding = parser->stringEncoding; + maxTokenLength = parser->maxStringLength; + if (!codepointToRecord) + { + SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsNullCharacter | JSON_ContainsControlCharacter); + } + else if (codepointToRecord < FIRST_NON_CONTROL_CODEPOINT) + { + SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsControlCharacter); + } + else if (codepointToRecord >= FIRST_NON_BMP_CODEPOINT) + { + SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsNonASCIICharacter | JSON_ContainsNonBMPCharacter); + } + else if (codepointToRecord >= FIRST_NON_ASCII_CODEPOINT) + { + SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsNonASCIICharacter); + } + goto recordCodepointAndAdvance; + +recordNumberCodepointAndAdvance: + + tokenEncoding = parser->numberEncoding; + maxTokenLength = parser->maxNumberLength; + goto recordCodepointAndAdvance; + +recordCodepointAndAdvance: + + /* We always ensure that there are LONGEST_ENCODING_SEQUENCE bytes + available in the buffer for the next codepoint, so we don't have to + check whether there is room when we decode a new codepoint, and if + there isn't another codepoint, we have space already allocated for + the encoded null terminator.*/ + parser->tokenBytesUsed += EncodeCodepoint(codepointToRecord, tokenEncoding, parser->pTokenBytes + parser->tokenBytesUsed); + if (parser->tokenBytesUsed > maxTokenLength) + { + JSON_Parser_SetErrorAtToken(parser, parser->token == T_NUMBER ? JSON_Error_TooLongNumber : JSON_Error_TooLongString); + return JSON_Failure; + } + if (parser->tokenBytesUsed > parser->tokenBytesLength - LONGEST_ENCODING_SEQUENCE) + { + byte* pBiggerBuffer = DoubleBuffer(&parser->memorySuite, parser->defaultTokenBytes, parser->pTokenBytes, parser->tokenBytesLength); + if (!pBiggerBuffer) + { + JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_OutOfMemory); + return JSON_Failure; + } + parser->pTokenBytes = pBiggerBuffer; + parser->tokenBytesLength *= 2; + } + goto advance; + +advance: + + /* The current codepoint has been accepted, so advance the codepoint + location counters accordingly. Note that the one time we don't + do this is when the codepoint is EOF, which doesn't actually + appear in the input stream. */ + if (c == CARRIAGE_RETURN_CODEPOINT) + { + SET_FLAGS_ON(ParserState, parser->state, PARSER_AFTER_CARRIAGE_RETURN); + } + if (c != EOF_CODEPOINT) + { + parser->codepointLocationByte += encodedLength; + if (c == CARRIAGE_RETURN_CODEPOINT || c == LINE_FEED_CODEPOINT) + { + /* The next character will begin a new line. */ + parser->codepointLocationLine++; + parser->codepointLocationColumn = 0; + } + else + { + /* The next character will be on the same line. */ + parser->codepointLocationColumn++; + } + } + + if (tokenFinished && !JSON_Parser_ProcessToken(parser)) + { + return JSON_Failure; + } + + return JSON_Success; +} + +static JSON_Status JSON_Parser_FlushLexer(JSON_Parser parser) +{ + /* Push the EOF codepoint to the lexer so that it can finish the pending + token, if any. The EOF codepoint is never emitted by the decoder + itself, since it is outside the Unicode range and therefore cannot + be encoded in any of the possible input encodings. */ + if (!JSON_Parser_ProcessCodepoint(parser, EOF_CODEPOINT, 0)) + { + return JSON_Failure; + } + + /* The lexer should be idle when parsing finishes. */ + if (parser->lexerState != LEXING_WHITESPACE) + { + JSON_Parser_SetErrorAtToken(parser, JSON_Error_IncompleteToken); + return JSON_Failure; + } + return JSON_Success; +} + +/* Parser's decoder functions. */ + +static JSON_Status JSON_Parser_CallEncodingDetectedHandler(JSON_Parser parser) +{ + if (parser->encodingDetectedHandler && parser->encodingDetectedHandler(parser) != JSON_Parser_Continue) + { + JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_AbortedByHandler); + return JSON_Failure; + } + return JSON_Success; +} + +/* Forward declaration. */ +static JSON_Status JSON_Parser_ProcessInputBytes(JSON_Parser parser, const byte* pBytes, size_t length); + +static JSON_Status JSON_Parser_ProcessUnknownByte(JSON_Parser parser, byte b) +{ + /* When the input encoding is unknown, the first 4 bytes of input are + recorded in decoder.bits. */ + + byte bytes[LONGEST_ENCODING_SEQUENCE]; + switch (parser->decoderData.state) + { + case DECODER_RESET: + parser->decoderData.state = DECODED_1_OF_4; + parser->decoderData.bits = (uint32_t)b << 24; + break; + + case DECODED_1_OF_4: + parser->decoderData.state = DECODED_2_OF_4; + parser->decoderData.bits |= (uint32_t)b << 16; + break; + + case DECODED_2_OF_4: + parser->decoderData.state = DECODED_3_OF_4; + parser->decoderData.bits |= (uint32_t)b << 8; + break; + + case DECODED_3_OF_4: + bytes[0] = (byte)(parser->decoderData.bits >> 24); + bytes[1] = (byte)(parser->decoderData.bits >> 16); + bytes[2] = (byte)(parser->decoderData.bits >> 8); + bytes[3] = (byte)(b); + + /* We try to match the following patterns in order, where .. is any + byte value and nz is any non-zero byte value: + EF BB BF .. => UTF-8 with BOM + FF FE 00 00 => UTF-32LE with BOM + FF FE nz 00 => UTF-16LE with BOM + 00 00 FE FF -> UTF-32BE with BOM + FE FF .. .. => UTF-16BE with BOM + nz nz .. .. => UTF-8 + nz 00 nz .. => UTF-16LE + nz 00 00 00 => UTF-32LE + 00 nz .. .. => UTF-16BE + 00 00 00 nz => UTF-32BE + .. .. .. .. => unknown encoding */ + if (bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) + { + /* EF BB BF .. */ + parser->inputEncoding = JSON_UTF8; + } + else if (bytes[0] == 0xFF && bytes[1] == 0xFE && bytes[3] == 0x00) + { + /* FF FE 00 00 or + FF FE nz 00 */ + parser->inputEncoding = (bytes[2] == 0x00) ? JSON_UTF32LE : JSON_UTF16LE; + } + else if (bytes[0] == 0x00 && bytes[1] == 0x00 && bytes[2] == 0xFE && bytes[3] == 0xFF) + { + /* 00 00 FE FF */ + parser->inputEncoding = JSON_UTF32BE; + } + else if (bytes[0] == 0xFE && bytes[1] == 0xFF) + { + /* FE FF .. .. */ + parser->inputEncoding = JSON_UTF16BE; + } + else if (bytes[0] != 0x00) + { + /* nz .. .. .. */ + if (bytes[1] != 0x00) + { + /* nz nz .. .. */ + parser->inputEncoding = JSON_UTF8; + } + else if (bytes[2] != 0x00) + { + /* nz 00 nz .. */ + parser->inputEncoding = JSON_UTF16LE; + } + else if (bytes[3] == 0x00) + { + /* nz 00 00 00 */ + parser->inputEncoding = JSON_UTF32LE; + } + else + { + /* nz 00 00 nz => error */ + } + } + else if (bytes[1] != 0x00) + { + /* 00 nz .. .. */ + parser->inputEncoding = JSON_UTF16BE; + } + else if (bytes[2] == 0x00 && bytes[3] != 0x00) + { + /* 00 00 00 nz */ + parser->inputEncoding = JSON_UTF32BE; + } + else + { + /* 00 00 nz .. or + 00 00 00 00 => error */ + } + + if (parser->inputEncoding == JSON_UnknownEncoding) + { + return JSON_Parser_HandleInvalidEncodingSequence(parser, 4); + } + + if (!JSON_Parser_CallEncodingDetectedHandler(parser)) + { + return JSON_Failure; + } + + /* Reset the decoder before reprocessing the bytes. */ + Decoder_Reset(&parser->decoderData); + return JSON_Parser_ProcessInputBytes(parser, bytes, 4); + } + + /* We don't have 4 bytes yet. */ + return JSON_Success; +} + +JSON_Status JSON_Parser_ProcessInputBytes(JSON_Parser parser, const byte* pBytes, size_t length) +{ + /* Note that if length is 0, pBytes is allowed to be NULL. */ + size_t i = 0; + while (parser->inputEncoding == JSON_UnknownEncoding && i < length) + { + if (!JSON_Parser_ProcessUnknownByte(parser, pBytes[i])) + { + return JSON_Failure; + } + i++; + } + while (i < length) + { + DecoderOutput output = Decoder_ProcessByte(&parser->decoderData, parser->inputEncoding, pBytes[i]); + DecoderResultCode result = DECODER_RESULT_CODE(output); + switch (result) + { + case SEQUENCE_PENDING: + i++; + break; + + case SEQUENCE_COMPLETE: + if (!JSON_Parser_ProcessCodepoint(parser, DECODER_CODEPOINT(output), DECODER_SEQUENCE_LENGTH(output))) + { + return JSON_Failure; + } + i++; + break; + + case SEQUENCE_INVALID_INCLUSIVE: + i++; + /* fallthrough */ + case SEQUENCE_INVALID_EXCLUSIVE: + if (!JSON_Parser_HandleInvalidEncodingSequence(parser, DECODER_SEQUENCE_LENGTH(output))) + { + return JSON_Failure; + } + break; + } + } + return JSON_Success; +} + +static JSON_Status JSON_Parser_FlushDecoder(JSON_Parser parser) +{ + /* If the input was 1, 2, or 3 bytes long, and the input encoding was not + explicitly specified by the client, we can sometimes make a reasonable + guess. If the input was 1 or 3 bytes long, the only encoding that could + possibly be valid JSON is UF-8. If the input was 2 bytes long, we try + to match the following patterns in order, where .. is any byte value + and nz is any non-zero byte value: + FF FE => UTF-16LE with BOM + FE FF => UTF-16BE with BOM + nz nz => UTF-8 + nz 00 => UTF-16LE + 00 nz => UTF-16BE + .. .. => unknown encoding + */ + if (parser->inputEncoding == JSON_UnknownEncoding && + parser->decoderData.state != DECODER_RESET) + { + byte bytes[3]; + size_t length = 0; + bytes[0] = (byte)(parser->decoderData.bits >> 24); + bytes[1] = (byte)(parser->decoderData.bits >> 16); + bytes[2] = (byte)(parser->decoderData.bits >> 8); + switch (parser->decoderData.state) + { + case DECODED_1_OF_4: + parser->inputEncoding = JSON_UTF8; + length = 1; + break; + + case DECODED_2_OF_4: + if (bytes[0] == 0xFF && bytes[1] == 0xFE) + { + /* FF FE */ + parser->inputEncoding = JSON_UTF16LE; + } + else if (bytes[0] == 0xFE && bytes[1] == 0xFF) + { + /* FE FF */ + parser->inputEncoding = JSON_UTF16BE; + } + else if (bytes[0] != 0x00) + { + /* nz nz or + nz 00 */ + parser->inputEncoding = bytes[1] ? JSON_UTF8 : JSON_UTF16LE; + } + else if (bytes[1] != 0x00) + { + /* 00 nz */ + parser->inputEncoding = JSON_UTF16BE; + } + else + { + /* 00 00 */ + return JSON_Parser_HandleInvalidEncodingSequence(parser, 2); + } + length = 2; + break; + + case DECODED_3_OF_4: + parser->inputEncoding = JSON_UTF8; + length = 3; + break; + } + + if (!JSON_Parser_CallEncodingDetectedHandler(parser)) + { + return JSON_Failure; + } + + /* Reset the decoder before reprocessing the bytes. */ + parser->decoderData.state = DECODER_RESET; + parser->decoderData.bits = 0; + if (!JSON_Parser_ProcessInputBytes(parser, bytes, length)) + { + return JSON_Failure; + } + } + + /* The decoder should be idle when parsing finishes. */ + if (Decoder_SequencePending(&parser->decoderData)) + { + return JSON_Parser_HandleInvalidEncodingSequence(parser, DECODER_STATE_BYTES(parser->decoderData.state)); + } + return JSON_Success; +} + +/* Parser API functions. */ + +JSON_Parser JSON_CALL JSON_Parser_Create(const JSON_MemorySuite* pMemorySuite) +{ + JSON_Parser parser; + JSON_MemorySuite memorySuite; + if (pMemorySuite) + { + memorySuite = *pMemorySuite; + if (!memorySuite.realloc || !memorySuite.free) + { + /* The full memory suite must be specified. */ + return NULL; + } + } + else + { + memorySuite = defaultMemorySuite; + } + parser = (JSON_Parser)memorySuite.realloc(memorySuite.userData, NULL, sizeof(struct JSON_Parser_Data)); + if (!parser) + { + return NULL; + } + parser->memorySuite = memorySuite; + JSON_Parser_ResetData(parser, 0/* isInitialized */); + return parser; +} + +JSON_Status JSON_CALL JSON_Parser_Free(JSON_Parser parser) +{ + if (!parser || GET_FLAGS(parser->state, PARSER_IN_PROTECTED_API)) + { + return JSON_Failure; + } + SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_PROTECTED_API); + if (parser->pTokenBytes != parser->defaultTokenBytes) + { + parser->memorySuite.free(parser->memorySuite.userData, parser->pTokenBytes); + } + while (parser->pMemberNames) + { + JSON_Parser_PopMemberNameList(parser); + } + Grammarian_FreeAllocations(&parser->grammarianData, &parser->memorySuite); + parser->memorySuite.free(parser->memorySuite.userData, parser); + return JSON_Success; +} + +JSON_Status JSON_CALL JSON_Parser_Reset(JSON_Parser parser) +{ + if (!parser || GET_FLAGS(parser->state, PARSER_IN_PROTECTED_API)) + { + return JSON_Failure; + } + SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_PROTECTED_API); + JSON_Parser_ResetData(parser, 1/* isInitialized */); + /* Note that JSON_Parser_ResetData() unset PARSER_IN_PROTECTED_API for us. */ + return JSON_Success; +} + +void* JSON_CALL JSON_Parser_GetUserData(JSON_Parser parser) +{ + return parser ? parser->userData : NULL; +} + +JSON_Status JSON_CALL JSON_Parser_SetUserData(JSON_Parser parser, void* userData) +{ + if (!parser) + { + return JSON_Failure; + } + parser->userData = userData; + return JSON_Success; +} + +JSON_Encoding JSON_CALL JSON_Parser_GetInputEncoding(JSON_Parser parser) +{ + return parser ? (JSON_Encoding)parser->inputEncoding : JSON_UnknownEncoding; +} + +JSON_Status JSON_CALL JSON_Parser_SetInputEncoding(JSON_Parser parser, JSON_Encoding encoding) +{ + if (!parser || encoding < JSON_UnknownEncoding || encoding > JSON_UTF32BE || GET_FLAGS(parser->state, PARSER_STARTED)) + { + return JSON_Failure; + } + parser->inputEncoding = (Encoding)encoding; + return JSON_Success; +} + +JSON_Encoding JSON_CALL JSON_Parser_GetStringEncoding(JSON_Parser parser) +{ + return parser ? (JSON_Encoding)parser->stringEncoding : JSON_UTF8; +} + +JSON_Status JSON_CALL JSON_Parser_SetStringEncoding(JSON_Parser parser, JSON_Encoding encoding) +{ + if (!parser || encoding <= JSON_UnknownEncoding || encoding > JSON_UTF32BE || GET_FLAGS(parser->state, PARSER_STARTED)) + { + return JSON_Failure; + } + parser->stringEncoding = (Encoding)encoding; + return JSON_Success; +} + +size_t JSON_CALL JSON_Parser_GetMaxStringLength(JSON_Parser parser) +{ + return parser ? parser->maxStringLength : SIZE_MAX; +} + +JSON_Status JSON_CALL JSON_Parser_SetMaxStringLength(JSON_Parser parser, size_t maxLength) +{ + if (!parser || GET_FLAGS(parser->state, PARSER_STARTED)) + { + return JSON_Failure; + } + parser->maxStringLength = maxLength; + return JSON_Success; +} + +JSON_Encoding JSON_CALL JSON_Parser_GetNumberEncoding(JSON_Parser parser) +{ + return parser ? (JSON_Encoding)parser->numberEncoding : JSON_UTF8; +} + +JSON_Status JSON_CALL JSON_Parser_SetNumberEncoding(JSON_Parser parser, JSON_Encoding encoding) +{ + if (!parser || encoding <= JSON_UnknownEncoding || encoding > JSON_UTF32BE || GET_FLAGS(parser->state, PARSER_STARTED)) + { + return JSON_Failure; + } + parser->numberEncoding = (Encoding)encoding; + return JSON_Success; +} + +size_t JSON_CALL JSON_Parser_GetMaxNumberLength(JSON_Parser parser) +{ + return parser ? parser->maxNumberLength : SIZE_MAX; +} + +JSON_Status JSON_CALL JSON_Parser_SetMaxNumberLength(JSON_Parser parser, size_t maxLength) +{ + if (!parser || GET_FLAGS(parser->state, PARSER_STARTED)) + { + return JSON_Failure; + } + parser->maxNumberLength = maxLength; + return JSON_Success; +} + +JSON_Boolean JSON_CALL JSON_Parser_GetAllowBOM(JSON_Parser parser) +{ + return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_BOM)) ? JSON_True : JSON_False; +} + +JSON_Status JSON_CALL JSON_Parser_SetAllowBOM(JSON_Parser parser, JSON_Boolean allowBOM) +{ + if (!parser || GET_FLAGS(parser->state, PARSER_STARTED)) + { + return JSON_Failure; + } + SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_BOM, allowBOM); + return JSON_Success; +} + +JSON_Boolean JSON_CALL JSON_Parser_GetAllowComments(JSON_Parser parser) +{ + return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_COMMENTS)) ? JSON_True : JSON_False; +} + +JSON_Status JSON_CALL JSON_Parser_SetAllowComments(JSON_Parser parser, JSON_Boolean allowComments) +{ + if (!parser || GET_FLAGS(parser->state, PARSER_STARTED)) + { + return JSON_Failure; + } + SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_COMMENTS, allowComments); + return JSON_Success; +} + +JSON_Boolean JSON_CALL JSON_Parser_GetAllowSpecialNumbers(JSON_Parser parser) +{ + return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS)) ? JSON_True : JSON_False; +} + +JSON_Status JSON_CALL JSON_Parser_SetAllowSpecialNumbers(JSON_Parser parser, JSON_Boolean allowSpecialNumbers) +{ + if (!parser || GET_FLAGS(parser->state, PARSER_STARTED)) + { + return JSON_Failure; + } + SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS, allowSpecialNumbers); + return JSON_Success; +} + +JSON_Boolean JSON_CALL JSON_Parser_GetAllowHexNumbers(JSON_Parser parser) +{ + return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_HEX_NUMBERS)) ? JSON_True : JSON_False; +} + +JSON_Status JSON_CALL JSON_Parser_SetAllowHexNumbers(JSON_Parser parser, JSON_Boolean allowHexNumbers) +{ + if (!parser || GET_FLAGS(parser->state, PARSER_STARTED)) + { + return JSON_Failure; + } + SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_HEX_NUMBERS, allowHexNumbers); + return JSON_Success; +} + +JSON_Boolean JSON_CALL JSON_Parser_GetAllowUnescapedControlCharacters(JSON_Parser parser) +{ + return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_CONTROL_CHARS)) ? JSON_True : JSON_False; +} + +JSON_Status JSON_CALL JSON_Parser_SetAllowUnescapedControlCharacters(JSON_Parser parser, JSON_Boolean allowUnescapedControlCharacters) +{ + if (!parser || GET_FLAGS(parser->state, PARSER_STARTED)) + { + return JSON_Failure; + } + SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_CONTROL_CHARS, allowUnescapedControlCharacters); + return JSON_Success; +} + +JSON_Boolean JSON_CALL JSON_Parser_GetReplaceInvalidEncodingSequences(JSON_Parser parser) +{ + return (parser && GET_FLAGS(parser->flags, PARSER_REPLACE_INVALID)) ? JSON_True : JSON_False; +} + +JSON_Status JSON_CALL JSON_Parser_SetReplaceInvalidEncodingSequences(JSON_Parser parser, JSON_Boolean replaceInvalidEncodingSequences) +{ + if (!parser || GET_FLAGS(parser->state, PARSER_STARTED)) + { + return JSON_Failure; + } + SET_FLAGS(ParserFlags, parser->flags, PARSER_REPLACE_INVALID, replaceInvalidEncodingSequences); + return JSON_Success; +} + +JSON_Boolean JSON_CALL JSON_Parser_GetTrackObjectMembers(JSON_Parser parser) +{ + return (parser && GET_FLAGS(parser->flags, PARSER_TRACK_OBJECT_MEMBERS)) ? JSON_True : JSON_False; +} + +JSON_Status JSON_CALL JSON_Parser_SetTrackObjectMembers(JSON_Parser parser, JSON_Boolean trackObjectMembers) +{ + if (!parser || GET_FLAGS(parser->state, PARSER_STARTED)) + { + return JSON_Failure; + } + SET_FLAGS(ParserFlags, parser->flags, PARSER_TRACK_OBJECT_MEMBERS, trackObjectMembers); + return JSON_Success; +} + +JSON_Boolean JSON_CALL JSON_Parser_GetStopAfterEmbeddedDocument(JSON_Parser parser) +{ + return (parser && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT)) ? JSON_True : JSON_False; +} + +JSON_Status JSON_CALL JSON_Parser_SetStopAfterEmbeddedDocument(JSON_Parser parser, JSON_Boolean stopAfterEmbeddedDocument) +{ + if (!parser || GET_FLAGS(parser->state, PARSER_STARTED)) + { + return JSON_Failure; + } + SET_FLAGS(ParserFlags, parser->flags, PARSER_EMBEDDED_DOCUMENT, stopAfterEmbeddedDocument); + return JSON_Success; +} + +JSON_Error JSON_CALL JSON_Parser_GetError(JSON_Parser parser) +{ + return parser ? (JSON_Error)parser->error : JSON_Error_None; +} + +JSON_Status JSON_CALL JSON_Parser_GetErrorLocation(JSON_Parser parser, JSON_Location* pLocation) +{ + if (!pLocation || !parser || parser->error == JSON_Error_None) + { + return JSON_Failure; + } + if (parser->errorOffset == ERROR_LOCATION_IS_TOKEN_START) + { + pLocation->byte = parser->tokenLocationByte; + pLocation->line = parser->tokenLocationLine; + pLocation->column = parser->tokenLocationColumn; + } + else + { + pLocation->byte = parser->codepointLocationByte - (SHORTEST_ENCODING_SEQUENCE(parser->inputEncoding) * parser->errorOffset); + pLocation->line = parser->codepointLocationLine; + pLocation->column = parser->codepointLocationColumn - parser->errorOffset; + } + pLocation->depth = parser->depth; + return JSON_Success; +} + +JSON_Status JSON_CALL JSON_Parser_GetTokenLocation(JSON_Parser parser, JSON_Location* pLocation) +{ + if (!parser || !pLocation || !GET_FLAGS(parser->state, PARSER_IN_TOKEN_HANDLER)) + { + return JSON_Failure; + } + pLocation->byte = parser->tokenLocationByte; + pLocation->line = parser->tokenLocationLine; + pLocation->column = parser->tokenLocationColumn; + pLocation->depth = parser->depth; + return JSON_Success; +} + +JSON_Status JSON_CALL JSON_Parser_GetAfterTokenLocation(JSON_Parser parser, JSON_Location* pLocation) +{ + if (!parser || !pLocation || !GET_FLAGS(parser->state, PARSER_IN_TOKEN_HANDLER)) + { + return JSON_Failure; + } + pLocation->byte = parser->codepointLocationByte; + pLocation->line = parser->codepointLocationLine; + pLocation->column = parser->codepointLocationColumn; + pLocation->depth = parser->depth; + return JSON_Success; +} + +JSON_Parser_NullHandler JSON_CALL JSON_Parser_GetEncodingDetectedHandler(JSON_Parser parser) +{ + return parser ? parser->encodingDetectedHandler : NULL; +} + +JSON_Status JSON_CALL JSON_Parser_SetEncodingDetectedHandler(JSON_Parser parser, JSON_Parser_EncodingDetectedHandler handler) +{ + if (!parser) + { + return JSON_Failure; + } + parser->encodingDetectedHandler = handler; + return JSON_Success; +} + +JSON_Parser_NullHandler JSON_CALL JSON_Parser_GetNullHandler(JSON_Parser parser) +{ + return parser ? parser->nullHandler : NULL; +} + +JSON_Status JSON_CALL JSON_Parser_SetNullHandler(JSON_Parser parser, JSON_Parser_NullHandler handler) +{ + if (!parser) + { + return JSON_Failure; + } + parser->nullHandler = handler; + return JSON_Success; +} + +JSON_Parser_BooleanHandler JSON_CALL JSON_Parser_GetBooleanHandler(JSON_Parser parser) +{ + return parser ? parser->booleanHandler : NULL; +} + +JSON_Status JSON_CALL JSON_Parser_SetBooleanHandler(JSON_Parser parser, JSON_Parser_BooleanHandler handler) +{ + if (!parser) + { + return JSON_Failure; + } + parser->booleanHandler = handler; + return JSON_Success; +} + +JSON_Parser_StringHandler JSON_CALL JSON_Parser_GetStringHandler(JSON_Parser parser) +{ + return parser ? parser->stringHandler : NULL; +} + +JSON_Status JSON_CALL JSON_Parser_SetStringHandler(JSON_Parser parser, JSON_Parser_StringHandler handler) +{ + if (!parser) + { + return JSON_Failure; + } + parser->stringHandler = handler; + return JSON_Success; +} + +JSON_Parser_NumberHandler JSON_CALL JSON_Parser_GetNumberHandler(JSON_Parser parser) +{ + return parser ? parser->numberHandler : NULL; +} + +JSON_Status JSON_CALL JSON_Parser_SetNumberHandler(JSON_Parser parser, JSON_Parser_NumberHandler handler) +{ + if (!parser) + { + return JSON_Failure; + } + parser->numberHandler = handler; + return JSON_Success; +} + +JSON_Parser_SpecialNumberHandler JSON_CALL JSON_Parser_GetSpecialNumberHandler(JSON_Parser parser) +{ + return parser ? parser->specialNumberHandler : NULL; +} + +JSON_Status JSON_CALL JSON_Parser_SetSpecialNumberHandler(JSON_Parser parser, JSON_Parser_SpecialNumberHandler handler) +{ + if (!parser) + { + return JSON_Failure; + } + parser->specialNumberHandler = handler; + return JSON_Success; +} + +JSON_Parser_StartObjectHandler JSON_CALL JSON_Parser_GetStartObjectHandler(JSON_Parser parser) +{ + return parser ? parser->startObjectHandler : NULL; +} + +JSON_Status JSON_CALL JSON_Parser_SetStartObjectHandler(JSON_Parser parser, JSON_Parser_StartObjectHandler handler) +{ + if (!parser) + { + return JSON_Failure; + } + parser->startObjectHandler = handler; + return JSON_Success; +} + +JSON_Parser_EndObjectHandler JSON_CALL JSON_Parser_GetEndObjectHandler(JSON_Parser parser) +{ + return parser ? parser->endObjectHandler : NULL; +} + +JSON_Status JSON_CALL JSON_Parser_SetEndObjectHandler(JSON_Parser parser, JSON_Parser_EndObjectHandler handler) +{ + if (!parser) + { + return JSON_Failure; + } + parser->endObjectHandler = handler; + return JSON_Success; +} + +JSON_Parser_ObjectMemberHandler JSON_CALL JSON_Parser_GetObjectMemberHandler(JSON_Parser parser) +{ + return parser ? parser->objectMemberHandler : NULL; +} + +JSON_Status JSON_CALL JSON_Parser_SetObjectMemberHandler(JSON_Parser parser, JSON_Parser_ObjectMemberHandler handler) +{ + if (!parser) + { + return JSON_Failure; + } + parser->objectMemberHandler = handler; + return JSON_Success; +} + +JSON_Parser_StartArrayHandler JSON_CALL JSON_Parser_GetStartArrayHandler(JSON_Parser parser) +{ + return parser ? parser->startArrayHandler : NULL; +} + +JSON_Status JSON_CALL JSON_Parser_SetStartArrayHandler(JSON_Parser parser, JSON_Parser_StartArrayHandler handler) +{ + if (!parser) + { + return JSON_Failure; + } + parser->startArrayHandler = handler; + return JSON_Success; +} + +JSON_Parser_EndArrayHandler JSON_CALL JSON_Parser_GetEndArrayHandler(JSON_Parser parser) +{ + return parser ? parser->endArrayHandler : NULL; +} + +JSON_Status JSON_CALL JSON_Parser_SetEndArrayHandler(JSON_Parser parser, JSON_Parser_EndArrayHandler handler) +{ + if (!parser) + { + return JSON_Failure; + } + parser->endArrayHandler = handler; + return JSON_Success; +} + +JSON_Parser_ArrayItemHandler JSON_CALL JSON_Parser_GetArrayItemHandler(JSON_Parser parser) +{ + return parser ? parser->arrayItemHandler : NULL; +} + +JSON_Status JSON_CALL JSON_Parser_SetArrayItemHandler(JSON_Parser parser, JSON_Parser_ArrayItemHandler handler) +{ + if (!parser) + { + return JSON_Failure; + } + parser->arrayItemHandler = handler; + return JSON_Success; +} + +JSON_Status JSON_CALL JSON_Parser_Parse(JSON_Parser parser, const char* pBytes, size_t length, JSON_Boolean isFinal) +{ + JSON_Status status = JSON_Failure; + if (parser && (pBytes || !length) && !GET_FLAGS(parser->state, PARSER_FINISHED | PARSER_IN_PROTECTED_API)) + { + int finishedParsing = 0; + SET_FLAGS_ON(ParserState, parser->state, PARSER_STARTED | PARSER_IN_PROTECTED_API); + if (JSON_Parser_ProcessInputBytes(parser, (const byte*)pBytes, length)) + { + /* New input was parsed successfully. */ + if (isFinal) + { + /* Make sure there is nothing pending in the decoder, lexer, + or parser. */ + if (JSON_Parser_FlushDecoder(parser) && + JSON_Parser_FlushLexer(parser) && + JSON_Parser_FlushParser(parser)) + { + status = JSON_Success; + } + finishedParsing = 1; + } + else + { + status = JSON_Success; + } + } + else + { + /* New input failed to parse. */ + finishedParsing = 1; + } + if (finishedParsing) + { + SET_FLAGS_ON(ParserState, parser->state, PARSER_FINISHED); + } + SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_PROTECTED_API); + } + return status; +} + +#endif /* JSON_NO_PARSER */ + +/******************** JSON Writer ********************/ + +#ifndef JSON_NO_WRITER + +/* Combinable writer state flags. */ +#define WRITER_RESET 0x0 +#define WRITER_STARTED 0x1 +#define WRITER_IN_PROTECTED_API 0x2 +typedef byte WriterState; + +/* Combinable writer settings flags. */ +#define WRITER_DEFAULT_FLAGS 0x0 +#define WRITER_USE_CRLF 0x1 +#define WRITER_REPLACE_INVALID 0x2 +#define WRITER_ESCAPE_NON_ASCII 0x4 +typedef byte WriterFlags; + +/* A writer instance. */ +struct JSON_Writer_Data +{ + JSON_MemorySuite memorySuite; + void* userData; + WriterState state; + WriterFlags flags; + Encoding outputEncoding; + Error error; + GrammarianData grammarianData; + JSON_Writer_OutputHandler outputHandler; +}; + +/* Writer internal functions. */ + +static void JSON_Writer_ResetData(JSON_Writer writer, int isInitialized) +{ + writer->userData = NULL; + writer->flags = WRITER_DEFAULT_FLAGS; + writer->outputEncoding = JSON_UTF8; + writer->error = JSON_Error_None; + Grammarian_Reset(&writer->grammarianData, isInitialized); + writer->outputHandler = NULL; + writer->state = WRITER_RESET; /* do this last! */ +} + +static void JSON_Writer_SetError(JSON_Writer writer, Error error) +{ + writer->error = error; +} + +static JSON_Status JSON_Writer_ProcessToken(JSON_Writer writer, Symbol token) +{ + GrammarianOutput output = Grammarian_ProcessToken(&writer->grammarianData, token, &writer->memorySuite); + switch (GRAMMARIAN_RESULT_CODE(output)) + { + case REJECTED_TOKEN: + JSON_Writer_SetError(writer, JSON_Error_UnexpectedToken); + return JSON_Failure; + + case SYMBOL_STACK_FULL: + JSON_Writer_SetError(writer, JSON_Error_OutOfMemory); + return JSON_Failure; + } + return JSON_Success; +} + +static JSON_Status JSON_Writer_OutputBytes(JSON_Writer writer, const byte* pBytes, size_t length) +{ + if (writer->outputHandler && length) + { + if (writer->outputHandler(writer, (const char*)pBytes, length) != JSON_Writer_Continue) + { + JSON_Writer_SetError(writer, JSON_Error_AbortedByHandler); + return JSON_Failure; + } + } + return JSON_Success; +} + +static Codepoint JSON_Writer_GetCodepointEscapeCharacter(JSON_Writer writer, Codepoint c) +{ + switch (c) + { + case BACKSPACE_CODEPOINT: + return 'b'; + + case TAB_CODEPOINT: + return 't'; + + case LINE_FEED_CODEPOINT: + return 'n'; + + case FORM_FEED_CODEPOINT: + return 'f'; + + case CARRIAGE_RETURN_CODEPOINT: + return 'r'; + + case '"': + return '"'; + + case '/': + return '/'; + + case '\\': + return '\\'; + + case DELETE_CODEPOINT: + case LINE_SEPARATOR_CODEPOINT: + case PARAGRAPH_SEPARATOR_CODEPOINT: + return 'u'; + + default: + if (c < FIRST_NON_CONTROL_CODEPOINT || IS_NONCHARACTER(c) || + (GET_FLAGS(writer->flags, WRITER_ESCAPE_NON_ASCII) && c > FIRST_NON_ASCII_CODEPOINT)) + { + return 'u'; + } + break; + } + return 0; +} + +typedef struct tag_WriteBufferData +{ + size_t used; + byte bytes[256]; +} WriteBufferData; +typedef WriteBufferData* WriteBuffer; + +static void WriteBuffer_Reset(WriteBuffer buffer) +{ + buffer->used = 0; +} + +static JSON_Status WriteBuffer_Flush(WriteBuffer buffer, JSON_Writer writer) +{ + JSON_Status status = JSON_Writer_OutputBytes(writer, buffer->bytes, buffer->used); + buffer->used = 0; + return status; +} + +static JSON_Status WriteBuffer_WriteBytes(WriteBuffer buffer, JSON_Writer writer, const byte* pBytes, size_t length) +{ + if (buffer->used + length > sizeof(buffer->bytes) && + !WriteBuffer_Flush(buffer, writer)) + { + return JSON_Failure; + } + memcpy(&buffer->bytes[buffer->used], pBytes, length); + buffer->used += length; + return JSON_Success; +} + +static JSON_Status WriteBuffer_WriteCodepoint(WriteBuffer buffer, JSON_Writer writer, Codepoint c) +{ + if (buffer->used + LONGEST_ENCODING_SEQUENCE > sizeof(buffer->bytes) && + !WriteBuffer_Flush(buffer, writer)) + { + return JSON_Failure; + } + buffer->used += EncodeCodepoint(c, writer->outputEncoding, &buffer->bytes[buffer->used]); + return JSON_Success; +} + +static JSON_Status WriteBuffer_WriteHexEscapeSequence(WriteBuffer buffer, JSON_Writer writer, Codepoint c) +{ + if (c >= FIRST_NON_BMP_CODEPOINT) + { + /* Non-BMP codepoints must be hex-escaped by escaping the UTF-16 + surrogate pair for the codepoint. We put the leading surrogate + in the low 16 bits of c so that it gets written first, then + the second pass through the loop will write out the trailing + surrogate. x*/ + c = SURROGATES_FROM_CODEPOINT(c); + c = (c << 16) | (c >> 16); + } + do + { + static const byte hexDigits[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; + byte escapeSequence[6]; + int i; + escapeSequence[0] = '\\'; + escapeSequence[1] = 'u'; + escapeSequence[2] = hexDigits[(c >> 12) & 0xF]; + escapeSequence[3] = hexDigits[(c >> 8) & 0xF]; + escapeSequence[4] = hexDigits[(c >> 4) & 0xF]; + escapeSequence[5] = hexDigits[c & 0xF]; + for (i = 0; i < sizeof(escapeSequence); i++) + { + if (!WriteBuffer_WriteCodepoint(buffer, writer, escapeSequence[i])) + { + return JSON_Failure; + } + } + c >>= 16; + } while (c); + return JSON_Success; +} + +static JSON_Status JSON_Writer_OutputString(JSON_Writer writer, const byte* pBytes, size_t length, Encoding encoding) +{ + static const byte quoteUTF[] = { 0, 0, 0, '"', 0, 0, 0 }; + static const byte* const quoteEncodings[5] = { quoteUTF + 3, quoteUTF + 3, quoteUTF + 2, quoteUTF + 3, quoteUTF }; + + const byte* pQuoteEncoded = quoteEncodings[writer->outputEncoding - 1]; + size_t minSequenceLength = SHORTEST_ENCODING_SEQUENCE(writer->outputEncoding); + DecoderData decoderData; + WriteBufferData bufferData; + size_t i = 0; + + WriteBuffer_Reset(&bufferData); + + /* Start quote. */ + if (!WriteBuffer_WriteBytes(&bufferData, writer, pQuoteEncoded, minSequenceLength)) + { + return JSON_Failure; + } + + /* String contents. */ + Decoder_Reset(&decoderData); + while (i < length) + { + DecoderOutput output = Decoder_ProcessByte(&decoderData, encoding, pBytes[i]); + DecoderResultCode result = DECODER_RESULT_CODE(output); + Codepoint c; + Codepoint escapeCharacter; + switch (result) + { + case SEQUENCE_PENDING: + i++; + break; + + case SEQUENCE_COMPLETE: + c = DECODER_CODEPOINT(output); + escapeCharacter = JSON_Writer_GetCodepointEscapeCharacter(writer, c); + switch (escapeCharacter) + { + case 0: + /* Output the codepoint as a normal encoding sequence. */ + if (!WriteBuffer_WriteCodepoint(&bufferData, writer, c)) + { + return JSON_Failure; + } + break; + + case 'u': + /* Output the codepoint as 1 or 2 hex escape sequences. */ + if (!WriteBuffer_WriteHexEscapeSequence(&bufferData, writer, c)) + { + return JSON_Failure; + } + break; + + default: + /* Output the codepoint as a simple escape sequence. */ + if (!WriteBuffer_WriteCodepoint(&bufferData, writer, '\\') || + !WriteBuffer_WriteCodepoint(&bufferData, writer, escapeCharacter)) + { + return JSON_Failure; + } + break; + } + i++; + break; + + case SEQUENCE_INVALID_INCLUSIVE: + i++; + /* fallthrough */ + case SEQUENCE_INVALID_EXCLUSIVE: + if (GET_FLAGS(writer->flags, WRITER_REPLACE_INVALID)) + { + if (!WriteBuffer_WriteHexEscapeSequence(&bufferData, writer, REPLACEMENT_CHARACTER_CODEPOINT)) + { + return JSON_Failure; + } + } + else + { + /* Output whatever valid bytes we've accumulated before failing. */ + if (WriteBuffer_Flush(&bufferData, writer)) + { + JSON_Writer_SetError(writer, JSON_Error_InvalidEncodingSequence); + } + return JSON_Failure; + } + break; + } + } + if (Decoder_SequencePending(&decoderData)) + { + if (GET_FLAGS(writer->flags, WRITER_REPLACE_INVALID)) + { + if (!WriteBuffer_WriteHexEscapeSequence(&bufferData, writer, REPLACEMENT_CHARACTER_CODEPOINT)) + { + return JSON_Failure; + } + } + else + { + /* Output whatever valid bytes we've accumulated before failing. */ + if (WriteBuffer_Flush(&bufferData, writer)) + { + JSON_Writer_SetError(writer, JSON_Error_InvalidEncodingSequence); + } + return JSON_Failure; + } + } + + /* End quote. */ + if (!WriteBuffer_WriteBytes(&bufferData, writer, pQuoteEncoded, minSequenceLength) || + !WriteBuffer_Flush(&bufferData, writer)) + { + return JSON_Failure; + } + return JSON_Success; +} + +static LexerState LexNumberCharacter(LexerState state, Codepoint c) +{ + switch (state) + { + case LEXING_WHITESPACE: + if (c == '-') + { + state = LEXING_NUMBER_AFTER_MINUS; + } + else if (c == '0') + { + state = LEXING_NUMBER_AFTER_LEADING_ZERO; + } + else if (c >= '1' && c <= '9') + { + state = LEXING_NUMBER_DECIMAL_DIGITS; + } + else + { + state = LEXER_ERROR; + } + break; + + case LEXING_NUMBER_AFTER_MINUS: + if (c == '0') + { + state = LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO; + } + else if (c >= '1' && c <= '9') + { + state = LEXING_NUMBER_DECIMAL_DIGITS; + } + else + { + state = LEXER_ERROR; + } + break; + + case LEXING_NUMBER_AFTER_LEADING_ZERO: + case LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO: + if (c == '.') + { + state = LEXING_NUMBER_AFTER_DOT; + } + else if (c == 'e' || c == 'E') + { + state = LEXING_NUMBER_AFTER_E; + } + else if ((c == 'x' || c == 'X') && state == LEXING_NUMBER_AFTER_LEADING_ZERO) + { + state = LEXING_NUMBER_AFTER_X; + } + else if (c == EOF_CODEPOINT) + { + state = LEXING_WHITESPACE; + } + else + { + state = LEXER_ERROR; + } + break; + + case LEXING_NUMBER_AFTER_X: + if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f')) + { + state = LEXING_NUMBER_HEX_DIGITS; + } + else + { + state = LEXER_ERROR; + } + break; + + case LEXING_NUMBER_HEX_DIGITS: + if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f')) + { + /* Still LEXING_NUMBER_HEX_DIGITS. */ + } + else if (c == EOF_CODEPOINT) + { + state = LEXING_WHITESPACE; + } + else + { + state = LEXER_ERROR; + } + break; + + case LEXING_NUMBER_DECIMAL_DIGITS: + if (c >= '0' && c <= '9') + { + /* Still LEXING_NUMBER_DECIMAL_DIGITS. */ + } + else if (c == '.') + { + state = LEXING_NUMBER_AFTER_DOT; + } + else if (c == 'e' || c == 'E') + { + state = LEXING_NUMBER_AFTER_E; + } + else if (c == EOF_CODEPOINT) + { + state = LEXING_WHITESPACE; + } + else + { + state = LEXER_ERROR; + } + break; + + case LEXING_NUMBER_AFTER_DOT: + if (c >= '0' && c <= '9') + { + state = LEXING_NUMBER_FRACTIONAL_DIGITS; + } + else + { + state = LEXER_ERROR; + } + break; + + case LEXING_NUMBER_FRACTIONAL_DIGITS: + if (c >= '0' && c <= '9') + { + /* Still LEXING_NUMBER_FRACTIONAL_DIGITS. */ + } + else if (c == 'e' || c == 'E') + { + state = LEXING_NUMBER_AFTER_E; + } + else if (c == EOF_CODEPOINT) + { + state = LEXING_WHITESPACE; + } + else + { + state = LEXER_ERROR; + } + break; + + case LEXING_NUMBER_AFTER_E: + if (c == '+' || c == '-') + { + state = LEXING_NUMBER_AFTER_EXPONENT_SIGN; + } + else if (c >= '0' && c <= '9') + { + state = LEXING_NUMBER_EXPONENT_DIGITS; + } + else + { + state = LEXER_ERROR; + } + break; + + case LEXING_NUMBER_AFTER_EXPONENT_SIGN: + if (c >= '0' && c <= '9') + { + state = LEXING_NUMBER_EXPONENT_DIGITS; + } + else + { + state = LEXER_ERROR; + } + break; + + case LEXING_NUMBER_EXPONENT_DIGITS: + if (c >= '0' && c <= '9') + { + /* Still LEXING_NUMBER_EXPONENT_DIGITS. */ + } + else if (c == EOF_CODEPOINT) + { + state = LEXING_WHITESPACE; + } + else + { + state = LEXER_ERROR; + } + break; + } + return state; +} + +static JSON_Status JSON_Writer_OutputNumber(JSON_Writer writer, const byte* pBytes, size_t length, Encoding encoding) +{ + DecoderData decoderData; + WriteBufferData bufferData; + LexerState lexerState = LEXING_WHITESPACE; + size_t i; + Decoder_Reset(&decoderData); + WriteBuffer_Reset(&bufferData); + for (i = 0; i < length; i++) + { + DecoderOutput output = Decoder_ProcessByte(&decoderData, encoding, pBytes[i]); + DecoderResultCode result = DECODER_RESULT_CODE(output); + Codepoint c; + switch (result) + { + case SEQUENCE_PENDING: + break; + + case SEQUENCE_COMPLETE: + c = DECODER_CODEPOINT(output); + lexerState = LexNumberCharacter(lexerState, c); + if (lexerState == LEXER_ERROR) + { + /* Output whatever valid bytes we've accumulated before failing. */ + if (WriteBuffer_Flush(&bufferData, writer)) + { + JSON_Writer_SetError(writer, JSON_Error_InvalidNumber); + } + return JSON_Failure; + } + if (!WriteBuffer_WriteCodepoint(&bufferData, writer, c)) + { + return JSON_Failure; + } + break; + + case SEQUENCE_INVALID_INCLUSIVE: + case SEQUENCE_INVALID_EXCLUSIVE: + /* Output whatever valid bytes we've accumulated before failing. */ + if (WriteBuffer_Flush(&bufferData, writer)) + { + JSON_Writer_SetError(writer, JSON_Error_InvalidEncodingSequence); + } + return JSON_Failure; + } + } + if (!WriteBuffer_Flush(&bufferData, writer)) + { + return JSON_Failure; + } + if (Decoder_SequencePending(&decoderData)) + { + JSON_Writer_SetError(writer, JSON_Error_InvalidEncodingSequence); + return JSON_Failure; + } + if (LexNumberCharacter(lexerState, EOF_CODEPOINT) == LEXER_ERROR) + { + JSON_Writer_SetError(writer, JSON_Error_InvalidNumber); + return JSON_Failure; + } + return JSON_Success; +} + +#define SPACES_PER_CHUNK 8 +static JSON_Status JSON_Writer_OutputSpaces(JSON_Writer writer, size_t numberOfSpaces) +{ + static const byte spacesUTF8[SPACES_PER_CHUNK] = { ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' }; + static const byte spacesUTF16[SPACES_PER_CHUNK * 2 + 1] = { 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0 }; + static const byte spacesUTF32[SPACES_PER_CHUNK * 4 + 3] = { 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0 }; + static const byte* const spacesEncodings[5] = { spacesUTF8, spacesUTF16 + 1, spacesUTF16, spacesUTF32 + 3, spacesUTF32 }; + + size_t encodedLength = SHORTEST_ENCODING_SEQUENCE(writer->outputEncoding); + const byte* encoded = spacesEncodings[writer->outputEncoding - 1]; + while (numberOfSpaces > SPACES_PER_CHUNK) + { + if (!JSON_Writer_OutputBytes(writer, encoded, SPACES_PER_CHUNK * encodedLength)) + { + return JSON_Failure; + } + numberOfSpaces -= SPACES_PER_CHUNK; + } + if (!JSON_Writer_OutputBytes(writer, encoded, numberOfSpaces * encodedLength)) + { + return JSON_Failure; + } + return JSON_Success; +} + +static JSON_Status JSON_Writer_WriteSimpleToken(JSON_Writer writer, Symbol token, const byte* const* encodings, size_t length) +{ + JSON_Status status = JSON_Failure; + if (writer && !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None) + { + size_t encodedLength = length * SHORTEST_ENCODING_SEQUENCE(writer->outputEncoding); + SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API); + if (JSON_Writer_ProcessToken(writer, token) && + JSON_Writer_OutputBytes(writer, encodings[writer->outputEncoding - 1], encodedLength)) + { + status = JSON_Success; + } + SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API); + } + return status; +} + +/* Writer API functions. */ + +JSON_Writer JSON_CALL JSON_Writer_Create(const JSON_MemorySuite* pMemorySuite) +{ + JSON_Writer writer; + JSON_MemorySuite memorySuite; + if (pMemorySuite) + { + memorySuite = *pMemorySuite; + if (!memorySuite.realloc || !memorySuite.free) + { + /* The full memory suite must be specified. */ + return NULL; + } + } + else + { + memorySuite = defaultMemorySuite; + } + writer = (JSON_Writer)memorySuite.realloc(memorySuite.userData, NULL, sizeof(struct JSON_Writer_Data)); + if (!writer) + { + return NULL; + } + writer->memorySuite = memorySuite; + JSON_Writer_ResetData(writer, 0/* isInitialized */); + return writer; +} + +JSON_Status JSON_CALL JSON_Writer_Free(JSON_Writer writer) +{ + if (!writer || GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API)) + { + return JSON_Failure; + } + SET_FLAGS_ON(WriterState, writer->state, WRITER_IN_PROTECTED_API); + Grammarian_FreeAllocations(&writer->grammarianData, &writer->memorySuite); + writer->memorySuite.free(writer->memorySuite.userData, writer); + return JSON_Success; +} + +JSON_Status JSON_CALL JSON_Writer_Reset(JSON_Writer writer) +{ + if (!writer || GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API)) + { + return JSON_Failure; + } + SET_FLAGS_ON(WriterState, writer->state, WRITER_IN_PROTECTED_API); + JSON_Writer_ResetData(writer, 1/* isInitialized */); + /* Note that JSON_Writer_ResetData() unset WRITER_IN_PROTECTED_API for us. */ + return JSON_Success; +} + +void* JSON_CALL JSON_Writer_GetUserData(JSON_Writer writer) +{ + return writer ? writer->userData : NULL; +} + +JSON_Status JSON_CALL JSON_Writer_SetUserData(JSON_Writer writer, void* userData) +{ + if (!writer) + { + return JSON_Failure; + } + writer->userData = userData; + return JSON_Success; +} + +JSON_Encoding JSON_CALL JSON_Writer_GetOutputEncoding(JSON_Writer writer) +{ + return writer ? (JSON_Encoding)writer->outputEncoding : JSON_UTF8; +} + +JSON_Status JSON_CALL JSON_Writer_SetOutputEncoding(JSON_Writer writer, JSON_Encoding encoding) +{ + if (!writer || GET_FLAGS(writer->state, WRITER_STARTED) || encoding <= JSON_UnknownEncoding || encoding > JSON_UTF32BE) + { + return JSON_Failure; + } + writer->outputEncoding = (Encoding)encoding; + return JSON_Success; +} + +JSON_Boolean JSON_CALL JSON_Writer_GetUseCRLF(JSON_Writer writer) +{ + return (writer && GET_FLAGS(writer->flags, WRITER_USE_CRLF)) ? JSON_True : JSON_False; +} + +JSON_Status JSON_CALL JSON_Writer_SetUseCRLF(JSON_Writer writer, JSON_Boolean useCRLF) +{ + if (!writer || GET_FLAGS(writer->state, WRITER_STARTED)) + { + return JSON_Failure; + } + SET_FLAGS(WriterFlags, writer->flags, WRITER_USE_CRLF, useCRLF); + return JSON_Success; +} + +JSON_Boolean JSON_CALL JSON_Writer_GetReplaceInvalidEncodingSequences(JSON_Writer writer) +{ + return (writer && GET_FLAGS(writer->flags, WRITER_REPLACE_INVALID)) ? JSON_True : JSON_False; +} + +JSON_Status JSON_CALL JSON_Writer_SetReplaceInvalidEncodingSequences(JSON_Writer writer, JSON_Boolean replaceInvalidEncodingSequences) +{ + if (!writer || GET_FLAGS(writer->state, WRITER_STARTED)) + { + return JSON_Failure; + } + SET_FLAGS(WriterFlags, writer->flags, WRITER_REPLACE_INVALID, replaceInvalidEncodingSequences); + return JSON_Success; +} + +JSON_Boolean JSON_CALL JSON_Writer_GetEscapeAllNonASCIICharacters(JSON_Writer writer) +{ + return (writer && GET_FLAGS(writer->flags, WRITER_ESCAPE_NON_ASCII)) ? JSON_True : JSON_False; +} + +JSON_Status JSON_CALL JSON_Writer_SetEscapeAllNonASCIICharacters(JSON_Writer writer, JSON_Boolean escapeAllNonASCIICharacters) +{ + if (!writer || GET_FLAGS(writer->state, WRITER_STARTED)) + { + return JSON_Failure; + } + SET_FLAGS(WriterFlags, writer->flags, WRITER_ESCAPE_NON_ASCII, escapeAllNonASCIICharacters); + return JSON_Success; +} + +JSON_Error JSON_CALL JSON_Writer_GetError(JSON_Writer writer) +{ + return writer ? (JSON_Error)writer->error : JSON_Error_None; +} + +JSON_Writer_OutputHandler JSON_CALL JSON_Writer_GetOutputHandler(JSON_Writer writer) +{ + return writer ? writer->outputHandler : NULL; +} + +JSON_Status JSON_CALL JSON_Writer_SetOutputHandler(JSON_Writer writer, JSON_Writer_OutputHandler handler) +{ + if (!writer) + { + return JSON_Failure; + } + writer->outputHandler = handler; + return JSON_Success; +} + +JSON_Status JSON_CALL JSON_Writer_WriteNull(JSON_Writer writer) +{ + static const byte nullUTF8[] = { 'n', 'u', 'l', 'l' }; + static const byte nullUTF16[] = { 0, 'n', 0, 'u', 0, 'l', 0, 'l', 0 }; + static const byte nullUTF32[] = { 0, 0, 0, 'n', 0, 0, 0, 'u', 0, 0, 0, 'l', 0, 0, 0, 'l', 0, 0, 0 }; + static const byte* const nullEncodings[5] = { nullUTF8, nullUTF16 + 1, nullUTF16, nullUTF32 + 3, nullUTF32 }; + + return JSON_Writer_WriteSimpleToken(writer, T_NULL, nullEncodings, sizeof(nullUTF8)); +} + +JSON_Status JSON_CALL JSON_Writer_WriteBoolean(JSON_Writer writer, JSON_Boolean value) +{ + static const byte trueUTF8[] = { 't', 'r', 'u', 'e' }; + static const byte trueUTF16[] = { 0, 't', 0, 'r', 0, 'u', 0, 'e', 0 }; + static const byte trueUTF32[] = { 0, 0, 0, 't', 0, 0, 0, 'r', 0, 0, 0, 'u', 0, 0, 0, 'e', 0, 0, 0 }; + static const byte* const trueEncodings[5] = { trueUTF8, trueUTF16 + 1, trueUTF16, trueUTF32 + 3, trueUTF32 }; + + static const byte falseUTF8[] = { 'f', 'a', 'l', 's', 'e' }; + static const byte falseUTF16[] = { 0, 'f', 0, 'a', 0, 'l', 0, 's', 0, 'e', 0 }; + static const byte falseUTF32[] = { 0, 0, 0, 'f', 0, 0, 0, 'a', 0, 0, 0, 'l', 0, 0, 0, 's', 0, 0, 0, 'e', 0, 0, 0 }; + static const byte* const falseEncodings[5] = { falseUTF8, falseUTF16 + 1, falseUTF16, falseUTF32 + 3, falseUTF32 }; + + Symbol token; + const byte* const* encodings; + size_t length; + if (value) + { + token = T_TRUE; + encodings = trueEncodings; + length = sizeof(trueUTF8); + } + else + { + token = T_FALSE; + encodings = falseEncodings; + length = sizeof(falseUTF8); + } + return JSON_Writer_WriteSimpleToken(writer, token, encodings, length); +} + +JSON_Status JSON_CALL JSON_Writer_WriteString(JSON_Writer writer, const char* pValue, size_t length, JSON_Encoding encoding) +{ + JSON_Status status = JSON_Failure; + if (writer && (pValue || !length) && encoding > JSON_UnknownEncoding && encoding <= JSON_UTF32BE && + !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None) + { + SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API); + if (JSON_Writer_ProcessToken(writer, T_STRING)) + { + status = JSON_Writer_OutputString(writer, (const byte*)pValue, length, (Encoding)encoding); + } + SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API); + } + return status; +} + +JSON_Status JSON_CALL JSON_Writer_WriteNumber(JSON_Writer writer, const char* pValue, size_t length, JSON_Encoding encoding) +{ + JSON_Status status = JSON_Failure; + if (writer && pValue && length && encoding > JSON_UnknownEncoding && encoding <= JSON_UTF32BE && + !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None) + { + SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API); + if (JSON_Writer_ProcessToken(writer, T_NUMBER)) + { + status = JSON_Writer_OutputNumber(writer, (const byte*)pValue, length, (Encoding)encoding); + } + SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API); + } + return status; +} + +JSON_Status JSON_CALL JSON_Writer_WriteSpecialNumber(JSON_Writer writer, JSON_SpecialNumber value) +{ + static const byte nanUTF8[] = { 'N', 'a', 'N' }; + static const byte nanUTF16[] = { 0, 'N', 0, 'a', 0, 'N', 0 }; + static const byte nanUTF32[] = { 0, 0, 0, 'N', 0, 0, 0, 'a', 0, 0, 0, 'N', 0, 0, 0 }; + static const byte* const nanEncodings[5] = { nanUTF8, nanUTF16 + 1, nanUTF16, nanUTF32 + 3, nanUTF32 }; + + static const byte ninfUTF8[] = { '-', 'I', 'n', 'f', 'i', 'n', 'i', 't', 'y' }; + static const byte ninfUTF16[] = { 0, '-', 0, 'I', 0, 'n', 0, 'f', 0, 'i', 0, 'n', 0, 'i', 0, 't', 0, 'y', 0 }; + static const byte ninfUTF32[] = { 0, 0, 0, '-', 0, 0, 0, 'I', 0, 0, 0, 'n', 0, 0, 0, 'f', 0, 0, 0, 'i', 0, 0, 0, 'n', 0, 0, 0, 'i', 0, 0, 0, 't', 0, 0, 0, 'y', 0, 0, 0 }; + static const byte* const infinityEncodings[5] = { ninfUTF8 + 1, ninfUTF16 + 3, ninfUTF16 + 2, ninfUTF32 + 7, ninfUTF32 + 4 }; + static const byte* const negativeInfinityEncodings[5] = { ninfUTF8, ninfUTF16 + 1, ninfUTF16, ninfUTF32 + 3, ninfUTF32 }; + + Symbol token; + const byte* const* encodings; + size_t length; + if (value == JSON_Infinity) + { + token = T_INFINITY; + encodings = infinityEncodings; + length = sizeof(ninfUTF8) - 1/* - */; + } + else if (value == JSON_NegativeInfinity) + { + token = T_NEGATIVE_INFINITY; + encodings = negativeInfinityEncodings; + length = sizeof(ninfUTF8); + } + else + { + token = T_NAN; + encodings = nanEncodings; + length = sizeof(nanUTF8); + } + return JSON_Writer_WriteSimpleToken(writer, token, encodings, length); +} + +JSON_Status JSON_CALL JSON_Writer_WriteStartObject(JSON_Writer writer) +{ + static const byte utf[] = { 0, 0, 0, '{', 0, 0, 0 }; + static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf }; + + return JSON_Writer_WriteSimpleToken(writer, T_LEFT_CURLY, encodings, 1); +} + +JSON_Status JSON_CALL JSON_Writer_WriteEndObject(JSON_Writer writer) +{ + static const byte utf[] = { 0, 0, 0, '}', 0, 0, 0 }; + static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf }; + + return JSON_Writer_WriteSimpleToken(writer, T_RIGHT_CURLY, encodings, 1); +} + +JSON_Status JSON_CALL JSON_Writer_WriteStartArray(JSON_Writer writer) +{ + static const byte utf[] = { 0, 0, 0, '[', 0, 0, 0 }; + static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf }; + + return JSON_Writer_WriteSimpleToken(writer, T_LEFT_SQUARE, encodings, 1); +} + +JSON_Status JSON_CALL JSON_Writer_WriteEndArray(JSON_Writer writer) +{ + static const byte utf[] = { 0, 0, 0, ']', 0, 0, 0 }; + static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf }; + + return JSON_Writer_WriteSimpleToken(writer, T_RIGHT_SQUARE, encodings, 1); +} + +JSON_Status JSON_CALL JSON_Writer_WriteColon(JSON_Writer writer) +{ + static const byte utf[] = { 0, 0, 0, ':', 0, 0, 0 }; + static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf }; + + return JSON_Writer_WriteSimpleToken(writer, T_COLON, encodings, 1); +} + +JSON_Status JSON_CALL JSON_Writer_WriteComma(JSON_Writer writer) +{ + static const byte utf[] = { 0, 0, 0, ',', 0, 0, 0 }; + static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf }; + + return JSON_Writer_WriteSimpleToken(writer, T_COMMA, encodings, 1); +} + +JSON_Status JSON_CALL JSON_Writer_WriteSpace(JSON_Writer writer, size_t numberOfSpaces) +{ + JSON_Status status = JSON_Failure; + if (writer && !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None) + { + SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API); + status = JSON_Writer_OutputSpaces(writer, numberOfSpaces); + SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API); + } + return status; +} + +JSON_Status JSON_CALL JSON_Writer_WriteNewLine(JSON_Writer writer) +{ + static const byte lfUTF[] = { 0, 0, 0, LINE_FEED_CODEPOINT, 0, 0, 0 }; + static const byte* const lfEncodings[5] = { lfUTF + 3, lfUTF + 3, lfUTF + 2, lfUTF + 3, lfUTF }; + + static const byte crlfUTF8[] = { CARRIAGE_RETURN_CODEPOINT, LINE_FEED_CODEPOINT }; + static const byte crlfUTF16[] = { 0, CARRIAGE_RETURN_CODEPOINT, 0, LINE_FEED_CODEPOINT, 0 }; + static const byte crlfUTF32[] = { 0, 0, 0, CARRIAGE_RETURN_CODEPOINT, 0, 0, 0, LINE_FEED_CODEPOINT, 0, 0, 0 }; + static const byte* const crlfEncodings[5] = { crlfUTF8, crlfUTF16 + 1, crlfUTF16, crlfUTF32 + 3, crlfUTF32 }; + + JSON_Status status = JSON_Failure; + if (writer && !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None) + { + const byte* const* encodings; + size_t length; + size_t encodedLength; + SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API); + if (GET_FLAGS(writer->flags, WRITER_USE_CRLF)) + { + encodings = crlfEncodings; + length = 2; + } + else + { + encodings = lfEncodings; + length = 1; + } + encodedLength = length * SHORTEST_ENCODING_SEQUENCE(writer->outputEncoding); + if (JSON_Writer_OutputBytes(writer, encodings[writer->outputEncoding - 1], encodedLength)) + { + status = JSON_Success; + } + SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API); + } + return status; +} + +#endif /* JSON_NO_WRITER */ + +/******************** Miscellaneous API ********************/ + +const JSON_Version* JSON_CALL JSON_LibraryVersion(void) +{ + static JSON_Version version = { JSON_MAJOR_VERSION, JSON_MINOR_VERSION, JSON_MICRO_VERSION }; + return &version; +} + +const char* JSON_CALL JSON_ErrorString(JSON_Error error) +{ + /* This array must match the order and number of the JSON_Error enum. */ + static const char* errorStrings[] = + { + /* JSON_Error_None */ "no error", + /* JSON_Error_OutOfMemory */ "could not allocate enough memory", + /* JSON_Error_AbortedByHandler */ "the operation was aborted by a handler", + /* JSON_Error_BOMNotAllowed */ "the input begins with a byte-order mark (BOM), which is not allowed by RFC 4627", + /* JSON_Error_InvalidEncodingSequence */ "the input contains a byte or sequence of bytes that is not valid for the input encoding", + /* JSON_Error_UnknownToken */ "the input contains an unknown token", + /* JSON_Error_UnexpectedToken */ "the input contains an unexpected token", + /* JSON_Error_IncompleteToken */ "the input ends in the middle of a token", + /* JSON_Error_MoreTokensExpected */ "the input ends when more tokens are expected", + /* JSON_Error_UnescapedControlCharacter */ "the input contains a string containing an unescaped control character (U+0000 - U+001F)", + /* JSON_Error_InvalidEscapeSequence */ "the input contains a string containing an invalid escape sequence", + /* JSON_Error_UnpairedSurrogateEscapeSequence */ "the input contains a string containing an unmatched UTF-16 surrogate codepoint", + /* JSON_Error_TooLongString */ "the input contains a string that is too long", + /* JSON_Error_InvalidNumber */ "the input contains an invalid number", + /* JSON_Error_TooLongNumber */ "the input contains a number that is too long", + /* JSON_Error_DuplicateObjectMember */ "the input contains an object with duplicate members", + /* JSON_Error_StoppedAfterEmbeddedDocument */ "the end of the embedded document was reached" + }; + return ((unsigned int)error < (sizeof(errorStrings) / sizeof(errorStrings[0]))) + ? errorStrings[error] + : ""; +} + +static const uint32_t endianEncodings = (((uint32_t)JSON_UTF32BE) << 24) | (((uint32_t)JSON_UTF16BE) << 16) | (((uint32_t)JSON_UTF16LE) << 8) | ((uint32_t)JSON_UTF32LE); + +JSON_Encoding JSON_CALL JSON_NativeUTF16Encoding(void) +{ + return (JSON_Encoding)(((byte*)&endianEncodings)[1]); +} + +JSON_Encoding JSON_CALL JSON_NativeUTF32Encoding(void) +{ + return (JSON_Encoding)(((byte*)&endianEncodings)[0]); +} diff --git a/libretro-common/include/formats/jsonsax_full.h b/libretro-common/include/formats/jsonsax_full.h new file mode 100644 index 0000000000..b89798e3c6 --- /dev/null +++ b/libretro-common/include/formats/jsonsax_full.h @@ -0,0 +1,1040 @@ +/* + Copyright (c) 2012 John-Anthony Owens + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + IN THE SOFTWARE. +*/ + +#ifndef JSONSAX_H_INCLUDED +#define JSONSAX_H_INCLUDED + +/* The library version */ +#define JSON_MAJOR_VERSION 1 +#define JSON_MINOR_VERSION 4 +#define JSON_MICRO_VERSION 5 + +/* JSON_NO_PARSER and JSON_NO_WRITER, if defined, remove the corresponding + * APIs and functionality from the library. + */ +#if defined(JSON_NO_PARSER) && defined(JSON_NO_WRITER) +#error JSON_NO_PARSER and JSON_NO_WRITER cannot both be defined! +#endif + +#include /* for size_t and NULL */ + +/* The library API is C and should not be subjected to C++ name mangling. */ +#ifdef __cplusplus +extern "C" { +#endif + +/* JSON_EXPORT controls the library's public API import/export linkage + * specifiers. By default, the library will be compiled to support dynamic + * linkage. In order to build the library for static linkage, the JSON_STATIC + * macro must be defined when the library itself is built AND when the client + * includes jsonsax.h. + */ +#if defined(JSON_STATIC) +#define JSON_EXPORT /* nothing */ +#else +#if defined(_MSC_VER) +#if defined(JSON_BUILDING) +#define JSON_EXPORT __declspec(dllexport) +#else +#define JSON_EXPORT __declspec(dllimport) +#endif +#else +#if defined(JSON_BUILDING) +#define JSON_EXPORT __attribute__ ((visibility("default"))) +#else +#define JSON_EXPORT /* nothing */ +#endif +#endif +#endif + +/* JSON_CALL controls the library's public API calling-convention. Clients' + * handler functions should be declared with JSON_CALL in order to ensure + * that the calling convention matches. + */ +#ifndef JSON_CALL +#if defined(_MSC_VER) +#define JSON_CALL __cdecl +#elif defined(__GNUC__) && defined(__i386) && !defined(__INTEL_COMPILER) +#define JSON_CALL __attribute__((cdecl)) +#else +#define JSON_CALL /* nothing */ +#endif +#endif + +#define JSON_API(t) JSON_EXPORT t JSON_CALL + +/* Boolean values used by the library. */ +typedef enum tag_JSON_Boolean +{ + JSON_False = 0, + JSON_True = 1 +} JSON_Boolean; + +/* Values returned by library APIs to indicate success or failure. */ +typedef enum tag_JSON_Status +{ + JSON_Failure = 0, + JSON_Success = 1 +} JSON_Status; + +/* Error codes. */ +typedef enum tag_JSON_Error +{ + JSON_Error_None = 0, + JSON_Error_OutOfMemory = 1, + JSON_Error_AbortedByHandler = 2, + JSON_Error_BOMNotAllowed = 3, + JSON_Error_InvalidEncodingSequence = 4, + JSON_Error_UnknownToken = 5, + JSON_Error_UnexpectedToken = 6, + JSON_Error_IncompleteToken = 7, + JSON_Error_ExpectedMoreTokens = 8, + JSON_Error_UnescapedControlCharacter = 9, + JSON_Error_InvalidEscapeSequence = 10, + JSON_Error_UnpairedSurrogateEscapeSequence = 11, + JSON_Error_TooLongString = 12, + JSON_Error_InvalidNumber = 13, + JSON_Error_TooLongNumber = 14, + JSON_Error_DuplicateObjectMember = 15, + JSON_Error_StoppedAfterEmbeddedDocument = 16 +} JSON_Error; + +/* Text encodings. */ +typedef enum tag_JSON_Encoding +{ + JSON_UnknownEncoding = 0, + JSON_UTF8 = 1, + JSON_UTF16LE = 2, + JSON_UTF16BE = 3, + JSON_UTF32LE = 4, + JSON_UTF32BE = 5 +} JSON_Encoding; + +/* Attributes of a string value. */ +typedef enum tag_JSON_StringAttribute +{ + JSON_SimpleString = 0, + JSON_ContainsNullCharacter = 1 << 0, /* U+0000 */ + JSON_ContainsControlCharacter = 1 << 1, /* U+0000 - U+001F */ + JSON_ContainsNonASCIICharacter = 1 << 2, /* U+0080 - U+10FFFF */ + JSON_ContainsNonBMPCharacter = 1 << 3, /* U+10000 - U+10FFFF */ + JSON_ContainsReplacedCharacter = 1 << 4 /* an invalid encoding sequence was replaced by U+FFFD */ +} JSON_StringAttribute; +typedef unsigned int JSON_StringAttributes; + +/* Attributes of a number value. */ +typedef enum tag_JSON_NumberAttribute +{ + JSON_SimpleNumber = 0, + JSON_IsNegative = 1 << 0, + JSON_IsHex = 1 << 1, + JSON_ContainsDecimalPoint = 1 << 2, + JSON_ContainsExponent = 1 << 3, + JSON_ContainsNegativeExponent = 1 << 4 +} JSON_NumberAttribute; +typedef unsigned int JSON_NumberAttributes; + +/* Types of "special" number. */ +typedef enum tag_JSON_SpecialNumber +{ + JSON_NaN = 0, + JSON_Infinity = 1, + JSON_NegativeInfinity = 2 +} JSON_SpecialNumber; + +/* Information identifying a location in a parser instance's input stream. */ +typedef struct tag_JSON_Location +{ + /* The zero-based index of the byte in the input stream. Note that this + * is the only value that unambiguously identifies the location, since + * line and column refer to characters (which may be encoded in the input + * as multi-byte sequences) rather than bytes. + */ + size_t byte; + + /* The zero-based line number of the character in the input stream. Note + * that the parser treats each of the following character sequences as a + * single line break for purposes of computing line numbers: + * + * U+000A (LINE FEED) + * U+000D (CARRIAGE RETURN) + * U+000D U+000A (CARRIAGE RETURN, LINE FEED) + * + */ + size_t line; + + /* The zero-based column number of the character in the input stream. */ + size_t column; + + /* The zero-based depth in the JSON document structure at the location. */ + size_t depth; +} JSON_Location; + +/* Custom memory management handlers. + * + * The semantics of these handlers correspond exactly to those of standard + * realloc(), and free(). The handlers also receive the value of the memory + * suite's user data parameter, which clients can use to implement memory + * pools or impose custom allocation limits, if desired. + */ +typedef void* (JSON_CALL * JSON_ReallocHandler)(void* userData, void* ptr, size_t size); +typedef void (JSON_CALL * JSON_FreeHandler)(void* userData, void* ptr); + +/* A suite of custom memory management functions. */ +typedef struct tag_JSON_MemorySuite +{ + void* userData; + JSON_ReallocHandler realloc; + JSON_FreeHandler free; +} JSON_MemorySuite; + +/******************** JSON Parser ********************/ + +#ifndef JSON_NO_PARSER + +/* Parser instance. */ +struct JSON_Parser_Data; /* opaque data */ +typedef struct JSON_Parser_Data* JSON_Parser; + +/* Create a parser instance. + * + * If pMemorySuite is null, the library will use the C runtime realloc() and + * free() as the parser's memory management suite. Otherwise, all the + * handlers in the memory suite must be non-null or the call will fail and + * return null. + */ +JSON_API(JSON_Parser) JSON_Parser_Create(const JSON_MemorySuite* pMemorySuite); + +/* Free a parser instance. + * + * Every successful call to JSON_Parser_Create() must eventually be paired + * with a call to JSON_Parser_Free() in order to avoid leaking memory. + * + * This function returns failure if the parser parameter is null or if the + * function was called reentrantly from inside a handler. + */ +JSON_API(JSON_Status) JSON_Parser_Free(JSON_Parser parser); + +/* Reset a parser instance so that it can be used to parse a new input stream. + * + * This function returns failure if the parser parameter is null or if the + * function was called reentrantly from inside a handler. + * + * After a parser is reset, its state is indistinguishable from its state + * when it was returned by JSON_Parser_Create(). The parser's custom memory + * suite, if any, is preserved; all other settings, state, and handlers are + * restored to their default values. + */ +JSON_API(JSON_Status) JSON_Parser_Reset(JSON_Parser parser); + +/* Get and set the user data value associated with a parser instance. + * + * This setting allows clients to associate additional data with a + * parser instance. The parser itself does not use the value. + * + * The default value of this setting is null. + * + * This setting can be changed at any time, even inside handlers. + */ +JSON_API(void*) JSON_Parser_GetUserData(JSON_Parser parser); +JSON_API(JSON_Status) JSON_Parser_SetUserData(JSON_Parser parser, void* userData); + +/* Get and set the input encoding for a parser instance. + * + * If the client does not explicitly set the input encoding before calling + * JSON_Parser_Parse() on the parser instance, the parser will use the first + * 4 bytes of input to detect the input encoding automatically. Once the + * parser has detected the encoding, calls to JSON_Parser_GetInputEncoding() + * will return the detected value. + * + * The default value of this setting is JSON_UnknownEncoding. + * + * This setting cannot be changed once the parser has started parsing. + */ +JSON_API(JSON_Encoding) JSON_Parser_GetInputEncoding(JSON_Parser parser); +JSON_API(JSON_Status) JSON_Parser_SetInputEncoding(JSON_Parser parser, JSON_Encoding encoding); + +/* Get and set the string encoding for a parser instance. + * + * This setting controls the encoding of the string values that are + * passed to the string and object member handlers. + * + * The default value of this setting is JSON_UTF8. + * + * This setting cannot be set to JSON_UnknownEncoding. + * + * This setting cannot be changed once the parser has started parsing. + */ +JSON_API(JSON_Encoding) JSON_Parser_GetStringEncoding(JSON_Parser parser); +JSON_API(JSON_Status) JSON_Parser_SetStringEncoding(JSON_Parser parser, JSON_Encoding encoding); + +/* Get and set the maximum length of strings that a parser instance allows. + * + * This setting controls the maximum length, in bytes (NOT characters), of + * the encoded strings that are passed to the string and object member + * handlers. If the parser encounters a string that, when encoded in the + * string encoding, is longer than the maximum string length, it triggers + * the JSON_TooLongString error. + * + * The default value of this setting is SIZE_MAX. + * + * This setting cannot be changed once the parser has started parsing. + */ +JSON_API(size_t) JSON_Parser_GetMaxStringLength(JSON_Parser parser); +JSON_API(JSON_Status) JSON_Parser_SetMaxStringLength(JSON_Parser parser, size_t maxLength); + +/* Get and set the number encoding for a parser instance. + * + * This setting controls the encoding of the number values that are + * passed to the number handler. + * + * The default value of this setting is JSON_UTF8. + * + * This setting cannot be set to JSON_UnknownEncoding. + * + * This setting cannot be changed once the parser has started parsing. + */ +JSON_API(JSON_Encoding) JSON_Parser_GetNumberEncoding(JSON_Parser parser); +JSON_API(JSON_Status) JSON_Parser_SetNumberEncoding(JSON_Parser parser, JSON_Encoding encoding); + +/* Get and set the maximum length of numbers that a parser instance allows. + * + * This setting controls the maximum length, in bytes (NOT characters), of + * the encoded numbers that are passed to the number handler. If the parser + * encounters a number that, when encoded in the number encoding, is longer + * than the maximum number length, it triggers the JSON_TooLongNumber error. + * + * The default value of this setting is SIZE_MAX. + * + * This setting cannot be changed once the parser has started parsing. + */ +JSON_API(size_t) JSON_Parser_GetMaxNumberLength(JSON_Parser parser); +JSON_API(JSON_Status) JSON_Parser_SetMaxNumberLength(JSON_Parser parser, size_t maxLength); + +/* Get and set whether a parser instance allows the input to begin with a + * byte-order-mark (BOM). + * + * RFC 4627 does not allow JSON text to begin with a BOM, but some clients + * may find it convenient to be lenient in this regard; for example, if the + * JSON text is being read from a file that has a BOM. + * + * The default value of this setting is JSON_False. + * + * This setting cannot be changed once the parser has started parsing. + */ +JSON_API(JSON_Boolean) JSON_Parser_GetAllowBOM(JSON_Parser parser); +JSON_API(JSON_Status) JSON_Parser_SetAllowBOM(JSON_Parser parser, JSON_Boolean allowBOM); + +/* Get and set whether a parser instance allows Javascript-style comments to + * appear in the JSON text. + * + * RFC 4627 does not allow JSON text to contain comments, but some clients + * may find it useful to allow them. + * + * Both types of comment described by ECMA-262 (multi-line and single-line) + * are supported. + * + * The default value of this setting is JSON_False. + * + * This setting cannot be changed once the parser has started parsing. + */ +JSON_API(JSON_Boolean) JSON_Parser_GetAllowComments(JSON_Parser parser); +JSON_API(JSON_Status) JSON_Parser_SetAllowComments(JSON_Parser parser, JSON_Boolean allowComments); + +/* Get and set whether a parser instance allows the "special" number literals + * NaN, Infinity, and -Infinity. + * + * RFC 4627 does not provide any way to represent NaN, Infinity, or -Infinity, + * but some clients may find it convenient to recognize these as literals, + * since they are emitted by many common JSON generators. + * + * The default value of this setting is JSON_False. + * + * This setting cannot be changed once the parser has started parsing. + */ +JSON_API(JSON_Boolean) JSON_Parser_GetAllowSpecialNumbers(JSON_Parser parser); +JSON_API(JSON_Status) JSON_Parser_SetAllowSpecialNumbers(JSON_Parser parser, JSON_Boolean allowSpecialNumbers); + +/* Get and set whether a parser instance allows hexadecimal notation to be + * used for specifying number values. + * + * RFC 4627 does not allow hexadecimal numbers, but some clients may find it + * convenient to allow them, in order to represent binary bit patterns more + * easily. + * + * The parser recognizes hexadecimal numbers that conform to the syntax of + * HexIntegerLiteral, as described in section 7.8.3 of ECMA-262. That is, a + * valid hexadecimal number must comprise the prefix '0x' or '0X', followed + * by a sequence of one or more of the following characters: '0' - '9', + * 'a' - 'f', and 'A' - 'F'. + * + * Hexadecimal numbers cannot be prefixed by a minus sign. + * + * The default value of this setting is JSON_False. + * + * This setting cannot be changed once the parser has started parsing. + */ +JSON_API(JSON_Boolean) JSON_Parser_GetAllowHexNumbers(JSON_Parser parser); +JSON_API(JSON_Status) JSON_Parser_SetAllowHexNumbers(JSON_Parser parser, JSON_Boolean allowHexNumbers); + +/* Get and set whether a parser instance allows unescaped control characters + * (U+0000 - U+001F) to appear inside string values. + * + * RFC 4627 does not allow JSON text to contain unescaped control characters, + * but some clients may find it useful to allow them. + * + * The default value of this setting is JSON_False. + * + * This setting cannot be changed once the parser has started parsing. + */ +JSON_API(JSON_Boolean) JSON_Parser_GetAllowUnescapedControlCharacters(JSON_Parser parser); +JSON_API(JSON_Status) JSON_Parser_SetAllowUnescapedControlCharacters(JSON_Parser parser, JSON_Boolean allowUnescapedControlCharacters); + +/* Get and set whether a parser instance replaces invalid encoding sequences + * it encounters inside string tokens with the Unicode replacement character + * (U+FFFD) rather than triggering an error. + * + * By default, the parser is strict when decoding the input stream, and will + * fail if it encounters an encoding sequence that is not valid for the input + * encoding. Note especially that this includes (but is not limited to) the + * following: + * + * - Overlong encoding sequences in UTF-8. + * - Surrogate codepoints encoded in UTF-8 or UTF-32. + * - Unpaired or improperly-paired surrogates in UTF-16. + * - Codepoints outside the Unicode range encoded in UTF-8 or UTF-32. + * + * The replacement follows the rules and recommendations described in section + * 3.9 of version 5.2.0 of [the Unicode Standard](http://www.unicode.org/versions/Unicode5.2.0/). + * + * The default value of this setting is JSON_False. + * + * This setting cannot be changed once the parser has started parsing. + */ +JSON_API(JSON_Boolean) JSON_Parser_GetReplaceInvalidEncodingSequences(JSON_Parser parser); +JSON_API(JSON_Status) JSON_Parser_SetReplaceInvalidEncodingSequences(JSON_Parser parser, JSON_Boolean replaceInvalidEncodingSequences); + +/* Get and set whether a parser instance tracks object member names for all + * open objects and detects duplicate members if any occur in the input. + * + * RFC 4627 stipulates that JSON parsers SHOULD check for duplicates, but + * may opt not to in light of reasonable implementation considerations. + * Checking for duplicate members necessarily incurs non-trivial memory + * overhead, and is therefore not enabled by default. Most clients use + * their parse handlers to build some sort of in-memory DOM representation + * of the JSON text and therefore already have the means to check for + * duplicate member names without incurring additional memory overhead; it + * is recommended that these clients implement duplicate member checking + * in their object member handler (refer to SetObjectMemberHandler() for + * details) and leave this setting disabled. + * + * The default value of this setting is JSON_False. + * + * This setting cannot be changed once the parser has started parsing. + */ +JSON_API(JSON_Boolean) JSON_Parser_GetTrackObjectMembers(JSON_Parser parser); +JSON_API(JSON_Status) JSON_Parser_SetTrackObjectMembers(JSON_Parser parser, JSON_Boolean trackObjectMembers); + +/* Get and set whether a parser instance stops parsing as soon as the end of + * the top-level JSON document is parsed. + * + * This setting allows the client to parse JSON content that is embedded + * inside a larger data stream. If this setting is enabled, the parser will, + * upon successfully parsing the end of the embedded JSON document, set its + * error to JSON_Error_StoppedAfterEmbeddedDocument, set its error location + * to the location in the input stream immediately following the end of the + * document, and return JSON_Failure from JSON_Parser_Parse(). + * + * The default value of this setting is JSON_False. + * + * This setting cannot be changed once the parser has started parsing. + */ +JSON_API(JSON_Boolean) JSON_Parser_GetStopAfterEmbeddedDocument(JSON_Parser parser); +JSON_API(JSON_Status) JSON_Parser_SetStopAfterEmbeddedDocument(JSON_Parser parser, JSON_Boolean stopAfterEmbeddedDocument); + +/* Get the type of error, if any, encountered by a parser instance. + * + * If the parser encountered an error while parsing input, this function + * returns the type of the error. Otherwise, this function returns + * JSON_Error_None. + */ +JSON_API(JSON_Error) JSON_Parser_GetError(JSON_Parser parser); + +/* Get the location in the input stream at which a parser instance + * encountered an error. + * + * If the parser encountered an error while parsing input, this function + * sets the members of the structure pointed to by pLocation to the location + * in the input stream at which the error occurred and returns success. + * Otherwise, it leaves the members unchanged and returns failure. + */ +JSON_API(JSON_Status) JSON_Parser_GetErrorLocation(JSON_Parser parser, JSON_Location* pLocation); + +/* Get the location in the input stream of the beginning of the token + * that is currently being handled by one of a parser instance's parse + * handlers. + * + * If the parser is inside a parse handler, this function sets the members + * of the structure pointed to by pLocation to the location and returns + * success. Otherwise, it leaves the members unchanged and returns failure. + */ +JSON_API(JSON_Status) JSON_Parser_GetTokenLocation(JSON_Parser parser, JSON_Location* pLocation); + +/* Get the location in the input stream that immediately follows the end of + * the token that is currently being handled by one of a parser instance's + * parse handlers. + * + * If the parser is inside a parse handler, this function sets the members + * of the structure pointed to by pLocation to the location and returns + * success. Otherwise, it leaves the members unchanged and returns failure. + */ +JSON_API(JSON_Status) JSON_Parser_GetAfterTokenLocation(JSON_Parser parser, JSON_Location* pLocation); + +/* Parse handlers are callbacks that the client provides in order to + * be notified about the structure of the JSON document as it is being + * parsed. The following notes apply equally to all parse handlers: + * + * 1. Parse handlers are optional. In fact, a parser with no parse + * handlers at all can be used to simply validate that the input + * is valid JSON. + * + * 2. Parse handlers can be set, unset, or changed at any time, even + * from inside a parse handler. + * + * 3. If a parse handler returns JSON_Parser_Abort, the parser will + * abort the parse, set its error to JSON_Error_AbortedByHandler, + * set its error location to the start of the token that triggered + * the handler, and return JSON_Failure from the outer call to + * JSON_Parser_Parse(). + * + * 4. A parse handler can get the location in the input stream of the + * token that triggered the handler by calling + * JSON_Parser_GetTokenLocation(). + */ + +/* Values returned by parse handlers to indicate whether parsing should + * continue or be aborted. + * + * Note that JSON_TreatAsDuplicateObjectMember should only be returned by + * object member handlers. Refer to JSON_Parser_SetObjectMemberHandler() + * for details. + */ +typedef enum tag_JSON_Parser_HandlerResult +{ + JSON_Parser_Continue = 0, + JSON_Parser_Abort = 1, + JSON_Parser_TreatAsDuplicateObjectMember = 2 +} JSON_Parser_HandlerResult; + +/* Get and set the handler that is called when a parser instance detects the + * input encoding. + * + * If the parser instance's input encoding was set to JSON_UnknownEncoding + * when parsing began, this handler will be called as soon as the actual + * input encoding has been detected. + * + * Note that JSON_Parser_GetTokenLocation() will return failure if called + * from inside this handler, since there is no token associated with this + * event. + */ +typedef JSON_Parser_HandlerResult (JSON_CALL * JSON_Parser_EncodingDetectedHandler)(JSON_Parser parser); +JSON_API(JSON_Parser_EncodingDetectedHandler) JSON_Parser_GetEncodingDetectedHandler(JSON_Parser parser); +JSON_API(JSON_Status) JSON_Parser_SetEncodingDetectedHandler(JSON_Parser parser, JSON_Parser_EncodingDetectedHandler handler); + +/* Get and set the handler that is called when a parser instance encounters + * a JSON null literal value. + */ +typedef JSON_Parser_HandlerResult (JSON_CALL * JSON_Parser_NullHandler)(JSON_Parser parser); +JSON_API(JSON_Parser_NullHandler) JSON_Parser_GetNullHandler(JSON_Parser parser); +JSON_API(JSON_Status) JSON_Parser_SetNullHandler(JSON_Parser parser, JSON_Parser_NullHandler handler); + +/* Get and set the handler that is called when a parser instance encounters + * a JSON boolean value (true or false). + */ +typedef JSON_Parser_HandlerResult (JSON_CALL * JSON_Parser_BooleanHandler)(JSON_Parser parser, JSON_Boolean value); +JSON_API(JSON_Parser_BooleanHandler) JSON_Parser_GetBooleanHandler(JSON_Parser parser); +JSON_API(JSON_Status) JSON_Parser_SetBooleanHandler(JSON_Parser parser, JSON_Parser_BooleanHandler handler); + +/* Get and set the handler that is called when a parser instance encounters + * a JSON string value. + * + * The pValue parameter points to a buffer containing the string value, + * encoded according to the parser instance's string encoding setting. The + * buffer is null-terminated (the null terminator character is also encoded). + * Note, however, that JSON strings may contain embedded null characters, + * which are specifiable using the escape sequence \u0000. The client is + * free to modify the contents of the buffer during the handler. + * + * The length parameter specifies the number of bytes (NOT characters) in + * the encoded string, not including the encoded null terminator. + * + * The attributes parameter provides information about the characters + * that comprise the string. If the option to replace invalid encoding + * sequences is enabled and the string contains any Unicode replacement + * characters (U+FFFD) that were the result of replacing invalid encoding + * sequences in the input, the attributes will include the value + * JSON_ContainsReplacedCharacter. Note that the absence of this attribute + * does not imply that the string does not contain any U+FFFD characters, + * since such characters may have been present in the original input, and + * not inserted by a replacement operation. + */ +typedef JSON_Parser_HandlerResult (JSON_CALL * JSON_Parser_StringHandler)(JSON_Parser parser, char* pValue, size_t length, JSON_StringAttributes attributes); +JSON_API(JSON_Parser_StringHandler) JSON_Parser_GetStringHandler(JSON_Parser parser); +JSON_API(JSON_Status) JSON_Parser_SetStringHandler(JSON_Parser parser, JSON_Parser_StringHandler handler); + +/* Get and set the handler that is called when a parser instance encounters + * a JSON number value. + * + * JSON numbers do not have a defined binary representation or precision, + * and different clients may wish to interpret them differently, for + * example, as IEEE 754 doubles, 64-bit integers, or arbitrary-precision + * bignums. For this reason, the parser does not attempt to interpret + * number values, but leaves this to the client. + * + * The pValue parameter points to a buffer containing the number value, + * encoded according to the parser instance's number encoding setting. The + * buffer is null-terminated (the null terminator character is also encoded). + * The buffer is guaranteed to contain only characters allowed in JSON number + * values, that is: '0' - '9', '+', '-', '.', 'e', and 'E'; if the option + * to allow hex numbers is enabled, the text may also contain the characters + * 'x', 'X', 'a' - 'f', and 'A' - 'F'. The client is free to modify the + * contents of the buffer during the handler. This is especially useful + * to clients that wish to convert the number to a double using the C + * standard library's strtod() function, which is locale-sensitive; in this + * case, the client should modify the buffer to replace the '.' character + * with localconv()->decimal_point[0] before passing the buffer to strtod(). + * + * The length parameter specifies the number of bytes (NOT characters) in + * the encoded number, not including the encoded null terminator. + * + * The attributes parameter provides information about the number. + */ +typedef JSON_Parser_HandlerResult (JSON_CALL * JSON_Parser_NumberHandler)(JSON_Parser parser, char* pValue, size_t length, JSON_NumberAttributes attributes); +JSON_API(JSON_Parser_NumberHandler) JSON_Parser_GetNumberHandler(JSON_Parser parser); +JSON_API(JSON_Status) JSON_Parser_SetNumberHandler(JSON_Parser parser, JSON_Parser_NumberHandler handler); + +/* Get and set the handler that is called when a parser instance encounters + * one of the "special" number literals NaN, Infinity, and -Inifinity. + */ +typedef JSON_Parser_HandlerResult (JSON_CALL * JSON_Parser_SpecialNumberHandler)(JSON_Parser parser, JSON_SpecialNumber value); +JSON_API(JSON_Parser_SpecialNumberHandler) JSON_Parser_GetSpecialNumberHandler(JSON_Parser parser); +JSON_API(JSON_Status) JSON_Parser_SetSpecialNumberHandler(JSON_Parser parser, JSON_Parser_SpecialNumberHandler handler); + +/* Get and set the handler that is called when a parser instance encounters + * the left curly brace that starts an object. + */ +typedef JSON_Parser_HandlerResult (JSON_CALL * JSON_Parser_StartObjectHandler)(JSON_Parser parser); +JSON_API(JSON_Parser_StartObjectHandler) JSON_Parser_GetStartObjectHandler(JSON_Parser parser); +JSON_API(JSON_Status) JSON_Parser_SetStartObjectHandler(JSON_Parser parser, JSON_Parser_StartObjectHandler handler); + +/* Get and set the handler that is called when a parser instance encounters + * the right curly brace that ends an object. + */ +typedef JSON_Parser_HandlerResult (JSON_CALL * JSON_Parser_EndObjectHandler)(JSON_Parser parser); +JSON_API(JSON_Parser_EndObjectHandler) JSON_Parser_GetEndObjectHandler(JSON_Parser parser); +JSON_API(JSON_Status) JSON_Parser_SetEndObjectHandler(JSON_Parser parser, JSON_Parser_EndObjectHandler handler); + +/* Get and set the handler that is called when a parser instance encounters + * an object member name. + * + * The pValue parameter points to a buffer containing the member name, + * encoded according to the parser instance's string encoding setting. The + * buffer is null-terminated (the null terminator character is also encoded). + * Note, however, that JSON strings may contain embedded null characters, + * which are specifiable using the escape sequence \u0000. The client is + * free to modify the contents of the buffer during the handler. + * + * The length parameter specifies the number of bytes (NOT characters) in + * the encoded string, not including the encoded null terminator. + * + * The attributes parameter provides information about the characters + * that comprise the string. If the option to replace invalid encoding + * sequences is enabled and the string contains any Unicode replacement + * characters (U+FFFD) that were the result of replacing invalid encoding + * sequences in the input, the attributes will include the value + * JSON_ContainsReplacedCharacter. Note that the absence of this attribute + * does not imply that the string does not contain any U+FFFD characters, + * since such characters may have been present in the original input, and + * not inserted by a replacement operation. + * + * The handler can return JSON_Parser_TreatAsDuplicateObjectMember to + * indicate that the current object already contains a member with the + * specified name. This allows clients to implement duplicate member + * checking without incurring the additional memory overhead associated + * with enabling the TrackObjectMembers setting. + */ +typedef JSON_Parser_HandlerResult (JSON_CALL * JSON_Parser_ObjectMemberHandler)(JSON_Parser parser, char* pValue, size_t length, JSON_StringAttributes attributes); +JSON_API(JSON_Parser_ObjectMemberHandler) JSON_Parser_GetObjectMemberHandler(JSON_Parser parser); +JSON_API(JSON_Status) JSON_Parser_SetObjectMemberHandler(JSON_Parser parser, JSON_Parser_ObjectMemberHandler handler); + +/* Get and set the handler that is called when a parser instance encounters + * the left square brace that starts an array. + */ +typedef JSON_Parser_HandlerResult (JSON_CALL * JSON_Parser_StartArrayHandler)(JSON_Parser parser); +JSON_API(JSON_Parser_StartArrayHandler) JSON_Parser_GetStartArrayHandler(JSON_Parser parser); +JSON_API(JSON_Status) JSON_Parser_SetStartArrayHandler(JSON_Parser parser, JSON_Parser_StartArrayHandler handler); + +/* Get and set the handler that is called when a parser instance encounters + * the right square brace that ends an array. + */ +typedef JSON_Parser_HandlerResult (JSON_CALL * JSON_Parser_EndArrayHandler)(JSON_Parser parser); +JSON_API(JSON_Parser_EndArrayHandler) JSON_Parser_GetEndArrayHandler(JSON_Parser parser); +JSON_API(JSON_Status) JSON_Parser_SetEndArrayHandler(JSON_Parser parser, JSON_Parser_EndArrayHandler handler); + +/* Get and set the handler that is called when a parser instance encounters + * an array item. + * + * This event is always immediately followed by a null, boolean, string, + * number, special number, start object, or start array event. + */ +typedef JSON_Parser_HandlerResult (JSON_CALL * JSON_Parser_ArrayItemHandler)(JSON_Parser parser); +JSON_API(JSON_Parser_ArrayItemHandler) JSON_Parser_GetArrayItemHandler(JSON_Parser parser); +JSON_API(JSON_Status) JSON_Parser_SetArrayItemHandler(JSON_Parser parser, JSON_Parser_ArrayItemHandler handler); + +/* Push zero or more bytes of input to a parser instance. + * + * The pBytes parameter points to a buffer containing the bytes to be + * parsed, if any. pBytes may be NULL if and only if the length parameter + * is 0. + * + * The length parameter specifies the number of bytes (NOT characters) + * pointed to by pBytes. + * + * The isFinal parameter specifies whether the parser should treat the + * input to the call as the last chunk of input in the JSON document. + * If this parameter is JSON_False, the parser will assume that more + * input may be forthcoming. + * + * The parser adheres to [RFC 4627](http://www.ietf.org/rfc/rfc4627.txt), + * except that any JSON value (null, true, false, string, number, object, + * or array) is accepted as a valid top-level entity in the parsed text. + * + * This function returns failure if the parser parameter is null, if the + * function was called reentrantly from inside a handler, or if the + * parser instance has already finished parsing. + */ +JSON_API(JSON_Status) JSON_Parser_Parse(JSON_Parser parser, const char* pBytes, size_t length, JSON_Boolean isFinal); + +#endif /* JSON_NO_PARSER */ + +/******************** JSON Writer ********************/ + +#ifndef JSON_NO_WRITER + +/* Writer instance. */ +struct JSON_Writer_Data; /* opaque data */ +typedef struct JSON_Writer_Data* JSON_Writer; + +/* Create a writer instance. + * + * If pMemorySuite is null, the library will use the C runtime realloc() and + * free() as the writer's memory management suite. Otherwise, all the + * handlers in the memory suite must be non-null or the call will fail and + * return null. + */ +JSON_API(JSON_Writer) JSON_Writer_Create(const JSON_MemorySuite* pMemorySuite); + +/* Free a writer instance. + * + * Every successful call to JSON_Writer_Create() must eventually be paired + * with a call to JSON_Writer_Free() in order to avoid leaking memory. + * + * This function returns failure if the writer parameter is null or if the + * function was called reentrantly from inside a handler. + */ +JSON_API(JSON_Status) JSON_Writer_Free(JSON_Writer writer); + +/* Reset a writer instance so that it can be used to write a new output + * stream. + * + * This function returns failure if the writer parameter is null or if the + * function was called reentrantly from inside a handler. + * + * After a writer is reset, its state is indistinguishable from its state + * when it was returned by JSON_Writer_Create(). The writer's custom memory + * suite, if any, is preserved; all other settings, state, and handlers are + * restored to their default values. + */ +JSON_API(JSON_Status) JSON_Writer_Reset(JSON_Writer writer); + +/* Get and set the user data value associated with a writer instance. + * + * This setting allows clients to associate additional data with a + * writer instance. The writer itself does not use the value. + * + * The default value of this setting is NULL. + * + * This setting can be changed at any time, even inside handlers. + */ +JSON_API(void*) JSON_Writer_GetUserData(JSON_Writer writer); +JSON_API(JSON_Status) JSON_Writer_SetUserData(JSON_Writer writer, void* userData); + +/* Get and set the output encoding for a writer instance. + * + * The default value of this setting is JSON_UTF8. + * + * This setting cannot be set to JSON_UnknownEncoding. + * + * This setting cannot be changed once the writer has started writing. + */ +JSON_API(JSON_Encoding) JSON_Writer_GetOutputEncoding(JSON_Writer writer); +JSON_API(JSON_Status) JSON_Writer_SetOutputEncoding(JSON_Writer writer, JSON_Encoding encoding); + +/* Get and set whether a writer instance uses CARRIAGE RETURN, LINE FEED + * (CRLF) as the new line sequence generated by JSON_Writer_WriteNewLine(). + * + * The default value of this setting is JSON_False. + * + * This setting cannot be changed once the writer has started writing. + */ +JSON_API(JSON_Boolean) JSON_Writer_GetUseCRLF(JSON_Writer writer); +JSON_API(JSON_Status) JSON_Writer_SetUseCRLF(JSON_Writer writer, JSON_Boolean useCRLF); + +/* Get and set whether a writer instance replaces invalid encoding sequences + * it encounters in string tokens with the Unicode replacement character + * (U+FFFD) rather than triggering an error. + * + * The default value of this setting is JSON_False. + * + * This setting cannot be changed once the writer has started writing. + */ +JSON_API(JSON_Boolean) JSON_Writer_GetReplaceInvalidEncodingSequences(JSON_Writer writer); +JSON_API(JSON_Status) JSON_Writer_SetReplaceInvalidEncodingSequences(JSON_Writer writer, JSON_Boolean replaceInvalidEncodingSequences); + +/* Get and set whether a writer instance escapes all non-ASCII characters + * that it outputs. This can be useful for debugging, or when the output + * will be consumed by a parser that does not support UTF-encoded input. + * It is not recommended as a general practice, since it bloats the size + * of non-ASCII strings considerably, compared to UTF encoding. + * + * The default value of this setting is JSON_False. + * + * This setting cannot be changed once the writer has started writing. + */ +JSON_API(JSON_Boolean) JSON_Writer_GetEscapeAllNonASCIICharacters(JSON_Writer writer); +JSON_API(JSON_Status) JSON_Writer_SetEscapeAllNonASCIICharacters(JSON_Writer writer, JSON_Boolean escapeAllNonASCIICharacters); + +/* Get the type of error, if any, encountered by a writer instance. + * + * If the writer encountered an error while writing input, this function + * returns the type of the error. Otherwise, this function returns + * JSON_Error_None. + */ +JSON_API(JSON_Error) JSON_Writer_GetError(JSON_Writer writer); + +/* The JSON_Writer_WriteXXX() family of functions cause JSON text to be + * sent to a writer instance's output handler. The following notes apply + * equally to all these functions: + * + * 1. The output handler is optional, and can be set, unset, or changed + * at any time, even from inside the output handler. + * + * 2. A single call to JSON_Writer_WriteXXX() may trigger multiple calls + * to the output handler. + * + * 3. All output generated by a call to JSON_Writer_WriteXXX() is sent + * to the output handler before the call returns; that is, the writer + * does not aggregate output from multiple writes before sending it to + * the output handler. + * + * 4. A call to JSON_Writer_WriteXXX() will fail if the writer has + * already encountered an error. + * + * 5. A call to JSON_Writer_WriteXXX() will fail if the call was made + * reentrantly from inside a handler. + * + * 6. A call to JSON_Writer_WriteXXX() will fail if it would cause the + * writer to output grammatically-incorrect JSON text. + * + * 7. If an output handler returns JSON_Writer_Abort, the writer will + * abort the write, set its error to JSON_Error_AbortedByHandler, + * set its error location to the location in the output stream prior + * to the call to the handler, and return JSON_Failure from the outer + * call to JSON_Writer_WriteXXX(). + */ + +/* Values returned by write handlers to indicate whether writing should + * continue or be aborted. + */ +typedef enum tag_JSON_Writer_HandlerResult +{ + JSON_Writer_Continue = 0, + JSON_Writer_Abort = 1 +} JSON_Writer_HandlerResult; + +/* Get and set the handler that is called when a writer instance has output + * ready to be written. + * + * The pBytes parameter points to a buffer containing the bytes to be written, + * encoded according to the writer instance's output encoding setting. The + * buffer is NOT null-terminated. + * + * The length parameter specifies the number of bytes (NOT characters) in + * the encoded output. + */ +typedef JSON_Writer_HandlerResult (JSON_CALL * JSON_Writer_OutputHandler)(JSON_Writer writer, const char* pBytes, size_t length); +JSON_API(JSON_Writer_OutputHandler) JSON_Writer_GetOutputHandler(JSON_Writer writer); +JSON_API(JSON_Status) JSON_Writer_SetOutputHandler(JSON_Writer writer, JSON_Writer_OutputHandler handler); + +/* Write the JSON null literal to the output. */ +JSON_API(JSON_Status) JSON_Writer_WriteNull(JSON_Writer writer); + +/* Write a JSON boolean value to the output. */ +JSON_API(JSON_Status) JSON_Writer_WriteBoolean(JSON_Writer writer, JSON_Boolean value); + +/* Write a JSON string value to the output. + * + * The pValue parameter points to a buffer containing the string to be + * written. The buffer does NOT need to be null-terminated. This + * parameter can be null if and only if the length parameter is zero. + * + * The length parameter specifies the number of bytes (NOT characters) + * in the buffer. If the buffer is null-terminated, the length should + * NOT include the null terminator. + * + * The encoding parameter specifies the encoding of the text pointed + * to by pValue. This parameter cannot be JSON_UnknownEncoding. + * + * If the string contains invalid encoding sequences and the option to + * replace invalid encoding sequences with the Unicode replacement + * character (U+FFFD) is not enabled for the writer instance, the writer + * sets its error to JSON_Error_InvalidEncodingSequence and returns + * failure. + * + * The writer escapes the following codepoints: + * + * - BACKSPACE (U+0008) => \b + * - TAB (U+0009) => \t + * - LINE FEED (U+000A) => \n + * - FORM FEED (U+000C) => \f + * - CARRIAGE RETURN (U+000D) => \r + * - QUOTATION MARK (U+0022) => \" + * - SOLIDUS (U+002F) => \/ + * - REVERSE SOLIDUS (U+005C) => \\ + * + * The writer also escapes the following codepoints using hex-style escape + * sequences: + * + * - All control characters (U+0000 - U+001F) except those covered by the + * list above. + * - DELETE (U+007F) + * - LINE SEPARATOR (U+2028) + * - PARAGRAPH SEPARATOR (U+2029) + * - All 34 Unicode "noncharacter" codepoints whose values end in FE or FF. + * - All 32 Unicode "noncharacter" codepoints in the range U+FDD0 - U+FDEF. + * - REPLACEMENT CHARACTER (U+FFFD), if it did not appear in the original + * string provided by the client; in other words, if the writer introduced + * it in the output as a replacement for an invalid encoding sequence in + * the original string. + * + * If the setting to escape all non-ASCII characters is enabled, ALL + * codepoints above U+0080 are escaped using hex-style escape sequences. + */ +JSON_API(JSON_Status) JSON_Writer_WriteString(JSON_Writer writer, const char* pValue, size_t length, JSON_Encoding encoding); + +/* Write a JSON number value to the output. + * + * The pValue parameter points to a buffer containing the number to be + * written. The buffer does NOT need to be null-terminated. + * + * The length parameter specifies the number of bytes (NOT characters) + * in the buffer. If the buffer is null-terminated, the length should + * NOT include the null terminator. + * + * The encoding parameter specifies the encoding of the text pointed + * to by pValue. This parameter cannot be JSON_UnknownEncoding. + * + * If the number contains an invalid encoding sequence, the writer sets + * its error to JSON_Error_InvalidEncodingSequence and returns failure, + * regardless of whether the option to replace invalid encoding sequences + * with the Unicode replacement character (U+FFFD) is enabled (that + * setting only affects writing of string values). + * + * The number must be a valid JSON number as described by RFC 4627, or a + * hexadecimal number conforming to the syntax of HexIntegerLiteral, as + * described in section 7.8.3 of ECMA-262. Otherwise, the writer sets its + * error to JSON_Error_InvalidNumber and returns failure. + */ +JSON_API(JSON_Status) JSON_Writer_WriteNumber(JSON_Writer writer, const char* pValue, size_t length, JSON_Encoding encoding); + +/* Write a JSON "special" number literal to the output. */ +JSON_API(JSON_Status) JSON_Writer_WriteSpecialNumber(JSON_Writer writer, JSON_SpecialNumber value); + +/* Write a left curly-brace character to the output. */ +JSON_API(JSON_Status) JSON_Writer_WriteStartObject(JSON_Writer writer); + +/* Write a right curly-brace character to the output. */ +JSON_API(JSON_Status) JSON_Writer_WriteEndObject(JSON_Writer writer); + +/* Write a left square-brace character to the output. */ +JSON_API(JSON_Status) JSON_Writer_WriteStartArray(JSON_Writer writer); + +/* Write a right square-brace character to the output. */ +JSON_API(JSON_Status) JSON_Writer_WriteEndArray(JSON_Writer writer); + +/* Write a colon character to the output. */ +JSON_API(JSON_Status) JSON_Writer_WriteColon(JSON_Writer writer); + +/* Write a comma character to the output. */ +JSON_API(JSON_Status) JSON_Writer_WriteComma(JSON_Writer writer); + +/* Write space characters to the output. */ +JSON_API(JSON_Status) JSON_Writer_WriteSpace(JSON_Writer writer, size_t numberOfSpaces); + +/* Write a newline sequence to the output. */ +JSON_API(JSON_Status) JSON_Writer_WriteNewLine(JSON_Writer writer); + +#endif /* JSON_NO_WRITER */ + +/******************** Miscellaneous API ********************/ + +/* Information about the library version. */ +typedef struct tag_JSON_Version +{ + unsigned int major; + unsigned int minor; + unsigned int micro; +} JSON_Version; + +/* Get a pointer to the library version information. */ +JSON_API(const JSON_Version*) JSON_LibraryVersion(void); + +/* Get a constant, null-terminated, ASCII string describing an error code. */ +JSON_API(const char*) JSON_ErrorString(JSON_Error error); + +/* Get the UTF-16 encoding whose endianness matches the target platform. + * + * This function always returns either JSON_UTF16LE or JSON_UTF16BE. + */ +JSON_API(JSON_Encoding) JSON_NativeUTF16Encoding(void); + +/* Get the UTF-32 encoding whose endianness matches the target platform. + * + * This function always returns either JSON_UTF32LE or JSON_UTF32BE. + */ +JSON_API(JSON_Encoding) JSON_NativeUTF32Encoding(void); + +#ifdef __cplusplus +} +#endif + +#endif /* JSONSAX_H_INCLUDED */