diff --git a/libretro-common/formats/json/jsonsax.c b/libretro-common/formats/json/jsonsax.c deleted file mode 100644 index 649f413370..0000000000 --- a/libretro-common/formats/json/jsonsax.c +++ /dev/null @@ -1,316 +0,0 @@ -/* Copyright (C) 2010-2020 The RetroArch team - * - * --------------------------------------------------------------------------------------- - * The following license statement only applies to this file (jsonsax.c). - * --------------------------------------------------------------------------------------- - * - * Permission is hereby granted, free of charge, - * to any person obtaining a copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation the rights to - * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, - * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include -#include -#include - -#include -#include - -#ifdef JSONSAX_ERRORS -const char* jsonsax_errors[] = -{ - "Ok", - "Interrupted", - "Missing key", - "Unterminated key", - "Missing value", - "Unterminated object", - "Unterminated array", - "Unterminated string", - "Invalid value" -}; -#endif - -typedef struct -{ - const jsonsax_handlers_t* handlers; - - const char* json; - void* ud; - jmp_buf env; -} -state_t; - -static INLINE void skip_spaces( state_t* state ) -{ - while ( isspace( (unsigned char)*state->json ) ) - state->json++; -} - -static INLINE void skip_digits( state_t* state ) -{ - while ( isdigit( (unsigned char)*state->json ) ) - state->json++; -} - -#define HANDLE_0( event ) \ - do { \ - if ( state->handlers->event && state->handlers->event( state->ud ) ) \ - longjmp( state->env, JSONSAX_INTERRUPTED ); \ - } while ( 0 ) - -#define HANDLE_1( event, arg1 ) \ - do { \ - if ( state->handlers->event && state->handlers->event( state->ud, arg1 ) ) \ - longjmp( state->env, JSONSAX_INTERRUPTED ); \ - } while ( 0 ) - -#define HANDLE_2( event, arg1, arg2 ) \ - do { \ - if ( state->handlers->event && state->handlers->event( state->ud, arg1, arg2 ) ) \ - longjmp( state->env, JSONSAX_INTERRUPTED ); \ - } while ( 0 ) - -static void jsonx_parse_value(state_t* state); - -static void jsonx_parse_object( state_t* state ) -{ - state->json++; /* we're sure the current character is a '{' */ - skip_spaces( state ); - HANDLE_0( start_object ); - - while ( *state->json != '}' ) - { - const char *name = NULL; - if ( *state->json != '"' ) - longjmp( state->env, JSONSAX_MISSING_KEY ); - - name = ++state->json; - - for ( ;; ) - { - const char* quote = strchr( state->json, '"' ); - - if ( !quote ) - longjmp( state->env, JSONSAX_UNTERMINATED_KEY ); - - state->json = quote + 1; - - if ( quote[ -1 ] != '\\' ) - break; - } - - HANDLE_2( key, name, state->json - name - 1 ); - skip_spaces( state ); - - if ( *state->json != ':' ) - longjmp( state->env, JSONSAX_MISSING_VALUE ); - - state->json++; - skip_spaces( state ); - jsonx_parse_value( state ); - skip_spaces( state ); - - if ( *state->json != ',' ) - break; - - state->json++; - skip_spaces( state ); - } - - if ( *state->json != '}' ) - longjmp( state->env, JSONSAX_UNTERMINATED_OBJECT ); - - state->json++; - HANDLE_0( end_object ); -} - -static void jsonx_parse_array(state_t* state) -{ - unsigned int ndx = 0; - - state->json++; /* we're sure the current character is a '[' */ - skip_spaces( state ); - HANDLE_0( start_array ); - - while ( *state->json != ']' ) - { - HANDLE_1( array_index, ndx++ ); - jsonx_parse_value( state ); - skip_spaces( state ); - - if ( *state->json != ',' ) - break; - - state->json++; - skip_spaces( state ); - } - - if ( *state->json != ']' ) - longjmp( state->env, JSONSAX_UNTERMINATED_ARRAY ); - - state->json++; - HANDLE_0( end_array ); -} - -static void jsonx_parse_string(state_t* state) -{ - const char* string = ++state->json; - - for ( ;; ) - { - const char* quote = strchr( state->json, '"' ); - - if ( !quote ) - longjmp( state->env, JSONSAX_UNTERMINATED_STRING ); - - state->json = quote + 1; - - if ( quote[ -1 ] != '\\' ) - break; - } - - HANDLE_2( string, string, state->json - string - 1 ); -} - -static void jsonx_parse_boolean(state_t* state) -{ - if ( !strncmp( state->json, "true", 4 ) ) - { - state->json += 4; - HANDLE_1( boolean, 1 ); - } - else if ( !strncmp( state->json, "false", 5 ) ) - { - state->json += 5; - HANDLE_1( boolean, 0 ); - } - else - longjmp( state->env, JSONSAX_INVALID_VALUE ); -} - -static void jsonx_parse_null(state_t* state) -{ - if ( !strncmp( state->json + 1, "ull", 3 ) ) /* we're sure the current character is a 'n' */ - { - state->json += 4; - HANDLE_0( null ); - } - else - longjmp( state->env, JSONSAX_INVALID_VALUE ); -} - -static void jsonx_parse_number(state_t* state) -{ - const char* number = state->json; - - if ( *state->json == '-' ) - state->json++; - - if ( !isdigit( (unsigned char)*state->json ) ) - longjmp( state->env, JSONSAX_INVALID_VALUE ); - - skip_digits( state ); - - if ( *state->json == '.' ) - { - state->json++; - - if ( !isdigit( (unsigned char)*state->json ) ) - longjmp( state->env, JSONSAX_INVALID_VALUE ); - - skip_digits( state ); - } - - if ( *state->json == 'e' || *state->json == 'E' ) - { - state->json++; - - if ( *state->json == '-' || *state->json == '+' ) - state->json++; - - if ( !isdigit( (unsigned char)*state->json ) ) - longjmp( state->env, JSONSAX_INVALID_VALUE ); - - skip_digits( state ); - } - - HANDLE_2( number, number, state->json - number ); -} - -static void jsonx_parse_value(state_t* state) -{ - skip_spaces( state ); - - switch ( *state->json ) - { - case '{': - jsonx_parse_object(state); - break; - case '[': - jsonx_parse_array( state ); - break; - case '"': - jsonx_parse_string( state ); - break; - case 't': - case 'f': - jsonx_parse_boolean( state ); - break; - case 'n': - jsonx_parse_null( state ); - break; - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - case '-': - jsonx_parse_number( state ); - break; - - default: - longjmp( state->env, JSONSAX_INVALID_VALUE ); - } -} - -int jsonsax_parse( const char* json, const jsonsax_handlers_t* handlers, void* userdata ) -{ - state_t state; - int res; - - state.json = json; - state.handlers = handlers; - state.ud = userdata; - - if ( ( res = setjmp( state.env ) ) == 0 ) - { - if ( handlers->start_document ) - handlers->start_document( userdata ); - - jsonx_parse_value(&state); - - if ( handlers->end_document ) - handlers->end_document( userdata ); - - res = JSONSAX_OK; - } - - return res; -} diff --git a/libretro-common/formats/json/jsonsax_full.c b/libretro-common/formats/json/jsonsax_full.c deleted file mode 100644 index 6ec0aa298d..0000000000 --- a/libretro-common/formats/json/jsonsax_full.c +++ /dev/null @@ -1,3894 +0,0 @@ -/* - Copyright (c) 2012 John-Anthony Owens - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the "Software"), - to deal in the Software without restriction, including without limitation - the rights to use, copy, modify, merge, publish, distribute, sublicense, - and/or sell copies of the Software, and to permit persons to whom the - Software is furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included - in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - IN THE SOFTWARE. -*/ - -#include -#include - -/* Ensure uint32_t type (compiler-dependent). */ -#if defined(_MSC_VER) -typedef unsigned __int32 uint32_t; -#else -#include -#endif - -/* Ensure SIZE_MAX defined. */ -#ifndef SIZE_MAX -#define SIZE_MAX ((size_t)-1) -#endif - -/* Mark APIs for export (as opposed to import) when we build this file. */ -#define JSON_BUILDING -#include - -/* Default allocation constants. */ -#define DEFAULT_TOKEN_BYTES_LENGTH 64 /* MUST be a power of 2 */ -#define DEFAULT_SYMBOL_STACK_SIZE 32 /* MUST be a power of 2 */ - -/* Types for readability. */ -typedef unsigned char byte; -typedef uint32_t Codepoint; - -/* Especially-relevant Unicode codepoints. */ -#define U_(x) ((Codepoint)(x)) -#define NULL_CODEPOINT U_(0x0000) -#define BACKSPACE_CODEPOINT U_(0x0008) -#define TAB_CODEPOINT U_(0x0009) -#define LINE_FEED_CODEPOINT U_(0x000A) -#define FORM_FEED_CODEPOINT U_(0x000C) -#define CARRIAGE_RETURN_CODEPOINT U_(0x000D) -#define FIRST_NON_CONTROL_CODEPOINT U_(0x0020) -#define DELETE_CODEPOINT U_(0x007F) -#define FIRST_NON_ASCII_CODEPOINT U_(0x0080) -#define FIRST_2_BYTE_UTF8_CODEPOINT U_(0x0080) -#define FIRST_3_BYTE_UTF8_CODEPOINT U_(0x0800) -#define LINE_SEPARATOR_CODEPOINT U_(0x2028) -#define PARAGRAPH_SEPARATOR_CODEPOINT U_(0x2029) -#define BOM_CODEPOINT U_(0xFEFF) -#define REPLACEMENT_CHARACTER_CODEPOINT U_(0xFFFD) -#define FIRST_NON_BMP_CODEPOINT U_(0x10000) -#define FIRST_4_BYTE_UTF8_CODEPOINT U_(0x10000) -#define MAX_CODEPOINT U_(0x10FFFF) -#define EOF_CODEPOINT U_(0xFFFFFFFF) - -/* Bit-masking macros. */ -#define BOTTOM_3_BITS(x) ((x) & 0x7) -#define BOTTOM_4_BITS(x) ((x) & 0xF) -#define BOTTOM_5_BITS(x) ((x) & 0x1F) -#define BOTTOM_6_BITS(x) ((x) & 0x3F) - -/* Bit-flag macros. */ -#define GET_FLAGS(x, f) ((x) & (f)) -#define SET_FLAGS_ON(flagstype, x, f) do { (x) |= (flagstype)(f); } while (0) -#define SET_FLAGS_OFF(flagstype, x, f) do { (x) &= (flagstype)~(f); } while (0) -#define SET_FLAGS(flagstype, x, f, cond) do { if (cond) (x) |= (flagstype)(f); else (x) &= (flagstype)~(f); } while (0) - -/* UTF-8 byte-related macros. */ -#define IS_UTF8_SINGLE_BYTE(b) (((b) & 0x80) == 0) -#define IS_UTF8_CONTINUATION_BYTE(b) (((b) & 0xC0) == 0x80) -#define IS_UTF8_FIRST_BYTE_OF_2(b) (((b) & 0xE0) == 0xC0) -#define IS_UTF8_FIRST_BYTE_OF_3(b) (((b) & 0xF0) == 0xE0) -#define IS_UTF8_FIRST_BYTE_OF_4(b) (((b) & 0xF8) == 0xF0) - -/* Unicode codepoint-related macros. */ -#define IS_NONCHARACTER(c) ((((c) & 0xFE) == 0xFE) || (((c) >= 0xFDD0) && ((c) <= 0xFDEF))) -#define IS_SURROGATE(c) (((c) & 0xFFFFF800) == 0xD800) -#define IS_LEADING_SURROGATE(c) (((c) & 0xFFFFFC00) == 0xD800) -#define IS_TRAILING_SURROGATE(c) (((c) & 0xFFFFFC00) == 0xDC00) -#define CODEPOINT_FROM_SURROGATES(hi_lo) ((((hi_lo) >> 16) << 10) + ((hi_lo) & 0xFFFF) + 0xFCA02400) -#define SURROGATES_FROM_CODEPOINT(c) ((((c) << 6) & 0x7FF0000) + ((c) & 0x3FF) + 0xD7C0DC00) -#define SHORTEST_ENCODING_SEQUENCE(enc) (UINT32_C(1) << ((enc) >> 1)) -#define LONGEST_ENCODING_SEQUENCE 4 - -/* Internal types that alias enum types in the public API. - By using byte to represent these values internally, - we can guarantee minimal storage size and avoid compiler - warnings when using values of the type in switch statements - that don't have (or need) a default case. */ -typedef byte Encoding; -typedef byte Error; -typedef byte TokenAttributes; - -/******************** Default Memory Suite ********************/ - -static void* JSON_CALL DefaultReallocHandler(void* userData, void* ptr, size_t size) -{ - (void)userData; /* unused */ - return realloc(ptr, size); -} - -static void JSON_CALL DefaultFreeHandler(void* userData, void* ptr) -{ - (void)userData; /* unused */ - free(ptr); -} - -static const JSON_MemorySuite defaultMemorySuite = { NULL, &DefaultReallocHandler, &DefaultFreeHandler }; - -static byte* DoubleBuffer(const JSON_MemorySuite* pMemorySuite, byte* pDefaultBuffer, byte* pBuffer, size_t length) -{ - size_t newLength = length * 2; - if (newLength < length) - { - pBuffer = NULL; - } - else if (pBuffer == pDefaultBuffer) - { - pBuffer = (byte*)pMemorySuite->realloc(pMemorySuite->userData, NULL, newLength); - if (pBuffer) - { - memcpy(pBuffer, pDefaultBuffer, length); - } - } - else - { - pBuffer = (byte*)pMemorySuite->realloc(pMemorySuite->userData, pBuffer, newLength); - } - return pBuffer; -} - -/******************** Unicode Decoder ********************/ - -/* Mutually-exclusive decoder states. */ -/* The bits of DecoderState are layed out as follows: - - ---lllnn - - - = unused (3 bits) - l = expected total sequence length (3 bits) - d = number of bytes decoded so far (2 bits) - */ - -#define DECODER_RESET 0x00 -#define DECODED_1_OF_2 0x09 /* 00001001 */ -#define DECODED_1_OF_3 0x0D /* 00001101 */ -#define DECODED_2_OF_3 0x0E /* 00001110 */ -#define DECODED_1_OF_4 0x11 /* 00010001 */ -#define DECODED_2_OF_4 0x12 /* 00010010 */ -#define DECODED_3_OF_4 0x13 /* 00010011 */ -typedef byte DecoderState; - -#define DECODER_STATE_BYTES(s) (size_t)((s) & 0x3) - -/* Decoder data. */ -typedef struct tag_DecoderData -{ - uint32_t bits; - DecoderState state; /* byte alignment */ -} DecoderData; -typedef DecoderData* Decoder; - -/* The bits of DecoderOutput are layed out as follows: - - ------rrlllccccccccccccccccccccc - - - = unused (6 bits) - r = result code (2 bits) - l = sequence length (3 bits) - c = codepoint (21 bits) - */ -#define SEQUENCE_PENDING 0 -#define SEQUENCE_COMPLETE 1 -#define SEQUENCE_INVALID_INCLUSIVE 2 -#define SEQUENCE_INVALID_EXCLUSIVE 3 -typedef uint32_t DecoderResultCode; - -#define DECODER_OUTPUT(r, l, c) (DecoderOutput)(((r) << 24) | ((l) << 21) | (c)) -#define DECODER_RESULT_CODE(o) (DecoderResultCode)((DecoderOutput)(o) >> 24) -#define DECODER_SEQUENCE_LENGTH(o) (size_t)(((DecoderOutput)(o) >> 21) & 0x7) -#define DECODER_CODEPOINT(o) (Codepoint)((DecoderOutput)(o) & 0x001FFFFF) -typedef uint32_t DecoderOutput; - -/* Decoder functions. */ - -static void Decoder_Reset(Decoder decoder) -{ - decoder->state = DECODER_RESET; - decoder->bits = 0; -} - -static int Decoder_SequencePending(Decoder decoder) -{ - return decoder->state != DECODER_RESET; -} - -static DecoderOutput Decoder_ProcessByte(Decoder decoder, Encoding encoding, byte b) -{ - DecoderOutput output = DECODER_OUTPUT(SEQUENCE_PENDING, 0, 0); - switch (encoding) - { - case JSON_UTF8: - /* When the input encoding is UTF-8, the decoded codepoint's bits are - recorded in the bottom 3 bytes of bits as they are decoded. - The top byte is not used. */ - switch (decoder->state) - { - case DECODER_RESET: - if (IS_UTF8_SINGLE_BYTE(b)) - output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 1, b); - else if (IS_UTF8_FIRST_BYTE_OF_2(b)) - { - /* UTF-8 2-byte sequences that are overlong encodings can be - detected from just the first byte (C0 or C1). */ - decoder->bits = (uint32_t)BOTTOM_5_BITS(b) << 6; - if (decoder->bits < FIRST_2_BYTE_UTF8_CODEPOINT) - output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 1, 0); - else - { - decoder->state = DECODED_1_OF_2; - goto noreset; - } - } - else if (IS_UTF8_FIRST_BYTE_OF_3(b)) - { - decoder->bits = (uint32_t)BOTTOM_4_BITS(b) << 12; - decoder->state = DECODED_1_OF_3; - goto noreset; - } - else if (IS_UTF8_FIRST_BYTE_OF_4(b)) - { - /* Some UTF-8 4-byte sequences that encode out-of-range - codepoints can be detected from the first byte (F5 - FF). */ - decoder->bits = (uint32_t)BOTTOM_3_BITS(b) << 18; - if (decoder->bits > MAX_CODEPOINT) - output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 1, 0); - else - { - decoder->state = DECODED_1_OF_4; - goto noreset; - } - } - else - /* The byte is of the form 11111xxx or 10xxxxxx, and is not - a valid first byte for a UTF-8 sequence. */ - output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 1, 0); - break; - - case DECODED_1_OF_2: - if (IS_UTF8_CONTINUATION_BYTE(b)) - output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 2, decoder->bits | BOTTOM_6_BITS(b)); - else - output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0); - break; - - case DECODED_1_OF_3: - if (IS_UTF8_CONTINUATION_BYTE(b)) - { - /* UTF-8 3-byte sequences that are overlong - * encodings or encode surrogate codepoints - * can be detected after 2 bytes. */ - decoder->bits |= (uint32_t)BOTTOM_6_BITS(b) << 6; - if ((decoder->bits < FIRST_3_BYTE_UTF8_CODEPOINT) || - IS_SURROGATE(decoder->bits)) - output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0); - else - { - decoder->state = DECODED_2_OF_3; - goto noreset; - } - } - else - output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0); - break; - - case DECODED_2_OF_3: - if (IS_UTF8_CONTINUATION_BYTE(b)) - output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 3, decoder->bits | BOTTOM_6_BITS(b)); - else - output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 2, 0); - break; - - case DECODED_1_OF_4: - if (IS_UTF8_CONTINUATION_BYTE(b)) - { - /* UTF-8 4-byte sequences that are overlong encodings or encode - out-of-range codepoints can be detected after 2 bytes. */ - decoder->bits |= (uint32_t)BOTTOM_6_BITS(b) << 12; - if ( (decoder->bits < FIRST_4_BYTE_UTF8_CODEPOINT) || - (decoder->bits > MAX_CODEPOINT)) - output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0); - else - { - decoder->state = DECODED_2_OF_4; - goto noreset; - } - } - else - output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 1, 0); - break; - - case DECODED_2_OF_4: - if (IS_UTF8_CONTINUATION_BYTE(b)) - { - decoder->bits |= (uint32_t)BOTTOM_6_BITS(b) << 6; - decoder->state = DECODED_3_OF_4; - goto noreset; - } - - output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 2, 0); - break; - - case DECODED_3_OF_4: - if (IS_UTF8_CONTINUATION_BYTE(b)) - output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, decoder->bits | BOTTOM_6_BITS(b)); - else - output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 3, 0); - break; - } - break; - - case JSON_UTF16LE: - /* When the input encoding is UTF-16, the decoded codepoint's bits are - recorded in the bottom 2 bytes of bits as they are decoded. - If those 2 bytes form a leading surrogate, the decoder treats the - surrogate pair as a single 4-byte sequence, shifts the leading - surrogate into the high 2 bytes of bits, and decodes the - trailing surrogate's bits in the bottom 2 bytes of bits. */ - switch (decoder->state) - { - case DECODER_RESET: - decoder->bits = b; - decoder->state = DECODED_1_OF_2; - goto noreset; - - case DECODED_1_OF_2: - decoder->bits |= (uint32_t)b << 8; - /* A trailing surrogate cannot appear on its own. */ - if (IS_TRAILING_SURROGATE(decoder->bits)) - output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 2, 0); - else if (IS_LEADING_SURROGATE(decoder->bits)) - { - /* A leading surrogate implies a 4-byte surrogate pair. */ - decoder->bits <<= 16; - decoder->state = DECODED_2_OF_4; - goto noreset; - } - else - output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 2, decoder->bits); - break; - - case DECODED_2_OF_4: - decoder->bits |= b; - decoder->state = DECODED_3_OF_4; - goto noreset; - - case DECODED_3_OF_4: - decoder->bits |= (uint32_t)b << 8; - if (!IS_TRAILING_SURROGATE(decoder->bits & 0xFFFF)) - { - /* A leading surrogate must be followed by a trailing one. - Treat the previous 3 bytes as an invalid 2-byte sequence - followed by the first byte of a new sequence. */ - decoder->bits &= 0xFF; - decoder->state = DECODED_1_OF_2; - output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 2, 0); - goto noreset; - } - - output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, CODEPOINT_FROM_SURROGATES(decoder->bits)); - break; - } - break; - - case JSON_UTF16BE: - /* When the input encoding is UTF-16, the decoded codepoint's bits are - recorded in the bottom 2 bytes of bits as they are decoded. - If those 2 bytes form a leading surrogate, the decoder treats the - surrogate pair as a single 4-byte sequence, shifts the leading - surrogate into the high 2 bytes of bits, and decodes the - trailing surrogate's bits in the bottom 2 bytes of bits. */ - switch (decoder->state) - { - case DECODER_RESET: - decoder->bits = (uint32_t)b << 8; - decoder->state = DECODED_1_OF_2; - goto noreset; - - case DECODED_1_OF_2: - decoder->bits |= b; - /* A trailing surrogate cannot appear on its own. */ - if (IS_TRAILING_SURROGATE(decoder->bits)) - output = DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 2, 0); - else if (IS_LEADING_SURROGATE(decoder->bits)) - { - /* A leading surrogate implies a 4-byte surrogate pair. */ - decoder->bits <<= 16; - decoder->state = DECODED_2_OF_4; - goto noreset; - } - else - output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 2, decoder->bits); - break; - - case DECODED_2_OF_4: - decoder->bits |= (uint32_t)b << 8; - decoder->state = DECODED_3_OF_4; - goto noreset; - - case DECODED_3_OF_4: - decoder->bits |= b; - if (!IS_TRAILING_SURROGATE(decoder->bits & 0xFFFF)) - { - /* A leading surrogate must be followed by a trailing one. - Treat the previous 3 bytes as an invalid 2-byte sequence - followed by the first byte of a new sequence. */ - decoder->bits &= 0xFF00; - decoder->state = DECODED_1_OF_2; - output = DECODER_OUTPUT(SEQUENCE_INVALID_EXCLUSIVE, 2, 0); - goto noreset; - } - - output = DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, - CODEPOINT_FROM_SURROGATES(decoder->bits)); - break; - } - break; - - case JSON_UTF32LE: - /* When the input encoding is UTF-32, the decoded codepoint's bits are - recorded in bits as they are decoded. */ - switch (decoder->state) - { - case DECODER_RESET: - decoder->state = DECODED_1_OF_4; - decoder->bits = (uint32_t)b; - goto noreset; - - case DECODED_1_OF_4: - decoder->state = DECODED_2_OF_4; - decoder->bits |= (uint32_t)b << 8; - goto noreset; - - case DECODED_2_OF_4: - decoder->state = DECODED_3_OF_4; - decoder->bits |= (uint32_t)b << 16; - goto noreset; - - case DECODED_3_OF_4: - decoder->bits |= (uint32_t)b << 24; - output = ( - IS_SURROGATE(decoder->bits) || - (decoder->bits > MAX_CODEPOINT)) - ? DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 4, 0) - : DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, decoder->bits); - break; - } - break; - - case JSON_UTF32BE: - /* When the input encoding is UTF-32, the decoded codepoint's bits are - recorded in bits as they are decoded. */ - switch (decoder->state) - { - case DECODER_RESET: - decoder->state = DECODED_1_OF_4; - decoder->bits = (uint32_t)b << 24; - goto noreset; - - case DECODED_1_OF_4: - decoder->state = DECODED_2_OF_4; - decoder->bits |= (uint32_t)b << 16; - goto noreset; - - case DECODED_2_OF_4: - decoder->state = DECODED_3_OF_4; - decoder->bits |= (uint32_t)b << 8; - goto noreset; - - case DECODED_3_OF_4: - decoder->bits |= b; - output = (IS_SURROGATE(decoder->bits) || - (decoder->bits > MAX_CODEPOINT)) - ? DECODER_OUTPUT(SEQUENCE_INVALID_INCLUSIVE, 4, 0) - : DECODER_OUTPUT(SEQUENCE_COMPLETE, 4, decoder->bits); - break; - } - break; - } - - /* Reset the decoder for the next sequence. */ - Decoder_Reset(decoder); - -noreset: - return output; -} - -/******************** Unicode Encoder ********************/ - -/* This function makes the following assumptions about its input: - - 1. The c argument is a valid codepoint (U+0000 - U+10FFFF). - 2. The encoding argument is not JSON_UnknownEncoding. - 3. The pBytes argument points to an array of at least 4 bytes. - */ -static size_t EncodeCodepoint(Codepoint c, Encoding encoding, byte* pBytes) -{ - size_t length = 0; - switch (encoding) - { - case JSON_UTF8: - if (c < FIRST_2_BYTE_UTF8_CODEPOINT) - { - pBytes[0] = (byte)c; - length = 1; - } - else if (c < FIRST_3_BYTE_UTF8_CODEPOINT) - { - pBytes[0] = (byte)(0xC0 | (c >> 6)); - pBytes[1] = (byte)(0x80 | BOTTOM_6_BITS(c)); - length = 2; - } - else if (c < FIRST_4_BYTE_UTF8_CODEPOINT) - { - pBytes[0] = (byte)(0xE0 | (c >> 12)); - pBytes[1] = (byte)(0x80 | BOTTOM_6_BITS(c >> 6)); - pBytes[2] = (byte)(0x80 | BOTTOM_6_BITS(c)); - length = 3; - } - else - { - pBytes[0] = (byte)(0xF0 | (c >> 18)); - pBytes[1] = (byte)(0x80 | BOTTOM_6_BITS(c >> 12)); - pBytes[2] = (byte)(0x80 | BOTTOM_6_BITS(c >> 6)); - pBytes[3] = (byte)(0x80 | BOTTOM_6_BITS(c)); - length = 4; - } - break; - - case JSON_UTF16LE: - if (c < FIRST_NON_BMP_CODEPOINT) - { - pBytes[0] = (byte)(c); - pBytes[1] = (byte)(c >> 8); - length = 2; - } - else - { - uint32_t surrogates = SURROGATES_FROM_CODEPOINT(c); - - /* Leading surrogate. */ - pBytes[0] = (byte)(surrogates >> 16); - pBytes[1] = (byte)(surrogates >> 24); - - /* Trailing surrogate. */ - pBytes[2] = (byte)(surrogates); - pBytes[3] = (byte)(surrogates >> 8); - length = 4; - } - break; - - case JSON_UTF16BE: - if (c < FIRST_NON_BMP_CODEPOINT) - { - pBytes[1] = (byte)(c); - pBytes[0] = (byte)(c >> 8); - length = 2; - } - else - { - /* The codepoint requires a surrogate pair in UTF-16. */ - uint32_t surrogates = SURROGATES_FROM_CODEPOINT(c); - - /* Leading surrogate. */ - pBytes[1] = (byte)(surrogates >> 16); - pBytes[0] = (byte)(surrogates >> 24); - - /* Trailing surrogate. */ - pBytes[3] = (byte)(surrogates); - pBytes[2] = (byte)(surrogates >> 8); - length = 4; - } - break; - - case JSON_UTF32LE: - pBytes[0] = (byte)(c); - pBytes[1] = (byte)(c >> 8); - pBytes[2] = (byte)(c >> 16); - pBytes[3] = (byte)(c >> 24); - length = 4; - break; - - case JSON_UTF32BE: - pBytes[3] = (byte)(c); - pBytes[2] = (byte)(c >> 8); - pBytes[1] = (byte)(c >> 16); - pBytes[0] = (byte)(c >> 24); - length = 4; - break; - } - return length; -} - -/******************** JSON Lexer States ********************/ - -/* Mutually-exclusive lexer states. */ -#define LEXING_WHITESPACE 0 -#define LEXING_LITERAL 1 -#define LEXING_STRING 2 -#define LEXING_STRING_ESCAPE 3 -#define LEXING_STRING_HEX_ESCAPE_BYTE_1 4 -#define LEXING_STRING_HEX_ESCAPE_BYTE_2 5 -#define LEXING_STRING_HEX_ESCAPE_BYTE_3 6 -#define LEXING_STRING_HEX_ESCAPE_BYTE_4 7 -#define LEXING_STRING_HEX_ESCAPE_BYTE_5 8 -#define LEXING_STRING_HEX_ESCAPE_BYTE_6 9 -#define LEXING_STRING_HEX_ESCAPE_BYTE_7 10 -#define LEXING_STRING_HEX_ESCAPE_BYTE_8 11 -#define LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_BACKSLASH 12 -#define LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_U 13 -#define LEXING_NUMBER_AFTER_MINUS 14 -#define LEXING_NUMBER_AFTER_LEADING_ZERO 15 -#define LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO 16 -#define LEXING_NUMBER_AFTER_X 17 -#define LEXING_NUMBER_HEX_DIGITS 18 -#define LEXING_NUMBER_DECIMAL_DIGITS 19 -#define LEXING_NUMBER_AFTER_DOT 20 -#define LEXING_NUMBER_FRACTIONAL_DIGITS 21 -#define LEXING_NUMBER_AFTER_E 22 -#define LEXING_NUMBER_AFTER_EXPONENT_SIGN 23 -#define LEXING_NUMBER_EXPONENT_DIGITS 24 -#define LEXING_COMMENT_AFTER_SLASH 25 -#define LEXING_SINGLE_LINE_COMMENT 26 -#define LEXING_MULTI_LINE_COMMENT 27 -#define LEXING_MULTI_LINE_COMMENT_AFTER_STAR 28 -#define LEXER_ERROR 255 -typedef byte LexerState; - -/******************** JSON Grammarian ********************/ - -/* The JSON grammar comprises the following productions: - - 1. VALUE => null - 2. VALUE => boolean - 3. VALUE => string - 4. VALUE => number - 5. VALUE => specialnumber - 6. VALUE => { MEMBERS } - 7. VALUE => [ ITEMS ] - 8. MEMBERS => MEMBER MORE_MEMBERS - 9. MEMBERS => e - 10. MEMBER => string : VALUE - 11. MORE_MEMBERS => , MEMBER MORE_MEMBERS - 12. MORE_MEMBERS => e - 13. ITEMS => ITEM MORE_ITEMS - 14. ITEMS => e - 15. ITEM => VALUE - 16. MORE_ITEMS => , ITEM MORE_ITEMS - 17. MORE_ITEMS => e - - We implement a simple LL(1) parser based on this grammar, with events - emitted when certain non-terminals are replaced. - */ - -/* Mutually-exclusive grammar tokens and non-terminals. The values are defined - so that the bottom 4 bits of a value can be used as an index into the - grammar production rule table. */ -#define T_NONE 0x00 /* tokens are in the form 0x0X */ -#define T_NULL 0x01 -#define T_TRUE 0x02 -#define T_FALSE 0x03 -#define T_STRING 0x04 -#define T_NUMBER 0x05 -#define T_NAN 0x06 -#define T_INFINITY 0x07 -#define T_NEGATIVE_INFINITY 0x08 -#define T_LEFT_CURLY 0x09 -#define T_RIGHT_CURLY 0x0A -#define T_LEFT_SQUARE 0x0B -#define T_RIGHT_SQUARE 0x0C -#define T_COLON 0x0D -#define T_COMMA 0x0E -#define NT_VALUE 0x10 /* non-terminals are in the form 0x1X */ -#define NT_MEMBERS 0x11 -#define NT_MEMBER 0x12 -#define NT_MORE_MEMBERS 0x13 -#define NT_ITEMS 0x14 -#define NT_ITEM 0x15 -#define NT_MORE_ITEMS 0x16 -typedef byte Symbol; - -#define IS_NONTERMINAL(s) ((s) & 0x10) -#define IS_TOKEN(s) !IS_NONTERMINAL(s) - -/* Grammarian data. */ -typedef struct tag_GrammarianData -{ - Symbol* pStack; /* initially set to defaultStack */ - size_t stackSize; - size_t stackUsed; - Symbol defaultStack[DEFAULT_SYMBOL_STACK_SIZE]; -} GrammarianData; -typedef GrammarianData* Grammarian; - -/* Mutually-exclusive result codes returned by the grammarian - after processing a token. */ -#define ACCEPTED_TOKEN 0 -#define REJECTED_TOKEN 1 -#define SYMBOL_STACK_FULL 2 -typedef uint32_t GrammarianResultCode; - -/* Events emitted by the grammarian as a result of processing a - token. Note that EMIT_ARRAY_ITEM always appears bitwise OR-ed - with one of the other values. */ -#define EMIT_NOTHING 0x00 -#define EMIT_NULL 0x01 -#define EMIT_BOOLEAN 0x02 -#define EMIT_STRING 0x03 -#define EMIT_NUMBER 0x04 -#define EMIT_SPECIAL_NUMBER 0x05 -#define EMIT_START_OBJECT 0x06 -#define EMIT_END_OBJECT 0x07 -#define EMIT_OBJECT_MEMBER 0x08 -#define EMIT_START_ARRAY 0x09 -#define EMIT_END_ARRAY 0x0A -#define EMIT_ARRAY_ITEM 0x10 /* may be combined with other values */ -typedef byte GrammarEvent; - -/* The bits of GrammarianOutput are layed out as follows: - - -rreeeee - - - = unused (1 bit) - r = result code (2 bits) - e = event (5 bits) - */ -#define GRAMMARIAN_OUTPUT(r, e) (GrammarianOutput)(((GrammarianResultCode)(r) << 5) | (GrammarEvent)(e)) -#define GRAMMARIAN_RESULT_CODE(o) (GrammarianResultCode)((GrammarianOutput)(o) >> 5) -#define GRAMMARIAN_EVENT(o) (GrammarEvent)((GrammarianOutput)(o) & 0x1F) -typedef byte GrammarianOutput; - -/* Grammar rule used by the grammarian to process a token. */ -typedef struct tag_GrammarRule -{ - Symbol symbolToPush1; /* byte alignment */ - Symbol symbolToPush2; /* byte alignment */ - byte reprocess; - GrammarEvent emit; /* byte alignment */ -} GrammarRule; - -/* Grammarian functions. */ - -static void Grammarian_Reset(Grammarian grammarian, int isInitialized) -{ - /* When we reset the grammarian, we keep the symbol stack that has - already been allocated, if any. If the client wants to reclaim the - memory used by the that buffer, he needs to free the grammarian - and create a new one. */ - if (!isInitialized) - { - grammarian->pStack = grammarian->defaultStack; - grammarian->stackSize = sizeof(grammarian->defaultStack); - } - - /* The grammarian always starts with NT_VALUE on the symbol stack. */ - grammarian->pStack[0] = NT_VALUE; - grammarian->stackUsed = 1; -} - -static void Grammarian_FreeAllocations(Grammarian grammarian, - const JSON_MemorySuite* pMemorySuite) -{ - if (grammarian->pStack != grammarian->defaultStack) - pMemorySuite->free(pMemorySuite->userData, grammarian->pStack); -} - -static int Grammarian_FinishedDocument(Grammarian grammarian) -{ - return !grammarian->stackUsed; -} - -static GrammarianOutput Grammarian_ProcessToken(Grammarian grammarian, - Symbol token, const JSON_MemorySuite* pMemorySuite) -{ - /* The order and number of the rows and columns in this table must - match the defined token and non-terminal symbol values. - - The row index is the incoming token's Symbol value. - - The column index is the bottom 4 bits of Symbol value of - the non-terminal at the top of the processing stack. - Since non-terminal Symbol values start at 0x10, taking - the bottom 4 bits yields a 0-based index. */ - static const byte ruleLookup[15][7] = - { - /* V MS M MM IS I MI */ - /* ---- */ { 0, 0, 0, 0, 0, 0, 0 }, - /* null */ { 1, 0, 0, 0, 13, 15, 0 }, - /* true */ { 2, 0, 0, 0, 13, 15, 0 }, - /* false */ { 2, 0, 0, 0, 13, 15, 0 }, - /* string */ { 3, 8, 10, 0, 13, 15, 0 }, - /* number */ { 4, 0, 0, 0, 13, 15, 0 }, - /* NaN */ { 5, 0, 0, 0, 13, 15, 0 }, - /* Inf */ { 5, 0, 0, 0, 13, 15, 0 }, - /* -Inf */ { 5, 0, 0, 0, 13, 15, 0 }, - /* { */ { 6, 0, 0, 0, 13, 15, 0 }, - /* } */ { 0, 9, 0, 12, 0, 0, 0 }, - /* [ */ { 7, 0, 0, 0, 13, 15, 0 }, - /* ] */ { 0, 0, 0, 0, 14, 0, 17 }, - /* : */ { 0, 0, 0, 0, 0, 0, 0 }, - /* , */ { 0, 0, 0, 11, 0, 0, 16 } - }; - - static const GrammarRule rules[17] = - { - /* 1. */ { T_NONE, T_NONE, 0, EMIT_NULL }, - /* 2. */ { T_NONE, T_NONE, 0, EMIT_BOOLEAN }, - /* 3. */ { T_NONE, T_NONE, 0, EMIT_STRING }, - /* 4. */ { T_NONE, T_NONE, 0, EMIT_NUMBER }, - /* 5. */ { T_NONE, T_NONE, 0, EMIT_SPECIAL_NUMBER }, - /* 6. */ { T_RIGHT_CURLY, NT_MEMBERS, 0, EMIT_START_OBJECT }, - /* 7. */ { T_RIGHT_SQUARE, NT_ITEMS, 0, EMIT_START_ARRAY }, - /* 8. */ { NT_MORE_MEMBERS, NT_MEMBER, 1, EMIT_NOTHING }, - /* 9. */ { T_NONE, T_NONE, 1, EMIT_END_OBJECT }, - /* 10. */ { NT_VALUE, T_COLON, 0, EMIT_OBJECT_MEMBER }, - /* 11. */ { NT_MORE_MEMBERS, NT_MEMBER, 0, EMIT_NOTHING }, - /* 12. */ { T_NONE, T_NONE, 1, EMIT_END_OBJECT }, - /* 13. */ { NT_MORE_ITEMS, NT_ITEM, 1, EMIT_NOTHING }, - /* 14. */ { T_NONE, T_NONE, 1, EMIT_END_ARRAY }, - /* 15. */ { NT_VALUE, T_NONE, 1, EMIT_ARRAY_ITEM }, - /* 16. */ { NT_MORE_ITEMS, NT_ITEM, 0, EMIT_NOTHING }, - /* 17. */ { T_NONE, T_NONE, 1, EMIT_END_ARRAY } - }; - - GrammarEvent emit = EMIT_NOTHING; - - /* If the stack is empty, no more tokens were expected. */ - if (Grammarian_FinishedDocument(grammarian)) - return GRAMMARIAN_OUTPUT(REJECTED_TOKEN, EMIT_NOTHING); - - for (;;) - { - Symbol topSymbol = grammarian->pStack[grammarian->stackUsed - 1]; - if (IS_TOKEN(topSymbol)) - { - if (topSymbol != token) - return GRAMMARIAN_OUTPUT(REJECTED_TOKEN, EMIT_NOTHING); - grammarian->stackUsed--; - break; - } - else - { - const GrammarRule* pRule = NULL; - byte ruleNumber = ruleLookup[token][BOTTOM_4_BITS(topSymbol)]; - - if (ruleNumber == 0) - return GRAMMARIAN_OUTPUT(REJECTED_TOKEN, EMIT_NOTHING); - - pRule = &rules[ruleNumber - 1]; - - /* The rule removes the top symbol and does not replace it. */ - if (pRule->symbolToPush1 == T_NONE) - grammarian->stackUsed--; - else - { - /* The rule replaces the top symbol with 1 or 2 symbols. */ - grammarian->pStack[grammarian->stackUsed - 1] = pRule->symbolToPush1; - if (pRule->symbolToPush2 != T_NONE) - { - /* The rule replaces the top symbol with 2 symbols. - Make sure the stack has room for the second one. */ - if (grammarian->stackUsed == grammarian->stackSize) - { - Symbol* pBiggerStack = DoubleBuffer(pMemorySuite, - grammarian->defaultStack, grammarian->pStack, - grammarian->stackSize); - - if (!pBiggerStack) - return GRAMMARIAN_OUTPUT(SYMBOL_STACK_FULL, EMIT_NOTHING); - - grammarian->pStack = pBiggerStack; - grammarian->stackSize *= 2; - } - grammarian->pStack[grammarian->stackUsed] = pRule->symbolToPush2; - grammarian->stackUsed++; - } - } - emit |= pRule->emit; - if (!pRule->reprocess) - break; - } - } - - return GRAMMARIAN_OUTPUT(ACCEPTED_TOKEN, emit); -} - -/******************** JSON Parser ********************/ - -#ifndef JSON_NO_PARSER - -/* Combinable parser state flags. */ -#define PARSER_RESET 0x00 -#define PARSER_STARTED 0x01 -#define PARSER_FINISHED 0x02 -#define PARSER_IN_PROTECTED_API 0x04 -#define PARSER_IN_TOKEN_HANDLER 0x08 -#define PARSER_AFTER_CARRIAGE_RETURN 0x10 -typedef byte ParserState; - -/* Combinable parser settings flags. */ -#define PARSER_DEFAULT_FLAGS 0x00 -#define PARSER_ALLOW_BOM 0x01 -#define PARSER_ALLOW_COMMENTS 0x02 -#define PARSER_ALLOW_SPECIAL_NUMBERS 0x04 -#define PARSER_ALLOW_HEX_NUMBERS 0x08 -#define PARSER_REPLACE_INVALID 0x10 -#define PARSER_TRACK_OBJECT_MEMBERS 0x20 -#define PARSER_ALLOW_CONTROL_CHARS 0x40 -#define PARSER_EMBEDDED_DOCUMENT 0x80 -typedef byte ParserFlags; - -/* Sentinel value for parser error location offset. */ -#define ERROR_LOCATION_IS_TOKEN_START 0xFF - -/* An object member name stored in an unordered, singly-linked-list, used for - detecting duplicate member names. Note that the name string is not null- - terminated. */ -typedef struct tag_MemberName -{ - struct tag_MemberName* pNextName; - size_t length; - byte pBytes[1]; /* variable-size buffer */ -} MemberName; - -/* An object's list of member names, and a pointer to the object's - nearest ancestor object, if any. This is used as a stack. Because arrays - do not have named items, they do not need to be recorded in the stack. */ -typedef struct tag_MemberNames -{ - struct tag_MemberNames* pAncestor; - MemberName* pFirstName; -} MemberNames; - -/* A parser instance. */ -struct JSON_Parser_Data -{ - JSON_MemorySuite memorySuite; /* ptr alignment */ - void* userData; - byte* pTokenBytes; - MemberNames* pMemberNames; - GrammarianData grammarianData; /* ptr alignment */ - JSON_Parser_EncodingDetectedHandler encodingDetectedHandler; /* ptr alignment */ - JSON_Parser_NullHandler nullHandler; - JSON_Parser_BooleanHandler booleanHandler; - JSON_Parser_StringHandler stringHandler; - JSON_Parser_NumberHandler numberHandler; - JSON_Parser_SpecialNumberHandler specialNumberHandler; - JSON_Parser_StartObjectHandler startObjectHandler; - JSON_Parser_EndObjectHandler endObjectHandler; - JSON_Parser_ObjectMemberHandler objectMemberHandler; - JSON_Parser_StartArrayHandler startArrayHandler; - JSON_Parser_EndArrayHandler endArrayHandler; - JSON_Parser_ArrayItemHandler arrayItemHandler; - uint32_t lexerBits; - DecoderData decoderData; - /* uint32 alignment */ - size_t codepointLocationByte; - size_t codepointLocationLine; - size_t codepointLocationColumn; - size_t tokenLocationByte; - size_t tokenLocationLine; - size_t tokenLocationColumn; - size_t depth; - size_t tokenBytesLength; - size_t tokenBytesUsed; - size_t maxStringLength; - size_t maxNumberLength; - ParserState state; /* byte alignment */ - ParserFlags flags; /* byte alignment */ - Encoding inputEncoding; /* byte alignment */ - Encoding stringEncoding; /* byte alignment */ - Encoding numberEncoding; /* byte alignment */ - Symbol token; /* byte alignment */ - TokenAttributes tokenAttributes; /* byte alignment */ - Error error; /* byte alignment */ - byte errorOffset; - LexerState lexerState; /* byte alignment */ - byte defaultTokenBytes[DEFAULT_TOKEN_BYTES_LENGTH]; -}; - -/* Parser internal functions. */ - -static void JSON_Parser_SetErrorAtCodepoint(JSON_Parser parser, Error error) -{ - parser->error = error; -} - -static void JSON_Parser_SetErrorAtStringEscapeSequenceStart( - JSON_Parser parser, Error error, int codepointsAgo) -{ - /* Note that backtracking from the current codepoint requires us to make - three assumptions, which are always valid in the context of a string - escape sequence: - - 1. The input encoding is not JSON_UnknownEncoding. - - 2 The codepoints we are backing up across are all in the range - U+0000 - U+007F, aka ASCII, so we can assume the number of - bytes comprising them based on the input encoding. - - 3. The codepoints we are backing up across do not include any - line breaks, so we can assume that the line number stays the - same and the column number can simply be decremented. - */ - parser->error = error; - parser->errorOffset = (byte)codepointsAgo; -} - -static void JSON_Parser_SetErrorAtToken(JSON_Parser parser, Error error) -{ - parser->error = error; - parser->errorOffset = ERROR_LOCATION_IS_TOKEN_START; -} - -static JSON_Status JSON_Parser_PushMemberNameList(JSON_Parser parser) -{ - MemberNames* pNames = (MemberNames*)parser->memorySuite.realloc( - parser->memorySuite.userData, NULL, sizeof(MemberNames)); - - if (!pNames) - { - JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_OutOfMemory); - return JSON_Failure; - } - - pNames->pAncestor = parser->pMemberNames; - pNames->pFirstName = NULL; - parser->pMemberNames = pNames; - return JSON_Success; -} - -static void JSON_Parser_PopMemberNameList(JSON_Parser parser) -{ - MemberNames* pAncestor = parser->pMemberNames->pAncestor; - while (parser->pMemberNames->pFirstName) - { - MemberName* pNextName = parser->pMemberNames->pFirstName->pNextName; - parser->memorySuite.free(parser->memorySuite.userData, parser->pMemberNames->pFirstName); - parser->pMemberNames->pFirstName = pNextName; - } - parser->memorySuite.free(parser->memorySuite.userData, parser->pMemberNames); - parser->pMemberNames = pAncestor; -} - -static JSON_Status JSON_Parser_StartContainer(JSON_Parser parser, int isObject) -{ - if (isObject && GET_FLAGS(parser->flags, PARSER_TRACK_OBJECT_MEMBERS) && - !JSON_Parser_PushMemberNameList(parser)) - { - return JSON_Failure; - } - parser->depth++; - return JSON_Success; -} - -static void JSON_Parser_EndContainer(JSON_Parser parser, int isObject) -{ - parser->depth--; - if (isObject && GET_FLAGS(parser->flags, PARSER_TRACK_OBJECT_MEMBERS)) - { - JSON_Parser_PopMemberNameList(parser); - } -} - -static JSON_Status JSON_Parser_AddMemberNameToList(JSON_Parser parser) -{ - if (GET_FLAGS(parser->flags, PARSER_TRACK_OBJECT_MEMBERS)) - { - MemberName* pName; - for (pName = parser->pMemberNames->pFirstName; pName; pName = pName->pNextName) - { - if (pName->length == parser->tokenBytesUsed && !memcmp(pName->pBytes, parser->pTokenBytes, pName->length)) - { - JSON_Parser_SetErrorAtToken(parser, JSON_Error_DuplicateObjectMember); - return JSON_Failure; - } - } - pName = (MemberName*)parser->memorySuite.realloc(parser->memorySuite.userData, NULL, sizeof(MemberName) + parser->tokenBytesUsed - 1); - if (!pName) - { - JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_OutOfMemory); - return JSON_Failure; - } - pName->pNextName = parser->pMemberNames->pFirstName; - pName->length = parser->tokenBytesUsed; - memcpy(pName->pBytes, parser->pTokenBytes, parser->tokenBytesUsed); - parser->pMemberNames->pFirstName = pName; - } - return JSON_Success; -} - -static void JSON_Parser_ResetData(JSON_Parser parser, int isInitialized) -{ - parser->userData = NULL; - parser->flags = PARSER_DEFAULT_FLAGS; - parser->inputEncoding = JSON_UnknownEncoding; - parser->stringEncoding = JSON_UTF8; - parser->numberEncoding = JSON_UTF8; - parser->token = T_NONE; - parser->tokenAttributes = 0; - parser->error = JSON_Error_None; - parser->errorOffset = 0; - parser->lexerState = LEXING_WHITESPACE; - parser->lexerBits = 0; - parser->codepointLocationByte = 0; - parser->codepointLocationLine = 0; - parser->codepointLocationColumn = 0; - parser->tokenLocationByte = 0; - parser->tokenLocationLine = 0; - parser->tokenLocationColumn = 0; - parser->depth = 0; - - if (!isInitialized) - { - parser->pTokenBytes = parser->defaultTokenBytes; - parser->tokenBytesLength = sizeof(parser->defaultTokenBytes); - } - else - { - /* When we reset the parser, we keep the output buffer and the symbol - stack that have already been allocated, if any. If the client wants - to reclaim the memory used by the those buffers, he needs to free - the parser and create a new one. */ - } - parser->tokenBytesUsed = 0; - parser->maxStringLength = SIZE_MAX; - parser->maxNumberLength = SIZE_MAX; - if (!isInitialized) - parser->pMemberNames = NULL; - else - { - while (parser->pMemberNames) - JSON_Parser_PopMemberNameList(parser); - } - Decoder_Reset(&parser->decoderData); - Grammarian_Reset(&parser->grammarianData, isInitialized); - parser->encodingDetectedHandler = NULL; - parser->nullHandler = NULL; - parser->booleanHandler = NULL; - parser->stringHandler = NULL; - parser->numberHandler = NULL; - parser->specialNumberHandler = NULL; - parser->startObjectHandler = NULL; - parser->endObjectHandler = NULL; - parser->objectMemberHandler = NULL; - parser->startArrayHandler = NULL; - parser->endArrayHandler = NULL; - parser->arrayItemHandler = NULL; - parser->state = PARSER_RESET; /* do this last! */ -} - -static void JSON_Parser_NullTerminateToken(JSON_Parser parser) -{ - /* Because we always ensure that there are LONGEST_ENCODING_SEQUENCE bytes - available at the end of the token buffer when we record codepoints, we - can write the null terminator to the buffer with impunity. */ - static const byte nullTerminatorBytes[LONGEST_ENCODING_SEQUENCE] = { 0 }; - Encoding encoding = (Encoding)((parser->token == T_NUMBER) ? parser->numberEncoding : parser->stringEncoding); - memcpy(parser->pTokenBytes + parser->tokenBytesUsed, nullTerminatorBytes, (size_t)SHORTEST_ENCODING_SEQUENCE(encoding)); -} - -static JSON_Status JSON_Parser_FlushParser(JSON_Parser parser) -{ - /* The symbol stack should be empty when parsing finishes. */ - if (!Grammarian_FinishedDocument(&parser->grammarianData)) - { - JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_ExpectedMoreTokens); - return JSON_Failure; - } - return JSON_Success; -} - -typedef JSON_Parser_HandlerResult (JSON_CALL * JSON_Parser_SimpleTokenHandler)(JSON_Parser parser); -static JSON_Status JSON_Parser_CallSimpleTokenHandler(JSON_Parser parser, JSON_Parser_SimpleTokenHandler handler) -{ - if (handler) - { - JSON_Parser_HandlerResult result; - SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER); - result = handler(parser); - SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER); - if (result != JSON_Parser_Continue) - { - JSON_Parser_SetErrorAtToken(parser, JSON_Error_AbortedByHandler); - return JSON_Failure; - } - } - return JSON_Success; -} - -static JSON_Status JSON_Parser_CallBooleanHandler(JSON_Parser parser) -{ - if (parser->booleanHandler) - { - JSON_Parser_HandlerResult result; - SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER); - result = parser->booleanHandler(parser, parser->token == T_TRUE ? JSON_True : JSON_False); - SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER); - if (result != JSON_Parser_Continue) - { - JSON_Parser_SetErrorAtToken(parser, JSON_Error_AbortedByHandler); - return JSON_Failure; - } - } - return JSON_Success; -} - -static JSON_Status JSON_Parser_CallStringHandler(JSON_Parser parser, int isObjectMember) -{ - JSON_Parser_StringHandler handler = isObjectMember ? parser->objectMemberHandler : parser->stringHandler; - if (handler) - { - JSON_Parser_HandlerResult result; - JSON_Parser_NullTerminateToken(parser); - SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER); - result = handler(parser, (char*)parser->pTokenBytes, parser->tokenBytesUsed, parser->tokenAttributes); - SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER); - - if (result != JSON_Parser_Continue) - { - JSON_Parser_SetErrorAtToken(parser, - (isObjectMember && result == JSON_Parser_TreatAsDuplicateObjectMember) - ? JSON_Error_DuplicateObjectMember - : JSON_Error_AbortedByHandler); - return JSON_Failure; - } - } - return JSON_Success; -} - -static JSON_Status JSON_Parser_CallNumberHandler(JSON_Parser parser) -{ - if (parser->numberHandler) - { - JSON_Parser_HandlerResult result; - JSON_Parser_NullTerminateToken(parser); - SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER); - result = parser->numberHandler(parser, (char*)parser->pTokenBytes, - parser->tokenBytesUsed, parser->tokenAttributes); - - SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER); - - if (result != JSON_Parser_Continue) - { - JSON_Parser_SetErrorAtToken(parser, JSON_Error_AbortedByHandler); - return JSON_Failure; - } - } - return JSON_Success; -} - -static JSON_Status JSON_Parser_CallSpecialNumberHandler(JSON_Parser parser) -{ - if (parser->specialNumberHandler) - { - JSON_Parser_HandlerResult result; - SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER); - result = parser->specialNumberHandler(parser, parser->token == T_NAN ? JSON_NaN : - (parser->token == T_INFINITY ? JSON_Infinity : JSON_NegativeInfinity)); - SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_TOKEN_HANDLER); - - if (result != JSON_Parser_Continue) - { - JSON_Parser_SetErrorAtToken(parser, JSON_Error_AbortedByHandler); - return JSON_Failure; - } - } - return JSON_Success; -} - -static JSON_Status JSON_Parser_HandleGrammarEvents(JSON_Parser parser, byte emit) -{ - if (GET_FLAGS(emit, EMIT_ARRAY_ITEM)) - { - if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->arrayItemHandler)) - { - return JSON_Failure; - } - SET_FLAGS_OFF(byte, emit, EMIT_ARRAY_ITEM); - } - switch (emit) - { - case EMIT_NULL: - if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->nullHandler)) - return JSON_Failure; - break; - - case EMIT_BOOLEAN: - if (!JSON_Parser_CallBooleanHandler(parser)) - return JSON_Failure; - break; - - case EMIT_STRING: - if (!JSON_Parser_CallStringHandler(parser, 0/* isObjectMember */)) - return JSON_Failure; - break; - - case EMIT_NUMBER: - if (!JSON_Parser_CallNumberHandler(parser)) - return JSON_Failure; - break; - - case EMIT_SPECIAL_NUMBER: - if (!JSON_Parser_CallSpecialNumberHandler(parser)) - return JSON_Failure; - break; - - case EMIT_START_OBJECT: - if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->startObjectHandler) || - !JSON_Parser_StartContainer(parser, 1/*isObject*/)) - return JSON_Failure; - break; - - case EMIT_END_OBJECT: - JSON_Parser_EndContainer(parser, 1/*isObject*/); - if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->endObjectHandler)) - return JSON_Failure; - break; - case EMIT_OBJECT_MEMBER: - if (!JSON_Parser_AddMemberNameToList(parser) || /* will fail if member is duplicate */ - !JSON_Parser_CallStringHandler(parser, 1 /* isObjectMember */)) - return JSON_Failure; - break; - - case EMIT_START_ARRAY: - if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->startArrayHandler) || - !JSON_Parser_StartContainer(parser, 0/*isObject*/)) - return JSON_Failure; - break; - - case EMIT_END_ARRAY: - JSON_Parser_EndContainer(parser, 0/*isObject*/); - if (!JSON_Parser_CallSimpleTokenHandler(parser, parser->endArrayHandler)) - return JSON_Failure; - break; - } - - if (!parser->depth && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT)) - { - JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_StoppedAfterEmbeddedDocument); - return JSON_Failure; - } - return JSON_Success; -} - -static JSON_Status JSON_Parser_ProcessToken(JSON_Parser parser) -{ - GrammarianOutput output; - output = Grammarian_ProcessToken(&parser->grammarianData, parser->token, &parser->memorySuite); - switch (GRAMMARIAN_RESULT_CODE(output)) - { - case ACCEPTED_TOKEN: - if (!JSON_Parser_HandleGrammarEvents(parser, GRAMMARIAN_EVENT(output))) - return JSON_Failure; - break; - - case REJECTED_TOKEN: - JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnexpectedToken); - return JSON_Failure; - - case SYMBOL_STACK_FULL: - JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_OutOfMemory); - return JSON_Failure; - } - - /* Reset the lexer to prepare for the next token. */ - parser->lexerState = LEXING_WHITESPACE; - parser->lexerBits = 0; - parser->token = T_NONE; - parser->tokenAttributes = 0; - parser->tokenBytesUsed = 0; - return JSON_Success; -} - -/* Lexer functions. */ - -static const byte expectedLiteralChars[] = { 'u', 'l', 'l', 0, 'r', 'u', 'e', 0, 'a', 'l', 's', 'e', 0, 'a', 'N', 0, 'n', 'f', 'i', 'n', 'i', 't', 'y', 0 }; - -#define NULL_LITERAL_EXPECTED_CHARS_START_INDEX 0 -#define TRUE_LITERAL_EXPECTED_CHARS_START_INDEX 4 -#define FALSE_LITERAL_EXPECTED_CHARS_START_INDEX 8 -#define NAN_LITERAL_EXPECTED_CHARS_START_INDEX 13 -#define INFINITY_LITERAL_EXPECTED_CHARS_START_INDEX 16 - -/* Forward declaration. */ -static JSON_Status JSON_Parser_FlushLexer(JSON_Parser parser); -static JSON_Status JSON_Parser_ProcessCodepoint( - JSON_Parser parser, Codepoint c, size_t encodedLength); - -static JSON_Status JSON_Parser_HandleInvalidEncodingSequence( - JSON_Parser parser, size_t encodedLength) -{ - if (parser->token == T_STRING && GET_FLAGS(parser->flags, PARSER_REPLACE_INVALID)) - { - /* Since we're inside a string token, replacing the invalid sequence - with the Unicode replacement character as requested by the client - is a viable way to avoid a parse failure. Outside a string token, - such a replacement would simply trigger JSON_Error_UnknownToken - when we tried to process the replacement character, so it's less - confusing to stick with JSON_Error_InvalidEncodingSequence in that - case. */ - SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsReplacedCharacter); - return JSON_Parser_ProcessCodepoint(parser, REPLACEMENT_CHARACTER_CODEPOINT, encodedLength); - } - else if (!parser->depth && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT)) - { - /* Since we're parsing the top-level value of an embedded - document, assume that the invalid encoding sequence we've - encountered does not actually belong to the document, and - finish parsing by pretending that we've encountered EOF - instead of an invalid sequence. If the content is valid, - this will fail with JSON_Error_StoppedAfterEmbeddedDocument; - otherwise, it will fail with an appropriate error. */ - return (JSON_Status)(JSON_Parser_FlushLexer(parser) && JSON_Parser_FlushParser(parser)); - } - JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_InvalidEncodingSequence); - return JSON_Failure; -} - -static JSON_Status JSON_Parser_HandleInvalidNumber(JSON_Parser parser, - Codepoint c, int codepointsSinceValidNumber, TokenAttributes attributesToRemove) -{ - SET_FLAGS_OFF(TokenAttributes, parser->tokenAttributes, attributesToRemove); - if (!parser->depth && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT)) - { - /* The invalid number is the top-level value of an embedded document, - and it has a prefix that can be interpreted as a valid number. - We want to backtrack so that we are at the end of that prefix, - and then process the valid token. - - Note that backtracking requires us to make three assumptions, which - are always valid in the context of a number token: - - 1. The input encoding is not JSON_UnknownEncoding. - - 2 The codepoints we are backing up across are all in the range - U+0000 - U+007F, aka ASCII, so we can assume the number of - bytes comprising them based on the input encoding. - - 3. The codepoints we are backing up across do not include any - line breaks, so we can assume that the line number stays the - same and the column number can simply be decremented. - - For example: - - "01" => "0" - "123.!" => "123" - "123e!" => "123" - "123e+!" => "123" - "123e-!" => "123" - "1.2e!" => "1.2" - "1.2e+!" => "1.2" - "1.2e-!" => "1.2" - */ - parser->codepointLocationByte -= (size_t)codepointsSinceValidNumber - * (size_t)SHORTEST_ENCODING_SEQUENCE(parser->inputEncoding); - parser->codepointLocationColumn -= (size_t)codepointsSinceValidNumber; - parser->tokenBytesUsed -= (size_t)codepointsSinceValidNumber - * (size_t)SHORTEST_ENCODING_SEQUENCE(parser->numberEncoding); - return JSON_Parser_ProcessToken(parser); /* always fails */ - } - /* Allow JSON_Parser_FlushLexer() to fail. */ - else if (c == EOF_CODEPOINT) - return JSON_Success; - - JSON_Parser_SetErrorAtToken(parser, JSON_Error_InvalidNumber); - return JSON_Failure; -} - -static void JSON_Parser_StartToken(JSON_Parser parser, Symbol token) -{ - parser->token = token; - parser->tokenLocationByte = parser->codepointLocationByte; - parser->tokenLocationLine = parser->codepointLocationLine; - parser->tokenLocationColumn = parser->codepointLocationColumn; -} - -static JSON_Status JSON_Parser_ProcessCodepoint(JSON_Parser parser, Codepoint c, size_t encodedLength) -{ - Encoding tokenEncoding; - size_t maxTokenLength; - int tokenFinished = 0; - Codepoint codepointToRecord = EOF_CODEPOINT; - - /* If the previous codepoint was U+000D (CARRIAGE RETURN), and the current - codepoint is U+000A (LINE FEED), then treat the 2 codepoints as a single - line break. */ - if (GET_FLAGS(parser->state, PARSER_AFTER_CARRIAGE_RETURN)) - { - if (c == LINE_FEED_CODEPOINT) - parser->codepointLocationLine--; - SET_FLAGS_OFF(ParserState, parser->state, PARSER_AFTER_CARRIAGE_RETURN); - } - -reprocess: - - switch (parser->lexerState) - { - case LEXING_WHITESPACE: - if (c == '{') - { - JSON_Parser_StartToken(parser, T_LEFT_CURLY); - tokenFinished = 1; - } - else if (c == '}') - { - JSON_Parser_StartToken(parser, T_RIGHT_CURLY); - tokenFinished = 1; - } - else if (c == '[') - { - JSON_Parser_StartToken(parser, T_LEFT_SQUARE); - tokenFinished = 1; - } - else if (c == ']') - { - JSON_Parser_StartToken(parser, T_RIGHT_SQUARE); - tokenFinished = 1; - } - else if (c == ':') - { - JSON_Parser_StartToken(parser, T_COLON); - tokenFinished = 1; - } - else if (c == ',') - { - JSON_Parser_StartToken(parser, T_COMMA); - tokenFinished = 1; - } - else if (c == 'n') - { - JSON_Parser_StartToken(parser, T_NULL); - parser->lexerBits = NULL_LITERAL_EXPECTED_CHARS_START_INDEX; - parser->lexerState = LEXING_LITERAL; - } - else if (c == 't') - { - JSON_Parser_StartToken(parser, T_TRUE); - parser->lexerBits = TRUE_LITERAL_EXPECTED_CHARS_START_INDEX; - parser->lexerState = LEXING_LITERAL; - } - else if (c == 'f') - { - JSON_Parser_StartToken(parser, T_FALSE); - parser->lexerBits = FALSE_LITERAL_EXPECTED_CHARS_START_INDEX; - parser->lexerState = LEXING_LITERAL; - } - else if (c == '"') - { - JSON_Parser_StartToken(parser, T_STRING); - parser->lexerState = LEXING_STRING; - } - else if (c == '-') - { - JSON_Parser_StartToken(parser, T_NUMBER); - parser->tokenAttributes = JSON_IsNegative; - codepointToRecord = '-'; - parser->lexerState = LEXING_NUMBER_AFTER_MINUS; - goto recordNumberCodepointAndAdvance; - } - else if (c == '0') - { - JSON_Parser_StartToken(parser, T_NUMBER); - codepointToRecord = '0'; - parser->lexerState = LEXING_NUMBER_AFTER_LEADING_ZERO; - goto recordNumberCodepointAndAdvance; - } - else if (c >= '1' && c <= '9') - { - JSON_Parser_StartToken(parser, T_NUMBER); - codepointToRecord = c; - parser->lexerState = LEXING_NUMBER_DECIMAL_DIGITS; - goto recordNumberCodepointAndAdvance; - } - else if (c == ' ' || c == TAB_CODEPOINT || c == LINE_FEED_CODEPOINT || - c == CARRIAGE_RETURN_CODEPOINT || c == EOF_CODEPOINT) - { - /* Ignore whitespace between tokens. */ - } - else if (c == BOM_CODEPOINT && parser->codepointLocationByte == 0) - { - /* OK, we'll allow the BOM. */ - if (GET_FLAGS(parser->flags, PARSER_ALLOW_BOM)) { } - else - { - JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_BOMNotAllowed); - return JSON_Failure; - } - } - else if (c == '/' && GET_FLAGS(parser->flags, PARSER_ALLOW_COMMENTS)) - { - /* Comments are not real tokens, but we save the location - of the comment as the token location in case of an error. */ - parser->tokenLocationByte = parser->codepointLocationByte; - parser->tokenLocationLine = parser->codepointLocationLine; - parser->tokenLocationColumn = parser->codepointLocationColumn; - parser->lexerState = LEXING_COMMENT_AFTER_SLASH; - } - else if (c == 'N' && GET_FLAGS(parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS)) - { - JSON_Parser_StartToken(parser, T_NAN); - parser->lexerBits = NAN_LITERAL_EXPECTED_CHARS_START_INDEX; - parser->lexerState = LEXING_LITERAL; - } - else if (c == 'I' && GET_FLAGS(parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS)) - { - JSON_Parser_StartToken(parser, T_INFINITY); - parser->lexerBits = INFINITY_LITERAL_EXPECTED_CHARS_START_INDEX; - parser->lexerState = LEXING_LITERAL; - } - else - { - JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_UnknownToken); - return JSON_Failure; - } - goto advance; - - case LEXING_LITERAL: - /* While lexing a literal we store an index into expectedLiteralChars - in lexerBits. */ - if (expectedLiteralChars[parser->lexerBits]) - { - /* The codepoint should match the next character in the literal. */ - if (c != expectedLiteralChars[parser->lexerBits]) - { - JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnknownToken); - return JSON_Failure; - } - parser->lexerBits++; - - /* If the literal is the top-level value of an embedded document, - process it as soon as we consume its last expected codepoint. - Normally we defer processing until the following codepoint - has been examined, so that we can treat sequences like "nullx" - as a single, unknown token rather than a null literal followed - by an unknown token. */ - if (!parser->depth && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT) && - !expectedLiteralChars[parser->lexerBits]) - tokenFinished = 1; - } - else - { - /* The literal should be finished, so the codepoint should not be - a plausible JSON literal character, but rather EOF, whitespace, - or the first character of the next token. */ - if ((c >= 'A' && c <= 'Z') || - (c >= 'a' && c <= 'z') || - (c >= '0' && c <= '9') || - (c == '_')) - { - JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnknownToken); - return JSON_Failure; - } - if (!JSON_Parser_ProcessToken(parser)) - return JSON_Failure; - goto reprocess; - } - goto advance; - - case LEXING_STRING: - /* Allow JSON_Parser_FlushLexer() to fail. */ - if (c == EOF_CODEPOINT) { } - else if (c == '"') - tokenFinished = 1; - else if (c == '\\') - parser->lexerState = LEXING_STRING_ESCAPE; - else if (c < 0x20 && !GET_FLAGS(parser->flags, PARSER_ALLOW_CONTROL_CHARS)) - { - /* ASCII control characters (U+0000 - U+001F) are not allowed to - appear unescaped in string values unless specifically allowed. */ - JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_UnescapedControlCharacter); - return JSON_Failure; - } - else - { - codepointToRecord = c; - goto recordStringCodepointAndAdvance; - } - goto advance; - - case LEXING_STRING_ESCAPE: - if (c == EOF_CODEPOINT) - { - /* Allow JSON_Parser_FlushLexer() to fail. */ - } - else - { - if (c == 'u') - parser->lexerState = LEXING_STRING_HEX_ESCAPE_BYTE_1; - else - { - if (c == '"' || c == '\\' || c == '/') - codepointToRecord = c; - else if (c == 'b') - codepointToRecord = BACKSPACE_CODEPOINT; - else if (c == 't') - codepointToRecord = TAB_CODEPOINT; - else if (c == 'n') - codepointToRecord = LINE_FEED_CODEPOINT; - else if (c == 'f') - codepointToRecord = FORM_FEED_CODEPOINT; - else if (c == 'r') - codepointToRecord = CARRIAGE_RETURN_CODEPOINT; - else - { - /* The current codepoint location is the first character after - the backslash that started the escape sequence. The error - location should be the beginning of the escape sequence, 1 - character earlier. */ - JSON_Parser_SetErrorAtStringEscapeSequenceStart(parser, JSON_Error_InvalidEscapeSequence, 1); - return JSON_Failure; - } - parser->lexerState = LEXING_STRING; - goto recordStringCodepointAndAdvance; - } - } - goto advance; - - case LEXING_STRING_HEX_ESCAPE_BYTE_1: - case LEXING_STRING_HEX_ESCAPE_BYTE_2: - case LEXING_STRING_HEX_ESCAPE_BYTE_3: - case LEXING_STRING_HEX_ESCAPE_BYTE_4: - case LEXING_STRING_HEX_ESCAPE_BYTE_5: - case LEXING_STRING_HEX_ESCAPE_BYTE_6: - case LEXING_STRING_HEX_ESCAPE_BYTE_7: - case LEXING_STRING_HEX_ESCAPE_BYTE_8: - /* Allow JSON_Parser_FlushLexer() to fail. */ - if (c != EOF_CODEPOINT) - { - /* While lexing a string hex escape sequence we store the bytes - of the escaped codepoint in the low 2 bytes of lexerBits. If - the escape sequence represents a leading surrogate, we shift - the leading surrogate into the high 2 bytes and lex a second - hex escape sequence (which should be a trailing surrogate). */ - int byteNumber = (parser->lexerState - LEXING_STRING_HEX_ESCAPE_BYTE_1) & 0x3; - uint32_t nibble; - if (c >= '0' && c <= '9') - nibble = c - '0'; - else if (c >= 'A' && c <= 'F') - nibble = c - 'A' + 10; - else if (c >= 'a' && c <= 'f') - nibble = c - 'a' + 10; - else - { - /* The current codepoint location is one of the 4 hex digit - character slots in the hex escape sequence. The error - location should be the beginning of the hex escape - sequence, between 2 and 5 bytes earlier. */ - int codepointsAgo = 2 /* for "\u" */ + byteNumber; - JSON_Parser_SetErrorAtStringEscapeSequenceStart( - parser, JSON_Error_InvalidEscapeSequence, codepointsAgo); - return JSON_Failure; - } - /* Store the hex digit's bits in the appropriate byte of lexerBits. */ - nibble <<= (3 - byteNumber) * 4 /* shift left by 12, 8, 4, 0 */ ; - parser->lexerBits |= nibble; - if (parser->lexerState == LEXING_STRING_HEX_ESCAPE_BYTE_4) - { - /* The escape sequence is complete. We need to check whether - it represents a leading surrogate (which implies that it - will be immediately followed by a hex-escaped trailing - surrogate), a trailing surrogate (which is invalid), or a - valid codepoint (which should simply be appended to the - string token value). */ - if (IS_LEADING_SURROGATE(parser->lexerBits)) - { - /* Shift the leading surrogate into the high 2 bytes of - lexerBits so that the trailing surrogate can be stored - in the low 2 bytes. */ - parser->lexerBits <<= 16; - parser->lexerState = LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_BACKSLASH; - } - else if (IS_TRAILING_SURROGATE(parser->lexerBits)) - { - /* The current codepoint location is the last hex digit - of the hex escape sequence. The error location should - be the beginning of the hex escape sequence, 5 - characters earlier. */ - JSON_Parser_SetErrorAtStringEscapeSequenceStart( - parser, JSON_Error_UnpairedSurrogateEscapeSequence, 5); - return JSON_Failure; - } - else - { - /* The escape sequence represents a BMP codepoint. */ - codepointToRecord = parser->lexerBits; - parser->lexerBits = 0; - parser->lexerState = LEXING_STRING; - goto recordStringCodepointAndAdvance; - } - } - else if (parser->lexerState == LEXING_STRING_HEX_ESCAPE_BYTE_8) - { - /* The second hex escape sequence is complete. We need to - check whether it represents a trailing surrogate as - expected. If so, the surrogate pair represents a single - non-BMP codepoint. */ - if (!IS_TRAILING_SURROGATE(parser->lexerBits & 0xFFFF)) - { - /* The current codepoint location is the last hex digit of - the second hex escape sequence. The error location - should be the beginning of the leading surrogate - hex escape sequence, 11 characters earlier. */ - JSON_Parser_SetErrorAtStringEscapeSequenceStart( - parser, JSON_Error_UnpairedSurrogateEscapeSequence, 11); - return JSON_Failure; - } - /* The escape sequence represents a non-BMP codepoint. */ - codepointToRecord = CODEPOINT_FROM_SURROGATES(parser->lexerBits); - parser->lexerBits = 0; - parser->lexerState = LEXING_STRING; - goto recordStringCodepointAndAdvance; - } - else - parser->lexerState++; - } - goto advance; - - case LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_BACKSLASH: - if (c != EOF_CODEPOINT) - { - if (c != '\\') - { - /* The current codepoint location is the first character after - the leading surrogate hex escape sequence. The error - location should be the beginning of the leading surrogate - hex escape sequence, 6 characters earlier. */ - JSON_Parser_SetErrorAtStringEscapeSequenceStart( - parser, JSON_Error_UnpairedSurrogateEscapeSequence, 6); - return JSON_Failure; - } - parser->lexerState = LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_U; - } - goto advance; - - case LEXING_STRING_TRAILING_SURROGATE_HEX_ESCAPE_U: - if (c != EOF_CODEPOINT) - { - if (c != 'u') - { - /* Distinguish between a totally bogus escape sequence - and a valid one that just isn't the hex escape kind - that we require for a trailing surrogate. The current - codepoint location is the first character after the - backslash that should have introduced the trailing - surrogate hex escape sequence. */ - if (c == '"' || c == '\\' || c == '/' || c == 'b' || - c == 't' || c == 'n' || c == 'f' || c == 'r') - { - /* The error location should be at that beginning of the - leading surrogate's hex escape sequence, 7 characters - earlier. */ - JSON_Parser_SetErrorAtStringEscapeSequenceStart( - parser, JSON_Error_UnpairedSurrogateEscapeSequence, 7); - } - else - { - /* The error location should be at that backslash, 1 - character earlier. */ - JSON_Parser_SetErrorAtStringEscapeSequenceStart( - parser, JSON_Error_InvalidEscapeSequence, 1); - } - return JSON_Failure; - } - parser->lexerState = LEXING_STRING_HEX_ESCAPE_BYTE_5; - } - goto advance; - - case LEXING_NUMBER_AFTER_MINUS: - if (c == EOF_CODEPOINT) - { - /* Allow JSON_Parser_FlushLexer() to fail. */ - } - else if (c == 'I' && GET_FLAGS(parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS)) - { - parser->token = T_NEGATIVE_INFINITY; /* changing horses mid-stream, so to speak */ - parser->lexerBits = INFINITY_LITERAL_EXPECTED_CHARS_START_INDEX; - parser->lexerState = LEXING_LITERAL; - } - else - { - if (c == '0') - { - codepointToRecord = '0'; - parser->lexerState = LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO; - goto recordNumberCodepointAndAdvance; - } - else if (c >= '1' && c <= '9') - { - codepointToRecord = c; - parser->lexerState = LEXING_NUMBER_DECIMAL_DIGITS; - goto recordNumberCodepointAndAdvance; - } - else - { - /* We trigger an unknown token error rather than an invalid number - error so that "Foo" and "-Foo" trigger the same error. */ - JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnknownToken); - return JSON_Failure; - } - } - goto advance; - - case LEXING_NUMBER_AFTER_LEADING_ZERO: - case LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO: - if (c == '.') - { - codepointToRecord = '.'; - SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsDecimalPoint); - parser->lexerState = LEXING_NUMBER_AFTER_DOT; - goto recordNumberCodepointAndAdvance; - } - else if (c == 'e' || c == 'E') - { - codepointToRecord = c; - SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsExponent); - parser->lexerState = LEXING_NUMBER_AFTER_E; - goto recordNumberCodepointAndAdvance; - } - else if (c >= '0' && c <= '9') - { - /* JSON does not allow the integer part of a number to have any - digits after a leading zero. */ - if (!JSON_Parser_HandleInvalidNumber(parser, c, 0, 0)) - return JSON_Failure; - } - else if ((c == 'x' || c == 'X') && - parser->lexerState == LEXING_NUMBER_AFTER_LEADING_ZERO && - GET_FLAGS(parser->flags, PARSER_ALLOW_HEX_NUMBERS)) - { - codepointToRecord = c; - SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_IsHex); - parser->lexerState = LEXING_NUMBER_AFTER_X; - goto recordNumberCodepointAndAdvance; - } - else - { - /* The number is finished. */ - if (!JSON_Parser_ProcessToken(parser)) - return JSON_Failure; - goto reprocess; - } - goto advance; - - case LEXING_NUMBER_AFTER_X: - if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f')) - { - codepointToRecord = c; - parser->lexerState = LEXING_NUMBER_HEX_DIGITS; - goto recordNumberCodepointAndAdvance; - } - else if (!JSON_Parser_HandleInvalidNumber(parser, c, 1, JSON_IsHex)) - return JSON_Failure; - goto advance; - - case LEXING_NUMBER_HEX_DIGITS: - if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f')) - { - codepointToRecord = c; - goto recordNumberCodepointAndAdvance; - } - /* The number is finished. */ - if (!JSON_Parser_ProcessToken(parser)) - return JSON_Failure; - goto reprocess; - - case LEXING_NUMBER_DECIMAL_DIGITS: - if (c >= '0' && c <= '9') - { - codepointToRecord = c; - goto recordNumberCodepointAndAdvance; - } - else if (c == '.') - { - codepointToRecord = '.'; - SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsDecimalPoint); - parser->lexerState = LEXING_NUMBER_AFTER_DOT; - goto recordNumberCodepointAndAdvance; - } - else if (c == 'e' || c == 'E') - { - codepointToRecord = c; - SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsExponent); - parser->lexerState = LEXING_NUMBER_AFTER_E; - goto recordNumberCodepointAndAdvance; - } - /* The number is finished. */ - if (!JSON_Parser_ProcessToken(parser)) - return JSON_Failure; - goto reprocess; - - case LEXING_NUMBER_AFTER_DOT: - if (c >= '0' && c <= '9') - { - codepointToRecord = c; - parser->lexerState = LEXING_NUMBER_FRACTIONAL_DIGITS; - goto recordNumberCodepointAndAdvance; - } - else if (!JSON_Parser_HandleInvalidNumber(parser, c, 1, JSON_ContainsDecimalPoint)) - return JSON_Failure; - goto advance; - - case LEXING_NUMBER_FRACTIONAL_DIGITS: - if (c >= '0' && c <= '9') - { - codepointToRecord = c; - goto recordNumberCodepointAndAdvance; - } - else if (c == 'e' || c == 'E') - { - codepointToRecord = c; - SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsExponent); - parser->lexerState = LEXING_NUMBER_AFTER_E; - goto recordNumberCodepointAndAdvance; - } - /* The number is finished. */ - if (!JSON_Parser_ProcessToken(parser)) - return JSON_Failure; - goto reprocess; - - case LEXING_NUMBER_AFTER_E: - if (c == '+') - { - codepointToRecord = c; - parser->lexerState = LEXING_NUMBER_AFTER_EXPONENT_SIGN; - goto recordNumberCodepointAndAdvance; - } - else if (c == '-') - { - codepointToRecord = c; - SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsNegativeExponent); - parser->lexerState = LEXING_NUMBER_AFTER_EXPONENT_SIGN; - goto recordNumberCodepointAndAdvance; - } - else if (c >= '0' && c <= '9') - { - codepointToRecord = c; - parser->lexerState = LEXING_NUMBER_EXPONENT_DIGITS; - goto recordNumberCodepointAndAdvance; - } - else if (!JSON_Parser_HandleInvalidNumber(parser, c, 1, JSON_ContainsExponent)) - return JSON_Failure; - goto advance; - - case LEXING_NUMBER_AFTER_EXPONENT_SIGN: - if (c >= '0' && c <= '9') - { - codepointToRecord = c; - parser->lexerState = LEXING_NUMBER_EXPONENT_DIGITS; - goto recordNumberCodepointAndAdvance; - } - else if (!JSON_Parser_HandleInvalidNumber(parser, c, 2, JSON_ContainsExponent | JSON_ContainsNegativeExponent)) - return JSON_Failure; - goto advance; - - case LEXING_NUMBER_EXPONENT_DIGITS: - if (c >= '0' && c <= '9') - { - codepointToRecord = c; - goto recordNumberCodepointAndAdvance; - } - /* The number is finished. */ - if (!JSON_Parser_ProcessToken(parser)) - return JSON_Failure; - goto reprocess; - - case LEXING_COMMENT_AFTER_SLASH: - if (c == '/') - parser->lexerState = LEXING_SINGLE_LINE_COMMENT; - else if (c == '*') - parser->lexerState = LEXING_MULTI_LINE_COMMENT; - else - { - JSON_Parser_SetErrorAtToken(parser, JSON_Error_UnknownToken); - return JSON_Failure; - } - goto advance; - - case LEXING_SINGLE_LINE_COMMENT: - if (c == CARRIAGE_RETURN_CODEPOINT || c == LINE_FEED_CODEPOINT || c == EOF_CODEPOINT) - parser->lexerState = LEXING_WHITESPACE; - goto advance; - - case LEXING_MULTI_LINE_COMMENT: - if (c == '*') - parser->lexerState = LEXING_MULTI_LINE_COMMENT_AFTER_STAR; - goto advance; - - case LEXING_MULTI_LINE_COMMENT_AFTER_STAR: - if (c == '/') - parser->lexerState = LEXING_WHITESPACE; - else if (c != '*') - parser->lexerState = LEXING_MULTI_LINE_COMMENT; - goto advance; - } - -recordStringCodepointAndAdvance: - - tokenEncoding = parser->stringEncoding; - maxTokenLength = parser->maxStringLength; - if (!codepointToRecord) - { - SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsNullCharacter | JSON_ContainsControlCharacter); - } - else if (codepointToRecord < FIRST_NON_CONTROL_CODEPOINT) - { - SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsControlCharacter); - } - else if (codepointToRecord >= FIRST_NON_BMP_CODEPOINT) - { - SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsNonASCIICharacter | JSON_ContainsNonBMPCharacter); - } - else if (codepointToRecord >= FIRST_NON_ASCII_CODEPOINT) - { - SET_FLAGS_ON(TokenAttributes, parser->tokenAttributes, JSON_ContainsNonASCIICharacter); - } - goto recordCodepointAndAdvance; - -recordNumberCodepointAndAdvance: - - tokenEncoding = parser->numberEncoding; - maxTokenLength = parser->maxNumberLength; - goto recordCodepointAndAdvance; - -recordCodepointAndAdvance: - - /* We always ensure that there are LONGEST_ENCODING_SEQUENCE bytes - available in the buffer for the next codepoint, so we don't have to - check whether there is room when we decode a new codepoint, and if - there isn't another codepoint, we have space already allocated for - the encoded null terminator.*/ - parser->tokenBytesUsed += EncodeCodepoint(codepointToRecord, tokenEncoding, parser->pTokenBytes + parser->tokenBytesUsed); - if (parser->tokenBytesUsed > maxTokenLength) - { - JSON_Parser_SetErrorAtToken(parser, parser->token == T_NUMBER ? JSON_Error_TooLongNumber : JSON_Error_TooLongString); - return JSON_Failure; - } - if (parser->tokenBytesUsed > parser->tokenBytesLength - LONGEST_ENCODING_SEQUENCE) - { - byte* pBiggerBuffer = DoubleBuffer(&parser->memorySuite, parser->defaultTokenBytes, parser->pTokenBytes, parser->tokenBytesLength); - if (!pBiggerBuffer) - { - JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_OutOfMemory); - return JSON_Failure; - } - parser->pTokenBytes = pBiggerBuffer; - parser->tokenBytesLength *= 2; - } - goto advance; - -advance: - - /* The current codepoint has been accepted, so advance the codepoint - location counters accordingly. Note that the one time we don't - do this is when the codepoint is EOF, which doesn't actually - appear in the input stream. */ - if (c == CARRIAGE_RETURN_CODEPOINT) - { - SET_FLAGS_ON(ParserState, parser->state, PARSER_AFTER_CARRIAGE_RETURN); - } - if (c != EOF_CODEPOINT) - { - parser->codepointLocationByte += encodedLength; - if (c == CARRIAGE_RETURN_CODEPOINT || c == LINE_FEED_CODEPOINT) - { - /* The next character will begin a new line. */ - parser->codepointLocationLine++; - parser->codepointLocationColumn = 0; - } - else - { - /* The next character will be on the same line. */ - parser->codepointLocationColumn++; - } - } - - if (tokenFinished && !JSON_Parser_ProcessToken(parser)) - return JSON_Failure; - - return JSON_Success; -} - -static JSON_Status JSON_Parser_FlushLexer(JSON_Parser parser) -{ - /* Push the EOF codepoint to the lexer so that it can finish the pending - token, if any. The EOF codepoint is never emitted by the decoder - itself, since it is outside the Unicode range and therefore cannot - be encoded in any of the possible input encodings. */ - if (!JSON_Parser_ProcessCodepoint(parser, EOF_CODEPOINT, 0)) - return JSON_Failure; - - /* The lexer should be idle when parsing finishes. */ - if (parser->lexerState != LEXING_WHITESPACE) - { - JSON_Parser_SetErrorAtToken(parser, JSON_Error_IncompleteToken); - return JSON_Failure; - } - return JSON_Success; -} - -/* Parser's decoder functions. */ - -static JSON_Status JSON_Parser_CallEncodingDetectedHandler(JSON_Parser parser) -{ - if (parser->encodingDetectedHandler && parser->encodingDetectedHandler(parser) != JSON_Parser_Continue) - { - JSON_Parser_SetErrorAtCodepoint(parser, JSON_Error_AbortedByHandler); - return JSON_Failure; - } - return JSON_Success; -} - -/* Forward declaration. */ -static JSON_Status JSON_Parser_ProcessInputBytes(JSON_Parser parser, const byte* pBytes, size_t length); - -static JSON_Status JSON_Parser_ProcessUnknownByte(JSON_Parser parser, byte b) -{ - /* When the input encoding is unknown, the first 4 bytes of input are - recorded in decoder.bits. */ - byte bytes[LONGEST_ENCODING_SEQUENCE]; - - switch (parser->decoderData.state) - { - case DECODER_RESET: - parser->decoderData.state = DECODED_1_OF_4; - parser->decoderData.bits = (uint32_t)b << 24; - break; - - case DECODED_1_OF_4: - parser->decoderData.state = DECODED_2_OF_4; - parser->decoderData.bits |= (uint32_t)b << 16; - break; - - case DECODED_2_OF_4: - parser->decoderData.state = DECODED_3_OF_4; - parser->decoderData.bits |= (uint32_t)b << 8; - break; - - case DECODED_3_OF_4: - bytes[0] = (byte)(parser->decoderData.bits >> 24); - bytes[1] = (byte)(parser->decoderData.bits >> 16); - bytes[2] = (byte)(parser->decoderData.bits >> 8); - bytes[3] = (byte)(b); - - /* We try to match the following patterns in order, where .. is any - byte value and nz is any non-zero byte value: - EF BB BF .. => UTF-8 with BOM - FF FE 00 00 => UTF-32LE with BOM - FF FE nz 00 => UTF-16LE with BOM - 00 00 FE FF -> UTF-32BE with BOM - FE FF .. .. => UTF-16BE with BOM - nz nz .. .. => UTF-8 - nz 00 nz .. => UTF-16LE - nz 00 00 00 => UTF-32LE - 00 nz .. .. => UTF-16BE - 00 00 00 nz => UTF-32BE - .. .. .. .. => unknown encoding */ - if (bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) - { - /* EF BB BF .. */ - parser->inputEncoding = JSON_UTF8; - } - else if (bytes[0] == 0xFF && bytes[1] == 0xFE && bytes[3] == 0x00) - { - /* FF FE 00 00 or - FF FE nz 00 */ - parser->inputEncoding = (bytes[2] == 0x00) ? JSON_UTF32LE : JSON_UTF16LE; - } - else if (bytes[0] == 0x00 && bytes[1] == 0x00 && bytes[2] == 0xFE && bytes[3] == 0xFF) - { - /* 00 00 FE FF */ - parser->inputEncoding = JSON_UTF32BE; - } - else if (bytes[0] == 0xFE && bytes[1] == 0xFF) - { - /* FE FF .. .. */ - parser->inputEncoding = JSON_UTF16BE; - } - else if (bytes[0] != 0x00) - { - /* nz .. .. .. */ - if (bytes[1] != 0x00) - { - /* nz nz .. .. */ - parser->inputEncoding = JSON_UTF8; - } - else if (bytes[2] != 0x00) - { - /* nz 00 nz .. */ - parser->inputEncoding = JSON_UTF16LE; - } - else if (bytes[3] == 0x00) - { - /* nz 00 00 00 */ - parser->inputEncoding = JSON_UTF32LE; - } - else - { - /* nz 00 00 nz => error */ - } - } - else if (bytes[1] != 0x00) - { - /* 00 nz .. .. */ - parser->inputEncoding = JSON_UTF16BE; - } - else if (bytes[2] == 0x00 && bytes[3] != 0x00) - { - /* 00 00 00 nz */ - parser->inputEncoding = JSON_UTF32BE; - } - else - { - /* 00 00 nz .. or - 00 00 00 00 => error */ - } - - if (parser->inputEncoding == JSON_UnknownEncoding) - return JSON_Parser_HandleInvalidEncodingSequence(parser, 4); - - if (!JSON_Parser_CallEncodingDetectedHandler(parser)) - return JSON_Failure; - - /* Reset the decoder before reprocessing the bytes. */ - Decoder_Reset(&parser->decoderData); - return JSON_Parser_ProcessInputBytes(parser, bytes, 4); - } - - /* We don't have 4 bytes yet. */ - return JSON_Success; -} - -JSON_Status JSON_Parser_ProcessInputBytes(JSON_Parser parser, const byte* pBytes, size_t length) -{ - /* Note that if length is 0, pBytes is allowed to be NULL. */ - size_t i = 0; - while (parser->inputEncoding == JSON_UnknownEncoding && i < length) - { - if (!JSON_Parser_ProcessUnknownByte(parser, pBytes[i])) - return JSON_Failure; - i++; - } - while (i < length) - { - DecoderOutput output = Decoder_ProcessByte( - &parser->decoderData, parser->inputEncoding, pBytes[i]); - DecoderResultCode result = DECODER_RESULT_CODE(output); - switch (result) - { - case SEQUENCE_PENDING: - i++; - break; - - case SEQUENCE_COMPLETE: - if (!JSON_Parser_ProcessCodepoint( - parser, DECODER_CODEPOINT(output), - DECODER_SEQUENCE_LENGTH(output))) - return JSON_Failure; - i++; - break; - - case SEQUENCE_INVALID_INCLUSIVE: - i++; - /* fallthrough */ - case SEQUENCE_INVALID_EXCLUSIVE: - if (!JSON_Parser_HandleInvalidEncodingSequence( - parser, DECODER_SEQUENCE_LENGTH(output))) - return JSON_Failure; - break; - } - } - return JSON_Success; -} - -static JSON_Status JSON_Parser_FlushDecoder(JSON_Parser parser) -{ - /* If the input was 1, 2, or 3 bytes long, and the input encoding was not - explicitly specified by the client, we can sometimes make a reasonable - guess. If the input was 1 or 3 bytes long, the only encoding that could - possibly be valid JSON is UF-8. If the input was 2 bytes long, we try - to match the following patterns in order, where .. is any byte value - and nz is any non-zero byte value: - FF FE => UTF-16LE with BOM - FE FF => UTF-16BE with BOM - nz nz => UTF-8 - nz 00 => UTF-16LE - 00 nz => UTF-16BE - .. .. => unknown encoding - */ - if (parser->inputEncoding == JSON_UnknownEncoding && - parser->decoderData.state != DECODER_RESET) - { - byte bytes[3]; - size_t length = 0; - bytes[0] = (byte)(parser->decoderData.bits >> 24); - bytes[1] = (byte)(parser->decoderData.bits >> 16); - bytes[2] = (byte)(parser->decoderData.bits >> 8); - - switch (parser->decoderData.state) - { - case DECODED_1_OF_4: - parser->inputEncoding = JSON_UTF8; - length = 1; - break; - - case DECODED_2_OF_4: - /* FF FE */ - if (bytes[0] == 0xFF && bytes[1] == 0xFE) - parser->inputEncoding = JSON_UTF16LE; - /* FE FF */ - else if (bytes[0] == 0xFE && bytes[1] == 0xFF) - parser->inputEncoding = JSON_UTF16BE; - else if (bytes[0] != 0x00) - { - /* nz nz or - nz 00 */ - parser->inputEncoding = bytes[1] ? JSON_UTF8 : JSON_UTF16LE; - } - /* 00 nz */ - else if (bytes[1] != 0x00) - parser->inputEncoding = JSON_UTF16BE; - /* 00 00 */ - else - return JSON_Parser_HandleInvalidEncodingSequence(parser, 2); - length = 2; - break; - - case DECODED_3_OF_4: - parser->inputEncoding = JSON_UTF8; - length = 3; - break; - } - - if (!JSON_Parser_CallEncodingDetectedHandler(parser)) - return JSON_Failure; - - /* Reset the decoder before reprocessing the bytes. */ - parser->decoderData.state = DECODER_RESET; - parser->decoderData.bits = 0; - if (!JSON_Parser_ProcessInputBytes(parser, bytes, length)) - return JSON_Failure; - } - - /* The decoder should be idle when parsing finishes. */ - if (Decoder_SequencePending(&parser->decoderData)) - return JSON_Parser_HandleInvalidEncodingSequence( - parser, DECODER_STATE_BYTES(parser->decoderData.state)); - return JSON_Success; -} - -/* Parser API functions. */ - -JSON_Parser JSON_CALL JSON_Parser_Create(const JSON_MemorySuite* pMemorySuite) -{ - JSON_Parser parser; - JSON_MemorySuite memorySuite; - - if (pMemorySuite) - { - memorySuite = *pMemorySuite; - - /* The full memory suite must be specified. */ - if (!memorySuite.realloc || !memorySuite.free) - return NULL; - } - else - memorySuite = defaultMemorySuite; - - parser = (JSON_Parser)memorySuite.realloc(memorySuite.userData, NULL, sizeof(struct JSON_Parser_Data)); - - if (!parser) - return NULL; - - parser->memorySuite = memorySuite; - JSON_Parser_ResetData(parser, 0/* isInitialized */); - return parser; -} - -JSON_Status JSON_CALL JSON_Parser_Free(JSON_Parser parser) -{ - if (!parser || GET_FLAGS(parser->state, PARSER_IN_PROTECTED_API)) - return JSON_Failure; - - SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_PROTECTED_API); - - if (parser->pTokenBytes != parser->defaultTokenBytes) - parser->memorySuite.free(parser->memorySuite.userData, parser->pTokenBytes); - - while (parser->pMemberNames) - JSON_Parser_PopMemberNameList(parser); - - Grammarian_FreeAllocations(&parser->grammarianData, &parser->memorySuite); - parser->memorySuite.free(parser->memorySuite.userData, parser); - return JSON_Success; -} - -JSON_Status JSON_CALL JSON_Parser_Reset(JSON_Parser parser) -{ - if (!parser || GET_FLAGS(parser->state, PARSER_IN_PROTECTED_API)) - return JSON_Failure; - SET_FLAGS_ON(ParserState, parser->state, PARSER_IN_PROTECTED_API); - JSON_Parser_ResetData(parser, 1/* isInitialized */); - /* Note that JSON_Parser_ResetData() unset PARSER_IN_PROTECTED_API for us. */ - return JSON_Success; -} - -void* JSON_CALL JSON_Parser_GetUserData(JSON_Parser parser) -{ - return parser ? parser->userData : NULL; -} - -JSON_Status JSON_CALL JSON_Parser_SetUserData(JSON_Parser parser, void* userData) -{ - if (!parser) - return JSON_Failure; - parser->userData = userData; - return JSON_Success; -} - -JSON_Encoding JSON_CALL JSON_Parser_GetInputEncoding(JSON_Parser parser) -{ - return parser ? (JSON_Encoding)parser->inputEncoding : JSON_UnknownEncoding; -} - -JSON_Status JSON_CALL JSON_Parser_SetInputEncoding(JSON_Parser parser, JSON_Encoding encoding) -{ - if ( !parser - || encoding < JSON_UnknownEncoding - || encoding > JSON_UTF32BE - || GET_FLAGS(parser->state, PARSER_STARTED)) - return JSON_Failure; - parser->inputEncoding = (Encoding)encoding; - return JSON_Success; -} - -JSON_Encoding JSON_CALL JSON_Parser_GetStringEncoding(JSON_Parser parser) -{ - return parser ? (JSON_Encoding)parser->stringEncoding : JSON_UTF8; -} - -JSON_Status JSON_CALL JSON_Parser_SetStringEncoding(JSON_Parser parser, JSON_Encoding encoding) -{ - if ( - !parser - || encoding <= JSON_UnknownEncoding - || encoding > JSON_UTF32BE - || GET_FLAGS(parser->state, PARSER_STARTED)) - return JSON_Failure; - parser->stringEncoding = (Encoding)encoding; - return JSON_Success; -} - -size_t JSON_CALL JSON_Parser_GetMaxStringLength(JSON_Parser parser) -{ - return parser ? parser->maxStringLength : SIZE_MAX; -} - -JSON_Status JSON_CALL JSON_Parser_SetMaxStringLength(JSON_Parser parser, size_t maxLength) -{ - if ( !parser - || GET_FLAGS(parser->state, PARSER_STARTED)) - return JSON_Failure; - parser->maxStringLength = maxLength; - return JSON_Success; -} - -JSON_Encoding JSON_CALL JSON_Parser_GetNumberEncoding(JSON_Parser parser) -{ - return parser ? (JSON_Encoding)parser->numberEncoding : JSON_UTF8; -} - -JSON_Status JSON_CALL JSON_Parser_SetNumberEncoding(JSON_Parser parser, JSON_Encoding encoding) -{ - if (!parser || encoding <= JSON_UnknownEncoding || encoding > JSON_UTF32BE || GET_FLAGS(parser->state, PARSER_STARTED)) - return JSON_Failure; - parser->numberEncoding = (Encoding)encoding; - return JSON_Success; -} - -size_t JSON_CALL JSON_Parser_GetMaxNumberLength(JSON_Parser parser) -{ - return parser ? parser->maxNumberLength : SIZE_MAX; -} - -JSON_Status JSON_CALL JSON_Parser_SetMaxNumberLength(JSON_Parser parser, size_t maxLength) -{ - if ( !parser - || GET_FLAGS(parser->state, PARSER_STARTED)) - return JSON_Failure; - parser->maxNumberLength = maxLength; - return JSON_Success; -} - -JSON_Boolean JSON_CALL JSON_Parser_GetAllowBOM(JSON_Parser parser) -{ - return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_BOM)) ? JSON_True : JSON_False; -} - -JSON_Status JSON_CALL JSON_Parser_SetAllowBOM(JSON_Parser parser, JSON_Boolean allowBOM) -{ - if (!parser || GET_FLAGS(parser->state, PARSER_STARTED)) - return JSON_Failure; - SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_BOM, allowBOM); - return JSON_Success; -} - -JSON_Boolean JSON_CALL JSON_Parser_GetAllowComments(JSON_Parser parser) -{ - return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_COMMENTS)) ? JSON_True : JSON_False; -} - -JSON_Status JSON_CALL JSON_Parser_SetAllowComments(JSON_Parser parser, JSON_Boolean allowComments) -{ - if (!parser || GET_FLAGS(parser->state, PARSER_STARTED)) - return JSON_Failure; - SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_COMMENTS, allowComments); - return JSON_Success; -} - -JSON_Boolean JSON_CALL JSON_Parser_GetAllowSpecialNumbers(JSON_Parser parser) -{ - return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS)) ? JSON_True : JSON_False; -} - -JSON_Status JSON_CALL JSON_Parser_SetAllowSpecialNumbers(JSON_Parser parser, JSON_Boolean allowSpecialNumbers) -{ - if (!parser || GET_FLAGS(parser->state, PARSER_STARTED)) - return JSON_Failure; - SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_SPECIAL_NUMBERS, allowSpecialNumbers); - return JSON_Success; -} - -JSON_Boolean JSON_CALL JSON_Parser_GetAllowHexNumbers(JSON_Parser parser) -{ - return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_HEX_NUMBERS)) ? JSON_True : JSON_False; -} - -JSON_Status JSON_CALL JSON_Parser_SetAllowHexNumbers(JSON_Parser parser, JSON_Boolean allowHexNumbers) -{ - if (!parser || GET_FLAGS(parser->state, PARSER_STARTED)) - return JSON_Failure; - SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_HEX_NUMBERS, allowHexNumbers); - return JSON_Success; -} - -JSON_Boolean JSON_CALL JSON_Parser_GetAllowUnescapedControlCharacters(JSON_Parser parser) -{ - return (parser && GET_FLAGS(parser->flags, PARSER_ALLOW_CONTROL_CHARS)) ? JSON_True : JSON_False; -} - -JSON_Status JSON_CALL JSON_Parser_SetAllowUnescapedControlCharacters(JSON_Parser parser, JSON_Boolean allowUnescapedControlCharacters) -{ - if (!parser || GET_FLAGS(parser->state, PARSER_STARTED)) - return JSON_Failure; - SET_FLAGS(ParserFlags, parser->flags, PARSER_ALLOW_CONTROL_CHARS, allowUnescapedControlCharacters); - return JSON_Success; -} - -JSON_Boolean JSON_CALL JSON_Parser_GetReplaceInvalidEncodingSequences(JSON_Parser parser) -{ - return (parser && GET_FLAGS(parser->flags, PARSER_REPLACE_INVALID)) ? JSON_True : JSON_False; -} - -JSON_Status JSON_CALL JSON_Parser_SetReplaceInvalidEncodingSequences( - JSON_Parser parser, JSON_Boolean replaceInvalidEncodingSequences) -{ - if (!parser || GET_FLAGS(parser->state, PARSER_STARTED)) - return JSON_Failure; - SET_FLAGS(ParserFlags, parser->flags, PARSER_REPLACE_INVALID, replaceInvalidEncodingSequences); - return JSON_Success; -} - -JSON_Boolean JSON_CALL JSON_Parser_GetTrackObjectMembers(JSON_Parser parser) -{ - return (parser && GET_FLAGS(parser->flags, PARSER_TRACK_OBJECT_MEMBERS)) ? JSON_True : JSON_False; -} - -JSON_Status JSON_CALL JSON_Parser_SetTrackObjectMembers(JSON_Parser parser, JSON_Boolean trackObjectMembers) -{ - if (!parser || GET_FLAGS(parser->state, PARSER_STARTED)) - { - return JSON_Failure; - } - SET_FLAGS(ParserFlags, parser->flags, PARSER_TRACK_OBJECT_MEMBERS, trackObjectMembers); - return JSON_Success; -} - -JSON_Boolean JSON_CALL JSON_Parser_GetStopAfterEmbeddedDocument(JSON_Parser parser) -{ - return (parser && GET_FLAGS(parser->flags, PARSER_EMBEDDED_DOCUMENT)) ? JSON_True : JSON_False; -} - -JSON_Status JSON_CALL JSON_Parser_SetStopAfterEmbeddedDocument( - JSON_Parser parser, JSON_Boolean stopAfterEmbeddedDocument) -{ - if (!parser || GET_FLAGS(parser->state, PARSER_STARTED)) - { - return JSON_Failure; - } - SET_FLAGS(ParserFlags, parser->flags, PARSER_EMBEDDED_DOCUMENT, stopAfterEmbeddedDocument); - return JSON_Success; -} - -JSON_Error JSON_CALL JSON_Parser_GetError(JSON_Parser parser) -{ - return parser ? (JSON_Error)parser->error : JSON_Error_None; -} - -JSON_Status JSON_CALL JSON_Parser_GetErrorLocation( - JSON_Parser parser, JSON_Location* pLocation) -{ - if (!pLocation || !parser || parser->error == JSON_Error_None) - return JSON_Failure; - - if (parser->errorOffset == ERROR_LOCATION_IS_TOKEN_START) - { - pLocation->byte = parser->tokenLocationByte; - pLocation->line = parser->tokenLocationLine; - pLocation->column = parser->tokenLocationColumn; - } - else - { - pLocation->byte = parser->codepointLocationByte - (SHORTEST_ENCODING_SEQUENCE(parser->inputEncoding) * parser->errorOffset); - pLocation->line = parser->codepointLocationLine; - pLocation->column = parser->codepointLocationColumn - parser->errorOffset; - } - pLocation->depth = parser->depth; - return JSON_Success; -} - -JSON_Status JSON_CALL JSON_Parser_GetTokenLocation( - JSON_Parser parser, JSON_Location* pLocation) -{ - if (!parser || !pLocation || !GET_FLAGS(parser->state, PARSER_IN_TOKEN_HANDLER)) - return JSON_Failure; - - pLocation->byte = parser->tokenLocationByte; - pLocation->line = parser->tokenLocationLine; - pLocation->column = parser->tokenLocationColumn; - pLocation->depth = parser->depth; - return JSON_Success; -} - -JSON_Status JSON_CALL JSON_Parser_GetAfterTokenLocation( - JSON_Parser parser, JSON_Location* pLocation) -{ - if (!parser || !pLocation || !GET_FLAGS(parser->state, PARSER_IN_TOKEN_HANDLER)) - return JSON_Failure; - - pLocation->byte = parser->codepointLocationByte; - pLocation->line = parser->codepointLocationLine; - pLocation->column = parser->codepointLocationColumn; - pLocation->depth = parser->depth; - return JSON_Success; -} - -JSON_Parser_NullHandler JSON_CALL JSON_Parser_GetEncodingDetectedHandler(JSON_Parser parser) -{ - return parser ? parser->encodingDetectedHandler : NULL; -} - -JSON_Status JSON_CALL JSON_Parser_SetEncodingDetectedHandler( - JSON_Parser parser, JSON_Parser_EncodingDetectedHandler handler) -{ - if (!parser) - return JSON_Failure; - - parser->encodingDetectedHandler = handler; - return JSON_Success; -} - -JSON_Parser_NullHandler JSON_CALL JSON_Parser_GetNullHandler(JSON_Parser parser) -{ - return parser ? parser->nullHandler : NULL; -} - -JSON_Status JSON_CALL JSON_Parser_SetNullHandler( - JSON_Parser parser, JSON_Parser_NullHandler handler) -{ - if (!parser) - return JSON_Failure; - - parser->nullHandler = handler; - return JSON_Success; -} - -JSON_Parser_BooleanHandler JSON_CALL JSON_Parser_GetBooleanHandler(JSON_Parser parser) -{ - return parser ? parser->booleanHandler : NULL; -} - -JSON_Status JSON_CALL JSON_Parser_SetBooleanHandler( - JSON_Parser parser, JSON_Parser_BooleanHandler handler) -{ - if (!parser) - return JSON_Failure; - - parser->booleanHandler = handler; - return JSON_Success; -} - -JSON_Parser_StringHandler JSON_CALL JSON_Parser_GetStringHandler(JSON_Parser parser) -{ - return parser ? parser->stringHandler : NULL; -} - -JSON_Status JSON_CALL JSON_Parser_SetStringHandler( - JSON_Parser parser, JSON_Parser_StringHandler handler) -{ - if (!parser) - return JSON_Failure; - - parser->stringHandler = handler; - return JSON_Success; -} - -JSON_Parser_NumberHandler JSON_CALL JSON_Parser_GetNumberHandler(JSON_Parser parser) -{ - return parser ? parser->numberHandler : NULL; -} - -JSON_Status JSON_CALL JSON_Parser_SetNumberHandler( - JSON_Parser parser, JSON_Parser_NumberHandler handler) -{ - if (!parser) - return JSON_Failure; - - parser->numberHandler = handler; - return JSON_Success; -} - -JSON_Parser_SpecialNumberHandler JSON_CALL JSON_Parser_GetSpecialNumberHandler(JSON_Parser parser) -{ - return parser ? parser->specialNumberHandler : NULL; -} - -JSON_Status JSON_CALL JSON_Parser_SetSpecialNumberHandler( - JSON_Parser parser, JSON_Parser_SpecialNumberHandler handler) -{ - if (!parser) - return JSON_Failure; - parser->specialNumberHandler = handler; - return JSON_Success; -} - -JSON_Parser_StartObjectHandler JSON_CALL JSON_Parser_GetStartObjectHandler(JSON_Parser parser) -{ - return parser ? parser->startObjectHandler : NULL; -} - -JSON_Status JSON_CALL JSON_Parser_SetStartObjectHandler( - JSON_Parser parser, JSON_Parser_StartObjectHandler handler) -{ - if (!parser) - return JSON_Failure; - - parser->startObjectHandler = handler; - return JSON_Success; -} - -JSON_Parser_EndObjectHandler JSON_CALL JSON_Parser_GetEndObjectHandler(JSON_Parser parser) -{ - return parser ? parser->endObjectHandler : NULL; -} - -JSON_Status JSON_CALL JSON_Parser_SetEndObjectHandler( - JSON_Parser parser, JSON_Parser_EndObjectHandler handler) -{ - if (!parser) - return JSON_Failure; - - parser->endObjectHandler = handler; - return JSON_Success; -} - -JSON_Parser_ObjectMemberHandler JSON_CALL JSON_Parser_GetObjectMemberHandler(JSON_Parser parser) -{ - return parser ? parser->objectMemberHandler : NULL; -} - -JSON_Status JSON_CALL JSON_Parser_SetObjectMemberHandler( - JSON_Parser parser, JSON_Parser_ObjectMemberHandler handler) -{ - if (!parser) - return JSON_Failure; - - parser->objectMemberHandler = handler; - return JSON_Success; -} - -JSON_Parser_StartArrayHandler JSON_CALL JSON_Parser_GetStartArrayHandler(JSON_Parser parser) -{ - return parser ? parser->startArrayHandler : NULL; -} - -JSON_Status JSON_CALL JSON_Parser_SetStartArrayHandler( - JSON_Parser parser, JSON_Parser_StartArrayHandler handler) -{ - if (!parser) - return JSON_Failure; - - parser->startArrayHandler = handler; - return JSON_Success; -} - -JSON_Parser_EndArrayHandler JSON_CALL JSON_Parser_GetEndArrayHandler(JSON_Parser parser) -{ - return parser ? parser->endArrayHandler : NULL; -} - -JSON_Status JSON_CALL JSON_Parser_SetEndArrayHandler( - JSON_Parser parser, JSON_Parser_EndArrayHandler handler) -{ - if (!parser) - return JSON_Failure; - - parser->endArrayHandler = handler; - return JSON_Success; -} - -JSON_Parser_ArrayItemHandler JSON_CALL JSON_Parser_GetArrayItemHandler(JSON_Parser parser) -{ - return parser ? parser->arrayItemHandler : NULL; -} - -JSON_Status JSON_CALL JSON_Parser_SetArrayItemHandler( - JSON_Parser parser, JSON_Parser_ArrayItemHandler handler) -{ - if (!parser) - return JSON_Failure; - - parser->arrayItemHandler = handler; - return JSON_Success; -} - -JSON_Status JSON_CALL JSON_Parser_Parse(JSON_Parser parser, const char* pBytes, size_t length, JSON_Boolean isFinal) -{ - JSON_Status status = JSON_Failure; - if (parser && (pBytes || !length) && !GET_FLAGS(parser->state, PARSER_FINISHED | PARSER_IN_PROTECTED_API)) - { - int finishedParsing = 0; - SET_FLAGS_ON(ParserState, parser->state, PARSER_STARTED | PARSER_IN_PROTECTED_API); - if (JSON_Parser_ProcessInputBytes(parser, (const byte*)pBytes, length)) - { - /* New input was parsed successfully. */ - if (isFinal) - { - /* Make sure there is nothing pending in the decoder, lexer, - or parser. */ - if (JSON_Parser_FlushDecoder(parser) && - JSON_Parser_FlushLexer(parser) && - JSON_Parser_FlushParser(parser)) - status = JSON_Success; - - finishedParsing = 1; - } - else - status = JSON_Success; - } - else - { - /* New input failed to parse. */ - finishedParsing = 1; - } - if (finishedParsing) - { - SET_FLAGS_ON(ParserState, parser->state, PARSER_FINISHED); - } - SET_FLAGS_OFF(ParserState, parser->state, PARSER_IN_PROTECTED_API); - } - return status; -} - -#endif /* JSON_NO_PARSER */ - -/******************** JSON Writer ********************/ - -#ifndef JSON_NO_WRITER - -/* Combinable writer state flags. */ -#define WRITER_RESET 0x0 -#define WRITER_STARTED 0x1 -#define WRITER_IN_PROTECTED_API 0x2 -typedef byte WriterState; - -/* Combinable writer settings flags. */ -#define WRITER_DEFAULT_FLAGS 0x0 -#define WRITER_USE_CRLF 0x1 -#define WRITER_REPLACE_INVALID 0x2 -#define WRITER_ESCAPE_NON_ASCII 0x4 -typedef byte WriterFlags; - -/* A writer instance. */ -struct JSON_Writer_Data -{ - JSON_MemorySuite memorySuite; - void* userData; - WriterState state; - WriterFlags flags; - Encoding outputEncoding; - Error error; - GrammarianData grammarianData; - JSON_Writer_OutputHandler outputHandler; -}; - -/* Writer internal functions. */ - -static void JSON_Writer_ResetData(JSON_Writer writer, int isInitialized) -{ - writer->userData = NULL; - writer->flags = WRITER_DEFAULT_FLAGS; - writer->outputEncoding = JSON_UTF8; - writer->error = JSON_Error_None; - Grammarian_Reset(&writer->grammarianData, isInitialized); - writer->outputHandler = NULL; - writer->state = WRITER_RESET; /* do this last! */ -} - -static void JSON_Writer_SetError(JSON_Writer writer, Error error) -{ - writer->error = error; -} - -static JSON_Status JSON_Writer_ProcessToken(JSON_Writer writer, Symbol token) -{ - GrammarianOutput output = Grammarian_ProcessToken(&writer->grammarianData, token, &writer->memorySuite); - switch (GRAMMARIAN_RESULT_CODE(output)) - { - case REJECTED_TOKEN: - JSON_Writer_SetError(writer, JSON_Error_UnexpectedToken); - return JSON_Failure; - - case SYMBOL_STACK_FULL: - JSON_Writer_SetError(writer, JSON_Error_OutOfMemory); - return JSON_Failure; - } - return JSON_Success; -} - -static JSON_Status JSON_Writer_OutputBytes(JSON_Writer writer, const byte* pBytes, size_t length) -{ - if (writer->outputHandler && length) - { - if (writer->outputHandler(writer, (const char*)pBytes, length) != JSON_Writer_Continue) - { - JSON_Writer_SetError(writer, JSON_Error_AbortedByHandler); - return JSON_Failure; - } - } - return JSON_Success; -} - -static Codepoint JSON_Writer_GetCodepointEscapeCharacter(JSON_Writer writer, Codepoint c) -{ - switch (c) - { - case BACKSPACE_CODEPOINT: - return 'b'; - - case TAB_CODEPOINT: - return 't'; - - case LINE_FEED_CODEPOINT: - return 'n'; - - case FORM_FEED_CODEPOINT: - return 'f'; - - case CARRIAGE_RETURN_CODEPOINT: - return 'r'; - - case '"': - return '"'; - /* Don't escape forward slashes */ - /*case '/': - return '/';*/ - - case '\\': - return '\\'; - - case DELETE_CODEPOINT: - case LINE_SEPARATOR_CODEPOINT: - case PARAGRAPH_SEPARATOR_CODEPOINT: - return 'u'; - - default: - if (c < FIRST_NON_CONTROL_CODEPOINT || IS_NONCHARACTER(c) || - (GET_FLAGS(writer->flags, WRITER_ESCAPE_NON_ASCII) && c > FIRST_NON_ASCII_CODEPOINT)) - return 'u'; - break; - } - return 0; -} - -typedef struct tag_WriteBufferData -{ - size_t used; - byte bytes[256]; -} WriteBufferData; -typedef WriteBufferData* WriteBuffer; - -static void WriteBuffer_Reset(WriteBuffer buffer) -{ - buffer->used = 0; -} - -static JSON_Status WriteBuffer_Flush(WriteBuffer buffer, JSON_Writer writer) -{ - JSON_Status status = JSON_Writer_OutputBytes(writer, buffer->bytes, buffer->used); - buffer->used = 0; - return status; -} - -static JSON_Status WriteBuffer_WriteBytes(WriteBuffer buffer, JSON_Writer writer, const byte* pBytes, size_t length) -{ - if (buffer->used + length > sizeof(buffer->bytes) && - !WriteBuffer_Flush(buffer, writer)) - return JSON_Failure; - - memcpy(&buffer->bytes[buffer->used], pBytes, length); - buffer->used += length; - return JSON_Success; -} - -static JSON_Status WriteBuffer_WriteCodepoint(WriteBuffer buffer, JSON_Writer writer, Codepoint c) -{ - if (buffer->used + LONGEST_ENCODING_SEQUENCE > sizeof(buffer->bytes) && - !WriteBuffer_Flush(buffer, writer)) - return JSON_Failure; - - buffer->used += EncodeCodepoint(c, writer->outputEncoding, &buffer->bytes[buffer->used]); - return JSON_Success; -} - -static JSON_Status WriteBuffer_WriteHexEscapeSequence(WriteBuffer buffer, JSON_Writer writer, Codepoint c) -{ - if (c >= FIRST_NON_BMP_CODEPOINT) - { - /* Non-BMP codepoints must be hex-escaped by escaping the UTF-16 - surrogate pair for the codepoint. We put the leading surrogate - in the low 16 bits of c so that it gets written first, then - the second pass through the loop will write out the trailing - surrogate. x*/ - c = SURROGATES_FROM_CODEPOINT(c); - c = (c << 16) | (c >> 16); - } - do - { - static const byte hexDigits[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; - byte escapeSequence[6]; - int i; - escapeSequence[0] = '\\'; - escapeSequence[1] = 'u'; - escapeSequence[2] = hexDigits[(c >> 12) & 0xF]; - escapeSequence[3] = hexDigits[(c >> 8) & 0xF]; - escapeSequence[4] = hexDigits[(c >> 4) & 0xF]; - escapeSequence[5] = hexDigits[c & 0xF]; - for (i = 0; i < sizeof(escapeSequence); i++) - { - if (!WriteBuffer_WriteCodepoint(buffer, writer, escapeSequence[i])) - return JSON_Failure; - } - c >>= 16; - } while (c); - return JSON_Success; -} - -static JSON_Status JSON_Writer_OutputString(JSON_Writer writer, const byte* pBytes, size_t length, Encoding encoding) -{ - static const byte quoteUTF[] = { 0, 0, 0, '"', 0, 0, 0 }; - static const byte* const quoteEncodings[5] = { quoteUTF + 3, quoteUTF + 3, quoteUTF + 2, quoteUTF + 3, quoteUTF }; - - const byte* pQuoteEncoded = quoteEncodings[writer->outputEncoding - 1]; - size_t minSequenceLength = (size_t)SHORTEST_ENCODING_SEQUENCE(writer->outputEncoding); - DecoderData decoderData; - WriteBufferData bufferData; - size_t i = 0; - - WriteBuffer_Reset(&bufferData); - - /* Start quote. */ - if (!WriteBuffer_WriteBytes(&bufferData, writer, pQuoteEncoded, minSequenceLength)) - return JSON_Failure; - - /* String contents. */ - Decoder_Reset(&decoderData); - while (i < length) - { - DecoderOutput output = Decoder_ProcessByte(&decoderData, encoding, pBytes[i]); - DecoderResultCode result = DECODER_RESULT_CODE(output); - Codepoint c; - Codepoint escapeCharacter; - switch (result) - { - case SEQUENCE_PENDING: - i++; - break; - - case SEQUENCE_COMPLETE: - c = DECODER_CODEPOINT(output); - escapeCharacter = JSON_Writer_GetCodepointEscapeCharacter(writer, c); - switch (escapeCharacter) - { - case 0: - /* Output the codepoint as a normal encoding sequence. */ - if (!WriteBuffer_WriteCodepoint(&bufferData, writer, c)) - return JSON_Failure; - break; - - case 'u': - /* Output the codepoint as 1 or 2 hex escape sequences. */ - if (!WriteBuffer_WriteHexEscapeSequence(&bufferData, writer, c)) - return JSON_Failure; - break; - - default: - /* Output the codepoint as a simple escape sequence. */ - if (!WriteBuffer_WriteCodepoint(&bufferData, writer, '\\') || - !WriteBuffer_WriteCodepoint(&bufferData, writer, escapeCharacter)) - return JSON_Failure; - break; - } - i++; - break; - - case SEQUENCE_INVALID_INCLUSIVE: - i++; - /* fallthrough */ - case SEQUENCE_INVALID_EXCLUSIVE: - if (GET_FLAGS(writer->flags, WRITER_REPLACE_INVALID)) - { - if (!WriteBuffer_WriteHexEscapeSequence(&bufferData, writer, REPLACEMENT_CHARACTER_CODEPOINT)) - return JSON_Failure; - } - else - { - /* Output whatever valid bytes we've accumulated before failing. */ - if (WriteBuffer_Flush(&bufferData, writer)) - JSON_Writer_SetError(writer, JSON_Error_InvalidEncodingSequence); - return JSON_Failure; - } - break; - } - } - if (Decoder_SequencePending(&decoderData)) - { - if (GET_FLAGS(writer->flags, WRITER_REPLACE_INVALID)) - { - if (!WriteBuffer_WriteHexEscapeSequence(&bufferData, writer, REPLACEMENT_CHARACTER_CODEPOINT)) - return JSON_Failure; - } - else - { - /* Output whatever valid bytes we've accumulated before failing. */ - if (WriteBuffer_Flush(&bufferData, writer)) - JSON_Writer_SetError(writer, JSON_Error_InvalidEncodingSequence); - return JSON_Failure; - } - } - - /* End quote. */ - if (!WriteBuffer_WriteBytes(&bufferData, writer, pQuoteEncoded, minSequenceLength) || - !WriteBuffer_Flush(&bufferData, writer)) - return JSON_Failure; - return JSON_Success; -} - -static LexerState LexNumberCharacter(LexerState state, Codepoint c) -{ - switch (state) - { - case LEXING_WHITESPACE: - if (c == '-') - state = LEXING_NUMBER_AFTER_MINUS; - else if (c == '0') - state = LEXING_NUMBER_AFTER_LEADING_ZERO; - else if (c >= '1' && c <= '9') - state = LEXING_NUMBER_DECIMAL_DIGITS; - else - state = LEXER_ERROR; - break; - - case LEXING_NUMBER_AFTER_MINUS: - if (c == '0') - state = LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO; - else if (c >= '1' && c <= '9') - state = LEXING_NUMBER_DECIMAL_DIGITS; - else - state = LEXER_ERROR; - break; - - case LEXING_NUMBER_AFTER_LEADING_ZERO: - case LEXING_NUMBER_AFTER_LEADING_NEGATIVE_ZERO: - if (c == '.') - state = LEXING_NUMBER_AFTER_DOT; - else if (c == 'e' || c == 'E') - state = LEXING_NUMBER_AFTER_E; - else if ((c == 'x' || c == 'X') && state == LEXING_NUMBER_AFTER_LEADING_ZERO) - state = LEXING_NUMBER_AFTER_X; - else if (c == EOF_CODEPOINT) - state = LEXING_WHITESPACE; - else - state = LEXER_ERROR; - break; - - case LEXING_NUMBER_AFTER_X: - if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f')) - state = LEXING_NUMBER_HEX_DIGITS; - else - state = LEXER_ERROR; - break; - - case LEXING_NUMBER_HEX_DIGITS: - if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f')) - { - /* Still LEXING_NUMBER_HEX_DIGITS. */ - } - else if (c == EOF_CODEPOINT) - state = LEXING_WHITESPACE; - else - state = LEXER_ERROR; - break; - - case LEXING_NUMBER_DECIMAL_DIGITS: - if (c >= '0' && c <= '9') - { - /* Still LEXING_NUMBER_DECIMAL_DIGITS. */ - } - else if (c == '.') - state = LEXING_NUMBER_AFTER_DOT; - else if (c == 'e' || c == 'E') - state = LEXING_NUMBER_AFTER_E; - else if (c == EOF_CODEPOINT) - state = LEXING_WHITESPACE; - else - state = LEXER_ERROR; - break; - - case LEXING_NUMBER_AFTER_DOT: - if (c >= '0' && c <= '9') - state = LEXING_NUMBER_FRACTIONAL_DIGITS; - else - state = LEXER_ERROR; - break; - - case LEXING_NUMBER_FRACTIONAL_DIGITS: - if (c >= '0' && c <= '9') - { - /* Still LEXING_NUMBER_FRACTIONAL_DIGITS. */ - } - else if (c == 'e' || c == 'E') - state = LEXING_NUMBER_AFTER_E; - else if (c == EOF_CODEPOINT) - state = LEXING_WHITESPACE; - else - state = LEXER_ERROR; - break; - - case LEXING_NUMBER_AFTER_E: - if (c == '+' || c == '-') - state = LEXING_NUMBER_AFTER_EXPONENT_SIGN; - else if (c >= '0' && c <= '9') - state = LEXING_NUMBER_EXPONENT_DIGITS; - else - state = LEXER_ERROR; - break; - - case LEXING_NUMBER_AFTER_EXPONENT_SIGN: - if (c >= '0' && c <= '9') - state = LEXING_NUMBER_EXPONENT_DIGITS; - else - state = LEXER_ERROR; - break; - - case LEXING_NUMBER_EXPONENT_DIGITS: - if (c >= '0' && c <= '9') - { - /* Still LEXING_NUMBER_EXPONENT_DIGITS. */ - } - else if (c == EOF_CODEPOINT) - state = LEXING_WHITESPACE; - else - state = LEXER_ERROR; - break; - } - return state; -} - -static JSON_Status JSON_Writer_OutputNumber(JSON_Writer writer, const byte* pBytes, size_t length, Encoding encoding) -{ - DecoderData decoderData; - WriteBufferData bufferData; - LexerState lexerState = LEXING_WHITESPACE; - size_t i; - Decoder_Reset(&decoderData); - WriteBuffer_Reset(&bufferData); - for (i = 0; i < length; i++) - { - DecoderOutput output = Decoder_ProcessByte(&decoderData, encoding, pBytes[i]); - DecoderResultCode result = DECODER_RESULT_CODE(output); - Codepoint c; - switch (result) - { - case SEQUENCE_PENDING: - break; - - case SEQUENCE_COMPLETE: - c = DECODER_CODEPOINT(output); - lexerState = LexNumberCharacter(lexerState, c); - if (lexerState == LEXER_ERROR) - { - /* Output whatever valid bytes we've accumulated before failing. */ - if (WriteBuffer_Flush(&bufferData, writer)) - JSON_Writer_SetError(writer, JSON_Error_InvalidNumber); - return JSON_Failure; - } - if (!WriteBuffer_WriteCodepoint(&bufferData, writer, c)) - return JSON_Failure; - break; - - case SEQUENCE_INVALID_INCLUSIVE: - case SEQUENCE_INVALID_EXCLUSIVE: - /* Output whatever valid bytes we've accumulated before failing. */ - if (WriteBuffer_Flush(&bufferData, writer)) - JSON_Writer_SetError(writer, JSON_Error_InvalidEncodingSequence); - return JSON_Failure; - } - } - if (!WriteBuffer_Flush(&bufferData, writer)) - return JSON_Failure; - if (Decoder_SequencePending(&decoderData)) - { - JSON_Writer_SetError(writer, JSON_Error_InvalidEncodingSequence); - return JSON_Failure; - } - if (LexNumberCharacter(lexerState, EOF_CODEPOINT) == LEXER_ERROR) - { - JSON_Writer_SetError(writer, JSON_Error_InvalidNumber); - return JSON_Failure; - } - return JSON_Success; -} - -#define SPACES_PER_CHUNK 8 -static JSON_Status JSON_Writer_OutputSpaces(JSON_Writer writer, size_t numberOfSpaces) -{ - static const byte spacesUTF8[SPACES_PER_CHUNK] = { ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' }; - static const byte spacesUTF16[SPACES_PER_CHUNK * 2 + 1] = { 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0, ' ', 0 }; - static const byte spacesUTF32[SPACES_PER_CHUNK * 4 + 3] = { 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0, ' ', 0, 0, 0 }; - static const byte* const spacesEncodings[5] = { spacesUTF8, spacesUTF16 + 1, spacesUTF16, spacesUTF32 + 3, spacesUTF32 }; - - size_t encodedLength = (size_t)SHORTEST_ENCODING_SEQUENCE(writer->outputEncoding); - const byte* encoded = spacesEncodings[writer->outputEncoding - 1]; - while (numberOfSpaces > SPACES_PER_CHUNK) - { - if (!JSON_Writer_OutputBytes(writer, encoded, SPACES_PER_CHUNK * encodedLength)) - return JSON_Failure; - numberOfSpaces -= SPACES_PER_CHUNK; - } - - if (!JSON_Writer_OutputBytes(writer, encoded, numberOfSpaces * encodedLength)) - return JSON_Failure; - return JSON_Success; -} - -static JSON_Status JSON_Writer_WriteSimpleToken(JSON_Writer writer, Symbol token, const byte* const* encodings, size_t length) -{ - JSON_Status status = JSON_Failure; - if (writer && !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None) - { - size_t encodedLength = length * (size_t)SHORTEST_ENCODING_SEQUENCE(writer->outputEncoding); - SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API); - if (JSON_Writer_ProcessToken(writer, token) && - JSON_Writer_OutputBytes(writer, encodings[writer->outputEncoding - 1], encodedLength)) - status = JSON_Success; - SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API); - } - return status; -} - -/* Writer API functions. */ - -JSON_Writer JSON_CALL JSON_Writer_Create(const JSON_MemorySuite* pMemorySuite) -{ - JSON_Writer writer; - JSON_MemorySuite memorySuite; - if (pMemorySuite) - { - memorySuite = *pMemorySuite; - /* The full memory suite must be specified. */ - if (!memorySuite.realloc || !memorySuite.free) - return NULL; - } - else - memorySuite = defaultMemorySuite; - - writer = (JSON_Writer)memorySuite.realloc(memorySuite.userData, NULL, sizeof(struct JSON_Writer_Data)); - - if (!writer) - return NULL; - - writer->memorySuite = memorySuite; - JSON_Writer_ResetData(writer, 0/* isInitialized */); - return writer; -} - -JSON_Status JSON_CALL JSON_Writer_Free(JSON_Writer writer) -{ - if (!writer || GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API)) - return JSON_Failure; - - SET_FLAGS_ON(WriterState, writer->state, WRITER_IN_PROTECTED_API); - Grammarian_FreeAllocations(&writer->grammarianData, &writer->memorySuite); - writer->memorySuite.free(writer->memorySuite.userData, writer); - return JSON_Success; -} - -JSON_Status JSON_CALL JSON_Writer_Reset(JSON_Writer writer) -{ - if (!writer || GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API)) - return JSON_Failure; - - SET_FLAGS_ON(WriterState, writer->state, WRITER_IN_PROTECTED_API); - JSON_Writer_ResetData(writer, 1/* isInitialized */); - /* Note that JSON_Writer_ResetData() unset WRITER_IN_PROTECTED_API for us. */ - return JSON_Success; -} - -void* JSON_CALL JSON_Writer_GetUserData(JSON_Writer writer) -{ - return writer ? writer->userData : NULL; -} - -JSON_Status JSON_CALL JSON_Writer_SetUserData(JSON_Writer writer, void* userData) -{ - if (!writer) - return JSON_Failure; - - writer->userData = userData; - return JSON_Success; -} - -JSON_Encoding JSON_CALL JSON_Writer_GetOutputEncoding(JSON_Writer writer) -{ - return writer ? (JSON_Encoding)writer->outputEncoding : JSON_UTF8; -} - -JSON_Status JSON_CALL JSON_Writer_SetOutputEncoding(JSON_Writer writer, JSON_Encoding encoding) -{ - if (!writer || GET_FLAGS(writer->state, WRITER_STARTED) || encoding <= JSON_UnknownEncoding || encoding > JSON_UTF32BE) - return JSON_Failure; - - writer->outputEncoding = (Encoding)encoding; - return JSON_Success; -} - -JSON_Boolean JSON_CALL JSON_Writer_GetUseCRLF(JSON_Writer writer) -{ - return (writer && GET_FLAGS(writer->flags, WRITER_USE_CRLF)) ? JSON_True : JSON_False; -} - -JSON_Status JSON_CALL JSON_Writer_SetUseCRLF(JSON_Writer writer, JSON_Boolean useCRLF) -{ - if (!writer || GET_FLAGS(writer->state, WRITER_STARTED)) - return JSON_Failure; - - SET_FLAGS(WriterFlags, writer->flags, WRITER_USE_CRLF, useCRLF); - return JSON_Success; -} - -JSON_Boolean JSON_CALL JSON_Writer_GetReplaceInvalidEncodingSequences(JSON_Writer writer) -{ - return (writer && GET_FLAGS(writer->flags, WRITER_REPLACE_INVALID)) ? JSON_True : JSON_False; -} - -JSON_Status JSON_CALL JSON_Writer_SetReplaceInvalidEncodingSequences(JSON_Writer writer, JSON_Boolean replaceInvalidEncodingSequences) -{ - if (!writer || GET_FLAGS(writer->state, WRITER_STARTED)) - return JSON_Failure; - - SET_FLAGS(WriterFlags, writer->flags, WRITER_REPLACE_INVALID, replaceInvalidEncodingSequences); - return JSON_Success; -} - -JSON_Boolean JSON_CALL JSON_Writer_GetEscapeAllNonASCIICharacters(JSON_Writer writer) -{ - return (writer && GET_FLAGS(writer->flags, WRITER_ESCAPE_NON_ASCII)) ? JSON_True : JSON_False; -} - -JSON_Status JSON_CALL JSON_Writer_SetEscapeAllNonASCIICharacters(JSON_Writer writer, JSON_Boolean escapeAllNonASCIICharacters) -{ - if (!writer || GET_FLAGS(writer->state, WRITER_STARTED)) - return JSON_Failure; - - SET_FLAGS(WriterFlags, writer->flags, WRITER_ESCAPE_NON_ASCII, escapeAllNonASCIICharacters); - return JSON_Success; -} - -JSON_Error JSON_CALL JSON_Writer_GetError(JSON_Writer writer) -{ - return writer ? (JSON_Error)writer->error : JSON_Error_None; -} - -JSON_Writer_OutputHandler JSON_CALL JSON_Writer_GetOutputHandler(JSON_Writer writer) -{ - return writer ? writer->outputHandler : NULL; -} - -JSON_Status JSON_CALL JSON_Writer_SetOutputHandler(JSON_Writer writer, JSON_Writer_OutputHandler handler) -{ - if (!writer) - return JSON_Failure; - - writer->outputHandler = handler; - return JSON_Success; -} - -JSON_Status JSON_CALL JSON_Writer_WriteNull(JSON_Writer writer) -{ - static const byte nullUTF8[] = { 'n', 'u', 'l', 'l' }; - static const byte nullUTF16[] = { 0, 'n', 0, 'u', 0, 'l', 0, 'l', 0 }; - static const byte nullUTF32[] = { 0, 0, 0, 'n', 0, 0, 0, 'u', 0, 0, 0, 'l', 0, 0, 0, 'l', 0, 0, 0 }; - static const byte* const nullEncodings[5] = { nullUTF8, nullUTF16 + 1, nullUTF16, nullUTF32 + 3, nullUTF32 }; - - return JSON_Writer_WriteSimpleToken(writer, T_NULL, nullEncodings, sizeof(nullUTF8)); -} - -JSON_Status JSON_CALL JSON_Writer_WriteBoolean(JSON_Writer writer, JSON_Boolean value) -{ - static const byte trueUTF8[] = { 't', 'r', 'u', 'e' }; - static const byte trueUTF16[] = { 0, 't', 0, 'r', 0, 'u', 0, 'e', 0 }; - static const byte trueUTF32[] = { 0, 0, 0, 't', 0, 0, 0, 'r', 0, 0, 0, 'u', 0, 0, 0, 'e', 0, 0, 0 }; - static const byte* const trueEncodings[5] = { trueUTF8, trueUTF16 + 1, trueUTF16, trueUTF32 + 3, trueUTF32 }; - - static const byte falseUTF8[] = { 'f', 'a', 'l', 's', 'e' }; - static const byte falseUTF16[] = { 0, 'f', 0, 'a', 0, 'l', 0, 's', 0, 'e', 0 }; - static const byte falseUTF32[] = { 0, 0, 0, 'f', 0, 0, 0, 'a', 0, 0, 0, 'l', 0, 0, 0, 's', 0, 0, 0, 'e', 0, 0, 0 }; - static const byte* const falseEncodings[5] = { falseUTF8, falseUTF16 + 1, falseUTF16, falseUTF32 + 3, falseUTF32 }; - - Symbol token; - const byte* const* encodings; - size_t length; - if (value) - { - token = T_TRUE; - encodings = trueEncodings; - length = sizeof(trueUTF8); - } - else - { - token = T_FALSE; - encodings = falseEncodings; - length = sizeof(falseUTF8); - } - return JSON_Writer_WriteSimpleToken(writer, token, encodings, length); -} - -JSON_Status JSON_CALL JSON_Writer_WriteString(JSON_Writer writer, const char* pValue, size_t length, JSON_Encoding encoding) -{ - JSON_Status status = JSON_Failure; - if (writer && (pValue || !length) && encoding > JSON_UnknownEncoding && encoding <= JSON_UTF32BE && - !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None) - { - SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API); - if (JSON_Writer_ProcessToken(writer, T_STRING)) - status = JSON_Writer_OutputString(writer, (const byte*)pValue, length, (Encoding)encoding); - - SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API); - } - return status; -} - -JSON_Status JSON_CALL JSON_Writer_WriteNumber(JSON_Writer writer, const char* pValue, size_t length, JSON_Encoding encoding) -{ - JSON_Status status = JSON_Failure; - if (writer && pValue && length && encoding > JSON_UnknownEncoding && encoding <= JSON_UTF32BE && - !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None) - { - SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API); - if (JSON_Writer_ProcessToken(writer, T_NUMBER)) - status = JSON_Writer_OutputNumber(writer, (const byte*)pValue, length, (Encoding)encoding); - - SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API); - } - return status; -} - -JSON_Status JSON_CALL JSON_Writer_WriteSpecialNumber(JSON_Writer writer, JSON_SpecialNumber value) -{ - static const byte nanUTF8[] = { 'N', 'a', 'N' }; - static const byte nanUTF16[] = { 0, 'N', 0, 'a', 0, 'N', 0 }; - static const byte nanUTF32[] = { 0, 0, 0, 'N', 0, 0, 0, 'a', 0, 0, 0, 'N', 0, 0, 0 }; - static const byte* const nanEncodings[5] = { nanUTF8, nanUTF16 + 1, nanUTF16, nanUTF32 + 3, nanUTF32 }; - - static const byte ninfUTF8[] = { '-', 'I', 'n', 'f', 'i', 'n', 'i', 't', 'y' }; - static const byte ninfUTF16[] = { 0, '-', 0, 'I', 0, 'n', 0, 'f', 0, 'i', 0, 'n', 0, 'i', 0, 't', 0, 'y', 0 }; - static const byte ninfUTF32[] = { 0, 0, 0, '-', 0, 0, 0, 'I', 0, 0, 0, 'n', 0, 0, 0, 'f', 0, 0, 0, 'i', 0, 0, 0, 'n', 0, 0, 0, 'i', 0, 0, 0, 't', 0, 0, 0, 'y', 0, 0, 0 }; - static const byte* const infinityEncodings[5] = { ninfUTF8 + 1, ninfUTF16 + 3, ninfUTF16 + 2, ninfUTF32 + 7, ninfUTF32 + 4 }; - static const byte* const negativeInfinityEncodings[5] = { ninfUTF8, ninfUTF16 + 1, ninfUTF16, ninfUTF32 + 3, ninfUTF32 }; - - Symbol token; - const byte* const* encodings; - size_t length; - if (value == JSON_Infinity) - { - token = T_INFINITY; - encodings = infinityEncodings; - length = sizeof(ninfUTF8) - 1/* - */; - } - else if (value == JSON_NegativeInfinity) - { - token = T_NEGATIVE_INFINITY; - encodings = negativeInfinityEncodings; - length = sizeof(ninfUTF8); - } - else - { - token = T_NAN; - encodings = nanEncodings; - length = sizeof(nanUTF8); - } - return JSON_Writer_WriteSimpleToken(writer, token, encodings, length); -} - -JSON_Status JSON_CALL JSON_Writer_WriteStartObject(JSON_Writer writer) -{ - static const byte utf[] = { 0, 0, 0, '{', 0, 0, 0 }; - static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf }; - - return JSON_Writer_WriteSimpleToken(writer, T_LEFT_CURLY, encodings, 1); -} - -JSON_Status JSON_CALL JSON_Writer_WriteEndObject(JSON_Writer writer) -{ - static const byte utf[] = { 0, 0, 0, '}', 0, 0, 0 }; - static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf }; - - return JSON_Writer_WriteSimpleToken(writer, T_RIGHT_CURLY, encodings, 1); -} - -JSON_Status JSON_CALL JSON_Writer_WriteStartArray(JSON_Writer writer) -{ - static const byte utf[] = { 0, 0, 0, '[', 0, 0, 0 }; - static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf }; - - return JSON_Writer_WriteSimpleToken(writer, T_LEFT_SQUARE, encodings, 1); -} - -JSON_Status JSON_CALL JSON_Writer_WriteEndArray(JSON_Writer writer) -{ - static const byte utf[] = { 0, 0, 0, ']', 0, 0, 0 }; - static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf }; - - return JSON_Writer_WriteSimpleToken(writer, T_RIGHT_SQUARE, encodings, 1); -} - -JSON_Status JSON_CALL JSON_Writer_WriteColon(JSON_Writer writer) -{ - static const byte utf[] = { 0, 0, 0, ':', 0, 0, 0 }; - static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf }; - - return JSON_Writer_WriteSimpleToken(writer, T_COLON, encodings, 1); -} - -JSON_Status JSON_CALL JSON_Writer_WriteComma(JSON_Writer writer) -{ - static const byte utf[] = { 0, 0, 0, ',', 0, 0, 0 }; - static const byte* const encodings[5] = { utf + 3, utf + 3, utf + 2, utf + 3, utf }; - - return JSON_Writer_WriteSimpleToken(writer, T_COMMA, encodings, 1); -} - -JSON_Status JSON_CALL JSON_Writer_WriteSpace(JSON_Writer writer, size_t numberOfSpaces) -{ - JSON_Status status = JSON_Failure; - if (writer && !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None) - { - SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API); - status = JSON_Writer_OutputSpaces(writer, numberOfSpaces); - SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API); - } - return status; -} - -JSON_Status JSON_CALL JSON_Writer_WriteNewLine(JSON_Writer writer) -{ - static const byte lfUTF[] = { 0, 0, 0, LINE_FEED_CODEPOINT, 0, 0, 0 }; - static const byte* const lfEncodings[5] = { lfUTF + 3, lfUTF + 3, lfUTF + 2, lfUTF + 3, lfUTF }; - - static const byte crlfUTF8[] = { CARRIAGE_RETURN_CODEPOINT, LINE_FEED_CODEPOINT }; - static const byte crlfUTF16[] = { 0, CARRIAGE_RETURN_CODEPOINT, 0, LINE_FEED_CODEPOINT, 0 }; - static const byte crlfUTF32[] = { 0, 0, 0, CARRIAGE_RETURN_CODEPOINT, 0, 0, 0, LINE_FEED_CODEPOINT, 0, 0, 0 }; - static const byte* const crlfEncodings[5] = { crlfUTF8, crlfUTF16 + 1, crlfUTF16, crlfUTF32 + 3, crlfUTF32 }; - - JSON_Status status = JSON_Failure; - if (writer && !GET_FLAGS(writer->state, WRITER_IN_PROTECTED_API) && writer->error == JSON_Error_None) - { - const byte* const* encodings; - size_t length; - size_t encodedLength; - SET_FLAGS_ON(WriterState, writer->state, WRITER_STARTED | WRITER_IN_PROTECTED_API); - if (GET_FLAGS(writer->flags, WRITER_USE_CRLF)) - { - encodings = crlfEncodings; - length = 2; - } - else - { - encodings = lfEncodings; - length = 1; - } - encodedLength = length * (size_t)SHORTEST_ENCODING_SEQUENCE(writer->outputEncoding); - if (JSON_Writer_OutputBytes(writer, encodings[writer->outputEncoding - 1], encodedLength)) - status = JSON_Success; - SET_FLAGS_OFF(WriterState, writer->state, WRITER_IN_PROTECTED_API); - } - return status; -} - -#endif /* JSON_NO_WRITER */ - -/******************** Miscellaneous API ********************/ - -const JSON_Version* JSON_CALL JSON_LibraryVersion(void) -{ - static JSON_Version version = { JSON_MAJOR_VERSION, JSON_MINOR_VERSION, JSON_MICRO_VERSION }; - return &version; -} - -const char* JSON_CALL JSON_ErrorString(JSON_Error error) -{ - /* This array must match the order and number of the JSON_Error enum. */ - static const char* errorStrings[] = - { - /* JSON_Error_None */ "no error", - /* JSON_Error_OutOfMemory */ "could not allocate enough memory", - /* JSON_Error_AbortedByHandler */ "the operation was aborted by a handler", - /* JSON_Error_BOMNotAllowed */ "the input begins with a byte-order mark (BOM), which is not allowed by RFC 4627", - /* JSON_Error_InvalidEncodingSequence */ "the input contains a byte or sequence of bytes that is not valid for the input encoding", - /* JSON_Error_UnknownToken */ "the input contains an unknown token", - /* JSON_Error_UnexpectedToken */ "the input contains an unexpected token", - /* JSON_Error_IncompleteToken */ "the input ends in the middle of a token", - /* JSON_Error_MoreTokensExpected */ "the input ends when more tokens are expected", - /* JSON_Error_UnescapedControlCharacter */ "the input contains a string containing an unescaped control character (U+0000 - U+001F)", - /* JSON_Error_InvalidEscapeSequence */ "the input contains a string containing an invalid escape sequence", - /* JSON_Error_UnpairedSurrogateEscapeSequence */ "the input contains a string containing an unmatched UTF-16 surrogate codepoint", - /* JSON_Error_TooLongString */ "the input contains a string that is too long", - /* JSON_Error_InvalidNumber */ "the input contains an invalid number", - /* JSON_Error_TooLongNumber */ "the input contains a number that is too long", - /* JSON_Error_DuplicateObjectMember */ "the input contains an object with duplicate members", - /* JSON_Error_StoppedAfterEmbeddedDocument */ "the end of the embedded document was reached" - }; - return ((unsigned int)error < (sizeof(errorStrings) / sizeof(errorStrings[0]))) - ? errorStrings[error] - : ""; -} - -static const uint32_t endianEncodings = (((uint32_t)JSON_UTF32BE) << 24) | (((uint32_t)JSON_UTF16BE) << 16) | (((uint32_t)JSON_UTF16LE) << 8) | ((uint32_t)JSON_UTF32LE); - -JSON_Encoding JSON_CALL JSON_NativeUTF16Encoding(void) -{ - return (JSON_Encoding)(((byte*)&endianEncodings)[1]); -} - -JSON_Encoding JSON_CALL JSON_NativeUTF32Encoding(void) -{ - return (JSON_Encoding)(((byte*)&endianEncodings)[0]); -} diff --git a/libretro-common/include/formats/jsonsax.h b/libretro-common/include/formats/jsonsax.h deleted file mode 100644 index de1bdc1eeb..0000000000 --- a/libretro-common/include/formats/jsonsax.h +++ /dev/null @@ -1,70 +0,0 @@ -/* Copyright (C) 2010-2020 The RetroArch team - * - * --------------------------------------------------------------------------------------- - * The following license statement only applies to this file (jsonsax.h). - * --------------------------------------------------------------------------------------- - * - * Permission is hereby granted, free of charge, - * to any person obtaining a copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation the rights to - * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, - * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef __LIBRETRO_SDK_FORMAT_JSONSAX_H__ -#define __LIBRETRO_SDK_FORMAT_JSONSAX_H__ - -#include - -#include - -RETRO_BEGIN_DECLS - -enum -{ - JSONSAX_OK = 0, - JSONSAX_INTERRUPTED, - JSONSAX_MISSING_KEY, - JSONSAX_UNTERMINATED_KEY, - JSONSAX_MISSING_VALUE, - JSONSAX_UNTERMINATED_OBJECT, - JSONSAX_UNTERMINATED_ARRAY, - JSONSAX_UNTERMINATED_STRING, - JSONSAX_INVALID_VALUE -}; - -#ifdef JSONSAX_ERRORS -extern const char* jsonsax_errors[]; -#endif - -typedef struct -{ - int ( *start_document )( void* userdata ); - int ( *end_document )( void* userdata ); - int ( *start_object )( void* userdata ); - int ( *end_object )( void* userdata ); - int ( *start_array )( void* userdata ); - int ( *end_array )( void* userdata ); - int ( *key )( void* userdata, const char* name, size_t length ); - int ( *array_index )( void* userdata, unsigned int index ); - int ( *string )( void* userdata, const char* string, size_t length ); - int ( *number )( void* userdata, const char* number, size_t length ); - int ( *boolean )( void* userdata, int istrue ); - int ( *null )( void* userdata ); -} -jsonsax_handlers_t; - -int jsonsax_parse( const char* json, const jsonsax_handlers_t* handlers, void* userdata ); - -RETRO_END_DECLS - -#endif /* __LIBRETRO_SDK_FORMAT_JSONSAX_H__ */ diff --git a/libretro-common/include/formats/jsonsax_full.h b/libretro-common/include/formats/jsonsax_full.h deleted file mode 100644 index b89798e3c6..0000000000 --- a/libretro-common/include/formats/jsonsax_full.h +++ /dev/null @@ -1,1040 +0,0 @@ -/* - Copyright (c) 2012 John-Anthony Owens - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the "Software"), - to deal in the Software without restriction, including without limitation - the rights to use, copy, modify, merge, publish, distribute, sublicense, - and/or sell copies of the Software, and to permit persons to whom the - Software is furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included - in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - IN THE SOFTWARE. -*/ - -#ifndef JSONSAX_H_INCLUDED -#define JSONSAX_H_INCLUDED - -/* The library version */ -#define JSON_MAJOR_VERSION 1 -#define JSON_MINOR_VERSION 4 -#define JSON_MICRO_VERSION 5 - -/* JSON_NO_PARSER and JSON_NO_WRITER, if defined, remove the corresponding - * APIs and functionality from the library. - */ -#if defined(JSON_NO_PARSER) && defined(JSON_NO_WRITER) -#error JSON_NO_PARSER and JSON_NO_WRITER cannot both be defined! -#endif - -#include /* for size_t and NULL */ - -/* The library API is C and should not be subjected to C++ name mangling. */ -#ifdef __cplusplus -extern "C" { -#endif - -/* JSON_EXPORT controls the library's public API import/export linkage - * specifiers. By default, the library will be compiled to support dynamic - * linkage. In order to build the library for static linkage, the JSON_STATIC - * macro must be defined when the library itself is built AND when the client - * includes jsonsax.h. - */ -#if defined(JSON_STATIC) -#define JSON_EXPORT /* nothing */ -#else -#if defined(_MSC_VER) -#if defined(JSON_BUILDING) -#define JSON_EXPORT __declspec(dllexport) -#else -#define JSON_EXPORT __declspec(dllimport) -#endif -#else -#if defined(JSON_BUILDING) -#define JSON_EXPORT __attribute__ ((visibility("default"))) -#else -#define JSON_EXPORT /* nothing */ -#endif -#endif -#endif - -/* JSON_CALL controls the library's public API calling-convention. Clients' - * handler functions should be declared with JSON_CALL in order to ensure - * that the calling convention matches. - */ -#ifndef JSON_CALL -#if defined(_MSC_VER) -#define JSON_CALL __cdecl -#elif defined(__GNUC__) && defined(__i386) && !defined(__INTEL_COMPILER) -#define JSON_CALL __attribute__((cdecl)) -#else -#define JSON_CALL /* nothing */ -#endif -#endif - -#define JSON_API(t) JSON_EXPORT t JSON_CALL - -/* Boolean values used by the library. */ -typedef enum tag_JSON_Boolean -{ - JSON_False = 0, - JSON_True = 1 -} JSON_Boolean; - -/* Values returned by library APIs to indicate success or failure. */ -typedef enum tag_JSON_Status -{ - JSON_Failure = 0, - JSON_Success = 1 -} JSON_Status; - -/* Error codes. */ -typedef enum tag_JSON_Error -{ - JSON_Error_None = 0, - JSON_Error_OutOfMemory = 1, - JSON_Error_AbortedByHandler = 2, - JSON_Error_BOMNotAllowed = 3, - JSON_Error_InvalidEncodingSequence = 4, - JSON_Error_UnknownToken = 5, - JSON_Error_UnexpectedToken = 6, - JSON_Error_IncompleteToken = 7, - JSON_Error_ExpectedMoreTokens = 8, - JSON_Error_UnescapedControlCharacter = 9, - JSON_Error_InvalidEscapeSequence = 10, - JSON_Error_UnpairedSurrogateEscapeSequence = 11, - JSON_Error_TooLongString = 12, - JSON_Error_InvalidNumber = 13, - JSON_Error_TooLongNumber = 14, - JSON_Error_DuplicateObjectMember = 15, - JSON_Error_StoppedAfterEmbeddedDocument = 16 -} JSON_Error; - -/* Text encodings. */ -typedef enum tag_JSON_Encoding -{ - JSON_UnknownEncoding = 0, - JSON_UTF8 = 1, - JSON_UTF16LE = 2, - JSON_UTF16BE = 3, - JSON_UTF32LE = 4, - JSON_UTF32BE = 5 -} JSON_Encoding; - -/* Attributes of a string value. */ -typedef enum tag_JSON_StringAttribute -{ - JSON_SimpleString = 0, - JSON_ContainsNullCharacter = 1 << 0, /* U+0000 */ - JSON_ContainsControlCharacter = 1 << 1, /* U+0000 - U+001F */ - JSON_ContainsNonASCIICharacter = 1 << 2, /* U+0080 - U+10FFFF */ - JSON_ContainsNonBMPCharacter = 1 << 3, /* U+10000 - U+10FFFF */ - JSON_ContainsReplacedCharacter = 1 << 4 /* an invalid encoding sequence was replaced by U+FFFD */ -} JSON_StringAttribute; -typedef unsigned int JSON_StringAttributes; - -/* Attributes of a number value. */ -typedef enum tag_JSON_NumberAttribute -{ - JSON_SimpleNumber = 0, - JSON_IsNegative = 1 << 0, - JSON_IsHex = 1 << 1, - JSON_ContainsDecimalPoint = 1 << 2, - JSON_ContainsExponent = 1 << 3, - JSON_ContainsNegativeExponent = 1 << 4 -} JSON_NumberAttribute; -typedef unsigned int JSON_NumberAttributes; - -/* Types of "special" number. */ -typedef enum tag_JSON_SpecialNumber -{ - JSON_NaN = 0, - JSON_Infinity = 1, - JSON_NegativeInfinity = 2 -} JSON_SpecialNumber; - -/* Information identifying a location in a parser instance's input stream. */ -typedef struct tag_JSON_Location -{ - /* The zero-based index of the byte in the input stream. Note that this - * is the only value that unambiguously identifies the location, since - * line and column refer to characters (which may be encoded in the input - * as multi-byte sequences) rather than bytes. - */ - size_t byte; - - /* The zero-based line number of the character in the input stream. Note - * that the parser treats each of the following character sequences as a - * single line break for purposes of computing line numbers: - * - * U+000A (LINE FEED) - * U+000D (CARRIAGE RETURN) - * U+000D U+000A (CARRIAGE RETURN, LINE FEED) - * - */ - size_t line; - - /* The zero-based column number of the character in the input stream. */ - size_t column; - - /* The zero-based depth in the JSON document structure at the location. */ - size_t depth; -} JSON_Location; - -/* Custom memory management handlers. - * - * The semantics of these handlers correspond exactly to those of standard - * realloc(), and free(). The handlers also receive the value of the memory - * suite's user data parameter, which clients can use to implement memory - * pools or impose custom allocation limits, if desired. - */ -typedef void* (JSON_CALL * JSON_ReallocHandler)(void* userData, void* ptr, size_t size); -typedef void (JSON_CALL * JSON_FreeHandler)(void* userData, void* ptr); - -/* A suite of custom memory management functions. */ -typedef struct tag_JSON_MemorySuite -{ - void* userData; - JSON_ReallocHandler realloc; - JSON_FreeHandler free; -} JSON_MemorySuite; - -/******************** JSON Parser ********************/ - -#ifndef JSON_NO_PARSER - -/* Parser instance. */ -struct JSON_Parser_Data; /* opaque data */ -typedef struct JSON_Parser_Data* JSON_Parser; - -/* Create a parser instance. - * - * If pMemorySuite is null, the library will use the C runtime realloc() and - * free() as the parser's memory management suite. Otherwise, all the - * handlers in the memory suite must be non-null or the call will fail and - * return null. - */ -JSON_API(JSON_Parser) JSON_Parser_Create(const JSON_MemorySuite* pMemorySuite); - -/* Free a parser instance. - * - * Every successful call to JSON_Parser_Create() must eventually be paired - * with a call to JSON_Parser_Free() in order to avoid leaking memory. - * - * This function returns failure if the parser parameter is null or if the - * function was called reentrantly from inside a handler. - */ -JSON_API(JSON_Status) JSON_Parser_Free(JSON_Parser parser); - -/* Reset a parser instance so that it can be used to parse a new input stream. - * - * This function returns failure if the parser parameter is null or if the - * function was called reentrantly from inside a handler. - * - * After a parser is reset, its state is indistinguishable from its state - * when it was returned by JSON_Parser_Create(). The parser's custom memory - * suite, if any, is preserved; all other settings, state, and handlers are - * restored to their default values. - */ -JSON_API(JSON_Status) JSON_Parser_Reset(JSON_Parser parser); - -/* Get and set the user data value associated with a parser instance. - * - * This setting allows clients to associate additional data with a - * parser instance. The parser itself does not use the value. - * - * The default value of this setting is null. - * - * This setting can be changed at any time, even inside handlers. - */ -JSON_API(void*) JSON_Parser_GetUserData(JSON_Parser parser); -JSON_API(JSON_Status) JSON_Parser_SetUserData(JSON_Parser parser, void* userData); - -/* Get and set the input encoding for a parser instance. - * - * If the client does not explicitly set the input encoding before calling - * JSON_Parser_Parse() on the parser instance, the parser will use the first - * 4 bytes of input to detect the input encoding automatically. Once the - * parser has detected the encoding, calls to JSON_Parser_GetInputEncoding() - * will return the detected value. - * - * The default value of this setting is JSON_UnknownEncoding. - * - * This setting cannot be changed once the parser has started parsing. - */ -JSON_API(JSON_Encoding) JSON_Parser_GetInputEncoding(JSON_Parser parser); -JSON_API(JSON_Status) JSON_Parser_SetInputEncoding(JSON_Parser parser, JSON_Encoding encoding); - -/* Get and set the string encoding for a parser instance. - * - * This setting controls the encoding of the string values that are - * passed to the string and object member handlers. - * - * The default value of this setting is JSON_UTF8. - * - * This setting cannot be set to JSON_UnknownEncoding. - * - * This setting cannot be changed once the parser has started parsing. - */ -JSON_API(JSON_Encoding) JSON_Parser_GetStringEncoding(JSON_Parser parser); -JSON_API(JSON_Status) JSON_Parser_SetStringEncoding(JSON_Parser parser, JSON_Encoding encoding); - -/* Get and set the maximum length of strings that a parser instance allows. - * - * This setting controls the maximum length, in bytes (NOT characters), of - * the encoded strings that are passed to the string and object member - * handlers. If the parser encounters a string that, when encoded in the - * string encoding, is longer than the maximum string length, it triggers - * the JSON_TooLongString error. - * - * The default value of this setting is SIZE_MAX. - * - * This setting cannot be changed once the parser has started parsing. - */ -JSON_API(size_t) JSON_Parser_GetMaxStringLength(JSON_Parser parser); -JSON_API(JSON_Status) JSON_Parser_SetMaxStringLength(JSON_Parser parser, size_t maxLength); - -/* Get and set the number encoding for a parser instance. - * - * This setting controls the encoding of the number values that are - * passed to the number handler. - * - * The default value of this setting is JSON_UTF8. - * - * This setting cannot be set to JSON_UnknownEncoding. - * - * This setting cannot be changed once the parser has started parsing. - */ -JSON_API(JSON_Encoding) JSON_Parser_GetNumberEncoding(JSON_Parser parser); -JSON_API(JSON_Status) JSON_Parser_SetNumberEncoding(JSON_Parser parser, JSON_Encoding encoding); - -/* Get and set the maximum length of numbers that a parser instance allows. - * - * This setting controls the maximum length, in bytes (NOT characters), of - * the encoded numbers that are passed to the number handler. If the parser - * encounters a number that, when encoded in the number encoding, is longer - * than the maximum number length, it triggers the JSON_TooLongNumber error. - * - * The default value of this setting is SIZE_MAX. - * - * This setting cannot be changed once the parser has started parsing. - */ -JSON_API(size_t) JSON_Parser_GetMaxNumberLength(JSON_Parser parser); -JSON_API(JSON_Status) JSON_Parser_SetMaxNumberLength(JSON_Parser parser, size_t maxLength); - -/* Get and set whether a parser instance allows the input to begin with a - * byte-order-mark (BOM). - * - * RFC 4627 does not allow JSON text to begin with a BOM, but some clients - * may find it convenient to be lenient in this regard; for example, if the - * JSON text is being read from a file that has a BOM. - * - * The default value of this setting is JSON_False. - * - * This setting cannot be changed once the parser has started parsing. - */ -JSON_API(JSON_Boolean) JSON_Parser_GetAllowBOM(JSON_Parser parser); -JSON_API(JSON_Status) JSON_Parser_SetAllowBOM(JSON_Parser parser, JSON_Boolean allowBOM); - -/* Get and set whether a parser instance allows Javascript-style comments to - * appear in the JSON text. - * - * RFC 4627 does not allow JSON text to contain comments, but some clients - * may find it useful to allow them. - * - * Both types of comment described by ECMA-262 (multi-line and single-line) - * are supported. - * - * The default value of this setting is JSON_False. - * - * This setting cannot be changed once the parser has started parsing. - */ -JSON_API(JSON_Boolean) JSON_Parser_GetAllowComments(JSON_Parser parser); -JSON_API(JSON_Status) JSON_Parser_SetAllowComments(JSON_Parser parser, JSON_Boolean allowComments); - -/* Get and set whether a parser instance allows the "special" number literals - * NaN, Infinity, and -Infinity. - * - * RFC 4627 does not provide any way to represent NaN, Infinity, or -Infinity, - * but some clients may find it convenient to recognize these as literals, - * since they are emitted by many common JSON generators. - * - * The default value of this setting is JSON_False. - * - * This setting cannot be changed once the parser has started parsing. - */ -JSON_API(JSON_Boolean) JSON_Parser_GetAllowSpecialNumbers(JSON_Parser parser); -JSON_API(JSON_Status) JSON_Parser_SetAllowSpecialNumbers(JSON_Parser parser, JSON_Boolean allowSpecialNumbers); - -/* Get and set whether a parser instance allows hexadecimal notation to be - * used for specifying number values. - * - * RFC 4627 does not allow hexadecimal numbers, but some clients may find it - * convenient to allow them, in order to represent binary bit patterns more - * easily. - * - * The parser recognizes hexadecimal numbers that conform to the syntax of - * HexIntegerLiteral, as described in section 7.8.3 of ECMA-262. That is, a - * valid hexadecimal number must comprise the prefix '0x' or '0X', followed - * by a sequence of one or more of the following characters: '0' - '9', - * 'a' - 'f', and 'A' - 'F'. - * - * Hexadecimal numbers cannot be prefixed by a minus sign. - * - * The default value of this setting is JSON_False. - * - * This setting cannot be changed once the parser has started parsing. - */ -JSON_API(JSON_Boolean) JSON_Parser_GetAllowHexNumbers(JSON_Parser parser); -JSON_API(JSON_Status) JSON_Parser_SetAllowHexNumbers(JSON_Parser parser, JSON_Boolean allowHexNumbers); - -/* Get and set whether a parser instance allows unescaped control characters - * (U+0000 - U+001F) to appear inside string values. - * - * RFC 4627 does not allow JSON text to contain unescaped control characters, - * but some clients may find it useful to allow them. - * - * The default value of this setting is JSON_False. - * - * This setting cannot be changed once the parser has started parsing. - */ -JSON_API(JSON_Boolean) JSON_Parser_GetAllowUnescapedControlCharacters(JSON_Parser parser); -JSON_API(JSON_Status) JSON_Parser_SetAllowUnescapedControlCharacters(JSON_Parser parser, JSON_Boolean allowUnescapedControlCharacters); - -/* Get and set whether a parser instance replaces invalid encoding sequences - * it encounters inside string tokens with the Unicode replacement character - * (U+FFFD) rather than triggering an error. - * - * By default, the parser is strict when decoding the input stream, and will - * fail if it encounters an encoding sequence that is not valid for the input - * encoding. Note especially that this includes (but is not limited to) the - * following: - * - * - Overlong encoding sequences in UTF-8. - * - Surrogate codepoints encoded in UTF-8 or UTF-32. - * - Unpaired or improperly-paired surrogates in UTF-16. - * - Codepoints outside the Unicode range encoded in UTF-8 or UTF-32. - * - * The replacement follows the rules and recommendations described in section - * 3.9 of version 5.2.0 of [the Unicode Standard](http://www.unicode.org/versions/Unicode5.2.0/). - * - * The default value of this setting is JSON_False. - * - * This setting cannot be changed once the parser has started parsing. - */ -JSON_API(JSON_Boolean) JSON_Parser_GetReplaceInvalidEncodingSequences(JSON_Parser parser); -JSON_API(JSON_Status) JSON_Parser_SetReplaceInvalidEncodingSequences(JSON_Parser parser, JSON_Boolean replaceInvalidEncodingSequences); - -/* Get and set whether a parser instance tracks object member names for all - * open objects and detects duplicate members if any occur in the input. - * - * RFC 4627 stipulates that JSON parsers SHOULD check for duplicates, but - * may opt not to in light of reasonable implementation considerations. - * Checking for duplicate members necessarily incurs non-trivial memory - * overhead, and is therefore not enabled by default. Most clients use - * their parse handlers to build some sort of in-memory DOM representation - * of the JSON text and therefore already have the means to check for - * duplicate member names without incurring additional memory overhead; it - * is recommended that these clients implement duplicate member checking - * in their object member handler (refer to SetObjectMemberHandler() for - * details) and leave this setting disabled. - * - * The default value of this setting is JSON_False. - * - * This setting cannot be changed once the parser has started parsing. - */ -JSON_API(JSON_Boolean) JSON_Parser_GetTrackObjectMembers(JSON_Parser parser); -JSON_API(JSON_Status) JSON_Parser_SetTrackObjectMembers(JSON_Parser parser, JSON_Boolean trackObjectMembers); - -/* Get and set whether a parser instance stops parsing as soon as the end of - * the top-level JSON document is parsed. - * - * This setting allows the client to parse JSON content that is embedded - * inside a larger data stream. If this setting is enabled, the parser will, - * upon successfully parsing the end of the embedded JSON document, set its - * error to JSON_Error_StoppedAfterEmbeddedDocument, set its error location - * to the location in the input stream immediately following the end of the - * document, and return JSON_Failure from JSON_Parser_Parse(). - * - * The default value of this setting is JSON_False. - * - * This setting cannot be changed once the parser has started parsing. - */ -JSON_API(JSON_Boolean) JSON_Parser_GetStopAfterEmbeddedDocument(JSON_Parser parser); -JSON_API(JSON_Status) JSON_Parser_SetStopAfterEmbeddedDocument(JSON_Parser parser, JSON_Boolean stopAfterEmbeddedDocument); - -/* Get the type of error, if any, encountered by a parser instance. - * - * If the parser encountered an error while parsing input, this function - * returns the type of the error. Otherwise, this function returns - * JSON_Error_None. - */ -JSON_API(JSON_Error) JSON_Parser_GetError(JSON_Parser parser); - -/* Get the location in the input stream at which a parser instance - * encountered an error. - * - * If the parser encountered an error while parsing input, this function - * sets the members of the structure pointed to by pLocation to the location - * in the input stream at which the error occurred and returns success. - * Otherwise, it leaves the members unchanged and returns failure. - */ -JSON_API(JSON_Status) JSON_Parser_GetErrorLocation(JSON_Parser parser, JSON_Location* pLocation); - -/* Get the location in the input stream of the beginning of the token - * that is currently being handled by one of a parser instance's parse - * handlers. - * - * If the parser is inside a parse handler, this function sets the members - * of the structure pointed to by pLocation to the location and returns - * success. Otherwise, it leaves the members unchanged and returns failure. - */ -JSON_API(JSON_Status) JSON_Parser_GetTokenLocation(JSON_Parser parser, JSON_Location* pLocation); - -/* Get the location in the input stream that immediately follows the end of - * the token that is currently being handled by one of a parser instance's - * parse handlers. - * - * If the parser is inside a parse handler, this function sets the members - * of the structure pointed to by pLocation to the location and returns - * success. Otherwise, it leaves the members unchanged and returns failure. - */ -JSON_API(JSON_Status) JSON_Parser_GetAfterTokenLocation(JSON_Parser parser, JSON_Location* pLocation); - -/* Parse handlers are callbacks that the client provides in order to - * be notified about the structure of the JSON document as it is being - * parsed. The following notes apply equally to all parse handlers: - * - * 1. Parse handlers are optional. In fact, a parser with no parse - * handlers at all can be used to simply validate that the input - * is valid JSON. - * - * 2. Parse handlers can be set, unset, or changed at any time, even - * from inside a parse handler. - * - * 3. If a parse handler returns JSON_Parser_Abort, the parser will - * abort the parse, set its error to JSON_Error_AbortedByHandler, - * set its error location to the start of the token that triggered - * the handler, and return JSON_Failure from the outer call to - * JSON_Parser_Parse(). - * - * 4. A parse handler can get the location in the input stream of the - * token that triggered the handler by calling - * JSON_Parser_GetTokenLocation(). - */ - -/* Values returned by parse handlers to indicate whether parsing should - * continue or be aborted. - * - * Note that JSON_TreatAsDuplicateObjectMember should only be returned by - * object member handlers. Refer to JSON_Parser_SetObjectMemberHandler() - * for details. - */ -typedef enum tag_JSON_Parser_HandlerResult -{ - JSON_Parser_Continue = 0, - JSON_Parser_Abort = 1, - JSON_Parser_TreatAsDuplicateObjectMember = 2 -} JSON_Parser_HandlerResult; - -/* Get and set the handler that is called when a parser instance detects the - * input encoding. - * - * If the parser instance's input encoding was set to JSON_UnknownEncoding - * when parsing began, this handler will be called as soon as the actual - * input encoding has been detected. - * - * Note that JSON_Parser_GetTokenLocation() will return failure if called - * from inside this handler, since there is no token associated with this - * event. - */ -typedef JSON_Parser_HandlerResult (JSON_CALL * JSON_Parser_EncodingDetectedHandler)(JSON_Parser parser); -JSON_API(JSON_Parser_EncodingDetectedHandler) JSON_Parser_GetEncodingDetectedHandler(JSON_Parser parser); -JSON_API(JSON_Status) JSON_Parser_SetEncodingDetectedHandler(JSON_Parser parser, JSON_Parser_EncodingDetectedHandler handler); - -/* Get and set the handler that is called when a parser instance encounters - * a JSON null literal value. - */ -typedef JSON_Parser_HandlerResult (JSON_CALL * JSON_Parser_NullHandler)(JSON_Parser parser); -JSON_API(JSON_Parser_NullHandler) JSON_Parser_GetNullHandler(JSON_Parser parser); -JSON_API(JSON_Status) JSON_Parser_SetNullHandler(JSON_Parser parser, JSON_Parser_NullHandler handler); - -/* Get and set the handler that is called when a parser instance encounters - * a JSON boolean value (true or false). - */ -typedef JSON_Parser_HandlerResult (JSON_CALL * JSON_Parser_BooleanHandler)(JSON_Parser parser, JSON_Boolean value); -JSON_API(JSON_Parser_BooleanHandler) JSON_Parser_GetBooleanHandler(JSON_Parser parser); -JSON_API(JSON_Status) JSON_Parser_SetBooleanHandler(JSON_Parser parser, JSON_Parser_BooleanHandler handler); - -/* Get and set the handler that is called when a parser instance encounters - * a JSON string value. - * - * The pValue parameter points to a buffer containing the string value, - * encoded according to the parser instance's string encoding setting. The - * buffer is null-terminated (the null terminator character is also encoded). - * Note, however, that JSON strings may contain embedded null characters, - * which are specifiable using the escape sequence \u0000. The client is - * free to modify the contents of the buffer during the handler. - * - * The length parameter specifies the number of bytes (NOT characters) in - * the encoded string, not including the encoded null terminator. - * - * The attributes parameter provides information about the characters - * that comprise the string. If the option to replace invalid encoding - * sequences is enabled and the string contains any Unicode replacement - * characters (U+FFFD) that were the result of replacing invalid encoding - * sequences in the input, the attributes will include the value - * JSON_ContainsReplacedCharacter. Note that the absence of this attribute - * does not imply that the string does not contain any U+FFFD characters, - * since such characters may have been present in the original input, and - * not inserted by a replacement operation. - */ -typedef JSON_Parser_HandlerResult (JSON_CALL * JSON_Parser_StringHandler)(JSON_Parser parser, char* pValue, size_t length, JSON_StringAttributes attributes); -JSON_API(JSON_Parser_StringHandler) JSON_Parser_GetStringHandler(JSON_Parser parser); -JSON_API(JSON_Status) JSON_Parser_SetStringHandler(JSON_Parser parser, JSON_Parser_StringHandler handler); - -/* Get and set the handler that is called when a parser instance encounters - * a JSON number value. - * - * JSON numbers do not have a defined binary representation or precision, - * and different clients may wish to interpret them differently, for - * example, as IEEE 754 doubles, 64-bit integers, or arbitrary-precision - * bignums. For this reason, the parser does not attempt to interpret - * number values, but leaves this to the client. - * - * The pValue parameter points to a buffer containing the number value, - * encoded according to the parser instance's number encoding setting. The - * buffer is null-terminated (the null terminator character is also encoded). - * The buffer is guaranteed to contain only characters allowed in JSON number - * values, that is: '0' - '9', '+', '-', '.', 'e', and 'E'; if the option - * to allow hex numbers is enabled, the text may also contain the characters - * 'x', 'X', 'a' - 'f', and 'A' - 'F'. The client is free to modify the - * contents of the buffer during the handler. This is especially useful - * to clients that wish to convert the number to a double using the C - * standard library's strtod() function, which is locale-sensitive; in this - * case, the client should modify the buffer to replace the '.' character - * with localconv()->decimal_point[0] before passing the buffer to strtod(). - * - * The length parameter specifies the number of bytes (NOT characters) in - * the encoded number, not including the encoded null terminator. - * - * The attributes parameter provides information about the number. - */ -typedef JSON_Parser_HandlerResult (JSON_CALL * JSON_Parser_NumberHandler)(JSON_Parser parser, char* pValue, size_t length, JSON_NumberAttributes attributes); -JSON_API(JSON_Parser_NumberHandler) JSON_Parser_GetNumberHandler(JSON_Parser parser); -JSON_API(JSON_Status) JSON_Parser_SetNumberHandler(JSON_Parser parser, JSON_Parser_NumberHandler handler); - -/* Get and set the handler that is called when a parser instance encounters - * one of the "special" number literals NaN, Infinity, and -Inifinity. - */ -typedef JSON_Parser_HandlerResult (JSON_CALL * JSON_Parser_SpecialNumberHandler)(JSON_Parser parser, JSON_SpecialNumber value); -JSON_API(JSON_Parser_SpecialNumberHandler) JSON_Parser_GetSpecialNumberHandler(JSON_Parser parser); -JSON_API(JSON_Status) JSON_Parser_SetSpecialNumberHandler(JSON_Parser parser, JSON_Parser_SpecialNumberHandler handler); - -/* Get and set the handler that is called when a parser instance encounters - * the left curly brace that starts an object. - */ -typedef JSON_Parser_HandlerResult (JSON_CALL * JSON_Parser_StartObjectHandler)(JSON_Parser parser); -JSON_API(JSON_Parser_StartObjectHandler) JSON_Parser_GetStartObjectHandler(JSON_Parser parser); -JSON_API(JSON_Status) JSON_Parser_SetStartObjectHandler(JSON_Parser parser, JSON_Parser_StartObjectHandler handler); - -/* Get and set the handler that is called when a parser instance encounters - * the right curly brace that ends an object. - */ -typedef JSON_Parser_HandlerResult (JSON_CALL * JSON_Parser_EndObjectHandler)(JSON_Parser parser); -JSON_API(JSON_Parser_EndObjectHandler) JSON_Parser_GetEndObjectHandler(JSON_Parser parser); -JSON_API(JSON_Status) JSON_Parser_SetEndObjectHandler(JSON_Parser parser, JSON_Parser_EndObjectHandler handler); - -/* Get and set the handler that is called when a parser instance encounters - * an object member name. - * - * The pValue parameter points to a buffer containing the member name, - * encoded according to the parser instance's string encoding setting. The - * buffer is null-terminated (the null terminator character is also encoded). - * Note, however, that JSON strings may contain embedded null characters, - * which are specifiable using the escape sequence \u0000. The client is - * free to modify the contents of the buffer during the handler. - * - * The length parameter specifies the number of bytes (NOT characters) in - * the encoded string, not including the encoded null terminator. - * - * The attributes parameter provides information about the characters - * that comprise the string. If the option to replace invalid encoding - * sequences is enabled and the string contains any Unicode replacement - * characters (U+FFFD) that were the result of replacing invalid encoding - * sequences in the input, the attributes will include the value - * JSON_ContainsReplacedCharacter. Note that the absence of this attribute - * does not imply that the string does not contain any U+FFFD characters, - * since such characters may have been present in the original input, and - * not inserted by a replacement operation. - * - * The handler can return JSON_Parser_TreatAsDuplicateObjectMember to - * indicate that the current object already contains a member with the - * specified name. This allows clients to implement duplicate member - * checking without incurring the additional memory overhead associated - * with enabling the TrackObjectMembers setting. - */ -typedef JSON_Parser_HandlerResult (JSON_CALL * JSON_Parser_ObjectMemberHandler)(JSON_Parser parser, char* pValue, size_t length, JSON_StringAttributes attributes); -JSON_API(JSON_Parser_ObjectMemberHandler) JSON_Parser_GetObjectMemberHandler(JSON_Parser parser); -JSON_API(JSON_Status) JSON_Parser_SetObjectMemberHandler(JSON_Parser parser, JSON_Parser_ObjectMemberHandler handler); - -/* Get and set the handler that is called when a parser instance encounters - * the left square brace that starts an array. - */ -typedef JSON_Parser_HandlerResult (JSON_CALL * JSON_Parser_StartArrayHandler)(JSON_Parser parser); -JSON_API(JSON_Parser_StartArrayHandler) JSON_Parser_GetStartArrayHandler(JSON_Parser parser); -JSON_API(JSON_Status) JSON_Parser_SetStartArrayHandler(JSON_Parser parser, JSON_Parser_StartArrayHandler handler); - -/* Get and set the handler that is called when a parser instance encounters - * the right square brace that ends an array. - */ -typedef JSON_Parser_HandlerResult (JSON_CALL * JSON_Parser_EndArrayHandler)(JSON_Parser parser); -JSON_API(JSON_Parser_EndArrayHandler) JSON_Parser_GetEndArrayHandler(JSON_Parser parser); -JSON_API(JSON_Status) JSON_Parser_SetEndArrayHandler(JSON_Parser parser, JSON_Parser_EndArrayHandler handler); - -/* Get and set the handler that is called when a parser instance encounters - * an array item. - * - * This event is always immediately followed by a null, boolean, string, - * number, special number, start object, or start array event. - */ -typedef JSON_Parser_HandlerResult (JSON_CALL * JSON_Parser_ArrayItemHandler)(JSON_Parser parser); -JSON_API(JSON_Parser_ArrayItemHandler) JSON_Parser_GetArrayItemHandler(JSON_Parser parser); -JSON_API(JSON_Status) JSON_Parser_SetArrayItemHandler(JSON_Parser parser, JSON_Parser_ArrayItemHandler handler); - -/* Push zero or more bytes of input to a parser instance. - * - * The pBytes parameter points to a buffer containing the bytes to be - * parsed, if any. pBytes may be NULL if and only if the length parameter - * is 0. - * - * The length parameter specifies the number of bytes (NOT characters) - * pointed to by pBytes. - * - * The isFinal parameter specifies whether the parser should treat the - * input to the call as the last chunk of input in the JSON document. - * If this parameter is JSON_False, the parser will assume that more - * input may be forthcoming. - * - * The parser adheres to [RFC 4627](http://www.ietf.org/rfc/rfc4627.txt), - * except that any JSON value (null, true, false, string, number, object, - * or array) is accepted as a valid top-level entity in the parsed text. - * - * This function returns failure if the parser parameter is null, if the - * function was called reentrantly from inside a handler, or if the - * parser instance has already finished parsing. - */ -JSON_API(JSON_Status) JSON_Parser_Parse(JSON_Parser parser, const char* pBytes, size_t length, JSON_Boolean isFinal); - -#endif /* JSON_NO_PARSER */ - -/******************** JSON Writer ********************/ - -#ifndef JSON_NO_WRITER - -/* Writer instance. */ -struct JSON_Writer_Data; /* opaque data */ -typedef struct JSON_Writer_Data* JSON_Writer; - -/* Create a writer instance. - * - * If pMemorySuite is null, the library will use the C runtime realloc() and - * free() as the writer's memory management suite. Otherwise, all the - * handlers in the memory suite must be non-null or the call will fail and - * return null. - */ -JSON_API(JSON_Writer) JSON_Writer_Create(const JSON_MemorySuite* pMemorySuite); - -/* Free a writer instance. - * - * Every successful call to JSON_Writer_Create() must eventually be paired - * with a call to JSON_Writer_Free() in order to avoid leaking memory. - * - * This function returns failure if the writer parameter is null or if the - * function was called reentrantly from inside a handler. - */ -JSON_API(JSON_Status) JSON_Writer_Free(JSON_Writer writer); - -/* Reset a writer instance so that it can be used to write a new output - * stream. - * - * This function returns failure if the writer parameter is null or if the - * function was called reentrantly from inside a handler. - * - * After a writer is reset, its state is indistinguishable from its state - * when it was returned by JSON_Writer_Create(). The writer's custom memory - * suite, if any, is preserved; all other settings, state, and handlers are - * restored to their default values. - */ -JSON_API(JSON_Status) JSON_Writer_Reset(JSON_Writer writer); - -/* Get and set the user data value associated with a writer instance. - * - * This setting allows clients to associate additional data with a - * writer instance. The writer itself does not use the value. - * - * The default value of this setting is NULL. - * - * This setting can be changed at any time, even inside handlers. - */ -JSON_API(void*) JSON_Writer_GetUserData(JSON_Writer writer); -JSON_API(JSON_Status) JSON_Writer_SetUserData(JSON_Writer writer, void* userData); - -/* Get and set the output encoding for a writer instance. - * - * The default value of this setting is JSON_UTF8. - * - * This setting cannot be set to JSON_UnknownEncoding. - * - * This setting cannot be changed once the writer has started writing. - */ -JSON_API(JSON_Encoding) JSON_Writer_GetOutputEncoding(JSON_Writer writer); -JSON_API(JSON_Status) JSON_Writer_SetOutputEncoding(JSON_Writer writer, JSON_Encoding encoding); - -/* Get and set whether a writer instance uses CARRIAGE RETURN, LINE FEED - * (CRLF) as the new line sequence generated by JSON_Writer_WriteNewLine(). - * - * The default value of this setting is JSON_False. - * - * This setting cannot be changed once the writer has started writing. - */ -JSON_API(JSON_Boolean) JSON_Writer_GetUseCRLF(JSON_Writer writer); -JSON_API(JSON_Status) JSON_Writer_SetUseCRLF(JSON_Writer writer, JSON_Boolean useCRLF); - -/* Get and set whether a writer instance replaces invalid encoding sequences - * it encounters in string tokens with the Unicode replacement character - * (U+FFFD) rather than triggering an error. - * - * The default value of this setting is JSON_False. - * - * This setting cannot be changed once the writer has started writing. - */ -JSON_API(JSON_Boolean) JSON_Writer_GetReplaceInvalidEncodingSequences(JSON_Writer writer); -JSON_API(JSON_Status) JSON_Writer_SetReplaceInvalidEncodingSequences(JSON_Writer writer, JSON_Boolean replaceInvalidEncodingSequences); - -/* Get and set whether a writer instance escapes all non-ASCII characters - * that it outputs. This can be useful for debugging, or when the output - * will be consumed by a parser that does not support UTF-encoded input. - * It is not recommended as a general practice, since it bloats the size - * of non-ASCII strings considerably, compared to UTF encoding. - * - * The default value of this setting is JSON_False. - * - * This setting cannot be changed once the writer has started writing. - */ -JSON_API(JSON_Boolean) JSON_Writer_GetEscapeAllNonASCIICharacters(JSON_Writer writer); -JSON_API(JSON_Status) JSON_Writer_SetEscapeAllNonASCIICharacters(JSON_Writer writer, JSON_Boolean escapeAllNonASCIICharacters); - -/* Get the type of error, if any, encountered by a writer instance. - * - * If the writer encountered an error while writing input, this function - * returns the type of the error. Otherwise, this function returns - * JSON_Error_None. - */ -JSON_API(JSON_Error) JSON_Writer_GetError(JSON_Writer writer); - -/* The JSON_Writer_WriteXXX() family of functions cause JSON text to be - * sent to a writer instance's output handler. The following notes apply - * equally to all these functions: - * - * 1. The output handler is optional, and can be set, unset, or changed - * at any time, even from inside the output handler. - * - * 2. A single call to JSON_Writer_WriteXXX() may trigger multiple calls - * to the output handler. - * - * 3. All output generated by a call to JSON_Writer_WriteXXX() is sent - * to the output handler before the call returns; that is, the writer - * does not aggregate output from multiple writes before sending it to - * the output handler. - * - * 4. A call to JSON_Writer_WriteXXX() will fail if the writer has - * already encountered an error. - * - * 5. A call to JSON_Writer_WriteXXX() will fail if the call was made - * reentrantly from inside a handler. - * - * 6. A call to JSON_Writer_WriteXXX() will fail if it would cause the - * writer to output grammatically-incorrect JSON text. - * - * 7. If an output handler returns JSON_Writer_Abort, the writer will - * abort the write, set its error to JSON_Error_AbortedByHandler, - * set its error location to the location in the output stream prior - * to the call to the handler, and return JSON_Failure from the outer - * call to JSON_Writer_WriteXXX(). - */ - -/* Values returned by write handlers to indicate whether writing should - * continue or be aborted. - */ -typedef enum tag_JSON_Writer_HandlerResult -{ - JSON_Writer_Continue = 0, - JSON_Writer_Abort = 1 -} JSON_Writer_HandlerResult; - -/* Get and set the handler that is called when a writer instance has output - * ready to be written. - * - * The pBytes parameter points to a buffer containing the bytes to be written, - * encoded according to the writer instance's output encoding setting. The - * buffer is NOT null-terminated. - * - * The length parameter specifies the number of bytes (NOT characters) in - * the encoded output. - */ -typedef JSON_Writer_HandlerResult (JSON_CALL * JSON_Writer_OutputHandler)(JSON_Writer writer, const char* pBytes, size_t length); -JSON_API(JSON_Writer_OutputHandler) JSON_Writer_GetOutputHandler(JSON_Writer writer); -JSON_API(JSON_Status) JSON_Writer_SetOutputHandler(JSON_Writer writer, JSON_Writer_OutputHandler handler); - -/* Write the JSON null literal to the output. */ -JSON_API(JSON_Status) JSON_Writer_WriteNull(JSON_Writer writer); - -/* Write a JSON boolean value to the output. */ -JSON_API(JSON_Status) JSON_Writer_WriteBoolean(JSON_Writer writer, JSON_Boolean value); - -/* Write a JSON string value to the output. - * - * The pValue parameter points to a buffer containing the string to be - * written. The buffer does NOT need to be null-terminated. This - * parameter can be null if and only if the length parameter is zero. - * - * The length parameter specifies the number of bytes (NOT characters) - * in the buffer. If the buffer is null-terminated, the length should - * NOT include the null terminator. - * - * The encoding parameter specifies the encoding of the text pointed - * to by pValue. This parameter cannot be JSON_UnknownEncoding. - * - * If the string contains invalid encoding sequences and the option to - * replace invalid encoding sequences with the Unicode replacement - * character (U+FFFD) is not enabled for the writer instance, the writer - * sets its error to JSON_Error_InvalidEncodingSequence and returns - * failure. - * - * The writer escapes the following codepoints: - * - * - BACKSPACE (U+0008) => \b - * - TAB (U+0009) => \t - * - LINE FEED (U+000A) => \n - * - FORM FEED (U+000C) => \f - * - CARRIAGE RETURN (U+000D) => \r - * - QUOTATION MARK (U+0022) => \" - * - SOLIDUS (U+002F) => \/ - * - REVERSE SOLIDUS (U+005C) => \\ - * - * The writer also escapes the following codepoints using hex-style escape - * sequences: - * - * - All control characters (U+0000 - U+001F) except those covered by the - * list above. - * - DELETE (U+007F) - * - LINE SEPARATOR (U+2028) - * - PARAGRAPH SEPARATOR (U+2029) - * - All 34 Unicode "noncharacter" codepoints whose values end in FE or FF. - * - All 32 Unicode "noncharacter" codepoints in the range U+FDD0 - U+FDEF. - * - REPLACEMENT CHARACTER (U+FFFD), if it did not appear in the original - * string provided by the client; in other words, if the writer introduced - * it in the output as a replacement for an invalid encoding sequence in - * the original string. - * - * If the setting to escape all non-ASCII characters is enabled, ALL - * codepoints above U+0080 are escaped using hex-style escape sequences. - */ -JSON_API(JSON_Status) JSON_Writer_WriteString(JSON_Writer writer, const char* pValue, size_t length, JSON_Encoding encoding); - -/* Write a JSON number value to the output. - * - * The pValue parameter points to a buffer containing the number to be - * written. The buffer does NOT need to be null-terminated. - * - * The length parameter specifies the number of bytes (NOT characters) - * in the buffer. If the buffer is null-terminated, the length should - * NOT include the null terminator. - * - * The encoding parameter specifies the encoding of the text pointed - * to by pValue. This parameter cannot be JSON_UnknownEncoding. - * - * If the number contains an invalid encoding sequence, the writer sets - * its error to JSON_Error_InvalidEncodingSequence and returns failure, - * regardless of whether the option to replace invalid encoding sequences - * with the Unicode replacement character (U+FFFD) is enabled (that - * setting only affects writing of string values). - * - * The number must be a valid JSON number as described by RFC 4627, or a - * hexadecimal number conforming to the syntax of HexIntegerLiteral, as - * described in section 7.8.3 of ECMA-262. Otherwise, the writer sets its - * error to JSON_Error_InvalidNumber and returns failure. - */ -JSON_API(JSON_Status) JSON_Writer_WriteNumber(JSON_Writer writer, const char* pValue, size_t length, JSON_Encoding encoding); - -/* Write a JSON "special" number literal to the output. */ -JSON_API(JSON_Status) JSON_Writer_WriteSpecialNumber(JSON_Writer writer, JSON_SpecialNumber value); - -/* Write a left curly-brace character to the output. */ -JSON_API(JSON_Status) JSON_Writer_WriteStartObject(JSON_Writer writer); - -/* Write a right curly-brace character to the output. */ -JSON_API(JSON_Status) JSON_Writer_WriteEndObject(JSON_Writer writer); - -/* Write a left square-brace character to the output. */ -JSON_API(JSON_Status) JSON_Writer_WriteStartArray(JSON_Writer writer); - -/* Write a right square-brace character to the output. */ -JSON_API(JSON_Status) JSON_Writer_WriteEndArray(JSON_Writer writer); - -/* Write a colon character to the output. */ -JSON_API(JSON_Status) JSON_Writer_WriteColon(JSON_Writer writer); - -/* Write a comma character to the output. */ -JSON_API(JSON_Status) JSON_Writer_WriteComma(JSON_Writer writer); - -/* Write space characters to the output. */ -JSON_API(JSON_Status) JSON_Writer_WriteSpace(JSON_Writer writer, size_t numberOfSpaces); - -/* Write a newline sequence to the output. */ -JSON_API(JSON_Status) JSON_Writer_WriteNewLine(JSON_Writer writer); - -#endif /* JSON_NO_WRITER */ - -/******************** Miscellaneous API ********************/ - -/* Information about the library version. */ -typedef struct tag_JSON_Version -{ - unsigned int major; - unsigned int minor; - unsigned int micro; -} JSON_Version; - -/* Get a pointer to the library version information. */ -JSON_API(const JSON_Version*) JSON_LibraryVersion(void); - -/* Get a constant, null-terminated, ASCII string describing an error code. */ -JSON_API(const char*) JSON_ErrorString(JSON_Error error); - -/* Get the UTF-16 encoding whose endianness matches the target platform. - * - * This function always returns either JSON_UTF16LE or JSON_UTF16BE. - */ -JSON_API(JSON_Encoding) JSON_NativeUTF16Encoding(void); - -/* Get the UTF-32 encoding whose endianness matches the target platform. - * - * This function always returns either JSON_UTF32LE or JSON_UTF32BE. - */ -JSON_API(JSON_Encoding) JSON_NativeUTF32Encoding(void); - -#ifdef __cplusplus -} -#endif - -#endif /* JSONSAX_H_INCLUDED */