mirror of https://github.com/xqemu/xqemu.git
QObject patches for 2018-09-24
-----BEGIN PGP SIGNATURE----- iQIcBAABAgAGBQJbqQxWAAoJEDhwtADrkYZTvMkQAJlraQ5ydNTLEcaQ2GPjzbNF n34hUjT8K0JQmtiLTdiZvcEmqN9VezPJtOgoGyOFljpdJOBgmThXkWp2STyEF4lo sYpA/ml8Mf39TCKjGQGmelxOuSHXmSuEWbCkcZLS/xbf/phMPHulVywcU8UP2ehz F7k5FXSx8MxA7a86lhhegXkK6O0+zvlnvR2tjufJdL0U/V1qXyKqdnOo5ZG7A/H9 +8PvhiVxHr+Id0+1iFqWYzL703zHDWQvfCxzI5arMD9X8jRulBli+eW1LJOTM8SK Pcel9xcSVsp53TIhD0+jG6OS88osMQP/JO3ND8qKFBbJ8f/WXKyskIUFgK9oVxX3 083tcCqCwYFe3THYzY8d5hyhP8OA3ddnSLyA0LV80APi5Z9z+eERSYwCdEad96nS SEl6kLT8VNoVxPi6lPoxsTKJDjCVWesgXkRH0KkzC9JsX0oweW+3z8rNEw9JIeEM VtMnqqG7aFPmlc0kcmNCGSWKNLHymN5ZxylHfQcyauzIPKO4eS3XCwtF4NB5npBJ I1s14NJIHeeSADGaQLTHRLkL1iY3q8ZtAfK+SwnGFtEgIIRHST96KAXDbxyJow8P Ommd2N/J57M68rJUtqBH0bxu58A7AeKN+DrxpeTpgzlDY+/LxLJS46pHVzu+zqim NpXyHG6C+DKcwd/+jFmk =BKOg -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/armbru/tags/pull-qobject-2018-09-24' into staging QObject patches for 2018-09-24 # gpg: Signature made Mon 24 Sep 2018 17:09:58 BST # gpg: using RSA key 3870B400EB918653 # gpg: Good signature from "Markus Armbruster <armbru@redhat.com>" # gpg: aka "Markus Armbruster <armbru@pond.sub.org>" # Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867 4E5F 3870 B400 EB91 8653 * remotes/armbru/tags/pull-qobject-2018-09-24: json: Eliminate lexer state IN_WHITESPACE, pseudo-token JSON_SKIP json: Eliminate lexer state IN_ERROR json: Nicer recovery from lexical errors json: Make lexer's "character consumed" logic less confusing json: Clean up how lexer consumes "end of input" json: Fix lexer for lookahead character beyond '\x7F' Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
f69d20fa8b
|
@ -100,7 +100,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
enum json_lexer_state {
|
enum json_lexer_state {
|
||||||
IN_ERROR = 0, /* must really be 0, see json_lexer[] */
|
IN_RECOVERY = 1,
|
||||||
IN_DQ_STRING_ESCAPE,
|
IN_DQ_STRING_ESCAPE,
|
||||||
IN_DQ_STRING,
|
IN_DQ_STRING,
|
||||||
IN_SQ_STRING_ESCAPE,
|
IN_SQ_STRING_ESCAPE,
|
||||||
|
@ -115,25 +115,44 @@ enum json_lexer_state {
|
||||||
IN_SIGN,
|
IN_SIGN,
|
||||||
IN_KEYWORD,
|
IN_KEYWORD,
|
||||||
IN_INTERP,
|
IN_INTERP,
|
||||||
IN_WHITESPACE,
|
|
||||||
IN_START,
|
IN_START,
|
||||||
IN_START_INTERP, /* must be IN_START + 1 */
|
IN_START_INTERP, /* must be IN_START + 1 */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
QEMU_BUILD_BUG_ON(JSON_ERROR != 0);
|
||||||
|
QEMU_BUILD_BUG_ON(IN_RECOVERY != JSON_ERROR + 1);
|
||||||
QEMU_BUILD_BUG_ON((int)JSON_MIN <= (int)IN_START_INTERP);
|
QEMU_BUILD_BUG_ON((int)JSON_MIN <= (int)IN_START_INTERP);
|
||||||
|
QEMU_BUILD_BUG_ON(JSON_MAX >= 0x80);
|
||||||
QEMU_BUILD_BUG_ON(IN_START_INTERP != IN_START + 1);
|
QEMU_BUILD_BUG_ON(IN_START_INTERP != IN_START + 1);
|
||||||
|
|
||||||
#define TERMINAL(state) [0 ... 0x7F] = (state)
|
#define LOOKAHEAD 0x80
|
||||||
|
#define TERMINAL(state) [0 ... 0xFF] = ((state) | LOOKAHEAD)
|
||||||
/* Return whether TERMINAL is a terminal state and the transition to it
|
|
||||||
from OLD_STATE required lookahead. This happens whenever the table
|
|
||||||
below uses the TERMINAL macro. */
|
|
||||||
#define TERMINAL_NEEDED_LOOKAHEAD(old_state, terminal) \
|
|
||||||
(terminal != IN_ERROR && json_lexer[(old_state)][0] == (terminal))
|
|
||||||
|
|
||||||
static const uint8_t json_lexer[][256] = {
|
static const uint8_t json_lexer[][256] = {
|
||||||
/* Relies on default initialization to IN_ERROR! */
|
/* Relies on default initialization to IN_ERROR! */
|
||||||
|
|
||||||
|
/* error recovery */
|
||||||
|
[IN_RECOVERY] = {
|
||||||
|
/*
|
||||||
|
* Skip characters until a structural character, an ASCII
|
||||||
|
* control character other than '\t', or impossible UTF-8
|
||||||
|
* bytes '\xFE', '\xFF'. Structural characters and line
|
||||||
|
* endings are promising resynchronization points. Clients
|
||||||
|
* may use the others to force the JSON parser into known-good
|
||||||
|
* state; see docs/interop/qmp-spec.txt.
|
||||||
|
*/
|
||||||
|
[0 ... 0x1F] = IN_START | LOOKAHEAD,
|
||||||
|
[0x20 ... 0xFD] = IN_RECOVERY,
|
||||||
|
[0xFE ... 0xFF] = IN_START | LOOKAHEAD,
|
||||||
|
['\t'] = IN_RECOVERY,
|
||||||
|
['['] = IN_START | LOOKAHEAD,
|
||||||
|
[']'] = IN_START | LOOKAHEAD,
|
||||||
|
['{'] = IN_START | LOOKAHEAD,
|
||||||
|
['}'] = IN_START | LOOKAHEAD,
|
||||||
|
[':'] = IN_START | LOOKAHEAD,
|
||||||
|
[','] = IN_START | LOOKAHEAD,
|
||||||
|
},
|
||||||
|
|
||||||
/* double quote string */
|
/* double quote string */
|
||||||
[IN_DQ_STRING_ESCAPE] = {
|
[IN_DQ_STRING_ESCAPE] = {
|
||||||
[0x20 ... 0xFD] = IN_DQ_STRING,
|
[0x20 ... 0xFD] = IN_DQ_STRING,
|
||||||
|
@ -157,7 +176,7 @@ static const uint8_t json_lexer[][256] = {
|
||||||
/* Zero */
|
/* Zero */
|
||||||
[IN_ZERO] = {
|
[IN_ZERO] = {
|
||||||
TERMINAL(JSON_INTEGER),
|
TERMINAL(JSON_INTEGER),
|
||||||
['0' ... '9'] = IN_ERROR,
|
['0' ... '9'] = JSON_ERROR,
|
||||||
['.'] = IN_MANTISSA,
|
['.'] = IN_MANTISSA,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
@ -208,15 +227,6 @@ static const uint8_t json_lexer[][256] = {
|
||||||
['a' ... 'z'] = IN_KEYWORD,
|
['a' ... 'z'] = IN_KEYWORD,
|
||||||
},
|
},
|
||||||
|
|
||||||
/* whitespace */
|
|
||||||
[IN_WHITESPACE] = {
|
|
||||||
TERMINAL(JSON_SKIP),
|
|
||||||
[' '] = IN_WHITESPACE,
|
|
||||||
['\t'] = IN_WHITESPACE,
|
|
||||||
['\r'] = IN_WHITESPACE,
|
|
||||||
['\n'] = IN_WHITESPACE,
|
|
||||||
},
|
|
||||||
|
|
||||||
/* interpolation */
|
/* interpolation */
|
||||||
[IN_INTERP] = {
|
[IN_INTERP] = {
|
||||||
TERMINAL(JSON_INTERP),
|
TERMINAL(JSON_INTERP),
|
||||||
|
@ -243,14 +253,25 @@ static const uint8_t json_lexer[][256] = {
|
||||||
[','] = JSON_COMMA,
|
[','] = JSON_COMMA,
|
||||||
[':'] = JSON_COLON,
|
[':'] = JSON_COLON,
|
||||||
['a' ... 'z'] = IN_KEYWORD,
|
['a' ... 'z'] = IN_KEYWORD,
|
||||||
[' '] = IN_WHITESPACE,
|
[' '] = IN_START,
|
||||||
['\t'] = IN_WHITESPACE,
|
['\t'] = IN_START,
|
||||||
['\r'] = IN_WHITESPACE,
|
['\r'] = IN_START,
|
||||||
['\n'] = IN_WHITESPACE,
|
['\n'] = IN_START,
|
||||||
},
|
},
|
||||||
[IN_START_INTERP]['%'] = IN_INTERP,
|
[IN_START_INTERP]['%'] = IN_INTERP,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static inline uint8_t next_state(JSONLexer *lexer, char ch, bool flush,
|
||||||
|
bool *char_consumed)
|
||||||
|
{
|
||||||
|
uint8_t next;
|
||||||
|
|
||||||
|
assert(lexer->state <= ARRAY_SIZE(json_lexer));
|
||||||
|
next = json_lexer[lexer->state][(uint8_t)ch];
|
||||||
|
*char_consumed = !flush && !(next & LOOKAHEAD);
|
||||||
|
return next & ~LOOKAHEAD;
|
||||||
|
}
|
||||||
|
|
||||||
void json_lexer_init(JSONLexer *lexer, bool enable_interpolation)
|
void json_lexer_init(JSONLexer *lexer, bool enable_interpolation)
|
||||||
{
|
{
|
||||||
lexer->start_state = lexer->state = enable_interpolation
|
lexer->start_state = lexer->state = enable_interpolation
|
||||||
|
@ -261,7 +282,8 @@ void json_lexer_init(JSONLexer *lexer, bool enable_interpolation)
|
||||||
|
|
||||||
static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush)
|
static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush)
|
||||||
{
|
{
|
||||||
int char_consumed, new_state;
|
int new_state;
|
||||||
|
bool char_consumed = false;
|
||||||
|
|
||||||
lexer->x++;
|
lexer->x++;
|
||||||
if (ch == '\n') {
|
if (ch == '\n') {
|
||||||
|
@ -269,11 +291,10 @@ static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush)
|
||||||
lexer->y++;
|
lexer->y++;
|
||||||
}
|
}
|
||||||
|
|
||||||
do {
|
while (flush ? lexer->state != lexer->start_state : !char_consumed) {
|
||||||
assert(lexer->state <= ARRAY_SIZE(json_lexer));
|
new_state = next_state(lexer, ch, flush, &char_consumed);
|
||||||
new_state = json_lexer[lexer->state][(uint8_t)ch];
|
if (char_consumed) {
|
||||||
char_consumed = !TERMINAL_NEEDED_LOOKAHEAD(lexer->state, new_state);
|
assert(!flush);
|
||||||
if (char_consumed && !flush) {
|
|
||||||
g_string_append_c(lexer->token, ch);
|
g_string_append_c(lexer->token, ch);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -292,33 +313,23 @@ static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush)
|
||||||
json_message_process_token(lexer, lexer->token, new_state,
|
json_message_process_token(lexer, lexer->token, new_state,
|
||||||
lexer->x, lexer->y);
|
lexer->x, lexer->y);
|
||||||
/* fall through */
|
/* fall through */
|
||||||
case JSON_SKIP:
|
case IN_START:
|
||||||
g_string_truncate(lexer->token, 0);
|
g_string_truncate(lexer->token, 0);
|
||||||
new_state = lexer->start_state;
|
new_state = lexer->start_state;
|
||||||
break;
|
break;
|
||||||
case IN_ERROR:
|
case JSON_ERROR:
|
||||||
/* XXX: To avoid having previous bad input leaving the parser in an
|
|
||||||
* unresponsive state where we consume unpredictable amounts of
|
|
||||||
* subsequent "good" input, percolate this error state up to the
|
|
||||||
* parser by emitting a JSON_ERROR token, then reset lexer state.
|
|
||||||
*
|
|
||||||
* Also note that this handling is required for reliable channel
|
|
||||||
* negotiation between QMP and the guest agent, since chr(0xFF)
|
|
||||||
* is placed at the beginning of certain events to ensure proper
|
|
||||||
* delivery when the channel is in an unknown state. chr(0xFF) is
|
|
||||||
* never a valid ASCII/UTF-8 sequence, so this should reliably
|
|
||||||
* induce an error/flush state.
|
|
||||||
*/
|
|
||||||
json_message_process_token(lexer, lexer->token, JSON_ERROR,
|
json_message_process_token(lexer, lexer->token, JSON_ERROR,
|
||||||
lexer->x, lexer->y);
|
lexer->x, lexer->y);
|
||||||
|
new_state = IN_RECOVERY;
|
||||||
|
/* fall through */
|
||||||
|
case IN_RECOVERY:
|
||||||
g_string_truncate(lexer->token, 0);
|
g_string_truncate(lexer->token, 0);
|
||||||
lexer->state = lexer->start_state;
|
break;
|
||||||
return;
|
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
lexer->state = new_state;
|
lexer->state = new_state;
|
||||||
} while (!char_consumed && !flush);
|
}
|
||||||
|
|
||||||
/* Do not let a single token grow to an arbitrarily large size,
|
/* Do not let a single token grow to an arbitrarily large size,
|
||||||
* this is a security consideration.
|
* this is a security consideration.
|
||||||
|
@ -342,9 +353,8 @@ void json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size)
|
||||||
|
|
||||||
void json_lexer_flush(JSONLexer *lexer)
|
void json_lexer_flush(JSONLexer *lexer)
|
||||||
{
|
{
|
||||||
if (lexer->state != lexer->start_state) {
|
|
||||||
json_lexer_feed_char(lexer, 0, true);
|
json_lexer_feed_char(lexer, 0, true);
|
||||||
}
|
assert(lexer->state == lexer->start_state);
|
||||||
json_message_process_token(lexer, lexer->token, JSON_END_OF_INPUT,
|
json_message_process_token(lexer, lexer->token, JSON_END_OF_INPUT,
|
||||||
lexer->x, lexer->y);
|
lexer->x, lexer->y);
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,10 +16,11 @@
|
||||||
|
|
||||||
#include "qapi/qmp/json-parser.h"
|
#include "qapi/qmp/json-parser.h"
|
||||||
|
|
||||||
|
|
||||||
typedef enum json_token_type {
|
typedef enum json_token_type {
|
||||||
JSON_MIN = 100,
|
JSON_ERROR = 0, /* must be zero, see json_lexer[] */
|
||||||
JSON_LCURLY = JSON_MIN,
|
/* Gap for lexer states */
|
||||||
|
JSON_LCURLY = 100,
|
||||||
|
JSON_MIN = JSON_LCURLY,
|
||||||
JSON_RCURLY,
|
JSON_RCURLY,
|
||||||
JSON_LSQUARE,
|
JSON_LSQUARE,
|
||||||
JSON_RSQUARE,
|
JSON_RSQUARE,
|
||||||
|
@ -30,9 +31,8 @@ typedef enum json_token_type {
|
||||||
JSON_KEYWORD,
|
JSON_KEYWORD,
|
||||||
JSON_STRING,
|
JSON_STRING,
|
||||||
JSON_INTERP,
|
JSON_INTERP,
|
||||||
JSON_SKIP,
|
|
||||||
JSON_ERROR,
|
|
||||||
JSON_END_OF_INPUT,
|
JSON_END_OF_INPUT,
|
||||||
|
JSON_MAX = JSON_END_OF_INPUT
|
||||||
} JSONTokenType;
|
} JSONTokenType;
|
||||||
|
|
||||||
typedef struct JSONToken JSONToken;
|
typedef struct JSONToken JSONToken;
|
||||||
|
|
|
@ -76,10 +76,7 @@ static void test_malformed(QTestState *qts)
|
||||||
assert_recovered(qts);
|
assert_recovered(qts);
|
||||||
|
|
||||||
/* lexical error: interpolation */
|
/* lexical error: interpolation */
|
||||||
qtest_qmp_send_raw(qts, "%%p\n");
|
qtest_qmp_send_raw(qts, "%%p");
|
||||||
/* two errors, one for "%", one for "p" */
|
|
||||||
resp = qtest_qmp_receive(qts);
|
|
||||||
qmp_assert_error_class(resp, "GenericError");
|
|
||||||
resp = qtest_qmp_receive(qts);
|
resp = qtest_qmp_receive(qts);
|
||||||
qmp_assert_error_class(resp, "GenericError");
|
qmp_assert_error_class(resp, "GenericError");
|
||||||
assert_recovered(qts);
|
assert_recovered(qts);
|
||||||
|
|
Loading…
Reference in New Issue