QObject patches for 2018-09-24

-----BEGIN PGP SIGNATURE-----
 
 iQIcBAABAgAGBQJbqQxWAAoJEDhwtADrkYZTvMkQAJlraQ5ydNTLEcaQ2GPjzbNF
 n34hUjT8K0JQmtiLTdiZvcEmqN9VezPJtOgoGyOFljpdJOBgmThXkWp2STyEF4lo
 sYpA/ml8Mf39TCKjGQGmelxOuSHXmSuEWbCkcZLS/xbf/phMPHulVywcU8UP2ehz
 F7k5FXSx8MxA7a86lhhegXkK6O0+zvlnvR2tjufJdL0U/V1qXyKqdnOo5ZG7A/H9
 +8PvhiVxHr+Id0+1iFqWYzL703zHDWQvfCxzI5arMD9X8jRulBli+eW1LJOTM8SK
 Pcel9xcSVsp53TIhD0+jG6OS88osMQP/JO3ND8qKFBbJ8f/WXKyskIUFgK9oVxX3
 083tcCqCwYFe3THYzY8d5hyhP8OA3ddnSLyA0LV80APi5Z9z+eERSYwCdEad96nS
 SEl6kLT8VNoVxPi6lPoxsTKJDjCVWesgXkRH0KkzC9JsX0oweW+3z8rNEw9JIeEM
 VtMnqqG7aFPmlc0kcmNCGSWKNLHymN5ZxylHfQcyauzIPKO4eS3XCwtF4NB5npBJ
 I1s14NJIHeeSADGaQLTHRLkL1iY3q8ZtAfK+SwnGFtEgIIRHST96KAXDbxyJow8P
 Ommd2N/J57M68rJUtqBH0bxu58A7AeKN+DrxpeTpgzlDY+/LxLJS46pHVzu+zqim
 NpXyHG6C+DKcwd/+jFmk
 =BKOg
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/armbru/tags/pull-qobject-2018-09-24' into staging

QObject patches for 2018-09-24

# gpg: Signature made Mon 24 Sep 2018 17:09:58 BST
# gpg:                using RSA key 3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg:                 aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653

* remotes/armbru/tags/pull-qobject-2018-09-24:
  json: Eliminate lexer state IN_WHITESPACE, pseudo-token JSON_SKIP
  json: Eliminate lexer state IN_ERROR
  json: Nicer recovery from lexical errors
  json: Make lexer's "character consumed" logic less confusing
  json: Clean up how lexer consumes "end of input"
  json: Fix lexer for lookahead character beyond '\x7F'

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2018-09-25 12:29:14 +01:00
commit f69d20fa8b
3 changed files with 65 additions and 58 deletions

View File

@ -100,7 +100,7 @@
*/ */
enum json_lexer_state { enum json_lexer_state {
IN_ERROR = 0, /* must really be 0, see json_lexer[] */ IN_RECOVERY = 1,
IN_DQ_STRING_ESCAPE, IN_DQ_STRING_ESCAPE,
IN_DQ_STRING, IN_DQ_STRING,
IN_SQ_STRING_ESCAPE, IN_SQ_STRING_ESCAPE,
@ -115,25 +115,44 @@ enum json_lexer_state {
IN_SIGN, IN_SIGN,
IN_KEYWORD, IN_KEYWORD,
IN_INTERP, IN_INTERP,
IN_WHITESPACE,
IN_START, IN_START,
IN_START_INTERP, /* must be IN_START + 1 */ IN_START_INTERP, /* must be IN_START + 1 */
}; };
QEMU_BUILD_BUG_ON(JSON_ERROR != 0);
QEMU_BUILD_BUG_ON(IN_RECOVERY != JSON_ERROR + 1);
QEMU_BUILD_BUG_ON((int)JSON_MIN <= (int)IN_START_INTERP); QEMU_BUILD_BUG_ON((int)JSON_MIN <= (int)IN_START_INTERP);
QEMU_BUILD_BUG_ON(JSON_MAX >= 0x80);
QEMU_BUILD_BUG_ON(IN_START_INTERP != IN_START + 1); QEMU_BUILD_BUG_ON(IN_START_INTERP != IN_START + 1);
#define TERMINAL(state) [0 ... 0x7F] = (state) #define LOOKAHEAD 0x80
#define TERMINAL(state) [0 ... 0xFF] = ((state) | LOOKAHEAD)
/* Return whether TERMINAL is a terminal state and the transition to it
from OLD_STATE required lookahead. This happens whenever the table
below uses the TERMINAL macro. */
#define TERMINAL_NEEDED_LOOKAHEAD(old_state, terminal) \
(terminal != IN_ERROR && json_lexer[(old_state)][0] == (terminal))
static const uint8_t json_lexer[][256] = { static const uint8_t json_lexer[][256] = {
/* Relies on default initialization to IN_ERROR! */ /* Relies on default initialization to IN_ERROR! */
/* error recovery */
[IN_RECOVERY] = {
/*
* Skip characters until a structural character, an ASCII
* control character other than '\t', or impossible UTF-8
* bytes '\xFE', '\xFF'. Structural characters and line
* endings are promising resynchronization points. Clients
* may use the others to force the JSON parser into known-good
* state; see docs/interop/qmp-spec.txt.
*/
[0 ... 0x1F] = IN_START | LOOKAHEAD,
[0x20 ... 0xFD] = IN_RECOVERY,
[0xFE ... 0xFF] = IN_START | LOOKAHEAD,
['\t'] = IN_RECOVERY,
['['] = IN_START | LOOKAHEAD,
[']'] = IN_START | LOOKAHEAD,
['{'] = IN_START | LOOKAHEAD,
['}'] = IN_START | LOOKAHEAD,
[':'] = IN_START | LOOKAHEAD,
[','] = IN_START | LOOKAHEAD,
},
/* double quote string */ /* double quote string */
[IN_DQ_STRING_ESCAPE] = { [IN_DQ_STRING_ESCAPE] = {
[0x20 ... 0xFD] = IN_DQ_STRING, [0x20 ... 0xFD] = IN_DQ_STRING,
@ -157,7 +176,7 @@ static const uint8_t json_lexer[][256] = {
/* Zero */ /* Zero */
[IN_ZERO] = { [IN_ZERO] = {
TERMINAL(JSON_INTEGER), TERMINAL(JSON_INTEGER),
['0' ... '9'] = IN_ERROR, ['0' ... '9'] = JSON_ERROR,
['.'] = IN_MANTISSA, ['.'] = IN_MANTISSA,
}, },
@ -208,15 +227,6 @@ static const uint8_t json_lexer[][256] = {
['a' ... 'z'] = IN_KEYWORD, ['a' ... 'z'] = IN_KEYWORD,
}, },
/* whitespace */
[IN_WHITESPACE] = {
TERMINAL(JSON_SKIP),
[' '] = IN_WHITESPACE,
['\t'] = IN_WHITESPACE,
['\r'] = IN_WHITESPACE,
['\n'] = IN_WHITESPACE,
},
/* interpolation */ /* interpolation */
[IN_INTERP] = { [IN_INTERP] = {
TERMINAL(JSON_INTERP), TERMINAL(JSON_INTERP),
@ -243,14 +253,25 @@ static const uint8_t json_lexer[][256] = {
[','] = JSON_COMMA, [','] = JSON_COMMA,
[':'] = JSON_COLON, [':'] = JSON_COLON,
['a' ... 'z'] = IN_KEYWORD, ['a' ... 'z'] = IN_KEYWORD,
[' '] = IN_WHITESPACE, [' '] = IN_START,
['\t'] = IN_WHITESPACE, ['\t'] = IN_START,
['\r'] = IN_WHITESPACE, ['\r'] = IN_START,
['\n'] = IN_WHITESPACE, ['\n'] = IN_START,
}, },
[IN_START_INTERP]['%'] = IN_INTERP, [IN_START_INTERP]['%'] = IN_INTERP,
}; };
static inline uint8_t next_state(JSONLexer *lexer, char ch, bool flush,
bool *char_consumed)
{
uint8_t next;
assert(lexer->state <= ARRAY_SIZE(json_lexer));
next = json_lexer[lexer->state][(uint8_t)ch];
*char_consumed = !flush && !(next & LOOKAHEAD);
return next & ~LOOKAHEAD;
}
void json_lexer_init(JSONLexer *lexer, bool enable_interpolation) void json_lexer_init(JSONLexer *lexer, bool enable_interpolation)
{ {
lexer->start_state = lexer->state = enable_interpolation lexer->start_state = lexer->state = enable_interpolation
@ -261,7 +282,8 @@ void json_lexer_init(JSONLexer *lexer, bool enable_interpolation)
static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush)
{ {
int char_consumed, new_state; int new_state;
bool char_consumed = false;
lexer->x++; lexer->x++;
if (ch == '\n') { if (ch == '\n') {
@ -269,11 +291,10 @@ static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush)
lexer->y++; lexer->y++;
} }
do { while (flush ? lexer->state != lexer->start_state : !char_consumed) {
assert(lexer->state <= ARRAY_SIZE(json_lexer)); new_state = next_state(lexer, ch, flush, &char_consumed);
new_state = json_lexer[lexer->state][(uint8_t)ch]; if (char_consumed) {
char_consumed = !TERMINAL_NEEDED_LOOKAHEAD(lexer->state, new_state); assert(!flush);
if (char_consumed && !flush) {
g_string_append_c(lexer->token, ch); g_string_append_c(lexer->token, ch);
} }
@ -292,33 +313,23 @@ static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush)
json_message_process_token(lexer, lexer->token, new_state, json_message_process_token(lexer, lexer->token, new_state,
lexer->x, lexer->y); lexer->x, lexer->y);
/* fall through */ /* fall through */
case JSON_SKIP: case IN_START:
g_string_truncate(lexer->token, 0); g_string_truncate(lexer->token, 0);
new_state = lexer->start_state; new_state = lexer->start_state;
break; break;
case IN_ERROR: case JSON_ERROR:
/* XXX: To avoid having previous bad input leaving the parser in an
* unresponsive state where we consume unpredictable amounts of
* subsequent "good" input, percolate this error state up to the
* parser by emitting a JSON_ERROR token, then reset lexer state.
*
* Also note that this handling is required for reliable channel
* negotiation between QMP and the guest agent, since chr(0xFF)
* is placed at the beginning of certain events to ensure proper
* delivery when the channel is in an unknown state. chr(0xFF) is
* never a valid ASCII/UTF-8 sequence, so this should reliably
* induce an error/flush state.
*/
json_message_process_token(lexer, lexer->token, JSON_ERROR, json_message_process_token(lexer, lexer->token, JSON_ERROR,
lexer->x, lexer->y); lexer->x, lexer->y);
new_state = IN_RECOVERY;
/* fall through */
case IN_RECOVERY:
g_string_truncate(lexer->token, 0); g_string_truncate(lexer->token, 0);
lexer->state = lexer->start_state; break;
return;
default: default:
break; break;
} }
lexer->state = new_state; lexer->state = new_state;
} while (!char_consumed && !flush); }
/* Do not let a single token grow to an arbitrarily large size, /* Do not let a single token grow to an arbitrarily large size,
* this is a security consideration. * this is a security consideration.
@ -342,9 +353,8 @@ void json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size)
void json_lexer_flush(JSONLexer *lexer) void json_lexer_flush(JSONLexer *lexer)
{ {
if (lexer->state != lexer->start_state) {
json_lexer_feed_char(lexer, 0, true); json_lexer_feed_char(lexer, 0, true);
} assert(lexer->state == lexer->start_state);
json_message_process_token(lexer, lexer->token, JSON_END_OF_INPUT, json_message_process_token(lexer, lexer->token, JSON_END_OF_INPUT,
lexer->x, lexer->y); lexer->x, lexer->y);
} }

View File

@ -16,10 +16,11 @@
#include "qapi/qmp/json-parser.h" #include "qapi/qmp/json-parser.h"
typedef enum json_token_type { typedef enum json_token_type {
JSON_MIN = 100, JSON_ERROR = 0, /* must be zero, see json_lexer[] */
JSON_LCURLY = JSON_MIN, /* Gap for lexer states */
JSON_LCURLY = 100,
JSON_MIN = JSON_LCURLY,
JSON_RCURLY, JSON_RCURLY,
JSON_LSQUARE, JSON_LSQUARE,
JSON_RSQUARE, JSON_RSQUARE,
@ -30,9 +31,8 @@ typedef enum json_token_type {
JSON_KEYWORD, JSON_KEYWORD,
JSON_STRING, JSON_STRING,
JSON_INTERP, JSON_INTERP,
JSON_SKIP,
JSON_ERROR,
JSON_END_OF_INPUT, JSON_END_OF_INPUT,
JSON_MAX = JSON_END_OF_INPUT
} JSONTokenType; } JSONTokenType;
typedef struct JSONToken JSONToken; typedef struct JSONToken JSONToken;

View File

@ -76,10 +76,7 @@ static void test_malformed(QTestState *qts)
assert_recovered(qts); assert_recovered(qts);
/* lexical error: interpolation */ /* lexical error: interpolation */
qtest_qmp_send_raw(qts, "%%p\n"); qtest_qmp_send_raw(qts, "%%p");
/* two errors, one for "%", one for "p" */
resp = qtest_qmp_receive(qts);
qmp_assert_error_class(resp, "GenericError");
resp = qtest_qmp_receive(qts); resp = qtest_qmp_receive(qts);
qmp_assert_error_class(resp, "GenericError"); qmp_assert_error_class(resp, "GenericError");
assert_recovered(qts); assert_recovered(qts);