Debugger: Refactor lexer to use Vector type

This commit is contained in:
Vicki Pfau 2017-12-29 00:47:49 -05:00
parent a83e76a62a
commit d7900fdf5f
3 changed files with 89 additions and 96 deletions

View File

@ -8,6 +8,8 @@
#include <mgba-util/common.h> #include <mgba-util/common.h>
#include <mgba-util/vector.h>
CXX_GUARD_START CXX_GUARD_START
enum Operation { enum Operation {
@ -43,10 +45,7 @@ struct Token {
}; };
}; };
struct LexVector { DECLARE_VECTOR(LexVector, struct Token);
struct LexVector* next;
struct Token token;
};
struct ParseTree { struct ParseTree {
struct Token token; struct Token token;

View File

@ -545,11 +545,11 @@ struct CLIDebugVector* CLIDVParse(struct CLIDebugger* debugger, const char* stri
struct CLIDebugVector dvTemp = { .type = CLIDV_INT_TYPE, .segmentValue = -1 }; struct CLIDebugVector dvTemp = { .type = CLIDV_INT_TYPE, .segmentValue = -1 };
struct LexVector lv = { .next = 0 }; struct LexVector lv;
LexVectorInit(&lv, 0);
size_t adjusted = lexExpression(&lv, string, length); size_t adjusted = lexExpression(&lv, string, length);
if (adjusted > length) { if (adjusted > length) {
dvTemp.type = CLIDV_ERROR_TYPE; dvTemp.type = CLIDV_ERROR_TYPE;
lexFree(lv.next);
} }
struct ParseTree tree; struct ParseTree tree;
@ -565,6 +565,9 @@ struct CLIDebugVector* CLIDVParse(struct CLIDebugger* debugger, const char* stri
parseFree(tree.lhs); parseFree(tree.lhs);
parseFree(tree.rhs); parseFree(tree.rhs);
lexFree(&lv);
LexVectorDeinit(&lv);
struct CLIDebugVector* dv = malloc(sizeof(struct CLIDebugVector)); struct CLIDebugVector* dv = malloc(sizeof(struct CLIDebugVector));
if (dvTemp.type == CLIDV_ERROR_TYPE) { if (dvTemp.type == CLIDV_ERROR_TYPE) {
dv->type = CLIDV_ERROR_TYPE; dv->type = CLIDV_ERROR_TYPE;

View File

@ -8,6 +8,8 @@
#include <mgba/debugger/debugger.h> #include <mgba/debugger/debugger.h>
#include <mgba-util/string.h> #include <mgba-util/string.h>
DEFINE_VECTOR(LexVector, struct Token);
enum LexState { enum LexState {
LEX_ERROR = -1, LEX_ERROR = -1,
LEX_ROOT = 0, LEX_ROOT = 0,
@ -21,53 +23,45 @@ enum LexState {
LEX_EXPECT_OPERATOR LEX_EXPECT_OPERATOR
}; };
static struct LexVector* _lexOperator(struct LexVector* lv, char operator) { static void _lexOperator(struct LexVector* lv, char operator) {
struct LexVector* lvNext = malloc(sizeof(struct LexVector)); struct Token* lvNext = LexVectorAppend(lv);
lvNext->token.type = TOKEN_OPERATOR_TYPE; lvNext->type = TOKEN_OPERATOR_TYPE;
switch (operator) { switch (operator) {
case '+': case '+':
lvNext->token.operatorValue = OP_ADD; lvNext->operatorValue = OP_ADD;
break; break;
case '-': case '-':
lvNext->token.operatorValue = OP_SUBTRACT; lvNext->operatorValue = OP_SUBTRACT;
break; break;
case '*': case '*':
lvNext->token.operatorValue = OP_MULTIPLY; lvNext->operatorValue = OP_MULTIPLY;
break; break;
case '/': case '/':
lvNext->token.operatorValue = OP_DIVIDE; lvNext->operatorValue = OP_DIVIDE;
break; break;
case '&': case '&':
lvNext->token.operatorValue = OP_AND; lvNext->operatorValue = OP_AND;
break; break;
case '|': case '|':
lvNext->token.operatorValue = OP_OR; lvNext->operatorValue = OP_OR;
break; break;
case '^': case '^':
lvNext->token.operatorValue = OP_XOR; lvNext->operatorValue = OP_XOR;
break; break;
case '<': case '<':
lvNext->token.operatorValue = OP_LESS; lvNext->operatorValue = OP_LESS;
break; break;
case '>': case '>':
lvNext->token.operatorValue = OP_GREATER; lvNext->operatorValue = OP_GREATER;
break; break;
default: default:
lvNext->token.type = TOKEN_ERROR_TYPE; lvNext->type = TOKEN_ERROR_TYPE;
break; break;
} }
lvNext->next = lv->next;
lv->next = lvNext;
lv = lvNext;
lvNext = malloc(sizeof(struct LexVector));
lvNext->next = lv->next;
lvNext->token.type = TOKEN_ERROR_TYPE;
lv->next = lvNext;
return lvNext;
} }
static struct LexVector* _lexValue(struct LexVector* lv, char token, uint32_t next, enum LexState* state) { static void _lexValue(struct LexVector* lv, char token, uint32_t next, enum LexState* state) {
struct LexVector* lvNext; struct Token* lvNext;
switch (token) { switch (token) {
case '+': case '+':
@ -79,26 +73,24 @@ static struct LexVector* _lexValue(struct LexVector* lv, char token, uint32_t ne
case '^': case '^':
case '<': case '<':
case '>': case '>':
lv->token.type = TOKEN_UINT_TYPE; lvNext = LexVectorAppend(lv);
lv->token.uintValue = next; lvNext->type = TOKEN_UINT_TYPE;
lv = _lexOperator(lv, token); lvNext->uintValue = next;
_lexOperator(lv, token);
*state = LEX_ROOT; *state = LEX_ROOT;
break; break;
case ')': case ')':
lvNext = malloc(sizeof(struct LexVector)); lvNext = LexVectorAppend(lv);
lvNext->next = lv->next; lvNext->type = TOKEN_UINT_TYPE;
lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE; lvNext->uintValue = next;
lv->next = lvNext; lvNext = LexVectorAppend(lv);
lv->token.type = TOKEN_UINT_TYPE; lvNext->type = TOKEN_CLOSE_PAREN_TYPE;
lv->token.uintValue = next;
lv = lvNext;
*state = LEX_EXPECT_OPERATOR; *state = LEX_EXPECT_OPERATOR;
break; break;
default: default:
*state = LEX_ERROR; *state = LEX_ERROR;
break; break;
} }
return lv;
} }
size_t lexExpression(struct LexVector* lv, const char* string, size_t length) { size_t lexExpression(struct LexVector* lv, const char* string, size_t length) {
@ -111,7 +103,7 @@ size_t lexExpression(struct LexVector* lv, const char* string, size_t length) {
enum LexState state = LEX_ROOT; enum LexState state = LEX_ROOT;
const char* tokenStart = 0; const char* tokenStart = 0;
struct LexVector* lvNext; struct Token* lvNext;
while (length > 0 && string[0] && string[0] != ' ' && state != LEX_ERROR) { while (length > 0 && string[0] && string[0] != ' ' && state != LEX_ERROR) {
char token = string[0]; char token = string[0];
@ -144,12 +136,8 @@ size_t lexExpression(struct LexVector* lv, const char* string, size_t length) {
break; break;
case '(': case '(':
state = LEX_ROOT; state = LEX_ROOT;
lv->token.type = TOKEN_OPEN_PAREN_TYPE; lvNext = LexVectorAppend(lv);
lvNext = malloc(sizeof(struct LexVector)); lvNext->type = TOKEN_OPEN_PAREN_TYPE;
lvNext->next = lv->next;
lvNext->token.type = TOKEN_ERROR_TYPE;
lv->next = lvNext;
lv = lvNext;
break; break;
default: default:
if (tolower(token) >= 'a' && tolower(token <= 'z')) { if (tolower(token) >= 'a' && tolower(token <= 'z')) {
@ -171,14 +159,18 @@ size_t lexExpression(struct LexVector* lv, const char* string, size_t length) {
case '^': case '^':
case '<': case '<':
case '>': case '>':
lv->token.type = TOKEN_IDENTIFIER_TYPE; lvNext = LexVectorAppend(lv);
lv->token.identifierValue = strndup(tokenStart, string - tokenStart - 1); lvNext->type = TOKEN_IDENTIFIER_TYPE;
lv = _lexOperator(lv, token); lvNext->identifierValue = strndup(tokenStart, string - tokenStart - 1);
_lexOperator(lv, token);
state = LEX_ROOT; state = LEX_ROOT;
break; break;
case ')': case ')':
lv->token.type = TOKEN_IDENTIFIER_TYPE; lvNext = LexVectorAppend(lv);
lv->token.identifierValue = strndup(tokenStart, string - tokenStart - 1); lvNext->type = TOKEN_IDENTIFIER_TYPE;
lvNext->identifierValue = strndup(tokenStart, string - tokenStart - 1);
lvNext = LexVectorAppend(lv);
lvNext->type = TOKEN_CLOSE_PAREN_TYPE;
state = LEX_EXPECT_OPERATOR; state = LEX_EXPECT_OPERATOR;
break; break;
default: default:
@ -197,7 +189,7 @@ size_t lexExpression(struct LexVector* lv, const char* string, size_t length) {
next += token - '0'; next += token - '0';
break; break;
default: default:
lv = _lexValue(lv, token, next, &state); _lexValue(lv, token, next, &state);
break; break;
} }
break; break;
@ -218,7 +210,7 @@ size_t lexExpression(struct LexVector* lv, const char* string, size_t length) {
next += token - '0'; next += token - '0';
break; break;
default: default:
lv = _lexValue(lv, token, next, &state); _lexValue(lv, token, next, &state);
break; break;
} }
break; break;
@ -262,17 +254,13 @@ size_t lexExpression(struct LexVector* lv, const char* string, size_t length) {
next += token - 'a' + 10; next += token - 'a' + 10;
break; break;
case ':': case ':':
lv->token.type = TOKEN_SEGMENT_TYPE; lvNext = LexVectorAppend(lv);
lv->token.uintValue = next; lvNext->type = TOKEN_SEGMENT_TYPE;
lvNext = malloc(sizeof(struct LexVector)); lvNext->uintValue = next;
lvNext->next = lv->next;
lvNext->token.type = TOKEN_UINT_TYPE;
lv->next = lvNext;
lv = lvNext;
next = 0; next = 0;
break; break;
default: default:
lv = _lexValue(lv, token, next, &state); _lexValue(lv, token, next, &state);
break; break;
} }
break; break;
@ -302,7 +290,7 @@ size_t lexExpression(struct LexVector* lv, const char* string, size_t length) {
state = LEX_EXPECT_DECIMAL; state = LEX_EXPECT_DECIMAL;
break; break;
default: default:
lv = _lexValue(lv, token, next, &state); _lexValue(lv, token, next, &state);
break; break;
} }
break; break;
@ -317,7 +305,7 @@ size_t lexExpression(struct LexVector* lv, const char* string, size_t length) {
case '^': case '^':
case '<': case '<':
case '>': case '>':
lv = _lexOperator(lv, token); _lexOperator(lv, token);
state = LEX_ROOT; state = LEX_ROOT;
break; break;
default: default:
@ -335,24 +323,23 @@ size_t lexExpression(struct LexVector* lv, const char* string, size_t length) {
case LEX_EXPECT_DECIMAL: case LEX_EXPECT_DECIMAL:
case LEX_EXPECT_HEX: case LEX_EXPECT_HEX:
case LEX_EXPECT_PREFIX: case LEX_EXPECT_PREFIX:
lv->token.type = TOKEN_UINT_TYPE; lvNext = LexVectorAppend(lv);
lv->token.uintValue = next; lvNext->type = TOKEN_UINT_TYPE;
lvNext->uintValue = next;
break; break;
case LEX_EXPECT_IDENTIFIER: case LEX_EXPECT_IDENTIFIER:
lv->token.type = TOKEN_IDENTIFIER_TYPE; lvNext = LexVectorAppend(lv);
lv->token.identifierValue = strndup(tokenStart, string - tokenStart); lvNext->type = TOKEN_IDENTIFIER_TYPE;
lvNext->identifierValue = strndup(tokenStart, string - tokenStart);
break; break;
case LEX_EXPECT_OPERATOR: case LEX_EXPECT_OPERATOR:
lvNext = malloc(sizeof(struct LexVector));
lvNext->next = lv->next;
lvNext->token.type = TOKEN_CLOSE_PAREN_TYPE;
lv->next = lvNext;
break; break;
case LEX_EXPECT_BINARY_FIRST: case LEX_EXPECT_BINARY_FIRST:
case LEX_EXPECT_HEX_FIRST: case LEX_EXPECT_HEX_FIRST:
case LEX_ERROR: case LEX_ERROR:
default: default:
lv->token.type = TOKEN_ERROR_TYPE; lvNext = LexVectorAppend(lv);
lvNext->type = TOKEN_ERROR_TYPE;
break; break;
} }
return adjusted; return adjusted;
@ -382,65 +369,67 @@ static struct ParseTree* _parseTreeCreate() {
return tree; return tree;
} }
static struct LexVector* _parseExpression(struct ParseTree* tree, struct LexVector* lv, int precedence, int openParens) { static size_t _parseExpression(struct ParseTree* tree, struct LexVector* lv, size_t i, int precedence, int openParens) {
struct ParseTree* newTree = 0; struct ParseTree* newTree = 0;
while (lv) { while (i < LexVectorSize(lv)) {
struct Token* token = LexVectorGetPointer(lv, i);
int newPrecedence; int newPrecedence;
switch (lv->token.type) { switch (token->type) {
case TOKEN_IDENTIFIER_TYPE: case TOKEN_IDENTIFIER_TYPE:
case TOKEN_UINT_TYPE: case TOKEN_UINT_TYPE:
if (tree->token.type == TOKEN_ERROR_TYPE) { if (tree->token.type == TOKEN_ERROR_TYPE) {
tree->token = lv->token; tree->token = *token;
lv = lv->next; if (token->type == TOKEN_IDENTIFIER_TYPE) {
tree->token.identifierValue = strdup(token->identifierValue);
}
++i;
} else { } else {
tree->token.type = TOKEN_ERROR_TYPE; tree->token.type = TOKEN_ERROR_TYPE;
return 0; return i + 1;
} }
break; break;
case TOKEN_SEGMENT_TYPE: case TOKEN_SEGMENT_TYPE:
tree->lhs = _parseTreeCreate(); tree->lhs = _parseTreeCreate();
tree->lhs->token.type = TOKEN_UINT_TYPE; tree->lhs->token.type = TOKEN_UINT_TYPE;
tree->lhs->token.uintValue = lv->token.uintValue; tree->lhs->token.uintValue = token->uintValue;
tree->rhs = _parseTreeCreate(); tree->rhs = _parseTreeCreate();
tree->token.type = TOKEN_SEGMENT_TYPE; tree->token.type = TOKEN_SEGMENT_TYPE;
lv = _parseExpression(tree->rhs, lv->next, precedence, openParens); i = _parseExpression(tree->rhs, lv, i + 1, precedence, openParens);
if (tree->token.type == TOKEN_ERROR_TYPE) { if (tree->token.type == TOKEN_ERROR_TYPE) {
tree->token.type = TOKEN_ERROR_TYPE; tree->token.type = TOKEN_ERROR_TYPE;
} }
break; break;
case TOKEN_OPEN_PAREN_TYPE: case TOKEN_OPEN_PAREN_TYPE:
lv = _parseExpression(tree, lv->next, INT_MAX, openParens + 1); i = _parseExpression(tree, lv, i + 1, INT_MAX, openParens + 1);
break; break;
case TOKEN_CLOSE_PAREN_TYPE: case TOKEN_CLOSE_PAREN_TYPE:
if (openParens <= 0) { if (openParens <= 0) {
tree->token.type = TOKEN_ERROR_TYPE; tree->token.type = TOKEN_ERROR_TYPE;
return 0;
} }
return lv->next; return i + 1;
break;
case TOKEN_OPERATOR_TYPE: case TOKEN_OPERATOR_TYPE:
newPrecedence = _operatorPrecedence[lv->token.operatorValue]; newPrecedence = _operatorPrecedence[token->operatorValue];
if (newPrecedence < precedence) { if (newPrecedence < precedence) {
newTree = _parseTreeCreate(); newTree = _parseTreeCreate();
*newTree = *tree; *newTree = *tree;
tree->lhs = newTree; tree->lhs = newTree;
tree->rhs = _parseTreeCreate(); tree->rhs = _parseTreeCreate();
tree->token = lv->token; tree->token = *token;
lv = _parseExpression(tree->rhs, lv->next, newPrecedence, openParens); i = _parseExpression(tree->rhs, lv, i + 1, newPrecedence, openParens);
if (tree->token.type == TOKEN_ERROR_TYPE) { if (tree->token.type == TOKEN_ERROR_TYPE) {
tree->token.type = TOKEN_ERROR_TYPE; tree->token.type = TOKEN_ERROR_TYPE;
} }
} else { } else {
return lv; return i;
} }
break; break;
case TOKEN_ERROR_TYPE: case TOKEN_ERROR_TYPE:
tree->token.type = TOKEN_ERROR_TYPE; tree->token.type = TOKEN_ERROR_TYPE;
return 0; return i + 1;
} }
} }
return 0; return i;
} }
void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv) { void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv) {
@ -452,14 +441,16 @@ void parseLexedExpression(struct ParseTree* tree, struct LexVector* lv) {
tree->lhs = 0; tree->lhs = 0;
tree->rhs = 0; tree->rhs = 0;
_parseExpression(tree, lv, INT_MAX, 0); _parseExpression(tree, lv, 0, INT_MAX, 0);
} }
void lexFree(struct LexVector* lv) { void lexFree(struct LexVector* lv) {
while (lv) { size_t i;
struct LexVector* lvNext = lv->next; for (i = 0; i < LexVectorSize(lv); ++i) {
free(lv); struct Token* token = LexVectorGetPointer(lv, i);
lv = lvNext; if (token->type == TOKEN_IDENTIFIER_TYPE) {
free(token->identifierValue);
}
} }
} }