ExpressionParser: Clean up string lexing and support numeric literals without tick delimiter: e.g. 0.75

This commit is contained in:
Jordan Woyak 2019-01-05 13:43:39 -06:00
parent fa75ab404f
commit 785eb14432
1 changed files with 34 additions and 41 deletions

View File

@ -7,6 +7,7 @@
#include <chrono> #include <chrono>
#include <cmath> #include <cmath>
#include <iostream> #include <iostream>
#include <locale>
#include <map> #include <map>
#include <memory> #include <memory>
#include <regex> #include <regex>
@ -155,72 +156,62 @@ public:
Lexer(const std::string& expr_) : expr(expr_) { it = expr.begin(); } Lexer(const std::string& expr_) : expr(expr_) { it = expr.begin(); }
bool FetchDelimString(std::string& value, char delim) template <typename F>
std::string FetchCharsWhile(F&& func)
{ {
value = ""; std::string value;
while (it != expr.end()) while (it != expr.end() && func(*it))
{ {
char c = *it; value += *it;
++it; ++it;
if (c == delim)
return true;
value += c;
} }
return false; return value;
}
std::string FetchDelimString(char delim)
{
const std::string result = FetchCharsWhile([delim](char c) { return c != delim; });
++it;
return result;
} }
std::string FetchWordChars() std::string FetchWordChars()
{ {
std::string word; // Valid word characters:
std::regex rx("[a-z0-9_]", std::regex_constants::icase);
std::regex valid_name_char("[a-z0-9_]", std::regex_constants::icase); return FetchCharsWhile([&rx](char c) { return std::regex_match(std::string(1, c), rx); });
while (it != expr.end() && std::regex_match(std::string(1, *it), valid_name_char))
{
word += *it;
++it;
}
return word;
} }
Token GetUnaryFunction() { return Token(TOK_UNARY, FetchWordChars()); } Token GetUnaryFunction() { return Token(TOK_UNARY, FetchWordChars()); }
Token GetLiteral() Token GetDelimitedLiteral() { return Token(TOK_LITERAL, FetchDelimString('\'')); }
{
std::string value;
FetchDelimString(value, '\'');
return Token(TOK_LITERAL, value);
}
Token GetVariable() { return Token(TOK_VARIABLE, FetchWordChars()); } Token GetVariable() { return Token(TOK_VARIABLE, FetchWordChars()); }
Token GetFullyQualifiedControl() Token GetFullyQualifiedControl() { return Token(TOK_CONTROL, FetchDelimString('`')); }
{
std::string value;
FetchDelimString(value, '`');
return Token(TOK_CONTROL, value);
}
Token GetBarewordsControl(char c) Token GetBarewordsControl(char c)
{ {
std::string name; std::string name;
name += c; name += c;
name += FetchCharsWhile([](char c) { return std::isalpha(c, std::locale::classic()); });
while (it != expr.end())
{
c = *it;
if (!isalpha(c))
break;
name += c;
++it;
}
ControlQualifier qualifier; ControlQualifier qualifier;
qualifier.control_name = name; qualifier.control_name = name;
return Token(TOK_CONTROL, qualifier); return Token(TOK_CONTROL, qualifier);
} }
Token GetRealLiteral(char c)
{
std::string value;
value += c;
value +=
FetchCharsWhile([](char c) { return isdigit(c, std::locale::classic()) || ('.' == c); });
return Token(TOK_LITERAL, value);
}
Token NextToken() Token NextToken()
{ {
if (it == expr.end()) if (it == expr.end())
@ -265,14 +256,16 @@ public:
case ',': case ',':
return Token(TOK_COMMA); return Token(TOK_COMMA);
case '\'': case '\'':
return GetLiteral(); return GetDelimitedLiteral();
case '$': case '$':
return GetVariable(); return GetVariable();
case '`': case '`':
return GetFullyQualifiedControl(); return GetFullyQualifiedControl();
default: default:
if (isalpha(c)) if (isalpha(c, std::locale::classic()))
return GetBarewordsControl(c); return GetBarewordsControl(c);
else if (isdigit(c, std::locale::classic()))
return GetRealLiteral(c);
else else
return Token(TOK_INVALID); return Token(TOK_INVALID);
} }