mirror of https://github.com/PCSX2/pcsx2.git
831 lines
20 KiB
C++
831 lines
20 KiB
C++
/**
|
|
* @file include/demangler/gparser.h
|
|
* @brief Parser of LL grammar.
|
|
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
|
*/
|
|
|
|
#ifndef DEMANGLER_GPARSER_H
|
|
#define DEMANGLER_GPARSER_H
|
|
|
|
#include <map>
|
|
#include <set>
|
|
#include <stack>
|
|
#include <string>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
//internal grammars
|
|
|
|
namespace demangler {
|
|
|
|
/**
|
|
* @brief Class which holds a demangled name.
|
|
*/
|
|
class cName {
|
|
public:
|
|
/**
|
|
* @brief Enum of types of the mangled types (Determines whether a type is built-in or named).
|
|
*/
|
|
enum ttype { //type of type
|
|
TT_UNKNOWN = 0, //can be used for unknown return type
|
|
TT_BUILTIN,
|
|
TT_NAME,
|
|
TT_NUM,
|
|
TT_PEXPR
|
|
};
|
|
|
|
/**
|
|
* @brief Enum of types of the mangled names.
|
|
*/
|
|
enum ntype { //type of the mangled name
|
|
NT_FUNCTION = 0,
|
|
NT_TEMPLFUNCTION,
|
|
NT_OPERATOR,
|
|
NT_CONSTRUCTOR,
|
|
NT_DESTRUCTOR,
|
|
NT_DATA,
|
|
NT_VTABLE,
|
|
NT_R0,
|
|
NT_R1,
|
|
NT_R2,
|
|
NT_R3,
|
|
NT_R4,
|
|
NT__A,
|
|
NT__B,
|
|
NT__C,
|
|
NT__D,
|
|
NT__E,
|
|
NT__F,
|
|
NT__G,
|
|
NT__H,
|
|
NT__I,
|
|
NT__J,
|
|
NT__K,
|
|
NT__L,
|
|
NT__M,
|
|
NT__N,
|
|
NT__O,
|
|
NT__P,
|
|
NT__Q,
|
|
NT__R,
|
|
NT__S,
|
|
NT__T,
|
|
NT__U,
|
|
NT__V,
|
|
NT__W,
|
|
NT__X,
|
|
NT__Y,
|
|
NT__Z,
|
|
NT_CLASS
|
|
};
|
|
|
|
/**
|
|
* @brief Enum of operator types.
|
|
*/
|
|
enum optype { //type of operator
|
|
OT_NULL = 0,
|
|
OT_NEW,
|
|
OT_NEWARR,
|
|
OT_DEL,
|
|
OT_DELARR,
|
|
OT_UPLUS,
|
|
OT_UMINUS,
|
|
OT_UAND,
|
|
OT_UAST,
|
|
OT_TILDA,
|
|
OT_PLUS,
|
|
OT_MINUS,
|
|
OT_AST,
|
|
OT_DIV,
|
|
OT_MOD,
|
|
OT_AND,
|
|
OT_OR,
|
|
OT_EXP,
|
|
OT_ASSIGN,
|
|
OT_PLUSASS,
|
|
OT_MINUSASS,
|
|
OT_ASTASS,
|
|
OT_DIVASS,
|
|
OT_MODASS,
|
|
OT_ANDASS,
|
|
OT_ORASS,
|
|
OT_EXPASS,
|
|
OT_LSHIFT,
|
|
OT_RSHIFT,
|
|
OT_LSHIFTASS,
|
|
OT_RSHIFTASS,
|
|
OT_EQ,
|
|
OT_NEQ,
|
|
OT_LT,
|
|
OT_GT,
|
|
OT_LE,
|
|
OT_GE,
|
|
OT_NOT,
|
|
OT_ANDAND,
|
|
OT_OROR,
|
|
OT_PLUSPLUS,
|
|
OT_MINUSMINUS,
|
|
OT_COMMA,
|
|
OT_PTAST,
|
|
OT_PT,
|
|
OT_BRACKETS,
|
|
OT_ARR,
|
|
OT_QUESTION,
|
|
OT_SIZEOFT,
|
|
OT_SIZEOFE,
|
|
OT_ALIGNOFT,
|
|
OT_ALIGNOFE,
|
|
OT_CAST
|
|
};
|
|
|
|
/**
|
|
* @brief Enum of built-in types.
|
|
*/
|
|
enum st_type { //standard built-in types
|
|
T_VOID = 0,
|
|
T_WCHAR,
|
|
T_BOOL,
|
|
T_CHAR,
|
|
T_SCHAR,
|
|
T_UCHAR,
|
|
T_SHORT,
|
|
T_USHORT,
|
|
T_INT,
|
|
T_UINT,
|
|
T_LONG,
|
|
T_ULONG,
|
|
T_LONGLONG,
|
|
T_ULONGLONG,
|
|
T_INT128,
|
|
T_UINT128,
|
|
T_FLOAT,
|
|
T_DOUBLE,
|
|
T_LONGDOUBLE,
|
|
T_FLOAT128,
|
|
T_ELLIPSIS,
|
|
T_DD,
|
|
T_DE,
|
|
T_DF,
|
|
T_DH,
|
|
T_CHAR32,
|
|
T_CHAR16,
|
|
T_AUTO,
|
|
T_NULLPTR
|
|
};
|
|
|
|
/**
|
|
* @brief Enum of member function accessibility.
|
|
*/
|
|
enum memfacc_t { //standard built-in types
|
|
MFM_NULL = 0,
|
|
MFM_PRIVATE,
|
|
MFM_PUBLIC,
|
|
MFM_PROTECTED
|
|
};
|
|
|
|
/**
|
|
* @brief Enum of function call conventions.
|
|
*/
|
|
enum fcall_t { //standard built-in types
|
|
FCC_NULL = 0,
|
|
FCC_CDECL,
|
|
FCC_PASCAL,
|
|
FCC_FORTRAN,
|
|
FCC_THISCALL,
|
|
FCC_STDCALL,
|
|
FCC_FASTCALL,
|
|
FCC_INTERRUPT
|
|
};
|
|
|
|
/**
|
|
* @brief Enum of named type classifications. Union, Struct, Class, Enum.
|
|
*/
|
|
enum mstype_t { //standard built-in types
|
|
MST_NULL = 0,
|
|
MST_UNION,
|
|
MST_STRUCT,
|
|
MST_CLASS,
|
|
MST_ENUM
|
|
};
|
|
|
|
|
|
/**
|
|
* @brief Structure of an unqualified name.
|
|
* @param un String which holds the name.
|
|
* @param tpl Pointer to the the template (vector of types) of this unqualified name. If nullptr, the unqualified name consists only of the string.
|
|
*/
|
|
struct name_t {
|
|
std::string un; //unqualified name
|
|
void *tpl = nullptr; //std::vector<type_t>
|
|
bool op = false; //is it operator name element?
|
|
};
|
|
|
|
/**
|
|
* @brief Structure of a type.
|
|
* @param type Type of the type.
|
|
* @param b Built-in type. This value is defined only if 'type' is TT_BUILTIN.
|
|
* @param n Qualified name of the type. This value is defined only if 'type' is TT_NAME.
|
|
* @param is_const Bool value which determines whether the type is const.
|
|
* @param is_restrict Bool value which determines whether the type is restrict.
|
|
* @param is_volatile Bool value which determines whether the type is volatile.
|
|
* @param is_pointer Integer value determining the pointer level of the type.
|
|
* @param is_reference Bool value which determines whether the type is a reference.
|
|
* @param is_rvalue Bool value which determines whether the type is an R-value.
|
|
* @param is_cpair Bool value which determines whether the type is a complex pair.
|
|
* @param is_imaginary Bool value which determines whether the type is imaginary.
|
|
*/
|
|
struct type_t {
|
|
ttype type = TT_UNKNOWN; //type of type... builtin or named type
|
|
st_type b = T_VOID; //builtin type
|
|
void *value = nullptr; //expression value
|
|
std::vector<name_t> n; //qualified name of named type
|
|
std::string modifiers;
|
|
mstype_t mst = MST_NULL;
|
|
int num = 0;
|
|
|
|
bool is_array = false;
|
|
std::vector<unsigned int> array_dimensions;
|
|
bool is_const = false;
|
|
bool is_restrict = false;
|
|
bool is_volatile = false;
|
|
unsigned int is_pointer = 0;
|
|
bool is_reference = false;
|
|
bool is_rvalue = false; //r-value reference
|
|
bool is_cpair = false; //complex pair
|
|
bool is_imaginary = false;
|
|
|
|
std::string getLlvmType();
|
|
|
|
private:
|
|
std::string llvmIr;
|
|
std::string getLlvmTypePrivate();
|
|
};
|
|
|
|
ntype name_type = NT_FUNCTION; //name type
|
|
optype operator_type = OT_NULL; //type of operator. it is OT_NULL if function is not an operator
|
|
type_t return_type;
|
|
type_t special_type; //return value for function or conversion type for operator OT_CAST
|
|
std::string modifiers;
|
|
|
|
memfacc_t member_function_access = MFM_NULL;
|
|
fcall_t function_call = FCC_NULL;
|
|
bool is_static = false;
|
|
bool is_virtual = false;
|
|
std::string storage_class;
|
|
std::vector<long int> rttibcd;
|
|
|
|
std::vector<type_t> parameters; //function parameters
|
|
std::vector<name_t> name; //qualified name composed of unqualified names
|
|
|
|
void *tf_tpl = nullptr;
|
|
|
|
bool last_shown_endtpl = false; //an auxiliary variable which helps to add space between multiple '>' at the end of templates
|
|
|
|
void deleteparams(std::vector<type_t> & vec);
|
|
cName(); //constructor
|
|
virtual ~cName(); //mass destruction
|
|
void type_t_clear(type_t &x);
|
|
void addname(const std::vector<name_t> & inname); //set the function name
|
|
void addpar(const std::vector<type_t> & inpar); //set the parameters of the name
|
|
void setnametype(ntype x); //set type of the mangled name
|
|
void setfcall(fcall_t x); //set type of the function call convention
|
|
void setmfacc(memfacc_t x); //set type of the member function access level
|
|
ntype getnametype(void); //get type of the mangled name
|
|
void setop(optype x); //set operator type
|
|
void setret(type_t x); //set return type
|
|
void setspec(type_t x); //set special type
|
|
void setstatic(); //set name's static flag
|
|
void setvirtual(); //set name's virtual flag
|
|
void addmodifier(char x); //add a modifier to the name modifier string
|
|
void addstcl(char x); //add a modifier to the storage class string
|
|
void setmodifiers(std::string x); //set modifiers
|
|
void settftpl(void* x); //set template function template
|
|
void addrttinum(long int x); //add a RTTI Base Class descriptor num
|
|
std::string optypetostr(optype x); //operator type to string
|
|
std::string printmodifiers(std::string x, bool space);
|
|
std::string printpremodifiers(std::string x, bool space);
|
|
std::string printpostmodifiers(std::string x, bool space);
|
|
std::string printname(std::vector<name_t> & vec, std::string compiler = "gcc");
|
|
std::string printparams(std::vector<type_t> & vec, bool ignorevoid = false, std::string compiler = "gcc");
|
|
std::string printpexpr(type_t & x);
|
|
|
|
/**
|
|
* @brief Print the calling convention to a string.
|
|
* @param callconv The calling convention to be printed.
|
|
* @return String containing calling convention.
|
|
*/
|
|
std::string printcallingconvention(fcall_t callconv);
|
|
|
|
std::string printall(std::string compiler = "gcc");
|
|
std::string printall_old(bool msvcpp = false);
|
|
}; //class cName
|
|
|
|
/**
|
|
* @brief Grammar class. It's member functions allow loading an external grammar and demangling a mangled name using the grammar.
|
|
*/
|
|
class cGram {
|
|
|
|
public:
|
|
/**
|
|
* @brief Global array of semantic action names. Used when building internal LL table from external grammar.
|
|
*/
|
|
static const char *semactname[];
|
|
|
|
/**
|
|
* @brief Enum of error codes.
|
|
*/
|
|
enum errcode {
|
|
ERROR_OK = 0,
|
|
ERROR_FILE,
|
|
ERROR_FSM,
|
|
ERROR_SYN,
|
|
ERROR_MEM,
|
|
ERROR_GRAM,
|
|
ERROR_LL,
|
|
ERROR_UNK
|
|
};
|
|
|
|
/**
|
|
* @brief An array of error messages.
|
|
*/
|
|
static const char *errmsg[];
|
|
|
|
/**
|
|
* @brief Type of a grammar element. It can be either a terminal or a non-terminal.
|
|
*/
|
|
enum gelemtype {
|
|
GE_TERM = 0,
|
|
GE_NONTERM
|
|
};
|
|
|
|
|
|
/**
|
|
* @brief Structure of a grammar element.
|
|
* @param type Type of the element (terminal or non-terminal).
|
|
* @param nt The name of the non-terminal. Only vylid if type is GE_NONTERM.
|
|
* @param t The byte value of the terminal. Only valid if type is GE_TERM.
|
|
*/
|
|
struct gelem_t {
|
|
gelem_t(gelemtype t, char* n, unsigned int i, char c) :
|
|
type(t),
|
|
nt(n),
|
|
ntst(i),
|
|
t(c)
|
|
{}
|
|
gelem_t() {}
|
|
gelemtype type = GE_TERM;
|
|
char* nt = nullptr;
|
|
unsigned int ntst = 0;
|
|
char t = 0;
|
|
};
|
|
|
|
/**
|
|
* @brief Enum of semantic actions.
|
|
*/
|
|
enum semact {
|
|
//do nothing
|
|
SA_NULL = 0,
|
|
|
|
//set type of name (function, operator, constructor, destructor, data)
|
|
SA_SETNAMEF,
|
|
SA_SETNAMETF,
|
|
SA_SETNAMEO,
|
|
SA_SETNAMEC,
|
|
SA_SETNAMED,
|
|
SA_SETNAMEX,
|
|
SA_SETNAMEVT,
|
|
|
|
//set operator type
|
|
SA_SETOPXX,
|
|
SA_SETOPNW,
|
|
SA_SETOPNA,
|
|
SA_SETOPDL,
|
|
SA_SETOPDA,
|
|
SA_SETOPPS,
|
|
SA_SETOPNG,
|
|
SA_SETOPAD,
|
|
SA_SETOPDE,
|
|
SA_SETOPCO,
|
|
SA_SETOPPL,
|
|
SA_SETOPMI,
|
|
SA_SETOPML,
|
|
SA_SETOPDV,
|
|
SA_SETOPRM,
|
|
SA_SETOPAN,
|
|
SA_SETOPOR,
|
|
SA_SETOPEO,
|
|
SA_SETOPASS,
|
|
SA_SETOPPLL,
|
|
SA_SETOPMII,
|
|
SA_SETOPMLL,
|
|
SA_SETOPDVV,
|
|
SA_SETOPRMM,
|
|
SA_SETOPANN,
|
|
SA_SETOPORR,
|
|
SA_SETOPEOO,
|
|
SA_SETOPLS,
|
|
SA_SETOPRS,
|
|
SA_SETOPLSS,
|
|
SA_SETOPRSS,
|
|
SA_SETOPEQ,
|
|
SA_SETOPNE,
|
|
SA_SETOPLT,
|
|
SA_SETOPGT,
|
|
SA_SETOPLE,
|
|
SA_SETOPGE,
|
|
SA_SETOPNT,
|
|
SA_SETOPAA,
|
|
SA_SETOPOO,
|
|
SA_SETOPPP,
|
|
SA_SETOPMM,
|
|
SA_SETOPCM,
|
|
SA_SETOPPM,
|
|
SA_SETOPPT,
|
|
SA_SETOPCL,
|
|
SA_SETOPIX,
|
|
SA_SETOPQU,
|
|
SA_SETOPST,
|
|
SA_SETOPSZ,
|
|
SA_SETOPAT,
|
|
SA_SETOPAZ,
|
|
SA_SETOPCV,
|
|
|
|
//builtin types
|
|
SA_SETTYPEV,
|
|
SA_SETTYPEW,
|
|
SA_SETTYPEB,
|
|
SA_SETTYPEC,
|
|
SA_SETTYPEA,
|
|
SA_SETTYPEH,
|
|
SA_SETTYPES,
|
|
SA_SETTYPET,
|
|
SA_SETTYPEI,
|
|
SA_SETTYPEJ,
|
|
SA_SETTYPEL,
|
|
SA_SETTYPEM,
|
|
SA_SETTYPEX,
|
|
SA_SETTYPEY,
|
|
SA_SETTYPEN,
|
|
SA_SETTYPEO,
|
|
SA_SETTYPEF,
|
|
SA_SETTYPED,
|
|
SA_SETTYPEE,
|
|
SA_SETTYPEG,
|
|
SA_SETTYPEZ,
|
|
|
|
//parameter modifiers
|
|
SA_SETCONST,
|
|
SA_SETRESTRICT,
|
|
SA_SETVOLATILE,
|
|
SA_SETPTR,
|
|
SA_SETREF,
|
|
SA_SETRVAL,
|
|
SA_SETCPAIR,
|
|
SA_SETIM,
|
|
|
|
//substitutions
|
|
SA_SUBSTD, //::std::
|
|
SA_SUBALC, //::std::allocator
|
|
SA_SUBSTR, //::std::basic_string
|
|
SA_SUBSTRS, //::std::basic_string<char,::std::char_traits<char>,::std::allocator<char>>
|
|
SA_SUBISTR, //::std::basic_istream<char, std::char_traits<char>>
|
|
SA_SUBOSTR, //::std::basic_ostream<char, std::char_traits<char>>
|
|
SA_SUBIOSTR, //::std::basic_iostream<char, std::char_traits<char>>
|
|
|
|
//other very important semantic actions
|
|
SA_LOADID, //load an unqualified name into the qualified vector of names
|
|
SA_LOADSUB, //load a substitution
|
|
SA_LOADTSUB, //load a template sub
|
|
SA_LOADARR, //load an array dimension
|
|
SA_STOREPAR, //store current parameter to vector of parameters
|
|
SA_STORETEMPARG, //store current parameter to current vector of template arguments
|
|
SA_STORETEMPLATE, //store the whole template into the last name element of last name vector
|
|
SA_BEGINTEMPL, //begin a template
|
|
SA_SKIPTEMPL, //skip a template
|
|
SA_PAR2F, //store current vector of parameters into the function
|
|
SA_PAR2RET, //store current parameter to the return value
|
|
SA_PAR2SPEC, //store current parameter to the special value
|
|
SA_UNQ2F, //future identifiers are added to the function name
|
|
SA_UNQ2P, //function identifiers are added to parameter name
|
|
|
|
//substitution expansion modifiers
|
|
SA_SSNEST, //nested sub
|
|
SA_STUNQ, //unqualified std:: sub
|
|
SA_SSNO, //other sub derived from <name>
|
|
|
|
SA_TYPE2EXPR, //builtin type is converted to primary expression
|
|
SA_EXPRVAL, //load expression value
|
|
SA_BEGINPEXPR, //begin a primary expression
|
|
SA_STOREPEXPR, //end a primary expression
|
|
SA_COPYTERM, //copy the terminal on the input into current_name in substitution analyzer
|
|
|
|
SA_ADDCHARTONAME, //add current character to current unqualified name
|
|
SA_STORENAME, //move current unqualified name into current name vector
|
|
SA_REVERSENAME,
|
|
SA_SETPRIVATE,
|
|
SA_SETPUBLIC,
|
|
SA_SETPROTECTED,
|
|
SA_SETFCDECL,
|
|
SA_SETFPASCAL,
|
|
SA_SETFFORTRAN,
|
|
SA_SETFTHISCALL,
|
|
SA_SETFSTDCALL,
|
|
SA_SETFFASTCALL,
|
|
SA_SETFINTERRUPT,
|
|
SA_SETUNION,
|
|
SA_SETSTRUCT,
|
|
SA_SETCLASS,
|
|
SA_SETENUM,
|
|
SA_SETSTATIC,
|
|
SA_SETVIRTUAL,
|
|
SA_STCLCONST,
|
|
SA_STCLVOL,
|
|
SA_STCLFAR,
|
|
SA_STCLHUGE,
|
|
SA_SAVENAMESUB,
|
|
SA_LOADNAMESUB,
|
|
SA_MSTEMPLPSUB,
|
|
SA_SETNAMER0,
|
|
SA_SETNAMER1,
|
|
SA_SETNAMER2,
|
|
SA_SETNAMER3,
|
|
SA_SETNAMER4,
|
|
SA_SETNAME_A,
|
|
SA_SETNAME_B,
|
|
SA_SETNAME_C,
|
|
SA_SETNAME_D,
|
|
SA_SETNAME_E,
|
|
SA_SETNAME_F,
|
|
SA_SETNAME_G,
|
|
SA_SETNAME_H,
|
|
SA_SETNAME_I,
|
|
SA_SETNAME_J,
|
|
SA_SETNAME_K,
|
|
SA_SETNAME_L,
|
|
SA_SETNAME_M,
|
|
SA_SETNAME_N,
|
|
SA_SETNAME_O,
|
|
SA_SETNAME_P,
|
|
SA_SETNAME_Q,
|
|
SA_SETNAME_R,
|
|
SA_SETNAME_S,
|
|
SA_SETNAME_T,
|
|
SA_SETNAME_U,
|
|
SA_SETNAME_V,
|
|
SA_SETNAME_W,
|
|
SA_SETNAME_X,
|
|
SA_SETNAME_Y,
|
|
SA_SETNAME_Z,
|
|
SA_TEMPL2TFTPL,
|
|
SA_BEGINBSUB,
|
|
SA_LOADBSUB,
|
|
SA_ADDMCONST,
|
|
SA_ADDMVOL,
|
|
SA_ADDMFAR,
|
|
SA_ADDMHUGE,
|
|
SA_LOADMSNUM,
|
|
SA_NUMTORTTIBCD,
|
|
SA_NUMTOTYPE,
|
|
SA_BORLANDNORMALIZEPARNAME,
|
|
SA_BORLANDID,
|
|
SA_LOADBORLANDSUB,
|
|
SA_BORLANDARR,
|
|
SA_END
|
|
};
|
|
|
|
/**
|
|
* @brief Structure of an element in an internal LL table.
|
|
* @param n Rule number. Numbered from 1. 0 is reserved for "no rule", which indicates a syntax error.
|
|
* @param s Semantic action to be done when this LL element is used.
|
|
*/
|
|
struct llelem_t {
|
|
llelem_t(unsigned int i, semact ss) :
|
|
n(i),
|
|
s(ss)
|
|
{}
|
|
llelem_t() {}
|
|
unsigned int n = 0;
|
|
semact s = SA_NULL;
|
|
};
|
|
|
|
/**
|
|
* @brief Struct used to describe a rule boundaries in an internal LL table.
|
|
* @param offset Offset from the start of ruleelements array.
|
|
* @param size Number of elements in the current rule.
|
|
*/
|
|
struct ruleaddr_t {
|
|
ruleaddr_t(unsigned int o, unsigned int s) :
|
|
offset(o),
|
|
size(s)
|
|
{}
|
|
unsigned int offset = 0;
|
|
unsigned int size = 0;
|
|
};
|
|
|
|
/**
|
|
* @brief Structure of a grammar rule.
|
|
* @param n Number of the rule. Numbered from 1. 0 is reserved for "no rule", which indicates a syntax error.
|
|
* @param left The left side of the rule, consisting of only one non-terminal.
|
|
* @param right The right side of the rule, which is a sequence of terminals or non-terminals. May be empty.
|
|
*/
|
|
struct rule_t {
|
|
unsigned int n = 0;
|
|
gelem_t left;
|
|
std::vector<gelem_t> right;
|
|
};
|
|
|
|
/**
|
|
* @brief Types of substitution expansion.
|
|
*/
|
|
enum subtype {
|
|
ST_NULL = 0,
|
|
ST_STUNQ,
|
|
ST_SSNEST,
|
|
ST_SSNO
|
|
};
|
|
|
|
/**
|
|
* @brief States of the FSM for parsing grammar rules from a file.
|
|
*/
|
|
enum fsmstate {
|
|
S_START = 0, //beginning of a line
|
|
S_NT_LEFT, //non-terminal on the left side
|
|
S_OP1, //:
|
|
S_OP2, //:
|
|
S_OP3, //=
|
|
S_RIGHT, //right side
|
|
S_NT_RIGHT, //non-terminal on the right side
|
|
S_T, //terminal on the right side
|
|
S_QT, //quoted terminal on the right side
|
|
S_QT_ESC, //escape sequence of a quoted terminal
|
|
S_IGNORE, //ignore the rest of the line
|
|
S_ERROR, //error ocurred
|
|
S_NULL //just a NULL terminator for the array of final states
|
|
};
|
|
|
|
/**
|
|
* @brief Class for comparing two grammar element structures. Used in std::set of grammar elements.
|
|
*/
|
|
class comparegelem_c {
|
|
public:
|
|
/**
|
|
* @brief Comparison function for two grammar element structures
|
|
* @param t1 First grammar element.
|
|
* @param t2 Second grammar element.
|
|
*/
|
|
bool operator() (const gelem_t t1, const gelem_t t2) const {
|
|
//if types don't match, terminal is less than non-terminal
|
|
if (t1.type!= t2.type) {
|
|
return (t1.type == GE_TERM)?true:false;
|
|
}
|
|
//for two terminals, compare their byte values
|
|
if (t1.type == GE_TERM) {
|
|
return t1.t < t2.t;
|
|
}
|
|
//for two non-terminals, compare the non-terminal name strings
|
|
else {
|
|
return t1.nt < t2.nt;
|
|
}
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Struct for internal grammar.
|
|
*/
|
|
struct igram_t {
|
|
igram_t(unsigned int tsx, unsigned int rax, unsigned int rex, unsigned int lx, unsigned int ly,
|
|
gelem_t r, unsigned char* ts, ruleaddr_t* ra, gelem_t* re, llelem_t** lt) :
|
|
terminal_static_x(tsx),
|
|
ruleaddrs_x(rax),
|
|
ruleelements_x(rex),
|
|
llst_x(lx),
|
|
llst_y(ly),
|
|
root(r),
|
|
terminal_static(ts),
|
|
ruleaddrs(ra),
|
|
ruleelements(re),
|
|
llst(lt)
|
|
{}
|
|
igram_t() {}
|
|
//dimensions of the arrays
|
|
unsigned int terminal_static_x = 0;
|
|
unsigned int ruleaddrs_x = 0;
|
|
unsigned int ruleelements_x = 0;
|
|
unsigned int llst_x = 0; //first one
|
|
unsigned int llst_y = 0; //second one
|
|
//root element
|
|
gelem_t root;
|
|
//the arrays
|
|
unsigned char* terminal_static = nullptr; //array of used terminals
|
|
ruleaddr_t* ruleaddrs = nullptr; //structures defining offset and size of each rule in the ruleelements table
|
|
gelem_t* ruleelements = nullptr; //all elements of all rules
|
|
llelem_t** llst = nullptr; //the LL table
|
|
};
|
|
|
|
/**
|
|
* @brief The struct variable containing pointers to internal grammar data.
|
|
*/
|
|
cGram::igram_t internalGrammarStruct;
|
|
|
|
//FSM for parsing external rules
|
|
static const fsmstate fsm_final[];
|
|
static const gelem_t T_EOF;
|
|
|
|
|
|
bool internalGrammar = false;
|
|
std::string compiler;
|
|
|
|
/*
|
|
* Variables used for generation of new internal grammars
|
|
*/
|
|
std::string createIGrammar; //if this thing is on, new internal grammar will be generated from external grammar file
|
|
unsigned int newIG_terminal_static_x = 0;
|
|
std::size_t newIG_ruleaddrs_x = 0;
|
|
std::size_t newIG_ruleelements_x = 0;
|
|
std::size_t newIG_llst_x = 0;
|
|
std::size_t newIG_llst_y = 0;
|
|
std::string newIG_root;
|
|
std::string newIG_terminal_static;
|
|
std::string newIG_ruleaddrs;
|
|
std::string newIG_ruleelements;
|
|
std::string newIG_llst;
|
|
|
|
|
|
/*
|
|
* Variables for parsed external grammar
|
|
*/
|
|
std::vector<rule_t> rules;
|
|
std::map<std::string,bool> empty;
|
|
std::map<std::string,std::set<gelem_t,comparegelem_c>> first;
|
|
std::map<std::string,std::set<gelem_t,comparegelem_c>> follow;
|
|
std::map<unsigned int,std::set<gelem_t,comparegelem_c>> predict;
|
|
std::map<std::string,std::map<char,std::pair<unsigned int, semact>>> ll;
|
|
|
|
std::vector<unsigned char> terminals;
|
|
std::vector<std::string> nonterminals;
|
|
|
|
size_t lex_position = 0; //position in the source file
|
|
std::fstream *source = nullptr; //pointer to the input filestream
|
|
|
|
/*
|
|
* methods
|
|
*/
|
|
errcode loadfile(const std::string filename);
|
|
bool is_final(fsmstate s); //is s a final state of fsm?
|
|
char getc();
|
|
bool eof();
|
|
bool lf();
|
|
errcode getgrammar(const std::string filename);
|
|
bool copyset(std::set<gelem_t,comparegelem_c> & src, std::set<gelem_t,comparegelem_c> & dst);
|
|
void genempty();
|
|
void genfirst();
|
|
bool getempty(std::vector<gelem_t> & src);
|
|
std::set<gelem_t,comparegelem_c> getfirst(std::vector<gelem_t> & src);
|
|
llelem_t getllpair(std::string nt, unsigned int ntst, unsigned char t);
|
|
void genfollow();
|
|
void genpredict();
|
|
errcode genll();
|
|
errcode genconstll();
|
|
void genllsem();
|
|
errcode analyze(std::string input, cName & pName);
|
|
std::string subanalyze(const std::string input, cGram::errcode *err);
|
|
semact getsem(const std::string input);
|
|
void *getbstpl(cName & pName);
|
|
void *getstrtpl(cName & pName);
|
|
bool issub(std::string candidate,std::vector<std::string> & vec);
|
|
void showsubs(std::vector<std::string> & vec);
|
|
long int b36toint(std::string x);
|
|
void * copynametpl(void * src);
|
|
public:
|
|
//constructor
|
|
cGram();
|
|
//destructor
|
|
virtual ~cGram();
|
|
errcode initialize(std::string gname, bool i = true);
|
|
errcode parse(const std::string filename);
|
|
cName *perform(const std::string input, errcode *err);
|
|
void demangleClassName(const std::string& input, cName* retvalue, cGram::errcode& err_i);
|
|
void showrules();
|
|
void showempty();
|
|
void showfirst();
|
|
void showfollow();
|
|
void showpredict();
|
|
void showll();
|
|
unsigned int isnt(std::vector<std::string> & nonterminals, std::string nonterminal);
|
|
unsigned int ist(std::vector<unsigned char> & terminals, unsigned char terminal);
|
|
|
|
void resetError() {errString = "";}
|
|
std::string errString; //string containing last error message
|
|
bool errValid = false; //is the gParser valid? false if it has not been properly initialized yet
|
|
bool SubAnalyzeEnabled = false; //enable substitution analysis for GCC demangler?
|
|
void setSubAnalyze(bool x);
|
|
|
|
errcode generateIgrammar(const std::string inputfilename, const std::string outputname);
|
|
std::string generatedTerminalStatic;
|
|
|
|
}; //class cGram
|
|
|
|
} // namespace demangler
|
|
|
|
#endif
|