2016-03-10 02:59:58 +00:00
|
|
|
|
|
|
|
// TOP
|
|
|
|
|
|
|
|
#ifndef FCPP_NEW_LEXER_INC
|
|
|
|
#define FCPP_NEW_LEXER_INC
|
|
|
|
|
|
|
|
#include "../4cpp_lexer_types.h"
|
|
|
|
|
2016-03-10 16:28:44 +00:00
|
|
|
namespace new_lex{
|
|
|
|
//
|
|
|
|
|
2016-03-10 02:59:58 +00:00
|
|
|
#define lexer_link static
|
|
|
|
|
2016-03-15 14:12:06 +00:00
|
|
|
// TODO(allen): revisit this keyword data declaration system
|
|
|
|
struct String_And_Flag{
|
|
|
|
char *str;
|
|
|
|
fcpp_u32 flags;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct String_List{
|
|
|
|
String_And_Flag *data;
|
|
|
|
int count;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct Sub_Match_List_Result{
|
|
|
|
int index;
|
|
|
|
fcpp_i32 new_pos;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// TODO(allen): shift towards storing in a context
|
|
|
|
static String_And_Flag int_suf_strings[] = {
|
|
|
|
{"ull"}, {"ULL"},
|
|
|
|
{"llu"}, {"LLU"},
|
|
|
|
{"ll"}, {"LL"},
|
|
|
|
{"l"}, {"L"},
|
|
|
|
{"u"}, {"U"}
|
|
|
|
};
|
|
|
|
|
|
|
|
#define lexer_string_list(x) {x, (sizeof(x)/sizeof(*x))}
|
|
|
|
|
|
|
|
static String_List int_sufs = lexer_string_list(int_suf_strings);
|
|
|
|
|
|
|
|
static String_And_Flag float_suf_strings[] = {
|
|
|
|
{"f"}, {"F"},
|
|
|
|
{"l"}, {"L"}
|
|
|
|
};
|
|
|
|
static String_List float_sufs = lexer_string_list(float_suf_strings);
|
|
|
|
|
|
|
|
static String_And_Flag bool_lit_strings[] = {
|
|
|
|
{"true"}, {"false"}
|
|
|
|
};
|
|
|
|
static String_List bool_lits = lexer_string_list(bool_lit_strings);
|
|
|
|
|
|
|
|
static String_And_Flag keyword_strings[] = {
|
|
|
|
{"and", CPP_TOKEN_AND},
|
|
|
|
{"and_eq", CPP_TOKEN_ANDEQ},
|
|
|
|
{"bitand", CPP_TOKEN_BIT_AND},
|
|
|
|
{"bitor", CPP_TOKEN_BIT_OR},
|
|
|
|
{"or", CPP_TOKEN_OR},
|
|
|
|
{"or_eq", CPP_TOKEN_OREQ},
|
|
|
|
{"sizeof", CPP_TOKEN_SIZEOF},
|
|
|
|
{"alignof", CPP_TOKEN_ALIGNOF},
|
|
|
|
{"decltype", CPP_TOKEN_DECLTYPE},
|
|
|
|
{"throw", CPP_TOKEN_THROW},
|
|
|
|
{"new", CPP_TOKEN_NEW},
|
|
|
|
{"delete", CPP_TOKEN_DELETE},
|
|
|
|
{"xor", CPP_TOKEN_BIT_XOR},
|
|
|
|
{"xor_eq", CPP_TOKEN_XOREQ},
|
|
|
|
{"not", CPP_TOKEN_NOT},
|
|
|
|
{"not_eq", CPP_TOKEN_NOTEQ},
|
|
|
|
{"typeid", CPP_TOKEN_TYPEID},
|
|
|
|
{"compl", CPP_TOKEN_BIT_NOT},
|
|
|
|
|
|
|
|
{"void", CPP_TOKEN_KEY_TYPE},
|
|
|
|
{"bool", CPP_TOKEN_KEY_TYPE},
|
|
|
|
{"char", CPP_TOKEN_KEY_TYPE},
|
|
|
|
{"int", CPP_TOKEN_KEY_TYPE},
|
|
|
|
{"float", CPP_TOKEN_KEY_TYPE},
|
|
|
|
{"double", CPP_TOKEN_KEY_TYPE},
|
|
|
|
|
|
|
|
{"long", CPP_TOKEN_KEY_MODIFIER},
|
|
|
|
{"short", CPP_TOKEN_KEY_MODIFIER},
|
|
|
|
{"unsigned", CPP_TOKEN_KEY_MODIFIER},
|
|
|
|
|
|
|
|
{"const", CPP_TOKEN_KEY_QUALIFIER},
|
|
|
|
{"volatile", CPP_TOKEN_KEY_QUALIFIER},
|
|
|
|
|
|
|
|
{"asm", CPP_TOKEN_KEY_CONTROL_FLOW},
|
|
|
|
{"break", CPP_TOKEN_KEY_CONTROL_FLOW},
|
|
|
|
{"case", CPP_TOKEN_KEY_CONTROL_FLOW},
|
|
|
|
{"catch", CPP_TOKEN_KEY_CONTROL_FLOW},
|
|
|
|
{"continue", CPP_TOKEN_KEY_CONTROL_FLOW},
|
|
|
|
{"default", CPP_TOKEN_KEY_CONTROL_FLOW},
|
|
|
|
{"do", CPP_TOKEN_KEY_CONTROL_FLOW},
|
|
|
|
{"else", CPP_TOKEN_KEY_CONTROL_FLOW},
|
|
|
|
{"for", CPP_TOKEN_KEY_CONTROL_FLOW},
|
|
|
|
{"goto", CPP_TOKEN_KEY_CONTROL_FLOW},
|
|
|
|
{"if", CPP_TOKEN_KEY_CONTROL_FLOW},
|
|
|
|
{"return", CPP_TOKEN_KEY_CONTROL_FLOW},
|
|
|
|
{"switch", CPP_TOKEN_KEY_CONTROL_FLOW},
|
|
|
|
{"try", CPP_TOKEN_KEY_CONTROL_FLOW},
|
|
|
|
{"while", CPP_TOKEN_KEY_CONTROL_FLOW},
|
|
|
|
{"static_assert", CPP_TOKEN_KEY_CONTROL_FLOW},
|
|
|
|
|
|
|
|
{"const_cast", CPP_TOKEN_KEY_CAST},
|
|
|
|
{"dynamic_cast", CPP_TOKEN_KEY_CAST},
|
|
|
|
{"reinterpret_cast", CPP_TOKEN_KEY_CAST},
|
|
|
|
{"static_cast", CPP_TOKEN_KEY_CAST},
|
|
|
|
|
|
|
|
{"class", CPP_TOKEN_KEY_TYPE_DECLARATION},
|
|
|
|
{"enum", CPP_TOKEN_KEY_TYPE_DECLARATION},
|
|
|
|
{"struct", CPP_TOKEN_KEY_TYPE_DECLARATION},
|
|
|
|
{"typedef", CPP_TOKEN_KEY_TYPE_DECLARATION},
|
|
|
|
{"union", CPP_TOKEN_KEY_TYPE_DECLARATION},
|
|
|
|
{"template", CPP_TOKEN_KEY_TYPE_DECLARATION},
|
|
|
|
{"typename", CPP_TOKEN_KEY_TYPE_DECLARATION},
|
|
|
|
|
|
|
|
{"friend", CPP_TOKEN_KEY_ACCESS},
|
|
|
|
{"namespace", CPP_TOKEN_KEY_ACCESS},
|
|
|
|
{"private", CPP_TOKEN_KEY_ACCESS},
|
|
|
|
{"protected", CPP_TOKEN_KEY_ACCESS},
|
|
|
|
{"public", CPP_TOKEN_KEY_ACCESS},
|
|
|
|
{"using", CPP_TOKEN_KEY_ACCESS},
|
|
|
|
|
|
|
|
{"extern", CPP_TOKEN_KEY_LINKAGE},
|
|
|
|
{"export", CPP_TOKEN_KEY_LINKAGE},
|
|
|
|
{"inline", CPP_TOKEN_KEY_LINKAGE},
|
|
|
|
{"static", CPP_TOKEN_KEY_LINKAGE},
|
|
|
|
{"virtual", CPP_TOKEN_KEY_LINKAGE},
|
|
|
|
|
|
|
|
{"alignas", CPP_TOKEN_KEY_OTHER},
|
|
|
|
{"explicit", CPP_TOKEN_KEY_OTHER},
|
|
|
|
{"noexcept", CPP_TOKEN_KEY_OTHER},
|
|
|
|
{"nullptr", CPP_TOKEN_KEY_OTHER},
|
|
|
|
{"operator", CPP_TOKEN_KEY_OTHER},
|
|
|
|
{"register", CPP_TOKEN_KEY_OTHER},
|
|
|
|
{"this", CPP_TOKEN_KEY_OTHER},
|
|
|
|
{"thread_local", CPP_TOKEN_KEY_OTHER},
|
|
|
|
};
|
|
|
|
static String_List keywords = lexer_string_list(keyword_strings);
|
|
|
|
|
|
|
|
static String_And_Flag op_strings[] = {
|
|
|
|
{"...", CPP_TOKEN_ELLIPSIS},
|
|
|
|
{"<<=", CPP_TOKEN_LSHIFTEQ},
|
|
|
|
{">>=", CPP_TOKEN_RSHIFTEQ},
|
|
|
|
{"->*", CPP_TOKEN_PTRARROW},
|
|
|
|
{"<<", CPP_TOKEN_LSHIFT},
|
|
|
|
{">>", CPP_TOKEN_RSHIFT},
|
|
|
|
{"&&", CPP_TOKEN_AND},
|
|
|
|
{"||", CPP_TOKEN_OR},
|
|
|
|
{"->", CPP_TOKEN_ARROW},
|
|
|
|
{"++", CPP_TOKEN_INCREMENT},
|
|
|
|
{"--", CPP_TOKEN_DECREMENT},
|
|
|
|
{"::", CPP_TOKEN_SCOPE},
|
|
|
|
{"+=", CPP_TOKEN_ADDEQ},
|
|
|
|
{"-=", CPP_TOKEN_SUBEQ},
|
|
|
|
{"*=", CPP_TOKEN_MULEQ},
|
|
|
|
{"/=", CPP_TOKEN_DIVEQ},
|
|
|
|
{"%=", CPP_TOKEN_MODEQ},
|
|
|
|
{"&=", CPP_TOKEN_ANDEQ},
|
|
|
|
{"|=", CPP_TOKEN_OREQ},
|
|
|
|
{"^=", CPP_TOKEN_XOREQ},
|
|
|
|
{"==", CPP_TOKEN_EQEQ},
|
|
|
|
{">=", CPP_TOKEN_GRTREQ},
|
|
|
|
{"<=", CPP_TOKEN_LESSEQ},
|
|
|
|
{"!=", CPP_TOKEN_NOTEQ},
|
|
|
|
{".*", CPP_TOKEN_PTRDOT},
|
|
|
|
{"{", CPP_TOKEN_BRACE_OPEN},
|
|
|
|
{"}", CPP_TOKEN_BRACE_CLOSE},
|
|
|
|
{"[", CPP_TOKEN_BRACKET_OPEN},
|
|
|
|
{"]", CPP_TOKEN_BRACKET_CLOSE},
|
|
|
|
{"(", CPP_TOKEN_PARENTHESE_OPEN},
|
|
|
|
{")", CPP_TOKEN_PARENTHESE_CLOSE},
|
|
|
|
{"<", CPP_TOKEN_LESS},
|
|
|
|
{">", CPP_TOKEN_GRTR},
|
|
|
|
{"+", CPP_TOKEN_PLUS},
|
|
|
|
{"-", CPP_TOKEN_MINUS},
|
|
|
|
{"!", CPP_TOKEN_NOT},
|
|
|
|
{"~", CPP_TOKEN_TILDE},
|
|
|
|
{"*", CPP_TOKEN_STAR},
|
|
|
|
{"&", CPP_TOKEN_AMPERSAND},
|
|
|
|
{"|", CPP_TOKEN_BIT_OR},
|
|
|
|
{"^", CPP_TOKEN_BIT_XOR},
|
|
|
|
{"=", CPP_TOKEN_EQ},
|
|
|
|
{",", CPP_TOKEN_COMMA},
|
|
|
|
{":", CPP_TOKEN_COLON},
|
|
|
|
{";", CPP_TOKEN_SEMICOLON},
|
|
|
|
{"/", CPP_TOKEN_DIV},
|
|
|
|
{"?", CPP_TOKEN_TERNARY_QMARK},
|
|
|
|
{"%", CPP_TOKEN_MOD},
|
|
|
|
{".", CPP_TOKEN_DOT},
|
|
|
|
};
|
|
|
|
static String_List ops = lexer_string_list(op_strings);
|
|
|
|
|
|
|
|
static String_And_Flag pp_op_strings[] = {
|
|
|
|
{"##", CPP_PP_CONCAT},
|
|
|
|
{"#", CPP_PP_STRINGIFY},
|
|
|
|
};
|
|
|
|
static String_List pp_ops = lexer_string_list(pp_op_strings);
|
|
|
|
|
|
|
|
static String_And_Flag preprop_strings[] = {
|
|
|
|
{"include", CPP_PP_INCLUDE},
|
|
|
|
{"INCLUDE", CPP_PP_INCLUDE},
|
|
|
|
{"ifndef", CPP_PP_IFNDEF},
|
|
|
|
{"IFNDEF", CPP_PP_IFNDEF},
|
|
|
|
{"define", CPP_PP_DEFINE},
|
|
|
|
{"DEFINE", CPP_PP_DEFINE},
|
|
|
|
{"import", CPP_PP_IMPORT},
|
|
|
|
{"IMPORT", CPP_PP_IMPORT},
|
|
|
|
{"pragma", CPP_PP_PRAGMA},
|
|
|
|
{"PRAGMA", CPP_PP_PRAGMA},
|
|
|
|
{"undef", CPP_PP_UNDEF},
|
|
|
|
{"UNDEF", CPP_PP_UNDEF},
|
|
|
|
{"endif", CPP_PP_ENDIF},
|
|
|
|
{"ENDIF", CPP_PP_ENDIF},
|
|
|
|
{"error", CPP_PP_ERROR},
|
|
|
|
{"ERROR", CPP_PP_ERROR},
|
|
|
|
{"ifdef", CPP_PP_IFDEF},
|
|
|
|
{"IFDEF", CPP_PP_IFDEF},
|
|
|
|
{"using", CPP_PP_USING},
|
|
|
|
{"USING", CPP_PP_USING},
|
|
|
|
{"else", CPP_PP_ELSE},
|
|
|
|
{"ELSE", CPP_PP_ELSE},
|
|
|
|
{"elif", CPP_PP_ELIF},
|
|
|
|
{"ELIF", CPP_PP_ELIF},
|
|
|
|
{"line", CPP_PP_LINE},
|
|
|
|
{"LINE", CPP_PP_LINE},
|
|
|
|
{"if", CPP_PP_IF},
|
|
|
|
{"IF", CPP_PP_IF},
|
|
|
|
};
|
|
|
|
static String_List preprops = lexer_string_list(preprop_strings);
|
|
|
|
|
|
|
|
lexer_link Sub_Match_List_Result
|
|
|
|
sub_match_list(char *chunk, int size, int pos, String_List list, int sub_size){
|
|
|
|
Sub_Match_List_Result result;
|
|
|
|
String str_main;
|
|
|
|
char *str_check;
|
|
|
|
int i,l;
|
|
|
|
|
|
|
|
result.index = -1;
|
|
|
|
result.new_pos = pos;
|
|
|
|
str_main = make_string(chunk + pos, size - pos);
|
|
|
|
if (sub_size > 0){
|
|
|
|
str_main = substr(str_main, 0, sub_size);
|
|
|
|
for (i = 0; i < list.count; ++i){
|
|
|
|
str_check = list.data[i].str;
|
|
|
|
if (match(str_main, str_check)){
|
|
|
|
result.index = i;
|
|
|
|
result.new_pos = pos + sub_size;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else{
|
|
|
|
for (i = 0; i < list.count; ++i){
|
|
|
|
str_check = list.data[i].str;
|
|
|
|
if (match_part(str_main, str_check, &l)){
|
|
|
|
result.index = i;
|
|
|
|
result.new_pos = pos + l;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2016-03-10 02:59:58 +00:00
|
|
|
|
|
|
|
lexer_link Cpp_Get_Token_Result
|
|
|
|
cpp_get_token(Cpp_Token_Stack *token_stack, int pos){
|
|
|
|
Cpp_Get_Token_Result result = {};
|
|
|
|
Cpp_Token *token_array = token_stack->tokens;
|
|
|
|
Cpp_Token *token = 0;
|
|
|
|
int first = 0;
|
|
|
|
int count = token_stack->count;
|
|
|
|
int last = count;
|
|
|
|
int this_start = 0, next_start = 0;
|
|
|
|
|
|
|
|
if (count > 0){
|
|
|
|
for (;;){
|
|
|
|
result.token_index = (first + last)/2;
|
|
|
|
token = token_array + result.token_index;
|
|
|
|
|
|
|
|
this_start = token->start;
|
|
|
|
|
|
|
|
if (result.token_index + 1 < count){
|
|
|
|
next_start = (token + 1)->start;
|
|
|
|
}
|
|
|
|
else{
|
|
|
|
next_start = this_start + token->size;
|
|
|
|
}
|
|
|
|
if (this_start <= pos && pos < next_start){
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else if (pos < this_start){
|
|
|
|
last = result.token_index;
|
|
|
|
}
|
|
|
|
else{
|
|
|
|
first = result.token_index + 1;
|
|
|
|
}
|
|
|
|
if (first == last){
|
|
|
|
result.token_index = first;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (result.token_index == count){
|
|
|
|
--result.token_index;
|
|
|
|
result.in_whitespace = 1;
|
|
|
|
}
|
|
|
|
else{
|
|
|
|
if (token->start + token->size <= pos){
|
|
|
|
result.in_whitespace = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else{
|
|
|
|
result.token_index = -1;
|
|
|
|
result.in_whitespace = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return(result);
|
|
|
|
}
|
|
|
|
|
|
|
|
lexer_link void
|
|
|
|
cpp_shift_token_starts(Cpp_Token_Stack *stack, int from_token_i, int shift_amount){
|
|
|
|
Cpp_Token *token = stack->tokens + from_token_i;
|
|
|
|
int count = stack->count, i;
|
|
|
|
|
|
|
|
for (i = from_token_i; i < count; ++i, ++token){
|
|
|
|
token->start += shift_amount;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
enum Lex_State{
|
|
|
|
LS_default,
|
2016-03-10 16:28:44 +00:00
|
|
|
LS_identifier,
|
2016-03-15 14:12:06 +00:00
|
|
|
LS_pound,
|
|
|
|
LS_pp,
|
2016-03-10 16:28:44 +00:00
|
|
|
LS_char,
|
2016-03-15 14:12:06 +00:00
|
|
|
LS_char_slashed,
|
2016-03-10 16:28:44 +00:00
|
|
|
LS_string,
|
2016-03-15 14:12:06 +00:00
|
|
|
LS_string_slashed,
|
2016-03-10 16:28:44 +00:00
|
|
|
LS_number,
|
2016-03-15 14:12:06 +00:00
|
|
|
LS_number0,
|
2016-03-11 03:33:19 +00:00
|
|
|
LS_float,
|
2016-03-15 14:12:06 +00:00
|
|
|
LS_hex,
|
2016-03-10 02:59:58 +00:00
|
|
|
LS_comment_pre,
|
|
|
|
LS_comment,
|
2016-03-15 14:12:06 +00:00
|
|
|
LS_comment_slashed,
|
2016-03-10 02:59:58 +00:00
|
|
|
LS_comment_block,
|
|
|
|
LS_comment_block_ending,
|
|
|
|
LS_dot,
|
2016-03-10 16:28:44 +00:00
|
|
|
LS_ellipsis,
|
2016-03-10 02:59:58 +00:00
|
|
|
LS_less,
|
2016-03-10 16:28:44 +00:00
|
|
|
LS_less_less,
|
2016-03-10 02:59:58 +00:00
|
|
|
LS_more,
|
2016-03-10 16:28:44 +00:00
|
|
|
LS_more_more,
|
|
|
|
LS_minus,
|
|
|
|
LS_arrow,
|
|
|
|
LS_and,
|
|
|
|
LS_or,
|
|
|
|
LS_plus,
|
|
|
|
LS_colon,
|
|
|
|
LS_star,
|
|
|
|
LS_modulo,
|
|
|
|
LS_caret,
|
2016-03-11 03:33:19 +00:00
|
|
|
LS_eq,
|
|
|
|
LS_bang,
|
2016-03-15 14:12:06 +00:00
|
|
|
LS_error_message,
|
|
|
|
//
|
|
|
|
LS_count
|
|
|
|
};
|
|
|
|
|
|
|
|
enum Lex_INC_State{
|
|
|
|
LSINC_default,
|
|
|
|
LSINC_quotes,
|
|
|
|
LSINC_pointy,
|
|
|
|
LSINC_junk,
|
|
|
|
};
|
|
|
|
|
|
|
|
enum Lex_PP_State{
|
|
|
|
LSPP_default,
|
|
|
|
LSPP_include,
|
|
|
|
LSPP_macro_identifier,
|
|
|
|
LSPP_identifier,
|
|
|
|
LSPP_body_if,
|
|
|
|
LSPP_body,
|
|
|
|
LSPP_number,
|
|
|
|
LSPP_error,
|
|
|
|
LSPP_junk,
|
|
|
|
//
|
|
|
|
LSPP_count
|
|
|
|
};
|
|
|
|
|
|
|
|
struct Lex_FSM{
|
|
|
|
unsigned short state;
|
|
|
|
char pp_state;
|
|
|
|
char emit_token;
|
|
|
|
char multi_line;
|
|
|
|
char completed;
|
2016-03-10 02:59:58 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct Lex_Data{
|
2016-03-15 14:12:06 +00:00
|
|
|
Lex_FSM fsm;
|
|
|
|
char pp_state;
|
|
|
|
char completed;
|
2016-03-10 02:59:58 +00:00
|
|
|
int token_start;
|
|
|
|
};
|
|
|
|
|
2016-03-15 14:12:06 +00:00
|
|
|
lexer_link Lex_PP_State
|
|
|
|
cpp_pp_directive_to_state(Cpp_Token_Type type){
|
|
|
|
Lex_PP_State result = LSPP_default;
|
|
|
|
switch (type){
|
|
|
|
case CPP_PP_INCLUDE:
|
|
|
|
case CPP_PP_IMPORT:
|
|
|
|
case CPP_PP_USING:
|
|
|
|
result = LSPP_include;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case CPP_PP_DEFINE:
|
|
|
|
result = LSPP_macro_identifier;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case CPP_PP_UNDEF:
|
|
|
|
case CPP_PP_IFDEF:
|
|
|
|
case CPP_PP_IFNDEF:
|
|
|
|
result = LSPP_identifier;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case CPP_PP_IF:
|
|
|
|
case CPP_PP_ELIF:
|
|
|
|
result = LSPP_body_if;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case CPP_PP_PRAGMA:
|
|
|
|
result = LSPP_body;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case CPP_PP_LINE:
|
|
|
|
result = LSPP_number;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case CPP_PP_ERROR:
|
|
|
|
result = LSPP_error;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case CPP_PP_UNKNOWN:
|
|
|
|
case CPP_PP_ELSE:
|
|
|
|
case CPP_PP_ENDIF:
|
|
|
|
result = LSPP_junk;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return(result);
|
|
|
|
}
|
|
|
|
|
|
|
|
lexer_link Cpp_Token_Merge
|
|
|
|
cpp_attempt_token_merge(Cpp_Token prev_token, Cpp_Token next_token){
|
|
|
|
Cpp_Token_Merge result = {(Cpp_Token_Type)0};
|
|
|
|
if (next_token.type == CPP_TOKEN_COMMENT && prev_token.type == CPP_TOKEN_COMMENT &&
|
|
|
|
next_token.flags == prev_token.flags && next_token.state_flags == prev_token.state_flags){
|
|
|
|
result.did_merge = 1;
|
|
|
|
prev_token.size = next_token.start + next_token.size - prev_token.start;
|
|
|
|
result.new_token = prev_token;
|
|
|
|
}
|
|
|
|
else if (next_token.type == CPP_TOKEN_JUNK && prev_token.type == CPP_TOKEN_JUNK &&
|
|
|
|
next_token.flags == prev_token.flags && next_token.state_flags == prev_token.state_flags){
|
|
|
|
result.did_merge = 1;
|
|
|
|
prev_token.size = next_token.start + next_token.size - prev_token.start;
|
|
|
|
result.new_token = prev_token;
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
lexer_link void
|
|
|
|
cpp_push_token_nonalloc(Cpp_Token *out_tokens, int *token_i, Cpp_Token token){
|
|
|
|
Cpp_Token_Merge merge = {(Cpp_Token_Type)0};
|
|
|
|
Cpp_Token prev_token = {(Cpp_Token_Type)0};
|
|
|
|
|
|
|
|
if (*token_i > 0){
|
|
|
|
prev_token = out_tokens[*token_i - 1];
|
|
|
|
merge = new_lex::cpp_attempt_token_merge(prev_token, token);
|
|
|
|
if (merge.did_merge){
|
|
|
|
out_tokens[*token_i - 1] = merge.new_token;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!merge.did_merge){
|
|
|
|
out_tokens[(*token_i)++] = token;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-03-10 02:59:58 +00:00
|
|
|
lexer_link Lex_Data
|
|
|
|
cpp_lex_nonalloc(char *chunk, int file_absolute_pos, int size, Cpp_Token_Stack *token_stack_out){
|
|
|
|
Cpp_Token *out_tokens = token_stack_out->tokens;
|
|
|
|
int token_i = token_stack_out->count;
|
|
|
|
int max_token_i = token_stack_out->max_count;
|
|
|
|
|
|
|
|
Cpp_Token token = {};
|
|
|
|
|
|
|
|
int pos = file_absolute_pos;
|
|
|
|
int end_pos = size + file_absolute_pos;
|
|
|
|
|
2016-03-15 14:12:06 +00:00
|
|
|
Lex_Data lex_data = {0};
|
|
|
|
Lex_FSM fsm = {0};
|
2016-03-10 02:59:58 +00:00
|
|
|
|
2016-03-10 16:28:44 +00:00
|
|
|
char c = 0;
|
2016-03-10 02:59:58 +00:00
|
|
|
|
|
|
|
chunk -= file_absolute_pos;
|
|
|
|
|
2016-03-10 16:28:44 +00:00
|
|
|
for (; pos < end_pos && token_i < max_token_i;){
|
|
|
|
|
|
|
|
c = chunk[pos];
|
|
|
|
|
|
|
|
if (c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\f' || c == '\v'){
|
|
|
|
for (; pos < end_pos;){
|
|
|
|
c = chunk[pos++];
|
2016-03-15 14:12:06 +00:00
|
|
|
if (lex_data.pp_state != LSPP_default){
|
|
|
|
if (c == '\n') lex_data.pp_state = LSPP_default;
|
|
|
|
}
|
2016-03-10 16:28:44 +00:00
|
|
|
if (!(c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\f' || c == '\v')) break;
|
|
|
|
}
|
|
|
|
--pos;
|
2016-03-10 02:59:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
lex_data.token_start = pos;
|
|
|
|
|
2016-03-15 14:12:06 +00:00
|
|
|
fsm = {0};
|
|
|
|
fsm.pp_state = lex_data.pp_state;
|
|
|
|
for (; fsm.emit_token == 0 && pos <= end_pos;){
|
2016-03-10 16:28:44 +00:00
|
|
|
if (pos < end_pos){
|
|
|
|
c = chunk[pos++];
|
|
|
|
}
|
|
|
|
else{
|
|
|
|
c = 0;
|
|
|
|
++pos;
|
|
|
|
}
|
2016-03-15 14:12:06 +00:00
|
|
|
|
|
|
|
{
|
|
|
|
unsigned short state = fsm.state;
|
|
|
|
char pp_state = fsm.pp_state;
|
|
|
|
char emit_token = fsm.emit_token;
|
|
|
|
char multi_line = fsm.multi_line;
|
|
|
|
|
|
|
|
switch (pp_state){
|
|
|
|
case LSPP_error:
|
|
|
|
state = LS_error_message;
|
|
|
|
if (c == '\n') emit_token = 1;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LSPP_include:
|
|
|
|
switch (state){
|
|
|
|
case LSINC_default:
|
|
|
|
switch (c){
|
|
|
|
case '"': state = LSINC_quotes; break;
|
|
|
|
case '<': state = LSINC_pointy; break;
|
|
|
|
default: state = LSINC_junk; break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LSINC_quotes:
|
|
|
|
if (c == '"') emit_token = 1;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LSINC_pointy:
|
|
|
|
if (c == '>') emit_token = 1;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LSINC_junk:
|
|
|
|
if (c == '\n') emit_token = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
switch (state){
|
|
|
|
case LS_default:
|
|
|
|
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'){
|
|
|
|
state = LS_identifier;
|
|
|
|
}
|
|
|
|
else if (c >= '1' && c <= '9'){
|
|
|
|
state = LS_number;
|
|
|
|
}
|
|
|
|
else if (c == '0'){
|
|
|
|
state = LS_number0;
|
|
|
|
}
|
|
|
|
else switch (c){
|
|
|
|
case '\'': state = LS_char; break;
|
|
|
|
case '"': state = LS_string; break;
|
|
|
|
|
|
|
|
case '/': state = LS_comment_pre; break;
|
|
|
|
|
|
|
|
case '.': state = LS_dot; break;
|
|
|
|
|
|
|
|
case '<': state = LS_less; break;
|
|
|
|
case '>': state = LS_more; break;
|
|
|
|
|
|
|
|
case '-': state = LS_minus; break;
|
|
|
|
|
|
|
|
case '&': state = LS_and; break;
|
|
|
|
case '|': state = LS_or; break;
|
|
|
|
|
|
|
|
case '+': state = LS_plus; break;
|
|
|
|
|
|
|
|
case ':': state = LS_colon; break;
|
|
|
|
|
|
|
|
case '*': state = LS_star; break;
|
|
|
|
|
|
|
|
case '%': state = LS_modulo; break;
|
|
|
|
case '^': state = LS_caret; break;
|
|
|
|
|
|
|
|
case '=': state = LS_eq; break;
|
|
|
|
case '!': state = LS_bang; break;
|
|
|
|
|
|
|
|
case '#': state = LS_pound; break;
|
|
|
|
|
2016-03-10 02:59:58 +00:00
|
|
|
#define OperCase(op,type) case op: emit_token = 1; break;
|
2016-03-15 14:12:06 +00:00
|
|
|
OperCase('{', CPP_TOKEN_BRACE_OPEN);
|
|
|
|
OperCase('}', CPP_TOKEN_BRACE_CLOSE);
|
2016-03-10 02:59:58 +00:00
|
|
|
|
2016-03-15 14:12:06 +00:00
|
|
|
OperCase('[', CPP_TOKEN_BRACKET_OPEN);
|
|
|
|
OperCase(']', CPP_TOKEN_BRACKET_CLOSE);
|
2016-03-10 02:59:58 +00:00
|
|
|
|
2016-03-15 14:12:06 +00:00
|
|
|
OperCase('(', CPP_TOKEN_PARENTHESE_OPEN);
|
|
|
|
OperCase(')', CPP_TOKEN_PARENTHESE_CLOSE);
|
2016-03-10 02:59:58 +00:00
|
|
|
|
2016-03-15 14:12:06 +00:00
|
|
|
OperCase('~', CPP_TOKEN_TILDE);
|
|
|
|
OperCase(',', CPP_TOKEN_COMMA);
|
|
|
|
OperCase(';', CPP_TOKEN_SEMICOLON);
|
|
|
|
OperCase('?', CPP_TOKEN_TERNARY_QMARK);
|
|
|
|
|
|
|
|
OperCase('@', CPP_TOKEN_JUNK);
|
|
|
|
OperCase('$', CPP_TOKEN_JUNK);
|
|
|
|
OperCase('\\', CPP_TOKEN_JUNK);
|
2016-03-10 02:59:58 +00:00
|
|
|
#undef OperCase
|
2016-03-15 14:12:06 +00:00
|
|
|
}
|
|
|
|
break;
|
2016-03-10 02:59:58 +00:00
|
|
|
|
2016-03-15 14:12:06 +00:00
|
|
|
case LS_identifier:
|
|
|
|
if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_')){
|
|
|
|
emit_token = 1;
|
|
|
|
}
|
|
|
|
break;
|
2016-03-10 02:59:58 +00:00
|
|
|
|
2016-03-15 14:12:06 +00:00
|
|
|
case LS_pound:
|
|
|
|
if (pp_state == LSPP_default){
|
|
|
|
if (c == ' ' || c == '\t' || c == '\r' || c == '\f' || c == '\v'){
|
|
|
|
state = LS_pound;
|
|
|
|
}
|
|
|
|
else if (c == '\n'){
|
|
|
|
emit_token = 1;
|
|
|
|
}
|
|
|
|
else{
|
|
|
|
state = LS_pp;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else{
|
|
|
|
switch (c){
|
|
|
|
case '#': emit_token = 1; break;
|
|
|
|
default: emit_token = 1; break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
2016-03-10 02:59:58 +00:00
|
|
|
|
2016-03-15 14:12:06 +00:00
|
|
|
case LS_pp:
|
|
|
|
if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_')){
|
|
|
|
emit_token = 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LS_char:
|
|
|
|
switch(c){
|
|
|
|
case '\'': emit_token = 1; break;
|
|
|
|
case '\\': state = LS_char_slashed; multi_line |= 1; break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LS_char_slashed:
|
|
|
|
switch (c){
|
|
|
|
case '\r': case '\f': case '\v': break;
|
|
|
|
default: state = LS_char; break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LS_string:
|
|
|
|
switch(c){
|
|
|
|
case '\"': emit_token = 1; break;
|
|
|
|
case '\\': state = LS_string_slashed; multi_line |= 1; break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LS_string_slashed:
|
|
|
|
switch (c){
|
|
|
|
case '\r': case '\f': case '\v': break;
|
|
|
|
default: state = LS_string; break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LS_number:
|
|
|
|
if (c >= '0' && c <= '9'){
|
|
|
|
state = LS_number;
|
|
|
|
}
|
|
|
|
else{
|
|
|
|
switch (c){
|
|
|
|
case '.': state = LS_float; break;
|
|
|
|
default: emit_token = 1; break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LS_number0:
|
|
|
|
if (c >= '0' && c <= '9'){
|
|
|
|
state = LS_number;
|
|
|
|
}
|
|
|
|
else if (c == 'x'){
|
|
|
|
state = LS_hex;
|
|
|
|
}
|
|
|
|
else if (c == '.'){
|
|
|
|
state = LS_float;
|
|
|
|
}
|
|
|
|
else{
|
|
|
|
emit_token = 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LS_float:
|
|
|
|
if (!(c >= '0' && c <= '9')){
|
|
|
|
switch (c){
|
|
|
|
case 'f': emit_token = 1; break;
|
|
|
|
default: emit_token = 1; break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LS_hex:
|
|
|
|
if (!(c >= '0' && c <= '9' || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F')){
|
|
|
|
emit_token = 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LS_dot:
|
|
|
|
if (c >= '0' && c <= '9'){
|
|
|
|
state = LS_float;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
switch (c){
|
|
|
|
case '.': state = LS_ellipsis; break;
|
|
|
|
case '*': emit_token = 1; break;
|
|
|
|
default: emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LS_ellipsis: emit_token = 1; break;
|
|
|
|
|
|
|
|
case LS_less:
|
|
|
|
switch (c){
|
|
|
|
case '<': state = LS_less_less; break;
|
|
|
|
case '=': emit_token = 1; break;
|
|
|
|
default: emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LS_less_less:
|
|
|
|
switch (c){
|
|
|
|
case '=': emit_token = 1; break;
|
|
|
|
default: emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LS_more:
|
|
|
|
switch (c){
|
|
|
|
case '>': state = LS_more_more; break;
|
|
|
|
case '=': emit_token = 1; break;
|
|
|
|
default: emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LS_more_more:
|
|
|
|
switch (c){
|
|
|
|
case '=': emit_token = 1; break;
|
|
|
|
default: emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LS_comment_pre:
|
|
|
|
switch (c){
|
|
|
|
case '/': state = LS_comment; break;
|
|
|
|
case '*': state = LS_comment_block; break;
|
|
|
|
case '=': emit_token = 1; break;
|
|
|
|
default: emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LS_comment:
|
|
|
|
switch (c){
|
|
|
|
case '\\': state = LS_comment_slashed; break;
|
|
|
|
case '\n': emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LS_comment_slashed:
|
|
|
|
switch (c){
|
|
|
|
case '\r': case '\f': case '\v': break;
|
|
|
|
default: state = LS_comment; break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LS_comment_block:
|
|
|
|
switch (c){
|
|
|
|
case '*': state = LS_comment_block_ending; break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LS_comment_block_ending:
|
|
|
|
switch (c){
|
|
|
|
case '*': state = LS_comment_block_ending; break;
|
|
|
|
case '/': emit_token = 1; break;
|
|
|
|
default: state = LS_comment_block; break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LS_minus:
|
|
|
|
switch (c){
|
|
|
|
case '>': state = LS_arrow; break;
|
|
|
|
case '-': emit_token = 1; break;
|
|
|
|
case '=': emit_token = 1; break;
|
|
|
|
default: emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LS_arrow:
|
|
|
|
switch (c){
|
|
|
|
case '*': emit_token = 1; break;
|
|
|
|
default: emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LS_and:
|
|
|
|
switch (c){
|
|
|
|
case '&': emit_token = 1; break;
|
|
|
|
case '=': emit_token = 1; break;
|
|
|
|
default: emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LS_or:
|
|
|
|
switch (c){
|
|
|
|
case '|': emit_token = 1; break;
|
|
|
|
case '=': emit_token = 1; break;
|
|
|
|
default: emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LS_plus:
|
|
|
|
switch (c){
|
|
|
|
case '+': emit_token = 1; break;
|
|
|
|
case '=': emit_token = 1; break;
|
|
|
|
default: emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LS_colon:
|
|
|
|
switch (c){
|
|
|
|
case ':': emit_token = 1; break;
|
|
|
|
default: emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LS_star:
|
|
|
|
switch (c){
|
|
|
|
case '=': emit_token = 1; break;
|
|
|
|
default: emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LS_modulo:
|
|
|
|
switch (c){
|
|
|
|
case '=': emit_token = 1; break;
|
|
|
|
default: emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LS_caret:
|
|
|
|
switch (c){
|
|
|
|
case '=': emit_token = 1; break;
|
|
|
|
default: emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LS_eq:
|
|
|
|
switch (c){
|
|
|
|
case '=': emit_token = 1; break;
|
|
|
|
default: emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LS_bang:
|
|
|
|
switch (c){
|
|
|
|
case '=': emit_token = 1; break;
|
|
|
|
default: emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2016-03-11 03:33:19 +00:00
|
|
|
}
|
2016-03-15 14:12:06 +00:00
|
|
|
|
|
|
|
|
|
|
|
fsm.state = state;
|
|
|
|
fsm.pp_state = pp_state;
|
|
|
|
fsm.emit_token = emit_token;
|
|
|
|
fsm.multi_line = multi_line;
|
2016-03-10 02:59:58 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-03-15 14:12:06 +00:00
|
|
|
if (fsm.emit_token){
|
|
|
|
if (lex_data.pp_state == LSPP_include){
|
|
|
|
switch (fsm.state){
|
|
|
|
case LSINC_default:break;
|
|
|
|
|
|
|
|
case LSINC_quotes:
|
|
|
|
case LSINC_pointy:
|
|
|
|
token.type = CPP_TOKEN_INCLUDE_FILE;
|
|
|
|
token.flags = 0;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LSINC_junk:
|
|
|
|
token.type = CPP_TOKEN_JUNK;
|
|
|
|
token.flags = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else switch (fsm.state){
|
2016-03-10 02:59:58 +00:00
|
|
|
case LS_default:
|
2016-03-10 16:28:44 +00:00
|
|
|
switch (c){
|
2016-03-10 02:59:58 +00:00
|
|
|
#define OperCase(op,t) case op: token.type = t; break;
|
|
|
|
OperCase('{', CPP_TOKEN_BRACE_OPEN);
|
|
|
|
OperCase('}', CPP_TOKEN_BRACE_CLOSE);
|
|
|
|
|
|
|
|
OperCase('[', CPP_TOKEN_BRACKET_OPEN);
|
|
|
|
OperCase(']', CPP_TOKEN_BRACKET_CLOSE);
|
|
|
|
|
|
|
|
OperCase('(', CPP_TOKEN_PARENTHESE_OPEN);
|
|
|
|
OperCase(')', CPP_TOKEN_PARENTHESE_CLOSE);
|
|
|
|
|
|
|
|
OperCase('~', CPP_TOKEN_TILDE);
|
|
|
|
OperCase(',', CPP_TOKEN_COMMA);
|
2016-03-10 16:28:44 +00:00
|
|
|
OperCase(';', CPP_TOKEN_SEMICOLON);
|
2016-03-10 02:59:58 +00:00
|
|
|
OperCase('?', CPP_TOKEN_TERNARY_QMARK);
|
2016-03-15 14:12:06 +00:00
|
|
|
|
|
|
|
OperCase('@', CPP_TOKEN_JUNK);
|
|
|
|
OperCase('$', CPP_TOKEN_JUNK);
|
|
|
|
OperCase('\\', CPP_TOKEN_JUNK);
|
2016-03-10 02:59:58 +00:00
|
|
|
#undef OperCase
|
|
|
|
}
|
2016-03-15 14:12:06 +00:00
|
|
|
if (c != '@' && c != '$' && c != '\\'){
|
|
|
|
token.flags = CPP_TFLAG_IS_OPERATOR;
|
|
|
|
}
|
2016-03-10 02:59:58 +00:00
|
|
|
break;
|
2016-03-15 14:12:06 +00:00
|
|
|
|
2016-03-10 16:28:44 +00:00
|
|
|
case LS_identifier:
|
2016-03-15 14:12:06 +00:00
|
|
|
{
|
|
|
|
--pos;
|
|
|
|
|
|
|
|
int start = lex_data.token_start;
|
|
|
|
int word_size = pos - lex_data.token_start;
|
|
|
|
|
|
|
|
|
|
|
|
if (lex_data.pp_state == LSPP_body_if){
|
|
|
|
if (match(make_string(chunk + start, word_size), make_lit_string("defined"))){
|
|
|
|
token.type = CPP_TOKEN_DEFINED;
|
|
|
|
token.flags = CPP_TFLAG_IS_OPERATOR | CPP_TFLAG_IS_KEYWORD;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Sub_Match_List_Result sub_match;
|
|
|
|
sub_match = sub_match_list(chunk, size, start, bool_lits, word_size);
|
|
|
|
|
|
|
|
if (sub_match.index != -1){
|
|
|
|
token.type = CPP_TOKEN_BOOLEAN_CONSTANT;
|
|
|
|
token.flags = CPP_TFLAG_IS_KEYWORD;
|
|
|
|
}
|
|
|
|
else{
|
|
|
|
sub_match = sub_match_list(chunk, size, start, keywords, word_size);
|
|
|
|
|
|
|
|
if (sub_match.index != -1){
|
|
|
|
String_And_Flag data = keywords.data[sub_match.index];
|
|
|
|
token.type = (Cpp_Token_Type)data.flags;
|
|
|
|
token.flags = CPP_TFLAG_IS_KEYWORD;
|
|
|
|
}
|
|
|
|
else{
|
|
|
|
token.type = CPP_TOKEN_IDENTIFIER;
|
|
|
|
token.flags = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}break;
|
|
|
|
|
|
|
|
case LS_pound:
|
|
|
|
token.flags = CPP_TFLAG_IS_OPERATOR;
|
|
|
|
switch (c){
|
|
|
|
case '=': token.type = CPP_TOKEN_LESSEQ; break;
|
|
|
|
default:
|
|
|
|
token.type = CPP_TOKEN_LESS;
|
|
|
|
--pos;
|
|
|
|
break;
|
|
|
|
}
|
2016-03-10 16:28:44 +00:00
|
|
|
break;
|
2016-03-15 14:12:06 +00:00
|
|
|
|
|
|
|
case LS_pp:
|
|
|
|
{
|
|
|
|
--pos;
|
|
|
|
int start = lex_data.token_start + 1;
|
|
|
|
|
|
|
|
c = chunk[start];
|
|
|
|
while (start < pos && (c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\v' || c == '\f')){
|
|
|
|
++start;
|
|
|
|
c = chunk[start];
|
|
|
|
}
|
|
|
|
|
|
|
|
int word_size = pos - start;
|
|
|
|
Sub_Match_List_Result match;
|
|
|
|
match = sub_match_list(chunk, size, start, preprops, word_size);
|
|
|
|
|
|
|
|
if (match.index != -1){
|
|
|
|
String_And_Flag data = preprops.data[match.index];
|
|
|
|
token.type = (Cpp_Token_Type)data.flags;
|
|
|
|
token.flags = CPP_TFLAG_PP_DIRECTIVE;
|
|
|
|
lex_data.pp_state = (char)cpp_pp_directive_to_state(token.type);
|
|
|
|
}
|
|
|
|
else{
|
|
|
|
token.type = CPP_TOKEN_JUNK;
|
|
|
|
token.flags = 0;
|
|
|
|
}
|
|
|
|
}break;
|
|
|
|
|
2016-03-11 03:33:19 +00:00
|
|
|
case LS_number:
|
2016-03-15 14:12:06 +00:00
|
|
|
case LS_number0:
|
|
|
|
case LS_hex:
|
2016-03-11 03:33:19 +00:00
|
|
|
token.type = CPP_TOKEN_INTEGER_CONSTANT;
|
|
|
|
token.flags = 0;
|
|
|
|
--pos;
|
|
|
|
break;
|
2016-03-15 14:12:06 +00:00
|
|
|
|
|
|
|
case LS_float:
|
|
|
|
token.type = CPP_TOKEN_FLOATING_CONSTANT;
|
|
|
|
token.flags = 0;
|
|
|
|
if (c != 'f'){
|
|
|
|
--pos;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LS_char:
|
|
|
|
token.type = CPP_TOKEN_CHARACTER_CONSTANT;
|
|
|
|
token.flags = 0;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LS_string:
|
|
|
|
token.type = CPP_TOKEN_STRING_CONSTANT;
|
|
|
|
token.flags = 0;
|
|
|
|
break;
|
|
|
|
|
2016-03-10 16:28:44 +00:00
|
|
|
case LS_comment_pre:
|
|
|
|
token.flags = CPP_TFLAG_IS_OPERATOR;
|
|
|
|
switch (c){
|
|
|
|
case '=': token.type = CPP_TOKEN_DIVEQ; break;
|
|
|
|
default:
|
|
|
|
token.type = CPP_TOKEN_DIV;
|
|
|
|
--pos;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2016-03-15 14:12:06 +00:00
|
|
|
|
2016-03-10 02:59:58 +00:00
|
|
|
case LS_comment: case LS_comment_block_ending:
|
|
|
|
token.type = CPP_TOKEN_COMMENT;
|
|
|
|
token.flags = 0;
|
2016-03-15 14:12:06 +00:00
|
|
|
c = chunk[--pos];
|
2016-03-10 02:59:58 +00:00
|
|
|
while (c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\v' || c == '\f'){
|
2016-03-15 14:12:06 +00:00
|
|
|
--pos;
|
|
|
|
c = chunk[pos];
|
2016-03-10 16:28:44 +00:00
|
|
|
}
|
2016-03-15 14:12:06 +00:00
|
|
|
++pos;
|
2016-03-10 16:28:44 +00:00
|
|
|
break;
|
2016-03-15 14:12:06 +00:00
|
|
|
|
|
|
|
case LS_error_message:
|
|
|
|
token.type = CPP_TOKEN_ERROR_MESSAGE;
|
|
|
|
token.flags = 0;
|
|
|
|
c = chunk[--pos];
|
|
|
|
while (c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\v' || c == '\f'){
|
|
|
|
--pos;
|
|
|
|
c = chunk[pos];
|
|
|
|
}
|
|
|
|
++pos;
|
|
|
|
break;
|
|
|
|
|
2016-03-10 16:28:44 +00:00
|
|
|
case LS_dot:
|
|
|
|
token.flags = CPP_TFLAG_IS_OPERATOR;
|
|
|
|
switch (c){
|
|
|
|
case '*': token.type = CPP_TOKEN_PTRDOT; break;
|
|
|
|
default:
|
|
|
|
token.type = CPP_TOKEN_DOT;
|
2016-03-10 02:59:58 +00:00
|
|
|
--pos;
|
2016-03-10 16:28:44 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2016-03-15 14:12:06 +00:00
|
|
|
|
2016-03-10 16:28:44 +00:00
|
|
|
case LS_ellipsis:
|
|
|
|
switch (c){
|
|
|
|
case '.':
|
|
|
|
token.flags = CPP_TFLAG_IS_OPERATOR;
|
|
|
|
token.type = CPP_TOKEN_ELLIPSIS;
|
|
|
|
break;
|
2016-03-15 14:12:06 +00:00
|
|
|
|
2016-03-10 16:28:44 +00:00
|
|
|
default:
|
|
|
|
token.type = CPP_TOKEN_JUNK;
|
|
|
|
--pos;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2016-03-15 14:12:06 +00:00
|
|
|
|
2016-03-10 16:28:44 +00:00
|
|
|
case LS_less:
|
|
|
|
token.flags = CPP_TFLAG_IS_OPERATOR;
|
|
|
|
switch (c){
|
|
|
|
case '=': token.type = CPP_TOKEN_LESSEQ; break;
|
|
|
|
default:
|
|
|
|
token.type = CPP_TOKEN_LESS;
|
|
|
|
--pos;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2016-03-15 14:12:06 +00:00
|
|
|
|
2016-03-10 16:28:44 +00:00
|
|
|
case LS_less_less:
|
|
|
|
token.flags = CPP_TFLAG_IS_OPERATOR;
|
|
|
|
switch (c){
|
|
|
|
case '=': token.type = CPP_TOKEN_LSHIFTEQ; break;
|
|
|
|
default:
|
|
|
|
token.type = CPP_TOKEN_LSHIFT;
|
|
|
|
--pos;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2016-03-15 14:12:06 +00:00
|
|
|
|
2016-03-10 16:28:44 +00:00
|
|
|
case LS_more:
|
|
|
|
token.flags = CPP_TFLAG_IS_OPERATOR;
|
|
|
|
switch (c){
|
|
|
|
case '=': token.type = CPP_TOKEN_GRTREQ; break;
|
|
|
|
default:
|
|
|
|
token.type = CPP_TOKEN_GRTR;
|
|
|
|
--pos;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2016-03-15 14:12:06 +00:00
|
|
|
|
2016-03-10 16:28:44 +00:00
|
|
|
case LS_more_more:
|
|
|
|
token.flags = CPP_TFLAG_IS_OPERATOR;
|
|
|
|
switch (c){
|
|
|
|
case '=': token.type = CPP_TOKEN_RSHIFTEQ; break;
|
|
|
|
default:
|
|
|
|
token.type = CPP_TOKEN_RSHIFT;
|
|
|
|
--pos;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2016-03-15 14:12:06 +00:00
|
|
|
|
2016-03-10 16:28:44 +00:00
|
|
|
case LS_minus:
|
|
|
|
token.flags = CPP_TFLAG_IS_OPERATOR;
|
|
|
|
switch (c){
|
|
|
|
case '-': token.type = CPP_TOKEN_DECREMENT; break;
|
|
|
|
case '=': token.type = CPP_TOKEN_SUBEQ; break;
|
|
|
|
default:
|
|
|
|
token.type = CPP_TOKEN_MINUS;
|
|
|
|
--pos;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2016-03-15 14:12:06 +00:00
|
|
|
|
2016-03-10 16:28:44 +00:00
|
|
|
case LS_arrow:
|
|
|
|
token.flags = CPP_TFLAG_IS_OPERATOR;
|
|
|
|
switch (c){
|
|
|
|
case '*': token.type = CPP_TOKEN_PTRARROW; break;
|
|
|
|
default:
|
|
|
|
token.type = CPP_TOKEN_ARROW;
|
|
|
|
--pos;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2016-03-15 14:12:06 +00:00
|
|
|
|
2016-03-10 16:28:44 +00:00
|
|
|
case LS_and:
|
|
|
|
token.flags = CPP_TFLAG_IS_OPERATOR;
|
|
|
|
switch (c){
|
|
|
|
case '&': token.type = CPP_TOKEN_AND; break;
|
|
|
|
case '=': token.type = CPP_TOKEN_ANDEQ; break;
|
|
|
|
default:
|
|
|
|
token.type = CPP_TOKEN_AMPERSAND;
|
|
|
|
--pos;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2016-03-15 14:12:06 +00:00
|
|
|
|
2016-03-10 16:28:44 +00:00
|
|
|
case LS_or:
|
|
|
|
token.flags = CPP_TFLAG_IS_OPERATOR;
|
|
|
|
switch (c){
|
|
|
|
case '|': token.type = CPP_TOKEN_OR; break;
|
|
|
|
case '=': token.type = CPP_TOKEN_OREQ; break;
|
|
|
|
default:
|
|
|
|
token.type = CPP_TOKEN_BIT_OR;
|
|
|
|
--pos;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2016-03-15 14:12:06 +00:00
|
|
|
|
2016-03-10 16:28:44 +00:00
|
|
|
case LS_plus:
|
|
|
|
token.flags = CPP_TFLAG_IS_OPERATOR;
|
|
|
|
switch (c){
|
|
|
|
case '+': token.type = CPP_TOKEN_INCREMENT; break;
|
|
|
|
case '=': token.type = CPP_TOKEN_ADDEQ; break;
|
|
|
|
default:
|
|
|
|
token.type = CPP_TOKEN_PLUS;
|
|
|
|
--pos;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2016-03-15 14:12:06 +00:00
|
|
|
|
2016-03-10 16:28:44 +00:00
|
|
|
case LS_colon:
|
|
|
|
token.flags = CPP_TFLAG_IS_OPERATOR;
|
|
|
|
switch (c){
|
|
|
|
case ':': token.type = CPP_TOKEN_SCOPE; break;
|
|
|
|
default:
|
|
|
|
token.type = CPP_TOKEN_COLON;
|
|
|
|
--pos;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2016-03-15 14:12:06 +00:00
|
|
|
|
2016-03-10 16:28:44 +00:00
|
|
|
case LS_star:
|
|
|
|
token.flags = CPP_TFLAG_IS_OPERATOR;
|
|
|
|
switch (c){
|
|
|
|
case '=': token.type = CPP_TOKEN_MULEQ; break;
|
|
|
|
default:
|
|
|
|
token.type = CPP_TOKEN_STAR;
|
|
|
|
--pos;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2016-03-15 14:12:06 +00:00
|
|
|
|
2016-03-10 16:28:44 +00:00
|
|
|
case LS_modulo:
|
|
|
|
token.flags = CPP_TFLAG_IS_OPERATOR;
|
|
|
|
switch (c){
|
|
|
|
case '=': token.type = CPP_TOKEN_MODEQ; break;
|
|
|
|
default:
|
|
|
|
token.type = CPP_TOKEN_MOD;
|
|
|
|
--pos;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2016-03-15 14:12:06 +00:00
|
|
|
|
2016-03-10 16:28:44 +00:00
|
|
|
case LS_caret:
|
|
|
|
token.flags = CPP_TFLAG_IS_OPERATOR;
|
|
|
|
switch (c){
|
2016-03-11 03:33:19 +00:00
|
|
|
case '=': token.type = CPP_TOKEN_XOREQ; break;
|
2016-03-10 16:28:44 +00:00
|
|
|
default:
|
|
|
|
token.type = CPP_TOKEN_BIT_XOR;
|
|
|
|
--pos;
|
|
|
|
break;
|
2016-03-10 02:59:58 +00:00
|
|
|
}
|
|
|
|
break;
|
2016-03-15 14:12:06 +00:00
|
|
|
|
2016-03-11 03:33:19 +00:00
|
|
|
case LS_eq:
|
|
|
|
token.flags = CPP_TFLAG_IS_OPERATOR;
|
|
|
|
switch (c){
|
|
|
|
case '=': token.type = CPP_TOKEN_EQEQ; break;
|
|
|
|
default:
|
|
|
|
token.type = CPP_TOKEN_EQ;
|
|
|
|
--pos;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2016-03-15 14:12:06 +00:00
|
|
|
|
2016-03-11 03:33:19 +00:00
|
|
|
case LS_bang:
|
|
|
|
token.flags = CPP_TFLAG_IS_OPERATOR;
|
|
|
|
switch (c){
|
|
|
|
case '=': token.type = CPP_TOKEN_NOTEQ; break;
|
|
|
|
default:
|
|
|
|
token.type = CPP_TOKEN_BIT_NOT;
|
|
|
|
--pos;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2016-03-10 02:59:58 +00:00
|
|
|
}
|
2016-03-15 14:12:06 +00:00
|
|
|
|
|
|
|
if ((token.flags & CPP_TFLAG_PP_DIRECTIVE) == 0){
|
|
|
|
switch (lex_data.pp_state){
|
|
|
|
case LSPP_include:
|
|
|
|
if (token.type != CPP_TOKEN_INCLUDE_FILE){
|
|
|
|
token.type = CPP_TOKEN_JUNK;
|
|
|
|
}
|
|
|
|
lex_data.pp_state = LSPP_junk;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LSPP_macro_identifier:
|
|
|
|
if (fsm.state != LS_identifier){
|
|
|
|
token.type = CPP_TOKEN_JUNK;
|
|
|
|
lex_data.pp_state = LSPP_junk;
|
|
|
|
}
|
|
|
|
else{
|
|
|
|
lex_data.pp_state = LSPP_body;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LSPP_identifier:
|
|
|
|
if (fsm.state != LS_identifier){
|
|
|
|
token.type = CPP_TOKEN_JUNK;
|
|
|
|
}
|
|
|
|
lex_data.pp_state = LSPP_junk;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LSPP_number:
|
|
|
|
if (token.type != CPP_TOKEN_INTEGER_CONSTANT){
|
|
|
|
token.type = CPP_TOKEN_JUNK;
|
|
|
|
lex_data.pp_state = LSPP_junk;
|
|
|
|
}
|
|
|
|
else{
|
|
|
|
lex_data.pp_state = LSPP_include;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case LSPP_junk:
|
|
|
|
token.type = CPP_TOKEN_JUNK;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-03-10 02:59:58 +00:00
|
|
|
token.start = lex_data.token_start;
|
2016-03-15 14:12:06 +00:00
|
|
|
token.size = pos - lex_data.token_start;
|
|
|
|
token.flags |= (fsm.multi_line)?(CPP_TFLAG_MULTILINE):(0);
|
|
|
|
token.flags |= (fsm.pp_state != LSPP_default)?(CPP_TFLAG_PP_BODY):(0);
|
|
|
|
token.state_flags = fsm.pp_state;
|
|
|
|
|
|
|
|
cpp_push_token_nonalloc(out_tokens, &token_i, token);
|
2016-03-10 02:59:58 +00:00
|
|
|
}
|
|
|
|
}
|
2016-03-15 14:12:06 +00:00
|
|
|
|
2016-03-10 02:59:58 +00:00
|
|
|
token_stack_out->count = token_i;
|
2016-03-15 14:12:06 +00:00
|
|
|
|
2016-03-10 02:59:58 +00:00
|
|
|
if (pos == end_pos) lex_data.completed = 1;
|
|
|
|
return(lex_data);
|
|
|
|
}
|
|
|
|
|
2016-03-10 16:28:44 +00:00
|
|
|
}
|
|
|
|
|
2016-03-10 02:59:58 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
// BOTTOM
|