diff --git a/4coder_custom.dll b/4coder_custom.dll deleted file mode 100644 index 763668bc..00000000 Binary files a/4coder_custom.dll and /dev/null differ diff --git a/4coder_custom.pdb b/4coder_custom.pdb deleted file mode 100644 index 3017c104..00000000 Binary files a/4coder_custom.pdb and /dev/null differ diff --git a/4coder_string.h b/4coder_string.h index fb2d0749..7e945dce 100644 --- a/4coder_string.h +++ b/4coder_string.h @@ -229,125 +229,125 @@ FSTRING_INLINE fstr_bool set_last_folder(String *dir, String folder_name, c #if !defined(FSTRING_C) && !defined(FSTRING_GUARD) FSTRING_INLINE String -make_string(void *str, int32_t size, int32_t mem_size){ return(make_string_cap(str,size,mem_size));} +make_string(void *str, int32_t size, int32_t mem_size){return(make_string_cap(str,size,mem_size));} FSTRING_INLINE String -substr(String str, int32_t start){ return(substr_tail(str,start));} +substr(String str, int32_t start){return(substr_tail(str,start));} FSTRING_INLINE fstr_bool -match(char *a, char *b){ return(match_cc(a,b));} +match(char *a, char *b){return(match_cc(a,b));} FSTRING_INLINE fstr_bool -match(String a, char *b){ return(match_sc(a,b));} +match(String a, char *b){return(match_sc(a,b));} FSTRING_INLINE fstr_bool -match(char *a, String b){ return(match_cs(a,b));} +match(char *a, String b){return(match_cs(a,b));} FSTRING_INLINE fstr_bool -match(String a, String b){ return(match_ss(a,b));} +match(String a, String b){return(match_ss(a,b));} FSTRING_INLINE fstr_bool -match_part(char *a, char *b, int32_t *len){ return(match_part_ccl(a,b,len));} +match_part(char *a, char *b, int32_t *len){return(match_part_ccl(a,b,len));} FSTRING_INLINE fstr_bool -match_part(String a, char *b, int32_t *len){ return(match_part_scl(a,b,len));} +match_part(String a, char *b, int32_t *len){return(match_part_scl(a,b,len));} FSTRING_INLINE fstr_bool -match_part(char *a, char *b){ return(match_part_cc(a,b));} +match_part(char *a, char *b){return(match_part_cc(a,b));} FSTRING_INLINE fstr_bool -match_part(String a, char *b){ return(match_part_sc(a,b));} +match_part(String a, char *b){return(match_part_sc(a,b));} FSTRING_INLINE fstr_bool -match_part(char *a, String b){ return(match_part_cs(a,b));} +match_part(char *a, String b){return(match_part_cs(a,b));} FSTRING_INLINE fstr_bool -match_part(String a, String b){ return(match_part_ss(a,b));} +match_part(String a, String b){return(match_part_ss(a,b));} FSTRING_INLINE fstr_bool -match_insensitive(char *a, char *b){ return(match_insensitive_cc(a,b));} +match_insensitive(char *a, char *b){return(match_insensitive_cc(a,b));} FSTRING_INLINE fstr_bool -match_insensitive(String a, char *b){ return(match_insensitive_sc(a,b));} +match_insensitive(String a, char *b){return(match_insensitive_sc(a,b));} FSTRING_INLINE fstr_bool -match_insensitive(char *a, String b){ return(match_insensitive_cs(a,b));} +match_insensitive(char *a, String b){return(match_insensitive_cs(a,b));} FSTRING_INLINE fstr_bool -match_insensitive(String a, String b){ return(match_insensitive_ss(a,b));} +match_insensitive(String a, String b){return(match_insensitive_ss(a,b));} FSTRING_INLINE fstr_bool -match_part_insensitive(char *a, char *b, int32_t *len){ return(match_part_insensitive_ccl(a,b,len));} +match_part_insensitive(char *a, char *b, int32_t *len){return(match_part_insensitive_ccl(a,b,len));} FSTRING_INLINE fstr_bool -match_part_insensitive(String a, char *b, int32_t *len){ return(match_part_insensitive_scl(a,b,len));} +match_part_insensitive(String a, char *b, int32_t *len){return(match_part_insensitive_scl(a,b,len));} FSTRING_INLINE fstr_bool -match_part_insensitive(char *a, char *b){ return(match_part_insensitive_cc(a,b));} +match_part_insensitive(char *a, char *b){return(match_part_insensitive_cc(a,b));} FSTRING_INLINE fstr_bool -match_part_insensitive(String a, char *b){ return(match_part_insensitive_sc(a,b));} +match_part_insensitive(String a, char *b){return(match_part_insensitive_sc(a,b));} FSTRING_INLINE fstr_bool -match_part_insensitive(char *a, String b){ return(match_part_insensitive_cs(a,b));} +match_part_insensitive(char *a, String b){return(match_part_insensitive_cs(a,b));} FSTRING_INLINE fstr_bool -match_part_insensitive(String a, String b){ return(match_part_insensitive_ss(a,b));} +match_part_insensitive(String a, String b){return(match_part_insensitive_ss(a,b));} FSTRING_INLINE int32_t -compare(char *a, char *b){ return(compare_cc(a,b));} +compare(char *a, char *b){return(compare_cc(a,b));} FSTRING_INLINE int32_t -compare(String a, char *b){ return(compare_sc(a,b));} +compare(String a, char *b){return(compare_sc(a,b));} FSTRING_INLINE int32_t -compare(char *a, String b){ return(compare_cs(a,b));} +compare(char *a, String b){return(compare_cs(a,b));} FSTRING_INLINE int32_t -compare(String a, String b){ return(compare_ss(a,b));} +compare(String a, String b){return(compare_ss(a,b));} FSTRING_INLINE int32_t -find(char *str, int32_t start, char character){ return(find_c_char(str,start,character));} +find(char *str, int32_t start, char character){return(find_c_char(str,start,character));} FSTRING_INLINE int32_t -find(String str, int32_t start, char character){ return(find_s_char(str,start,character));} +find(String str, int32_t start, char character){return(find_s_char(str,start,character));} FSTRING_INLINE int32_t -find(char *str, int32_t start, char *characters){ return(find_c_chars(str,start,characters));} +find(char *str, int32_t start, char *characters){return(find_c_chars(str,start,characters));} FSTRING_INLINE int32_t -find(String str, int32_t start, char *characters){ return(find_s_chars(str,start,characters));} +find(String str, int32_t start, char *characters){return(find_s_chars(str,start,characters));} FSTRING_INLINE int32_t -find_substr(char *str, int32_t start, String seek){ return(find_substr_c(str,start,seek));} +find_substr(char *str, int32_t start, String seek){return(find_substr_c(str,start,seek));} FSTRING_INLINE int32_t -find_substr(String str, int32_t start, String seek){ return(find_substr_s(str,start,seek));} +find_substr(String str, int32_t start, String seek){return(find_substr_s(str,start,seek));} FSTRING_INLINE int32_t -rfind_substr(String str, int32_t start, String seek){ return(rfind_substr_s(str,start,seek));} +rfind_substr(String str, int32_t start, String seek){return(rfind_substr_s(str,start,seek));} FSTRING_INLINE int32_t -find_substr_insensitive(char *str, int32_t start, String seek){ return(find_substr_insensitive_c(str,start,seek));} +find_substr_insensitive(char *str, int32_t start, String seek){return(find_substr_insensitive_c(str,start,seek));} FSTRING_INLINE int32_t -find_substr_insensitive(String str, int32_t start, String seek){ return(find_substr_insensitive_s(str,start,seek));} +find_substr_insensitive(String str, int32_t start, String seek){return(find_substr_insensitive_s(str,start,seek));} FSTRING_INLINE fstr_bool -has_substr(char *s, String seek){ return(has_substr_c(s,seek));} +has_substr(char *s, String seek){return(has_substr_c(s,seek));} FSTRING_INLINE fstr_bool -has_substr(String s, String seek){ return(has_substr_s(s,seek));} +has_substr(String s, String seek){return(has_substr_s(s,seek));} FSTRING_INLINE fstr_bool -has_substr_insensitive(char *s, String seek){ return(has_substr_insensitive_c(s,seek));} +has_substr_insensitive(char *s, String seek){return(has_substr_insensitive_c(s,seek));} FSTRING_INLINE fstr_bool -has_substr_insensitive(String s, String seek){ return(has_substr_insensitive_s(s,seek));} +has_substr_insensitive(String s, String seek){return(has_substr_insensitive_s(s,seek));} FSTRING_INLINE int32_t -copy_fast_unsafe(char *dest, char *src){ return(copy_fast_unsafe_cc(dest,src));} +copy_fast_unsafe(char *dest, char *src){return(copy_fast_unsafe_cc(dest,src));} FSTRING_INLINE int32_t -copy_fast_unsafe(char *dest, String src){ return(copy_fast_unsafe_cs(dest,src));} +copy_fast_unsafe(char *dest, String src){return(copy_fast_unsafe_cs(dest,src));} FSTRING_INLINE fstr_bool -copy_checked(String *dest, String src){ return(copy_checked_ss(dest,src));} +copy_checked(String *dest, String src){return(copy_checked_ss(dest,src));} FSTRING_INLINE fstr_bool -copy_partial(String *dest, char *src){ return(copy_partial_sc(dest,src));} +copy_partial(String *dest, char *src){return(copy_partial_sc(dest,src));} FSTRING_INLINE fstr_bool -copy_partial(String *dest, String src){ return(copy_partial_ss(dest,src));} +copy_partial(String *dest, String src){return(copy_partial_ss(dest,src));} FSTRING_INLINE int32_t -copy(char *dest, char *src){ return(copy_cc(dest,src));} +copy(char *dest, char *src){return(copy_cc(dest,src));} FSTRING_INLINE void -copy(String *dest, String src){ (copy_ss(dest,src));} +copy(String *dest, String src){(copy_ss(dest,src));} FSTRING_INLINE void -copy(String *dest, char *src){ (copy_sc(dest,src));} +copy(String *dest, char *src){(copy_sc(dest,src));} FSTRING_INLINE fstr_bool -append_checked(String *dest, String src){ return(append_checked_ss(dest,src));} +append_checked(String *dest, String src){return(append_checked_ss(dest,src));} FSTRING_INLINE fstr_bool -append_partial(String *dest, char *src){ return(append_partial_sc(dest,src));} +append_partial(String *dest, char *src){return(append_partial_sc(dest,src));} FSTRING_INLINE fstr_bool -append_partial(String *dest, String src){ return(append_partial_ss(dest,src));} +append_partial(String *dest, String src){return(append_partial_ss(dest,src));} FSTRING_INLINE fstr_bool -append(String *dest, char c){ return(append_s_char(dest,c));} +append(String *dest, char c){return(append_s_char(dest,c));} FSTRING_INLINE fstr_bool -append(String *dest, String src){ return(append_ss(dest,src));} +append(String *dest, String src){return(append_ss(dest,src));} FSTRING_INLINE fstr_bool -append(String *dest, char *src){ return(append_sc(dest,src));} +append(String *dest, char *src){return(append_sc(dest,src));} FSTRING_INLINE int32_t -str_is_int(char *str){ return(str_is_int_c(str));} +str_is_int(char *str){return(str_is_int_c(str));} FSTRING_INLINE fstr_bool -str_is_int(String str){ return(str_is_int_s(str));} +str_is_int(String str){return(str_is_int_s(str));} FSTRING_INLINE int32_t -str_to_int(char *str){ return(str_to_int_c(str));} +str_to_int(char *str){return(str_to_int_c(str));} FSTRING_INLINE int32_t -str_to_int(String str){ return(str_to_int_s(str));} +str_to_int(String str){return(str_to_int_s(str));} FSTRING_INLINE int32_t -reverse_seek_slash(String str, int32_t pos){ return(reverse_seek_slash_pos(str,pos));} +reverse_seek_slash(String str, int32_t pos){return(reverse_seek_slash_pos(str,pos));} FSTRING_INLINE fstr_bool -set_last_folder(String *dir, char *folder_name, char slash){ return(set_last_folder_sc(dir,folder_name,slash));} +set_last_folder(String *dir, char *folder_name, char slash){return(set_last_folder_sc(dir,folder_name,slash));} FSTRING_INLINE fstr_bool -set_last_folder(String *dir, String folder_name, char slash){ return(set_last_folder_ss(dir,folder_name,slash));} +set_last_folder(String *dir, String folder_name, char slash){return(set_last_folder_ss(dir,folder_name,slash));} #endif diff --git a/4cpp_lexer.h b/4cpp_lexer.h index c5bea44e..00cabb3e 100644 --- a/4cpp_lexer.h +++ b/4cpp_lexer.h @@ -1,202 +1,34 @@ -/* "4cpp" Open C++ Parser v0.1: Lexer - no warranty implied; use at your own risk - -NOTES ON USE: - OPTIONS: - Set options by defining macros before including this file. - - FCPP_LEXER_IMPLEMENTATION - causes this file to output function implementations - - this option is unset after use so that future includes of this file - in the same unit do not continue to output implementations - - FCPP_NO_MALLOC - prevent including - FCPP_NO_ASSERT - prevent including - FCPP_NO_STRING - prevent including - FCPP_NO_CRT - FCPP_NO_MALLOC & FCPP_NO_ASSERT & FCPP_NO_STRING - - FCPP_FORBID_MALLOC - one step above *NO_MALLOC with this set 4cpp functions that do allocations - are not allowed to be declared or defined at all, forcing the user to handle - allocation themselves - - implies FCPP_NO_MALLOC - - FCPP_GET_MEMORY - defines how to make allocations, interface of malloc, defaults to malloc - FCPP_FREE_MEMORY - defines how to free memory, interface of ree, defaults to free - (The above must be defined if FCPP_NO_MALLOC is set, unless FCPP_FORBID_MALLOC is set) - - FCPP_ASSERT - defines how to make assertions, interface of assert, defaults to assert - - FCPP_MEM_COPY - defines how to copy blocks of memory, interface of memcpy, defaults to memcpy - FCPP_MEM_MOVE - defines how to move blocks of memory, interface of memmove, defaults to memmove - (The above must be defined if FCPP_NO_STRING is set) - - FCPP_LINK - defines linkage of non-inline functions, defaults to static - FCPP_EXTERN - changes FCPP_LINK default to extern, this option is ignored if FCPP_LINK is defined - - include the file "4cpp_clear_config.h" if you want to undefine all options for some reason - - HIDDDEN DEPENDENCIES: - 4cpp is not a single file include library, there are dependencies between the files. - Be sure to include these dependencies before 4cpp_lexer.h: - - 4cpp_types.h - 4cpp_string.h -*/ // TOP -// TODO(allen): -// -// EASE OF USE AND DEPLOYMENT -// - make it easier to locate the list of function declarations -// - more C compatibility -// -// POTENTIAL -// - Experiment with optimizations. Sean's State machine? -// - Reserve 0th token for null? Put a EOF token at the end? -// - Pass Cpp_File and Cpp_Token_Stack by value instead of by pointer? -// -// CURRENT -// - lex in chunks -// -#include "4coder_config.h" - -#ifndef FCPP_LEXER_INC -#define FCPP_LEXER_INC +#ifndef FCPP_NEW_LEXER_INC +#define FCPP_NEW_LEXER_INC #include "4cpp_lexer_types.h" +#include "4cpp_lexer_fsms.h" +#include "4cpp_lexer_tables.c" -struct Cpp_Lex_Data{ - Cpp_Preprocessor_State pp_state; - int pos; - int complete; -}; - -struct Cpp_Read_Result{ - Cpp_Token token; - int pos; - char newline; - char has_result; -}; +#define lexer_link static // TODO(allen): revisit this keyword data declaration system -struct String_And_Flag{ - char *str; - fcpp_u32 flags; -}; - struct String_List{ - String_And_Flag *data; - int count; + String_And_Flag *data; + int count; }; struct Sub_Match_List_Result{ - int index; - fcpp_i32 new_pos; + int index; + fcpp_i32 new_pos; }; -inline fcpp_u16 -cpp_token_set_pp_state(fcpp_u16 bitfield, Cpp_Preprocessor_State state_value){ - return (fcpp_u16)state_value; -} +#define lexer_string_list(x) {x, (sizeof(x)/sizeof(*x))} -inline Cpp_Preprocessor_State -cpp_token_get_pp_state(fcpp_u16 bitfield){ - return (Cpp_Preprocessor_State)(bitfield); -} - -inline String -cpp_get_lexeme(char *str, Cpp_Token *token){ - String result; - result.str = str + token->start; - result.size = token->size; - return result; -} - -inline bool -is_keyword(Cpp_Token_Type type){ - return (type >= CPP_TOKEN_KEY_TYPE && type <= CPP_TOKEN_KEY_OTHER); -} - -FCPP_LINK Sub_Match_List_Result sub_match_list(char *data, int size, int pos, String_List list, int sub_size); - -FCPP_LINK Seek_Result seek_unescaped_eol(char *data, int size, int pos); -FCPP_LINK Seek_Result seek_unescaped_delim(char *data, int size, int pos, char delim); -FCPP_LINK Seek_Result seek_block_comment_end(char *data, int size, int pos); - -FCPP_LINK Cpp_Read_Result cpp_read_whitespace(char *data, int size, int pos); -FCPP_LINK Cpp_Read_Result cpp_read_junk_line(char *data, int size, int pos); -FCPP_LINK Cpp_Read_Result cpp_read_operator(char *data, int size, int pos); -FCPP_LINK Cpp_Read_Result cpp_read_pp_operator(char *data, int size, int pos); -FCPP_LINK Cpp_Read_Result cpp_read_alpha_numeric(char *data, int size, int pos, bool in_if_body); -inline Cpp_Read_Result cpp_read_alpha_numeric(char *data, int size, int pos) { return cpp_read_alpha_numeric(data, size, pos, 0); } -FCPP_LINK Cpp_Read_Result cpp_read_number(char *data, int size, int pos); -FCPP_LINK Cpp_Read_Result cpp_read_string_litteral(char *data, int size, int pos); -FCPP_LINK Cpp_Read_Result cpp_read_character_litteral(char *data, int size, int pos); -FCPP_LINK Cpp_Read_Result cpp_read_line_comment(char *data, int size, int pos); -FCPP_LINK Cpp_Read_Result cpp_read_block_comment(char *data, int size, int pos); -FCPP_LINK Cpp_Read_Result cpp_read_preprocessor(char *data, int size, int pos); -FCPP_LINK Cpp_Read_Result cpp_read_pp_include_file(char *data, int size, int pos); -FCPP_LINK Cpp_Read_Result cpp_read_pp_default_mode(char *data, int size, int pos, bool in_if_body); -inline Cpp_Read_Result cpp_read_pp_default_mode(char *data, int size, int pos) { return cpp_read_pp_default_mode(data, size, pos, 0); } - -FCPP_LINK Cpp_Token_Merge cpp_attempt_token_merge(Cpp_Token prev, Cpp_Token next); - -FCPP_LINK bool cpp_push_token_no_merge(Cpp_Token_Stack *stack, Cpp_Token token); -FCPP_LINK bool cpp_push_token_nonalloc(Cpp_Token_Stack *stack, Cpp_Token token); - -inline Cpp_Lex_Data cpp_lex_data_zero() { Cpp_Lex_Data data = {(Cpp_Preprocessor_State)0}; return(data); } - -FCPP_LINK Cpp_Read_Result cpp_lex_step(char *data, int size, Cpp_Lex_Data *lex); - -FCPP_LINK int cpp_lex_file_token_count(char *data, int size); -FCPP_LINK Cpp_Lex_Data cpp_lex_file_nonalloc(char *data, int size, Cpp_Token_Stack *stack, Cpp_Lex_Data lex_data); -inline Cpp_Lex_Data cpp_lex_file_nonalloc(char *data, int size, Cpp_Token_Stack *stack) { return cpp_lex_file_nonalloc(data, size, stack, cpp_lex_data_zero()); } - -FCPP_LINK Cpp_Get_Token_Result cpp_get_token(Cpp_Token_Stack *stack, int pos); - -FCPP_LINK int cpp_get_end_token(Cpp_Token_Stack *stack, int end); -FCPP_LINK void cpp_shift_token_starts(Cpp_Token_Stack *stack, int from_token, int amount); - -FCPP_LINK Cpp_Relex_State cpp_relex_nonalloc_start(char *data, int size, Cpp_Token_Stack *stack, int start, int end, int amount, int tolerance); -FCPP_LINK bool cpp_relex_nonalloc_main(Cpp_Relex_State state, Cpp_Token_Stack *stack); - -#ifndef FCPP_FORBID_MALLOC -FCPP_LINK Cpp_Token_Stack cpp_make_token_stack(int max); -FCPP_LINK void cpp_free_token_stack(Cpp_Token_Stack stack); -FCPP_LINK void cpp_resize_token_stack(Cpp_Token_Stack *stack, int new_max); - -FCPP_LINK void cpp_push_token(Cpp_Token_Stack *stack, Cpp_Token token); -FCPP_LINK void cpp_lex_file(char *data, int size, Cpp_Token_Stack *stack); -FCPP_LINK bool cpp_relex_file_limited(char *data, int size, Cpp_Token_Stack *stack, int start_i, int end_i, int amount, int extra_tolerance); -inline void cpp_relex_file(char *data, int size, Cpp_Token_Stack *stack, int start_i, int end_i, int amount) -{ cpp_relex_file_limited(data, size, stack, start_i, end_i, amount, -1); } -#endif - -#define FCPP_STRING_LIST(x) {x, FCPP_COUNT(x)} - -// TODO(allen): shift towards storing in a context -FCPP_GLOBAL String_And_Flag int_suf_strings[] = { - {"ull"}, {"ULL"}, - {"llu"}, {"LLU"}, - {"ll"}, {"LL"}, - {"l"}, {"L"}, - {"u"}, {"U"} +static String_And_Flag bool_lit_strings[] = { + {"true"}, {"false"} }; +static String_List bool_lits = lexer_string_list(bool_lit_strings); -FCPP_GLOBAL String_List int_sufs = FCPP_STRING_LIST(int_suf_strings); - -FCPP_GLOBAL String_And_Flag float_suf_strings[] = { - {"f"}, {"F"}, - {"l"}, {"L"} -}; -FCPP_GLOBAL String_List float_sufs = FCPP_STRING_LIST(float_suf_strings); - -FCPP_GLOBAL String_And_Flag bool_lit_strings[] = { - {"true"}, {"false"} -}; -FCPP_GLOBAL String_List bool_lits = FCPP_STRING_LIST(bool_lit_strings); - -FCPP_GLOBAL String_And_Flag keyword_strings[] = { +static String_And_Flag keyword_strings[] = { {"and", CPP_TOKEN_AND}, {"and_eq", CPP_TOKEN_ANDEQ}, {"bitand", CPP_TOKEN_BIT_AND}, @@ -215,21 +47,21 @@ FCPP_GLOBAL String_And_Flag keyword_strings[] = { {"not_eq", CPP_TOKEN_NOTEQ}, {"typeid", CPP_TOKEN_TYPEID}, {"compl", CPP_TOKEN_BIT_NOT}, - + {"void", CPP_TOKEN_KEY_TYPE}, {"bool", CPP_TOKEN_KEY_TYPE}, {"char", CPP_TOKEN_KEY_TYPE}, {"int", CPP_TOKEN_KEY_TYPE}, {"float", CPP_TOKEN_KEY_TYPE}, {"double", CPP_TOKEN_KEY_TYPE}, - + {"long", CPP_TOKEN_KEY_MODIFIER}, {"short", CPP_TOKEN_KEY_MODIFIER}, {"unsigned", CPP_TOKEN_KEY_MODIFIER}, - + {"const", CPP_TOKEN_KEY_QUALIFIER}, {"volatile", CPP_TOKEN_KEY_QUALIFIER}, - + {"asm", CPP_TOKEN_KEY_CONTROL_FLOW}, {"break", CPP_TOKEN_KEY_CONTROL_FLOW}, {"case", CPP_TOKEN_KEY_CONTROL_FLOW}, @@ -246,12 +78,12 @@ FCPP_GLOBAL String_And_Flag keyword_strings[] = { {"try", CPP_TOKEN_KEY_CONTROL_FLOW}, {"while", CPP_TOKEN_KEY_CONTROL_FLOW}, {"static_assert", CPP_TOKEN_KEY_CONTROL_FLOW}, - + {"const_cast", CPP_TOKEN_KEY_CAST}, {"dynamic_cast", CPP_TOKEN_KEY_CAST}, {"reinterpret_cast", CPP_TOKEN_KEY_CAST}, {"static_cast", CPP_TOKEN_KEY_CAST}, - + {"class", CPP_TOKEN_KEY_TYPE_DECLARATION}, {"enum", CPP_TOKEN_KEY_TYPE_DECLARATION}, {"struct", CPP_TOKEN_KEY_TYPE_DECLARATION}, @@ -259,20 +91,20 @@ FCPP_GLOBAL String_And_Flag keyword_strings[] = { {"union", CPP_TOKEN_KEY_TYPE_DECLARATION}, {"template", CPP_TOKEN_KEY_TYPE_DECLARATION}, {"typename", CPP_TOKEN_KEY_TYPE_DECLARATION}, - + {"friend", CPP_TOKEN_KEY_ACCESS}, {"namespace", CPP_TOKEN_KEY_ACCESS}, {"private", CPP_TOKEN_KEY_ACCESS}, {"protected", CPP_TOKEN_KEY_ACCESS}, {"public", CPP_TOKEN_KEY_ACCESS}, {"using", CPP_TOKEN_KEY_ACCESS}, - + {"extern", CPP_TOKEN_KEY_LINKAGE}, {"export", CPP_TOKEN_KEY_LINKAGE}, {"inline", CPP_TOKEN_KEY_LINKAGE}, {"static", CPP_TOKEN_KEY_LINKAGE}, {"virtual", CPP_TOKEN_KEY_LINKAGE}, - + {"alignas", CPP_TOKEN_KEY_OTHER}, {"explicit", CPP_TOKEN_KEY_OTHER}, {"noexcept", CPP_TOKEN_KEY_OTHER}, @@ -282,118 +114,18 @@ FCPP_GLOBAL String_And_Flag keyword_strings[] = { {"this", CPP_TOKEN_KEY_OTHER}, {"thread_local", CPP_TOKEN_KEY_OTHER}, }; -FCPP_GLOBAL String_List keywords = FCPP_STRING_LIST(keyword_strings); +static String_List keywords = lexer_string_list(keyword_strings); -FCPP_GLOBAL String_And_Flag op_strings[] = { - {"...", CPP_TOKEN_ELLIPSIS}, - {"<<=", CPP_TOKEN_LSHIFTEQ}, - {">>=", CPP_TOKEN_RSHIFTEQ}, - {"->*", CPP_TOKEN_PTRARROW}, - {"<<", CPP_TOKEN_LSHIFT}, - {">>", CPP_TOKEN_RSHIFT}, - {"&&", CPP_TOKEN_AND}, - {"||", CPP_TOKEN_OR}, - {"->", CPP_TOKEN_ARROW}, - {"++", CPP_TOKEN_INCREMENT}, - {"--", CPP_TOKEN_DECREMENT}, - {"::", CPP_TOKEN_SCOPE}, - {"+=", CPP_TOKEN_ADDEQ}, - {"-=", CPP_TOKEN_SUBEQ}, - {"*=", CPP_TOKEN_MULEQ}, - {"/=", CPP_TOKEN_DIVEQ}, - {"%=", CPP_TOKEN_MODEQ}, - {"&=", CPP_TOKEN_ANDEQ}, - {"|=", CPP_TOKEN_OREQ}, - {"^=", CPP_TOKEN_XOREQ}, - {"==", CPP_TOKEN_EQEQ}, - {">=", CPP_TOKEN_GRTREQ}, - {"<=", CPP_TOKEN_LESSEQ}, - {"!=", CPP_TOKEN_NOTEQ}, - {".*", CPP_TOKEN_PTRDOT}, - {"{", CPP_TOKEN_BRACE_OPEN}, - {"}", CPP_TOKEN_BRACE_CLOSE}, - {"[", CPP_TOKEN_BRACKET_OPEN}, - {"]", CPP_TOKEN_BRACKET_CLOSE}, - {"(", CPP_TOKEN_PARENTHESE_OPEN}, - {")", CPP_TOKEN_PARENTHESE_CLOSE}, - {"<", CPP_TOKEN_LESS}, - {">", CPP_TOKEN_GRTR}, - {"+", CPP_TOKEN_PLUS}, - {"-", CPP_TOKEN_MINUS}, - {"!", CPP_TOKEN_NOT}, - {"~", CPP_TOKEN_TILDE}, - {"*", CPP_TOKEN_STAR}, - {"&", CPP_TOKEN_AMPERSAND}, - {"|", CPP_TOKEN_BIT_OR}, - {"^", CPP_TOKEN_BIT_XOR}, - {"=", CPP_TOKEN_EQ}, - {",", CPP_TOKEN_COMMA}, - {":", CPP_TOKEN_COLON}, - {";", CPP_TOKEN_SEMICOLON}, - {"/", CPP_TOKEN_DIV}, - {"?", CPP_TOKEN_TERNARY_QMARK}, - {"%", CPP_TOKEN_MOD}, - {".", CPP_TOKEN_DOT}, -}; -FCPP_GLOBAL String_List ops = FCPP_STRING_LIST(op_strings); - -FCPP_GLOBAL String_And_Flag pp_op_strings[] = { - {"##", CPP_PP_CONCAT}, - {"#", CPP_PP_STRINGIFY}, -}; -FCPP_GLOBAL String_List pp_ops = FCPP_STRING_LIST(pp_op_strings); - -FCPP_GLOBAL String_And_Flag preprop_strings[] = { - {"include", CPP_PP_INCLUDE}, - {"INCLUDE", CPP_PP_INCLUDE}, - {"ifndef", CPP_PP_IFNDEF}, - {"IFNDEF", CPP_PP_IFNDEF}, - {"define", CPP_PP_DEFINE}, - {"DEFINE", CPP_PP_DEFINE}, - {"import", CPP_PP_IMPORT}, - {"IMPORT", CPP_PP_IMPORT}, - {"pragma", CPP_PP_PRAGMA}, - {"PRAGMA", CPP_PP_PRAGMA}, - {"undef", CPP_PP_UNDEF}, - {"UNDEF", CPP_PP_UNDEF}, - {"endif", CPP_PP_ENDIF}, - {"ENDIF", CPP_PP_ENDIF}, - {"error", CPP_PP_ERROR}, - {"ERROR", CPP_PP_ERROR}, - {"ifdef", CPP_PP_IFDEF}, - {"IFDEF", CPP_PP_IFDEF}, - {"using", CPP_PP_USING}, - {"USING", CPP_PP_USING}, - {"else", CPP_PP_ELSE}, - {"ELSE", CPP_PP_ELSE}, - {"elif", CPP_PP_ELIF}, - {"ELIF", CPP_PP_ELIF}, - {"line", CPP_PP_LINE}, - {"LINE", CPP_PP_LINE}, - {"if", CPP_PP_IF}, - {"IF", CPP_PP_IF}, -}; -FCPP_GLOBAL String_List preprops = FCPP_STRING_LIST(preprop_strings); - -#undef FCPP_STRING_LIST - -#endif // #ifndef FCPP_CPP_LEXER - -#ifdef FCPP_LEXER_IMPLEMENTATION - -#define _Assert FCPP_ASSERT -#define _TentativeAssert FCPP_ASSERT - -FCPP_LINK Sub_Match_List_Result -sub_match_list(char *data, int size, int pos, String_List list, int sub_size){ - Sub_Match_List_Result result; +lexer_link Sub_Match_List_Result +sub_match_list(char *chunk, int size, int pos, String_List list, int sub_size){ + Sub_Match_List_Result result; String str_main; char *str_check; int i,l; - + result.index = -1; result.new_pos = pos; - str_main = make_string(data + pos, size - pos); + str_main = make_string(chunk + pos, size - pos); if (sub_size > 0){ str_main = substr(str_main, 0, sub_size); for (i = 0; i < list.count; ++i){ @@ -415,930 +147,32 @@ sub_match_list(char *data, int size, int pos, String_List list, int sub_size){ } } } - return result; + return result; } -FCPP_LINK Seek_Result -seek_unescaped_eol(char *data, int size, int pos){ - Seek_Result result = {}; - ++pos; - while (pos < size){ - if (data[pos] == '\\'){ - if (pos + 1 < size && - data[pos+1] == '\n'){ - result.new_line = 1; - ++pos; - } - else if (pos + 1 < size && - data[pos+1] == '\r' && - pos + 2 < size && - data[pos+2] == '\n'){ - result.new_line = 1; - pos += 2; - } - } - else if (data[pos] == '\n'){ - break; - } - ++pos; - } - ++pos; - result.pos = pos; - return result; -} - -FCPP_LINK Seek_Result -seek_unescaped_delim(char *data, int size, int pos, char delim){ - Seek_Result result = {}; - bool escape = 0; - ++pos; - while (pos < size){ - if (data[pos] == '\n'){ - result.new_line = 1; - } - if (escape){ - escape = 0; - } - else{ - if (data[pos] == '\\'){ - escape = 1; - } - else if (data[pos] == delim){ - break; - } - } - ++pos; - } - ++pos; - - result.pos = pos; - return result; -} - -FCPP_LINK Seek_Result -seek_block_comment_end(char *data, int size, int pos){ - Seek_Result result = {}; - pos += 2; - while (pos < size){ - if (data[pos] == '*' && - pos + 1 < size && - data[pos+1] == '/'){ - break; - } - if (data[pos] == '\n'){ - result.new_line = 1; - } - ++pos; - } - pos += 2; - result.pos = pos; - return result; -} - -FCPP_LINK Cpp_Read_Result -cpp_read_whitespace(char *data, int size, int pos){ - Cpp_Read_Result result = {}; - - while (pos < size && char_is_whitespace(data[pos])){ - if (data[pos] == '\n'){ - result.newline = 1; - } - ++pos; - } - - result.pos = pos; - - return result; -} - -FCPP_LINK Cpp_Read_Result -cpp_read_junk_line(char *data, int size, int pos){ - Cpp_Read_Result result = {}; - result.token.start = pos; - result.token.type = CPP_TOKEN_JUNK; - - bool comment_end = 0; - while (pos < size && data[pos] != '\n'){ - if (data[pos] == '/' && pos + 1 < size){ - if (data[pos + 1] == '/' || - data[pos + 1] == '*'){ - comment_end = 1; - break; - } - } - ++pos; - } - - if (comment_end){ - result.pos = pos; - result.token.size = pos - result.token.start; - } - else{ - while (pos > 0 && data[pos - 1] == '\r'){ - --pos; - } - if (pos > 0 && data[pos - 1] == '\\'){ - --pos; - } - result.pos = pos; - result.token.size = pos - result.token.start; - } - - return result; -} - -FCPP_LINK Cpp_Read_Result -cpp_read_operator(char *data, int size, int pos){ - Cpp_Read_Result result = {}; - result.pos = pos; - result.token.start = pos; - - Sub_Match_List_Result match; - match = sub_match_list(data, size, result.token.start, ops, -1); - - if (match.index != -1){ - result.pos = match.new_pos; - result.token.size = result.pos - result.token.start; - result.token.type = (Cpp_Token_Type)ops.data[match.index].flags; - result.token.flags |= CPP_TFLAG_IS_OPERATOR; - } - else{ - result.token.size = 1; - result.token.type = CPP_TOKEN_JUNK; - result.pos = pos + 1; - } - - return result; -} - -FCPP_LINK Cpp_Read_Result -cpp_read_pp_operator(char *data, int size, int pos){ - Cpp_Read_Result result = {}; - result.pos = pos; - result.token.start = pos; - - Sub_Match_List_Result match; - match = sub_match_list(data, size, result.token.start, pp_ops, -1); - - _Assert(match.index != -1); - result.pos = match.new_pos; - result.token.size = result.pos - result.token.start; - result.token.type = (Cpp_Token_Type)pp_ops.data[match.index].flags; - - return result; -} - -FCPP_LINK Cpp_Read_Result -cpp_read_alpha_numeric(char *data, int size, int pos, bool in_if_body){ - Cpp_Read_Result result = {}; - result.pos = pos; - result.token.start = pos; - - while (result.pos < size && - char_is_alpha_numeric(data[result.pos])){ - ++result.pos; - } - - result.token.size = result.pos - result.token.start; - - // TODO(allen): do better - if (in_if_body){ - String word; - word.size = result.token.size; - word.str = data + result.token.start; - if (match_ss(word, make_lit_string("defined"))){ - result.token.type = CPP_TOKEN_DEFINED; - result.token.flags |= CPP_TFLAG_IS_OPERATOR; - result.token.flags |= CPP_TFLAG_IS_KEYWORD; - } - } - - if (result.token.type == CPP_TOKEN_JUNK){ - Sub_Match_List_Result match; - match = sub_match_list(data, size, result.token.start, bool_lits, result.token.size); - - if (match.index != -1){ - result.token.type = CPP_TOKEN_BOOLEAN_CONSTANT; - result.token.flags |= CPP_TFLAG_IS_KEYWORD; - } - else{ - match = sub_match_list(data, size, result.token.start, keywords, result.token.size); - - if (match.index != -1){ - String_And_Flag data = keywords.data[match.index]; - result.token.type = (Cpp_Token_Type)data.flags; - result.token.flags |= CPP_TFLAG_IS_KEYWORD; - } - else{ - result.token.type = CPP_TOKEN_IDENTIFIER; - } - } - } - - return result; -} - -FCPP_LINK Cpp_Read_Result -cpp_read_number(char *data, int size, int pos){ - Cpp_Read_Result result = {}; - result.pos = pos; - result.token.start = pos; - - bool is_float = 0; - bool is_integer = 0; - bool is_oct = 0; - bool is_hex = 0; - bool is_zero = 0; - - if (data[pos] == '0'){ - if (pos+1 < size){ - char next = data[pos+1]; - if (next == 'x'){ - is_hex = 1; - is_integer = 1; - } - else if (next == '.'){ - is_float = 1; - ++result.pos; - } - else if (next >= '0' && next <= '9'){ - is_oct = 1; - is_integer = 1; - } - else{ - is_zero = 1; - is_integer = 1; - } - } - else{ - is_zero = 1; - is_integer = 1; - } - } - else if (data[pos] == '.'){ - is_float = 1; - } - - if (is_zero){ - ++result.pos; - } - else if (is_hex){ - ++result.pos; - char character; - do{ - ++result.pos; - if (result.pos >= size){ - break; - } - character = data[result.pos]; - } while(char_is_hex(character)); - } - else if (is_oct){ - char character; - do{ - ++result.pos; - if (result.pos >= size){ - break; - } - character = data[result.pos]; - }while(char_is_numeric(character)); - } - else{ - if (!is_float){ - is_integer = 1; - while (1){ - ++result.pos; - - if (result.pos >= size){ - break; - } - bool is_good = 0; - char character = data[result.pos]; - if (character >= '0' && character <= '9'){ - is_good = 1; - } - else if (character == '.'){ - is_integer = 0; - is_float = 1; - } - if (!is_good){ - break; - } - } - } - - if (is_float){ - bool e_mode = 0; - bool e_minus = 0; - bool is_good = 0; - char character; - - while (1){ - ++result.pos; - if (result.pos >= size){ - break; - } - is_good = 0; - character = data[result.pos]; - if (character >= '0' && character <= '9'){ - is_good = 1; - } - else{ - if (character == 'e' && !e_mode){ - e_mode = 1; - is_good = 1; - } - else if (character == '-' && e_mode && !e_minus){ - e_minus = 1; - is_good = 1; - } - } - if (!is_good){ - break; - } - } - } - } - - if (is_integer){ - Sub_Match_List_Result match = - sub_match_list(data, size, result.pos, int_sufs, -1); - if (match.index != -1){ - result.pos = match.new_pos; - } - result.token.type = CPP_TOKEN_INTEGER_CONSTANT; - result.token.size = result.pos - result.token.start; - } - else if (is_float){ - Sub_Match_List_Result match = - sub_match_list(data, size, result.pos, float_sufs, -1); - if (match.index != -1){ - result.pos = match.new_pos; - } - result.token.type = CPP_TOKEN_FLOATING_CONSTANT; - result.token.size = result.pos - result.token.start; - } - else{ - _Assert(!"This shouldn't happen!"); - } - - return result; -} - -FCPP_LINK Cpp_Read_Result -cpp_read_string_litteral(char *data, int size, int pos){ - Cpp_Read_Result result = {}; - result.token.start = pos; - - _Assert(data[pos] == '"'); - Seek_Result seek = seek_unescaped_delim(data, size, pos, '"'); - pos = seek.pos; - if (seek.new_line){ - result.token.flags |= CPP_TFLAG_MULTILINE; - } - - result.token.size = pos - result.token.start; - result.token.type = CPP_TOKEN_STRING_CONSTANT; - result.pos = pos; - - return result; -} - -FCPP_LINK Cpp_Read_Result -cpp_read_character_litteral(char *data, int size, int pos){ - Cpp_Read_Result result = {}; - result.token.start = pos; - - _Assert(data[pos] == '\''); - Seek_Result seek = seek_unescaped_delim(data, size, pos, '\''); - pos = seek.pos; - if (seek.new_line){ - result.token.flags |= CPP_TFLAG_MULTILINE; - } - - result.token.size = pos - result.token.start; - result.token.type = CPP_TOKEN_CHARACTER_CONSTANT; - result.pos = pos; - - return result; -} - -FCPP_LINK Cpp_Read_Result -cpp_read_line_comment(char *data, int size, int pos){ - Cpp_Read_Result result = {}; - result.token.start = pos; - - _Assert(data[pos] == '/' && data[pos + 1] == '/'); - - pos += 2; - while (pos < size){ - if (data[pos] == '\n'){ - break; - } - if (data[pos] == '\\'){ - if (pos + 1 < size && - data[pos + 1] == '\n'){ - ++pos; - } - else if (pos + 2 < size && - data[pos + 1] == '\r' && - data[pos + 2] == '\n'){ - pos += 2; - } - } - ++pos; - } - if (pos > 0 && data[pos-1] == '\r'){ - --pos; - } - result.token.size = pos - result.token.start; - result.token.type = CPP_TOKEN_COMMENT; - result.pos = pos; - return result; -} - -FCPP_LINK Cpp_Read_Result -cpp_read_block_comment(char *data, int size, int pos){ - Cpp_Read_Result result = {}; - result.token.start = pos; - - _Assert(data[pos] == '/' && data[pos + 1] == '*'); - pos += 2; - while (pos < size){ - if (data[pos] == '*' && - pos + 1 < size && - data[pos+1] == '/'){ - pos += 2; - break; - } - ++pos; - } - result.token.size = pos - result.token.start; - result.token.type = CPP_TOKEN_COMMENT; - result.pos = pos; - return result; -} - -FCPP_LINK Cpp_Read_Result -cpp_read_preprocessor(char *data, int size, int pos){ - _Assert(data[pos] == '#'); - Cpp_Read_Result result = {}; - result.token.start = pos; - result.token.type = CPP_PP_UNKNOWN; - result.token.flags |= CPP_TFLAG_PP_DIRECTIVE; - - ++pos; - while (pos < size && - (data[pos] == ' ' || - data[pos] == '\t')){ - ++pos; - } - - Sub_Match_List_Result match - = sub_match_list(data, size, pos, preprops, -1); - - if (match.index != -1){ - result.token.size = match.new_pos - result.token.start; - result.token.type = (Cpp_Token_Type)preprops.data[match.index].flags; - result.pos = match.new_pos; - } - else{ - while (pos < size && !char_is_whitespace(data[pos])){ - ++pos; - } - result.token.size = pos - result.token.start; - result.pos = pos; - } - - return result; -} - -FCPP_LINK Cpp_Read_Result -cpp_read_pp_include_file(char *data, int size, int pos){ - char start = data[pos]; - _Assert(start == '<' || start == '"'); - - Cpp_Read_Result result = {}; - result.token.start = pos; - result.token.type = CPP_TOKEN_INCLUDE_FILE; - result.token.flags |= CPP_TFLAG_PP_BODY; - - char end; - if (start == '<'){ - end = '>'; - } - else{ - end = '"'; - } - - ++pos; - while (pos < size && data[pos] != end){ - if (data[pos] == '\n'){ - result.token.type = CPP_TOKEN_JUNK; - result.token.flags |= CPP_TFLAG_BAD_ENDING; - break; - } - if (data[pos] == '\\'){ - if (pos + 1 < size && data[pos + 1] == '\n'){ - ++pos; - result.token.flags |= CPP_TFLAG_MULTILINE; - } - else if (pos + 2 < size && - data[pos + 1] == '\r' && - data[pos + 2] == '\n'){ - pos += 2; - result.token.flags |= CPP_TFLAG_MULTILINE; - } - } - ++pos; - } - - if (result.token.type != CPP_TOKEN_JUNK){ - if (pos < size){ - ++pos; - } - } - - result.token.size = pos - result.token.start; - result.pos = pos; - - return result; -} - -FCPP_LINK Cpp_Read_Result -cpp_read_pp_default_mode(char *data, int size, int pos, bool in_if_body){ - char current = data[pos]; - Cpp_Read_Result result; - if (char_is_numeric(current)){ - result = cpp_read_number(data, size, pos); - } - else if (char_is_alpha(current)){ - result = cpp_read_alpha_numeric(data, size, pos, in_if_body); - } - else if (current == '.'){ - if (pos + 1 < size){ - char next = data[pos + 1]; - if (char_is_numeric(next)){ - result = cpp_read_number(data, size, pos); - } - else{ - result = cpp_read_operator(data, size, pos); - } - } - else{ - result = cpp_read_operator(data, size, pos); - } - } - - else if (current == '/'){ - if (pos + 1 < size){ - char next = data[pos + 1]; - if (next == '/'){ - result = cpp_read_line_comment(data, size, pos); - } - else if (next == '*'){ - result = cpp_read_block_comment(data, size, pos); - } - else{ - result = cpp_read_operator(data, size, pos); - } - } - else{ - result = cpp_read_operator(data, size, pos); - } - } - else if (current == '"'){ - result = cpp_read_string_litteral(data, size, pos); - } - else if (current == '\''){ - result = cpp_read_character_litteral(data, size, pos); - } - else{ - result = cpp_read_operator(data, size, pos); - } - - return result; -} - -FCPP_LINK Cpp_Token_Merge -cpp_attempt_token_merge(Cpp_Token prev_token, Cpp_Token next_token){ - Cpp_Token_Merge result = {}; - if (next_token.type == CPP_TOKEN_COMMENT && prev_token.type == CPP_TOKEN_COMMENT && - next_token.flags == prev_token.flags && next_token.state_flags == prev_token.state_flags){ - result.did_merge = 1; - prev_token.size = next_token.start + next_token.size - prev_token.start; - result.new_token = prev_token; - } - else if (next_token.type == CPP_TOKEN_JUNK && prev_token.type == CPP_TOKEN_JUNK && - next_token.flags == prev_token.flags && next_token.state_flags == prev_token.state_flags){ - result.did_merge = 1; - prev_token.size = next_token.start + next_token.size - prev_token.start; - result.new_token = prev_token; - } - return result; -} - -FCPP_LINK bool -cpp_push_token_no_merge(Cpp_Token_Stack *token_stack, Cpp_Token token){ - if (token_stack->count >= token_stack->max_count){ - return 0; - } - - token_stack->tokens[token_stack->count++] = token; - return 1; -} - -FCPP_LINK bool -cpp_push_token_nonalloc(Cpp_Token_Stack *token_stack, Cpp_Token token){ - Cpp_Token_Merge merge = {}; - - if (token_stack->count > 0){ - Cpp_Token prev_token = token_stack->tokens[token_stack->count - 1]; - merge = cpp_attempt_token_merge(prev_token, token); - if (merge.did_merge){ - token_stack->tokens[token_stack->count - 1] = merge.new_token; - } - } - - if (!merge.did_merge){ - if (token_stack->count >= token_stack->max_count){ - return 0; - } - - token_stack->tokens[token_stack->count++] = token; - } - - return 1; -} - -FCPP_LINK Cpp_Read_Result -cpp_lex_step(char *data, int size, Cpp_Lex_Data *lex_data){ - Cpp_Lex_Data lex = *lex_data; - Cpp_Read_Result result = {}; - bool has_result = 1; - - fcpp_u16 state_flags = cpp_token_set_pp_state(0, lex.pp_state); - - char current = data[lex.pos]; - if (char_is_whitespace(current)){ - result = cpp_read_whitespace(data, size, lex.pos); - lex.pos = result.pos; - if (result.newline && lex.pp_state != CPP_LEX_PP_DEFAULT){ - lex.pp_state = CPP_LEX_PP_DEFAULT; - } - has_result = 0; - } - - else{ - if (lex.pp_state == CPP_LEX_PP_DEFAULT){ - // TODO(allen): Not first hard of the line? Then it's junk. - if (current == '#'){ - result = cpp_read_preprocessor(data, size, lex.pos); - lex.pos = result.pos; - switch (result.token.type){ - case CPP_PP_INCLUDE: - case CPP_PP_IMPORT: - case CPP_PP_USING: - lex.pp_state = CPP_LEX_PP_INCLUDE; - break; - case CPP_PP_DEFINE: - lex.pp_state = CPP_LEX_PP_MACRO_IDENTIFIER; - break; - case CPP_PP_UNDEF: - case CPP_PP_IFDEF: - case CPP_PP_IFNDEF: - lex.pp_state = CPP_LEX_PP_IDENTIFIER; - break; - case CPP_PP_IF: - case CPP_PP_ELIF: - lex.pp_state = CPP_LEX_PP_BODY_IF; - break; - case CPP_PP_PRAGMA: - lex.pp_state = CPP_LEX_PP_BODY; - break; - case CPP_PP_LINE: - lex.pp_state = CPP_LEX_PP_NUMBER; - break; - case CPP_PP_ERROR: - lex.pp_state = CPP_LEX_PP_ERROR; - break; - - case CPP_PP_UNKNOWN: - case CPP_PP_ELSE: - case CPP_PP_ENDIF: - lex.pp_state = CPP_LEX_PP_JUNK; - break; - } - } - else{ - result = cpp_read_pp_default_mode(data, size, lex.pos); - lex.pos = result.pos; - } - } - - else{ - if (current == '\\'){ - fcpp_i32 seek = lex.pos; - ++seek; - while (seek < size && data[seek] == '\r'){ - ++seek; - } - if ((seek < size && data[seek] == '\n') || seek >= size){ - lex.pos = seek + 1; - has_result = 0; - } - else{ - lex.pp_state = CPP_LEX_PP_JUNK; - result.token.type = CPP_TOKEN_JUNK; - result.token.start = lex.pos; - result.token.size = 1; - result.token.flags |= CPP_TFLAG_PP_BODY; - lex.pos = seek; - } - } - - else{ - switch (lex.pp_state){ - case CPP_LEX_PP_IDENTIFIER: - if (!char_is_alpha_numeric(current)){ - has_result = 0; - lex.pp_state = CPP_LEX_PP_JUNK; - } - else{ - result = cpp_read_alpha_numeric(data, size, lex.pos); - result.token.flags |= CPP_TFLAG_PP_BODY; - lex.pos = result.pos; - lex.pp_state = CPP_LEX_PP_JUNK; - } - break; - - case CPP_LEX_PP_MACRO_IDENTIFIER: - if (!char_is_alpha_numeric(current)){ - has_result = 0; - lex.pp_state = CPP_LEX_PP_JUNK; - } - else{ - result = cpp_read_alpha_numeric(data, size, lex.pos); - result.token.flags |= CPP_TFLAG_PP_BODY; - lex.pos = result.pos; - lex.pp_state = CPP_LEX_PP_BODY; - } - break; - - case CPP_LEX_PP_INCLUDE: - if (current != '"' && current != '<'){ - has_result = 0; - lex.pp_state = CPP_LEX_PP_JUNK; - } - else{ - result = cpp_read_pp_include_file(data, size, lex.pos); - lex.pos = result.pos; - lex.pp_state = CPP_LEX_PP_JUNK; - } - break; - - case CPP_LEX_PP_BODY: - if (current == '#'){ - result = cpp_read_pp_operator(data, size, lex.pos); - } - else{ - result = cpp_read_pp_default_mode(data, size, lex.pos); - } - lex.pos = result.pos; - result.token.flags |= CPP_TFLAG_PP_BODY; - break; - - case CPP_LEX_PP_BODY_IF: - if (current == '#'){ - result = cpp_read_pp_operator(data, size, lex.pos); - } - else{ - result = cpp_read_pp_default_mode(data, size, lex.pos, 1); - } - lex.pos = result.pos; - result.token.flags |= CPP_TFLAG_PP_BODY; - break; - - case CPP_LEX_PP_NUMBER: - if (!char_is_numeric(current)){ - has_result = 0; - lex.pp_state = CPP_LEX_PP_JUNK; - } - else{ - result = cpp_read_number(data, size, lex.pos); - lex.pos = result.pos; - result.token.flags |= CPP_TFLAG_PP_BODY; - lex.pp_state = CPP_LEX_PP_INCLUDE; - } - break; - - case CPP_LEX_PP_ERROR: - result = cpp_read_junk_line(data, size, lex.pos); - lex.pos = result.pos; - result.token.type = CPP_TOKEN_ERROR_MESSAGE; - result.token.flags |= CPP_TFLAG_PP_BODY; - break; - - default: - { - bool took_comment = 0; - if (current == '/' && lex.pos + 1 < size){ - if (data[lex.pos + 1] == '/'){ - result = cpp_read_line_comment(data, size, lex.pos); - lex.pp_state = CPP_LEX_PP_DEFAULT; - lex.pos = result.pos; - took_comment = 1; - }else if (data[lex.pos + 1] == '*'){ - result = cpp_read_block_comment(data, size, lex.pos); - lex.pos = result.pos; - took_comment = 1; - } - } - - if (!took_comment){ - result = cpp_read_junk_line(data, size, lex.pos); - lex.pos = result.pos; - result.token.flags |= CPP_TFLAG_PP_BODY; - } - }break; - - } - } - } - } - - result.token.state_flags = state_flags; - result.has_result = has_result; - - *lex_data = lex; - return result; -} - -FCPP_LINK int -cpp_lex_file_token_count(char *data, int size){ - int count = 0; - Cpp_Lex_Data lex = {}; - Cpp_Token token = {}; - while (lex.pos < size){ - Cpp_Read_Result step_result = cpp_lex_step(data, size, &lex); - - if (step_result.has_result){ - if (count > 0){ - Cpp_Token_Merge merge = cpp_attempt_token_merge(token, step_result.token); - if (merge.did_merge){ - token = merge.new_token; - } - else{ - token = step_result.token; - ++count; - } - } - else{ - token = step_result.token; - ++count; - } - } - } - return count; -} - -FCPP_LINK Cpp_Lex_Data -cpp_lex_file_nonalloc(char *data, int size, Cpp_Token_Stack *token_stack_out, Cpp_Lex_Data lex_data){ - while (lex_data.pos < size){ - Cpp_Lex_Data prev_lex = lex_data; - Cpp_Read_Result step_result = cpp_lex_step(data, size, &lex_data); - - if (step_result.has_result){ - if (!cpp_push_token_nonalloc(token_stack_out, step_result.token)){ - lex_data = prev_lex; - return lex_data; - } - } - } - - lex_data.complete = 1; - return lex_data; -} - -FCPP_LINK Cpp_Get_Token_Result +lexer_link Cpp_Get_Token_Result cpp_get_token(Cpp_Token_Stack *token_stack, int pos){ - int first, last; - first = 0; - last = token_stack->count; - Cpp_Get_Token_Result result = {}; - if (token_stack->count > 0){ + Cpp_Token *token_array = token_stack->tokens; + Cpp_Token *token = 0; + int first = 0; + int count = token_stack->count; + int last = count; + int this_start = 0, next_start = 0; + + if (count > 0){ for (;;){ result.token_index = (first + last)/2; - - int this_start = token_stack->tokens[result.token_index].start; - int next_start; - if (result.token_index + 1 < token_stack->count){ - next_start = token_stack->tokens[result.token_index+1].start; + token = token_array + result.token_index; + + this_start = token->start; + + if (result.token_index + 1 < count){ + next_start = (token + 1)->start; } else{ - next_start = this_start + token_stack->tokens[result.token_index].size; + next_start = this_start + token->size; } if (this_start <= pos && pos < next_start){ break; @@ -1354,13 +188,12 @@ cpp_get_token(Cpp_Token_Stack *token_stack, int pos){ break; } } - - if (result.token_index == token_stack->count){ + + if (result.token_index == count){ --result.token_index; result.in_whitespace = 1; } else{ - Cpp_Token *token = token_stack->tokens + result.token_index; if (token->start + token->size <= pos){ result.in_whitespace = 1; } @@ -1370,20 +203,858 @@ cpp_get_token(Cpp_Token_Stack *token_stack, int pos){ result.token_index = -1; result.in_whitespace = 1; } - - return result; + + return(result); } -FCPP_LINK void -cpp_shift_token_starts(Cpp_Token_Stack *stack, int from_token_i, int amount){ - int count = stack->count; +lexer_link void +cpp_shift_token_starts(Cpp_Token_Stack *stack, int from_token_i, int shift_amount){ Cpp_Token *token = stack->tokens + from_token_i; - for (int i = from_token_i; i < count; ++i, ++token){ - token->start += amount; + int count = stack->count, i; + + for (i = from_token_i; i < count; ++i, ++token){ + token->start += shift_amount; } } -FCPP_LINK Cpp_Relex_State +enum Pos_Update_Rule{ + PUR_none, + PUR_back_one, +}; + +lexer_link Lex_PP_State +cpp_pp_directive_to_state(Cpp_Token_Type type){ + Lex_PP_State result = LSPP_default; + switch (type){ + case CPP_PP_INCLUDE: + case CPP_PP_IMPORT: + case CPP_PP_USING: + result = LSPP_include; + break; + + case CPP_PP_DEFINE: + result = LSPP_macro_identifier; + break; + + case CPP_PP_UNDEF: + case CPP_PP_IFDEF: + case CPP_PP_IFNDEF: + result = LSPP_identifier; + break; + + case CPP_PP_IF: + case CPP_PP_ELIF: + result = LSPP_body_if; + break; + + case CPP_PP_PRAGMA: + result = LSPP_body; + break; + + case CPP_PP_LINE: + result = LSPP_number; + break; + + case CPP_PP_ERROR: + result = LSPP_error; + break; + + case CPP_PP_UNKNOWN: + case CPP_PP_ELSE: + case CPP_PP_ENDIF: + result = LSPP_junk; + break; + } + return(result); +} + +lexer_link Cpp_Token_Merge +cpp_attempt_token_merge(Cpp_Token prev_token, Cpp_Token next_token){ + Cpp_Token_Merge result = {(Cpp_Token_Type)0}; + if (next_token.type == CPP_TOKEN_COMMENT && prev_token.type == CPP_TOKEN_COMMENT && + next_token.flags == prev_token.flags && next_token.state_flags == prev_token.state_flags){ + result.did_merge = 1; + prev_token.size = next_token.start + next_token.size - prev_token.start; + result.new_token = prev_token; + } + else if (next_token.type == CPP_TOKEN_JUNK && prev_token.type == CPP_TOKEN_JUNK && + next_token.flags == prev_token.flags && next_token.state_flags == prev_token.state_flags){ + result.did_merge = 1; + prev_token.size = next_token.start + next_token.size - prev_token.start; + result.new_token = prev_token; + } + return result; +} + +lexer_link int +cpp_place_token_nonalloc(Cpp_Token *out_tokens, int token_i, Cpp_Token token){ + Cpp_Token_Merge merge = {(Cpp_Token_Type)0}; + Cpp_Token prev_token = {(Cpp_Token_Type)0}; + + if (token_i > 0){ + prev_token = out_tokens[token_i - 1]; + merge = cpp_attempt_token_merge(prev_token, token); + if (merge.did_merge){ + out_tokens[token_i - 1] = merge.new_token; + } + } + + if (!merge.did_merge){ + out_tokens[token_i++] = token; + } + + return(token_i); +} + +lexer_link bool +cpp_push_token_nonalloc(Cpp_Token_Stack *out_tokens, Cpp_Token token){ + bool result = 0; + if (out_tokens->count == out_tokens->max_count){ + out_tokens->count = + cpp_place_token_nonalloc(out_tokens->tokens, out_tokens->count, token); + result = 1; + } + return(result); +} + +struct Lex_Data{ + char *tb; + int tb_pos; + int token_start; + + int pos; + int pos_overide; + int chunk_pos; + + Lex_FSM fsm; + Whitespace_FSM wfsm; + unsigned char pp_state; + unsigned char completed; + + Cpp_Token token; + + int __pc__; +}; +inline Lex_Data +lex_data_init(char *tb){ + Lex_Data data = {0}; + data.tb = tb; + return(data); +} + + +#define DrCase(PC) case PC: goto resumespot_##PC + +#define DrYield(PC, n) {\ + token_stack_out->count = token_i;\ + *S_ptr = S; S_ptr->__pc__ = PC; return(n); resumespot_##PC:; } + +#define DrReturn(n) {\ + token_stack_out->count = token_i;\ + *S_ptr = S; S_ptr->__pc__ = -1; return(n); } + +enum Lex_Result{ + LexFinished, + LexNeedChunk, + LexNeedTokenMemory, + LexHitTokenLimit +}; + +lexer_link int +cpp_lex_nonalloc(Lex_Data *S_ptr, + char *chunk, int size, + Cpp_Token_Stack *token_stack_out){ + Lex_Data S = *S_ptr; + + Cpp_Token *out_tokens = token_stack_out->tokens; + int token_i = token_stack_out->count; + int max_token_i = token_stack_out->max_count; + + Pos_Update_Rule pos_update_rule = PUR_none; + + char c = 0; + + int end_pos = size + S.chunk_pos; + chunk -= S.chunk_pos; + + switch (S.__pc__){ + DrCase(1); + DrCase(2); + DrCase(3); + DrCase(4); + DrCase(5); + DrCase(6); + DrCase(7); + } + + for (;;){ + S.wfsm.white_done = 0; + S.wfsm.pp_state = S.pp_state; + for(;;){ + for (; S.wfsm.pp_state < LSPP_count && S.pos < end_pos;){ + c = chunk[S.pos++]; + int i = S.wfsm.pp_state + whitespace_fsm_eq_classes[c]; + S.wfsm.pp_state = whitespace_fsm_table[i]; + } + S.wfsm.white_done = (S.wfsm.pp_state >= LSPP_count); + + if (S.wfsm.white_done == 0){ + S.chunk_pos += size; + DrYield(4, LexNeedChunk); + } + else break; + } + --S.pos; + S.pp_state = S.wfsm.pp_state; + if (S.pp_state >= LSPP_count){ + S.pp_state -= LSPP_count; + } + + S.token.state_flags = S.pp_state; + + S.token_start = S.pos; + S.tb_pos = 0; + S.fsm = zero_lex_fsm(); + for(;;){ + { + unsigned short *eq_classes = get_eq_classes[S.pp_state]; + unsigned char *fsm_table = get_table[S.pp_state]; + + for (; S.fsm.state < LS_count && S.pos < end_pos;){ + c = chunk[S.pos++]; + S.tb[S.tb_pos++] = c; + + int i = S.fsm.state + eq_classes[c]; + S.fsm.state = fsm_table[i]; + S.fsm.multi_line |= multiline_state_table[S.fsm.state]; + } + S.fsm.emit_token = (S.fsm.state >= LS_count); + } + + if (S.fsm.emit_token == 0){ + S.chunk_pos += size; + DrYield(3, LexNeedChunk); + } + else break; + } + + Assert(S.fsm.emit_token == 1); + + if (c == 0){ + S.completed = 1; + } + + if (S.fsm.state >= LS_count) S.fsm.state -= LS_count; + pos_update_rule = PUR_none; + if (S.pp_state == LSPP_include){ + if (c == 0) S.fsm.emit_token = 0; + switch (S.fsm.state){ + case LSINC_default:break; + + case LSINC_quotes: + case LSINC_pointy: + S.token.type = CPP_TOKEN_INCLUDE_FILE; + S.token.flags = 0; + break; + + case LSINC_junk: + S.token.type = CPP_TOKEN_JUNK; + S.token.flags = 0; + break; + } + } + else{ + switch (S.fsm.state){ + case LS_default: + switch (c){ + case 0: S.fsm.emit_token = 0; break; + +#define OperCase(op,t) case op: S.token.type = t; break; + OperCase('{', CPP_TOKEN_BRACE_OPEN); + OperCase('}', CPP_TOKEN_BRACE_CLOSE); + + OperCase('[', CPP_TOKEN_BRACKET_OPEN); + OperCase(']', CPP_TOKEN_BRACKET_CLOSE); + + OperCase('(', CPP_TOKEN_PARENTHESE_OPEN); + OperCase(')', CPP_TOKEN_PARENTHESE_CLOSE); + + OperCase('~', CPP_TOKEN_TILDE); + OperCase(',', CPP_TOKEN_COMMA); + OperCase(';', CPP_TOKEN_SEMICOLON); + OperCase('?', CPP_TOKEN_TERNARY_QMARK); + + OperCase('@', CPP_TOKEN_JUNK); + OperCase('$', CPP_TOKEN_JUNK); +#undef OperCase + + case '\\': + if (S.pp_state == LSPP_default){ + S.token.type = CPP_TOKEN_JUNK; + } + else{ + S.pos_overide = S.pos; + S.wfsm.white_done = 0; + for (;;){ + for (; S.wfsm.white_done == 0 && S.pos < end_pos;){ + c = chunk[S.pos++]; + if (!(c == ' ' || c == '\t' || c == '\r' || c == '\v' || c == '\f')) S.wfsm.white_done = 1; + } + + if (S.wfsm.white_done == 0){ + S.chunk_pos += size; + DrYield(1, LexNeedChunk); + } + else break; + } + + if (c == '\n'){ + S.fsm.emit_token = 0; + S.pos_overide = 0; + } + else{ + S.token.type = CPP_TOKEN_JUNK; + } + } + break; + } + if (c != '@' && c != '$' && c != '\\'){ + S.token.flags = CPP_TFLAG_IS_OPERATOR; + } + break; + + case LS_identifier: + { + --S.pos; + + int word_size = S.pos - S.token_start; + + if (S.pp_state == LSPP_body_if){ + if (match_ss(make_string(S.tb, word_size), make_lit_string("defined"))){ + S.token.type = CPP_TOKEN_DEFINED; + S.token.flags = CPP_TFLAG_IS_OPERATOR | CPP_TFLAG_IS_KEYWORD; + break; + } + } + + Sub_Match_List_Result sub_match; + sub_match = sub_match_list(S.tb, S.tb_pos, 0, bool_lits, word_size); + + if (sub_match.index != -1){ + S.token.type = CPP_TOKEN_BOOLEAN_CONSTANT; + S.token.flags = CPP_TFLAG_IS_KEYWORD; + } + else{ + sub_match = sub_match_list(S.tb, S.tb_pos, 0, keywords, word_size); + + if (sub_match.index != -1){ + String_And_Flag data = keywords.data[sub_match.index]; + S.token.type = (Cpp_Token_Type)data.flags; + S.token.flags = CPP_TFLAG_IS_KEYWORD; + } + else{ + S.token.type = CPP_TOKEN_IDENTIFIER; + S.token.flags = 0; + } + } + }break; + + case LS_pound: + S.token.flags = 0; + switch (c){ + case '#': S.token.type = CPP_PP_CONCAT; break; + default: + S.token.type = CPP_PP_STRINGIFY; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_pp: + { + S.fsm.directive_state = LSDIR_default; + S.fsm.emit_token = 0; + for (;;){ + for (; S.fsm.directive_state < LSDIR_count && S.pos < end_pos;){ + c = chunk[S.pos++]; + S.fsm.directive_state = pp_directive_table[S.fsm.directive_state + pp_directive_eq_classes[c]]; + } + S.fsm.emit_token = (S.fsm.int_state >= LSDIR_count); + + if (S.fsm.emit_token == 0){ + S.chunk_pos += size; + DrYield(6, LexNeedChunk); + } + else break; + } + --S.pos; + + Cpp_Token_Type type = (Cpp_Token_Type)(S.fsm.directive_state - pp_directive_terminal_base); + S.token.type = type; + if (type == CPP_TOKEN_JUNK){ + S.token.flags = 0; + } + else{ + S.token.flags = CPP_TFLAG_PP_DIRECTIVE; + S.pp_state = (unsigned char)cpp_pp_directive_to_state(S.token.type); + } + }break; + + case LS_number: + case LS_number0: + case LS_hex: + S.fsm.int_state = LSINT_default; + S.fsm.emit_token = 0; + --S.pos; + for (;;){ + for (; S.fsm.int_state < LSINT_count && S.pos < end_pos;){ + c = chunk[S.pos++]; + S.fsm.int_state = int_fsm_table[S.fsm.int_state + int_fsm_eq_classes[c]]; + } + S.fsm.emit_token = (S.fsm.int_state >= LSINT_count); + + if (S.fsm.emit_token == 0){ + S.chunk_pos += size; + DrYield(5, LexNeedChunk); + } + else break; + } + --S.pos; + + S.token.type = CPP_TOKEN_INTEGER_CONSTANT; + S.token.flags = 0; + break; + + case LS_float: + case LS_crazy_float0: + case LS_crazy_float1: + S.token.type = CPP_TOKEN_FLOATING_CONSTANT; + S.token.flags = 0; + switch (c){ + case 'f': case 'F': + case 'l': case 'L':break; + default: + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_char: + case LS_char_slashed: + S.token.type = CPP_TOKEN_JUNK; + if (c == '\''){ + S.token.type = CPP_TOKEN_CHARACTER_CONSTANT; + } + S.token.flags = 0; + break; + + case LS_char_multiline: + S.token.type = CPP_TOKEN_JUNK; + if (c == '\''){ + S.token.type = CPP_TOKEN_CHARACTER_CONSTANT; + } + S.token.flags = CPP_TFLAG_MULTILINE; + break; + + case LS_string: + case LS_string_slashed: + S.token.type = CPP_TOKEN_JUNK; + if (c == '"'){ + S.token.type = CPP_TOKEN_STRING_CONSTANT; + } + S.token.flags = 0; + break; + + case LS_string_multiline: + S.token.type = CPP_TOKEN_JUNK; + if (c == '"'){ + S.token.type = CPP_TOKEN_STRING_CONSTANT; + } + S.token.flags = CPP_TFLAG_MULTILINE; + break; + + case LS_comment_pre: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case '=': S.token.type = CPP_TOKEN_DIVEQ; break; + default: + S.token.type = CPP_TOKEN_DIV; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_comment: + case LS_comment_slashed: + S.token.type = CPP_TOKEN_COMMENT; + S.token.flags = 0; + pos_update_rule = PUR_back_one; + break; + + case LS_comment_block: + case LS_comment_block_ending: + S.token.type = CPP_TOKEN_COMMENT; + S.token.flags = 0; + break; + + case LS_error_message: + S.token.type = CPP_TOKEN_ERROR_MESSAGE; + S.token.flags = 0; + pos_update_rule = PUR_back_one; + break; + + case LS_dot: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case '*': S.token.type = CPP_TOKEN_PTRDOT; break; + default: + S.token.type = CPP_TOKEN_DOT; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_ellipsis: + switch (c){ + case '.': + S.token.flags = CPP_TFLAG_IS_OPERATOR; + S.token.type = CPP_TOKEN_ELLIPSIS; + break; + + default: + S.token.type = CPP_TOKEN_JUNK; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_less: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case '=': S.token.type = CPP_TOKEN_LESSEQ; break; + default: + S.token.type = CPP_TOKEN_LESS; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_less_less: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case '=': S.token.type = CPP_TOKEN_LSHIFTEQ; break; + default: + S.token.type = CPP_TOKEN_LSHIFT; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_more: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case '=': S.token.type = CPP_TOKEN_GRTREQ; break; + default: + S.token.type = CPP_TOKEN_GRTR; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_more_more: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case '=': S.token.type = CPP_TOKEN_RSHIFTEQ; break; + default: + S.token.type = CPP_TOKEN_RSHIFT; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_minus: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case '-': S.token.type = CPP_TOKEN_DECREMENT; break; + case '=': S.token.type = CPP_TOKEN_SUBEQ; break; + default: + S.token.type = CPP_TOKEN_MINUS; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_arrow: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case '*': S.token.type = CPP_TOKEN_PTRARROW; break; + default: + S.token.type = CPP_TOKEN_ARROW; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_and: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case '&': S.token.type = CPP_TOKEN_AND; break; + case '=': S.token.type = CPP_TOKEN_ANDEQ; break; + default: + S.token.type = CPP_TOKEN_AMPERSAND; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_or: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case '|': S.token.type = CPP_TOKEN_OR; break; + case '=': S.token.type = CPP_TOKEN_OREQ; break; + default: + S.token.type = CPP_TOKEN_BIT_OR; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_plus: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case '+': S.token.type = CPP_TOKEN_INCREMENT; break; + case '=': S.token.type = CPP_TOKEN_ADDEQ; break; + default: + S.token.type = CPP_TOKEN_PLUS; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_colon: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case ':': S.token.type = CPP_TOKEN_SCOPE; break; + default: + S.token.type = CPP_TOKEN_COLON; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_star: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case '=': S.token.type = CPP_TOKEN_MULEQ; break; + default: + S.token.type = CPP_TOKEN_STAR; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_modulo: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case '=': S.token.type = CPP_TOKEN_MODEQ; break; + default: + S.token.type = CPP_TOKEN_MOD; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_caret: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case '=': S.token.type = CPP_TOKEN_XOREQ; break; + default: + S.token.type = CPP_TOKEN_BIT_XOR; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_eq: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case '=': S.token.type = CPP_TOKEN_EQEQ; break; + default: + S.token.type = CPP_TOKEN_EQ; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_bang: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case '=': S.token.type = CPP_TOKEN_NOTEQ; break; + default: + S.token.type = CPP_TOKEN_NOT; + pos_update_rule = PUR_back_one; + break; + } + break; + } + + switch (pos_update_rule){ + case PUR_back_one: + --S.pos; + break; + + default: + if (chunk[S.pos-1] == 0){ + --S.pos; + } + break; + } + + if ((S.token.flags & CPP_TFLAG_PP_DIRECTIVE) == 0){ + switch (S.pp_state){ + case LSPP_include: + if (S.token.type != CPP_TOKEN_INCLUDE_FILE){ + S.token.type = CPP_TOKEN_JUNK; + } + S.pp_state = LSPP_junk; + break; + + case LSPP_macro_identifier: + if (S.fsm.state != LS_identifier){ + S.token.type = CPP_TOKEN_JUNK; + S.pp_state = LSPP_junk; + } + else{ + S.pp_state = LSPP_body; + } + break; + + case LSPP_identifier: + if (S.fsm.state != LS_identifier){ + S.token.type = CPP_TOKEN_JUNK; + } + S.pp_state = LSPP_junk; + break; + + case LSPP_number: + if (S.token.type != CPP_TOKEN_INTEGER_CONSTANT){ + S.token.type = CPP_TOKEN_JUNK; + S.pp_state = LSPP_junk; + } + else{ + S.pp_state = LSPP_include; + } + break; + + case LSPP_junk: + S.token.type = CPP_TOKEN_JUNK; + break; + } + } + } + + if (S.fsm.emit_token){ + S.token.start = S.token_start; + if (S.pos_overide){ + S.token.size = S.pos_overide - S.token_start; + S.pos_overide = 0; + } + else{ + S.token.size = S.pos - S.token_start; + } + if ((S.token.flags & CPP_TFLAG_PP_DIRECTIVE) == 0){ + S.token.flags |= (S.pp_state != LSPP_default)?(CPP_TFLAG_PP_BODY):(0); + } + + token_i = cpp_place_token_nonalloc(out_tokens, token_i, S.token); + if (token_i == max_token_i){ + if (S.pos == end_pos){ + S.chunk_pos += size; + DrYield(7, LexNeedChunk); + } + DrYield(2, LexNeedTokenMemory); + } + } + + if (S.completed){ + break; + } + } + + DrReturn(LexFinished); +} + +#undef DrYield +#undef DrReturn +#undef DrCase + +lexer_link int +cpp_lex_nonalloc(Lex_Data *S_ptr, + char *chunk, int size, + Cpp_Token_Stack *token_stack_out, int max_tokens){ + Cpp_Token_Stack temp_stack = *token_stack_out; + if (temp_stack.max_count > temp_stack.count + max_tokens){ + temp_stack.max_count = temp_stack.count + max_tokens; + } + + int result = cpp_lex_nonalloc(S_ptr, chunk, size, &temp_stack); + + token_stack_out->count = temp_stack.count; + + if (result == LexNeedTokenMemory){ + if (token_stack_out->count < token_stack_out->max_count){ + result = LexHitTokenLimit; + } + } + + return(result); +} + +lexer_link int +cpp_lex_size_nonalloc(Lex_Data *S_ptr, + char *chunk, int size, int full_size, + Cpp_Token_Stack *token_stack_out){ + int result = 0; + if (S_ptr->pos >= full_size){ + char end_null = 0; + result = cpp_lex_nonalloc(S_ptr, &end_null, 1, token_stack_out); + } + else{ + result = cpp_lex_nonalloc(S_ptr, chunk, size, token_stack_out); + if (result == LexNeedChunk){ + if (S_ptr->pos >= full_size){ + char end_null = 0; + result = cpp_lex_nonalloc(S_ptr, &end_null, 1, token_stack_out); + } + } + } + return(result); +} + +lexer_link int +cpp_lex_size_nonalloc(Lex_Data *S_ptr, + char *chunk, int size, int full_size, + Cpp_Token_Stack *token_stack_out, int max_tokens){ + Cpp_Token_Stack temp_stack = *token_stack_out; + if (temp_stack.max_count > temp_stack.count + max_tokens){ + temp_stack.max_count = temp_stack.count + max_tokens; + } + + int result = cpp_lex_size_nonalloc(S_ptr, chunk, size, full_size, + &temp_stack); + + token_stack_out->count = temp_stack.count; + + if (result == LexNeedTokenMemory){ + if (token_stack_out->count < token_stack_out->max_count){ + result = LexHitTokenLimit; + } + } + + return(result); +} + +lexer_link Cpp_Relex_State cpp_relex_nonalloc_start(char *data, int size, Cpp_Token_Stack *stack, int start, int end, int amount, int tolerance){ Cpp_Relex_State state; @@ -1396,21 +1067,21 @@ cpp_relex_nonalloc_start(char *data, int size, Cpp_Token_Stack *stack, state.tolerance = tolerance; Cpp_Get_Token_Result result = cpp_get_token(stack, start); - if (result.token_index <= 0){ + + state.start_token_i = result.token_index-1; + if (state.start_token_i < 0){ state.start_token_i = 0; } - else{ - state.start_token_i = result.token_index-1; - } result = cpp_get_token(stack, end); - if (result.token_index < 0){ - result.token_index = 0; - } - else if (end > stack->tokens[result.token_index].start){ - ++result.token_index; - } + state.end_token_i = result.token_index; + if (end > stack->tokens[state.end_token_i].start){ + ++state.end_token_i; + } + if (state.end_token_i < 0){ + state.end_token_i = 0; + } state.relex_start = stack->tokens[state.start_token_i].start; if (start < state.relex_start){ @@ -1422,6 +1093,13 @@ cpp_relex_nonalloc_start(char *data, int size, Cpp_Token_Stack *stack, return(state); } +inline char +cpp_token_get_pp_state(fcpp_u16 bitfield){ + return (char)(bitfield); +} + +// TODO(allen): Eliminate this once we actually store the EOF token +// in the token stack. inline Cpp_Token cpp__get_token(Cpp_Token_Stack *stack, Cpp_Token *tokens, int size, int index){ Cpp_Token result; @@ -1438,44 +1116,64 @@ cpp__get_token(Cpp_Token_Stack *stack, Cpp_Token *tokens, int size, int index){ return result; } -FCPP_LINK bool -cpp_relex_nonalloc_main(Cpp_Relex_State *state, Cpp_Token_Stack *relex_stack, int *relex_end){ +FCPP_LINK int +cpp_relex_nonalloc_main(Cpp_Relex_State *state, + Cpp_Token_Stack *relex_stack, + int *relex_end, + char *spare){ Cpp_Token_Stack *stack = state->stack; Cpp_Token *tokens = stack->tokens; cpp_shift_token_starts(stack, state->end_token_i, state->amount); - Cpp_Lex_Data lex = {}; + Lex_Data lex = lex_data_init(spare); lex.pp_state = cpp_token_get_pp_state(tokens[state->start_token_i].state_flags); lex.pos = state->relex_start; int relex_end_i = state->end_token_i; Cpp_Token match_token = cpp__get_token(stack, tokens, state->size, relex_end_i); Cpp_Token end_token = match_token; - bool went_too_far = 0; + int went_too_far = false; + // TODO(allen): This can be better I suspect. for (;;){ - Cpp_Read_Result read = cpp_lex_step(state->data, state->size, &lex); - if (read.has_result){ - if (read.token.start == end_token.start && - read.token.size == end_token.size && - read.token.flags == end_token.flags && - read.token.state_flags == end_token.state_flags){ - break; + int result = + cpp_lex_size_nonalloc(&lex, + state->data, + state->size, + state->size, + relex_stack, 1); + + switch (result){ + case LexHitTokenLimit: + { + Cpp_Token token = relex_stack->tokens[relex_stack->count-1]; + if (token.start == end_token.start && + token.size == end_token.size && + token.flags == end_token.flags && + token.state_flags == end_token.state_flags){ + --relex_stack->count; + goto double_break; + } + + while (lex.pos > end_token.start && relex_end_i < stack->count){ + ++relex_end_i; + end_token = cpp__get_token(stack, tokens, state->size, relex_end_i); + } } - cpp_push_token_nonalloc(relex_stack, read.token); + break; - while (lex.pos > end_token.start && relex_end_i < stack->count){ - ++relex_end_i; - end_token = cpp__get_token(stack, tokens, state->size, relex_end_i); - } - if (relex_stack->count == relex_stack->max_count){ - went_too_far = 1; - break; - } + case LexNeedChunk: Assert(!"Invalid path"); break; + + case LexNeedTokenMemory: + went_too_far = true; + goto double_break; + + case LexFinished: + goto double_break; } - if (lex.pos >= state->size) break; } + double_break:; if (!went_too_far){ if (relex_stack->count > 0){ @@ -1509,28 +1207,32 @@ cpp_relex_nonalloc_main(Cpp_Relex_State *state, Cpp_Token_Stack *relex_stack, in return(went_too_far); } -#ifndef FCPP_FORBID_MALLOC +#if !defined(FCPP_FORBID_MALLOC) + +#include +#include + FCPP_LINK Cpp_Token_Stack cpp_make_token_stack(int starting_max){ Cpp_Token_Stack token_stack; token_stack.count = 0; token_stack.max_count = starting_max; - token_stack.tokens = (Cpp_Token*)FCPP_GET_MEMORY(sizeof(Cpp_Token)*starting_max); - return token_stack; + token_stack.tokens = (Cpp_Token*)malloc(sizeof(Cpp_Token)*starting_max); + return(token_stack); } FCPP_LINK void cpp_free_token_stack(Cpp_Token_Stack token_stack){ - FCPP_FREE_MEMORY(token_stack.tokens); + free(token_stack.tokens); } FCPP_LINK void cpp_resize_token_stack(Cpp_Token_Stack *token_stack, int new_max){ Cpp_Token *new_tokens = (Cpp_Token*)FCPP_GET_MEMORY(sizeof(Cpp_Token)*new_max); - + if (new_tokens){ - FCPP_MEM_COPY(new_tokens, token_stack->tokens, sizeof(Cpp_Token)*token_stack->count); - FCPP_FREE_MEMORY(token_stack->tokens); + memcpy(new_tokens, token_stack->tokens, sizeof(Cpp_Token)*token_stack->count); + free(token_stack->tokens); token_stack->tokens = new_tokens; token_stack->max_count = new_max; } @@ -1541,168 +1243,45 @@ cpp_push_token(Cpp_Token_Stack *token_stack, Cpp_Token token){ if (!cpp_push_token_nonalloc(token_stack, token)){ int new_max = 2*token_stack->max_count + 1; cpp_resize_token_stack(token_stack, new_max); - bool result = cpp_push_token_nonalloc(token_stack, token); - _Assert(result); + cpp_push_token_nonalloc(token_stack, token); } } FCPP_LINK void cpp_lex_file(char *data, int size, Cpp_Token_Stack *token_stack_out){ - Cpp_Lex_Data lex = {}; - while (lex.pos < size){ - Cpp_Read_Result step_result = cpp_lex_step(data, size, &lex); - if (step_result.has_result){ - cpp_push_token(token_stack_out, step_result.token); + Lex_Data S = {0}; + S.tb = (char*)malloc(size); + int quit = 0; + + token_stack_out->count = 0; + for (;!quit;){ + int result = cpp_lex_nonalloc(&S, data, size, token_stack_out); + switch (result){ + case LexFinished: + { + quit = 1; + }break; + + case LexNeedChunk: + { + char empty = 0; + cpp_lex_nonalloc(&S, &empty, 1, token_stack_out); + quit = 1; + }break; + + case LexNeedTokenMemory: + { + int new_max = 2*token_stack_out->max_count + 1; + cpp_resize_token_stack(token_stack_out, new_max); + }break; } } + + free(S.tb); } -FCPP_LINK bool -cpp_relex_file_limited(char *data, int size, Cpp_Token_Stack *stack, - int start, int end, int amount, int tolerance){ -#if 0 - int start_token_i, end_token_i; - Cpp_Get_Token_Result get_result = cpp_get_token(token_stack, start_i); - start_token_i = get_result.token_index; - get_result = cpp_get_token(token_stack, end_i); - end_token_i = get_result.token_index; - if (end_token_i == -1){ - end_token_i = 0; - } - else if (end > token_stack->tokens[end_token_i].start){ - ++end_token_i; - } - cpp_shift_token_starts(token_stack, end_token_i, amount); - - int relex_start_i = start_token_i - 1; - if (relex_start_i < 0){ - relex_start_i = 0; - } - - int end_guess_i = end_token_i + 1; - if (end_guess_i > token_stack->count){ - --end_guess_i; - } -#endif - - int relex_start_i; - int end_token_i, end_guess_i; - { - Cpp_Get_Token_Result result = cpp_get_token(stack, start); - if (result.token_index <= 0){ - relex_start_i = 0; - } - else{ - relex_start_i = result.token_index-1; - } - - result = cpp_get_token(stack, end); - if (result.token_index < 0) result.token_index = 0; - else if (end > stack->tokens[result.token_index].start) ++result.token_index; - end_token_i = result.token_index; - end_guess_i = result.token_index+1; - } - - int relex_start = stack->tokens[relex_start_i].start; - if (start < relex_start) relex_start = start; - - cpp_shift_token_starts(stack, end_token_i, amount); - Cpp_Token_Stack relex_stack = cpp_make_token_stack((end_guess_i - relex_start_i + 1) * 3 / 2); - Cpp_Lex_Data lex = {}; - lex.pp_state = cpp_token_get_pp_state(stack->tokens[relex_start_i].state_flags); - lex.pos = relex_start; - bool went_too_far = 0; - - while (1){ - Cpp_Read_Result result = cpp_lex_step(data, size, &lex); - if (result.has_result){ - if (end_guess_i < stack->count && - result.token.start == stack->tokens[end_guess_i].start && - result.token.size == stack->tokens[end_guess_i].size && - result.token.flags == stack->tokens[end_guess_i].flags && - result.token.state_flags == stack->tokens[end_guess_i].state_flags){ - break; - } - else{ - cpp_push_token(&relex_stack, result.token); - while (lex.pos > stack->tokens[end_guess_i].start && - end_guess_i < stack->count){ - ++end_guess_i; - } - } - } - - if (lex.pos >= size){ - break; - } - - if (tolerance >= 0 && relex_stack.count + relex_start_i >= end_guess_i + tolerance){ - went_too_far = 1; - break; - } - } - - if (!went_too_far){ - int relex_end_i = end_guess_i; - - if (relex_stack.count > 0){ - if (relex_start_i > 0){ - Cpp_Token_Merge merge = cpp_attempt_token_merge(stack->tokens[relex_start_i - 1], - relex_stack.tokens[0]); - if (merge.did_merge){ - --relex_start_i; - relex_stack.tokens[0] = merge.new_token; - } - } - - if (relex_end_i < stack->count){ - Cpp_Token_Merge merge = cpp_attempt_token_merge(relex_stack.tokens[relex_stack.count - 1], - stack->tokens[relex_end_i]); - if (merge.did_merge){ - ++relex_end_i; - relex_stack.tokens[relex_stack.count - 1] = merge.new_token; - } - } - } - - int token_delete_amount = relex_end_i - relex_start_i; - int token_shift_amount = relex_stack.count - token_delete_amount; - - if (token_shift_amount != 0){ - int new_token_count = stack->count + token_shift_amount; - if (new_token_count > stack->max_count){ - int new_max = 2*stack->max_count + 1; - while (new_token_count > new_max){ - new_max = 2*new_max + 1; - } - cpp_resize_token_stack(stack, new_max); - } - - if (relex_end_i < stack->count){ - FCPP_MEM_MOVE(stack->tokens + relex_end_i + token_shift_amount, - stack->tokens + relex_end_i, sizeof(Cpp_Token)*(stack->count - relex_end_i)); - } - - stack->count += token_shift_amount; - } - - FCPP_MEM_COPY(stack->tokens + relex_start_i, relex_stack.tokens, sizeof(Cpp_Token)*relex_stack.count); - cpp_free_token_stack(relex_stack); - } - - else{ - cpp_shift_token_starts(stack, end_token_i, -amount); - cpp_free_token_stack(relex_stack); - } - - return went_too_far; -} #endif -#undef _Assert -#undef _TentativeAssert - -#undef FCPP_LEXER_IMPLEMENTATION -#endif // #ifdef FCPP_LEXER_IMPLEMENTATION +#endif // BOTTOM diff --git a/test/4cpp_lexer_fsms.h b/4cpp_lexer_fsms.h similarity index 100% rename from test/4cpp_lexer_fsms.h rename to 4cpp_lexer_fsms.h diff --git a/4cpp_lexer_old.h b/4cpp_lexer_old.h new file mode 100644 index 00000000..c5e31b00 --- /dev/null +++ b/4cpp_lexer_old.h @@ -0,0 +1,1683 @@ +/* "4cpp" Open C++ Parser v0.1: Lexer + no warranty implied; use at your own risk + +NOTES ON USE: + OPTIONS: + Set options by defining macros before including this file. + + FCPP_LEXER_IMPLEMENTATION - causes this file to output function implementations + - this option is unset after use so that future includes of this file + in the same unit do not continue to output implementations + + FCPP_NO_MALLOC - prevent including + FCPP_NO_ASSERT - prevent including + FCPP_NO_STRING - prevent including + FCPP_NO_CRT - FCPP_NO_MALLOC & FCPP_NO_ASSERT & FCPP_NO_STRING + + FCPP_FORBID_MALLOC - one step above *NO_MALLOC with this set 4cpp functions that do allocations + are not allowed to be declared or defined at all, forcing the user to handle + allocation themselves + - implies FCPP_NO_MALLOC + + FCPP_GET_MEMORY - defines how to make allocations, interface of malloc, defaults to malloc + FCPP_FREE_MEMORY - defines how to free memory, interface of ree, defaults to free + (The above must be defined if FCPP_NO_MALLOC is set, unless FCPP_FORBID_MALLOC is set) + + FCPP_ASSERT - defines how to make assertions, interface of assert, defaults to assert + + FCPP_MEM_COPY - defines how to copy blocks of memory, interface of memcpy, defaults to memcpy + FCPP_MEM_MOVE - defines how to move blocks of memory, interface of memmove, defaults to memmove + (The above must be defined if FCPP_NO_STRING is set) + + FCPP_LINK - defines linkage of non-inline functions, defaults to static + FCPP_EXTERN - changes FCPP_LINK default to extern, this option is ignored if FCPP_LINK is defined + + include the file "4cpp_clear_config.h" if you want to undefine all options for some reason + + HIDDDEN DEPENDENCIES: + 4cpp is not a single file include library, there are dependencies between the files. + Be sure to include these dependencies before 4cpp_lexer.h: + + 4cpp_types.h + 4cpp_string.h +*/ + +// TOP +// TODO(allen): +// +// EASE OF USE AND DEPLOYMENT +// - make it easier to locate the list of function declarations +// - more C compatibility +// +// POTENTIAL +// - Experiment with optimizations. Sean's State machine? +// - Reserve 0th token for null? Put a EOF token at the end? +// - Pass Cpp_File and Cpp_Token_Stack by value instead of by pointer? +// +// CURRENT +// - lex in chunks +// + +#include "4coder_config.h" + +#ifndef FCPP_LEXER_INC +#define FCPP_LEXER_INC + +#include "4cpp_lexer_types.h" + +struct Cpp_Lex_Data{ + Cpp_Preprocessor_State pp_state; + int pos; + int complete; +}; + +struct Cpp_Read_Result{ + Cpp_Token token; + int pos; + char newline; + char has_result; +}; + +// TODO(allen): revisit this keyword data declaration system +struct String_And_Flag{ + char *str; + fcpp_u32 flags; +}; + +struct String_List{ + String_And_Flag *data; + int count; +}; + +struct Sub_Match_List_Result{ + int index; + fcpp_i32 new_pos; +}; + +inline fcpp_u16 +cpp_token_set_pp_state(fcpp_u16 bitfield, Cpp_Preprocessor_State state_value){ + return (fcpp_u16)state_value; +} + +inline Cpp_Preprocessor_State +cpp_token_get_pp_state(fcpp_u16 bitfield){ + return (Cpp_Preprocessor_State)(bitfield); +} + +inline String +cpp_get_lexeme(char *str, Cpp_Token *token){ + String result; + result.str = str + token->start; + result.size = token->size; + return result; +} + +inline bool +is_keyword(Cpp_Token_Type type){ + return (type >= CPP_TOKEN_KEY_TYPE && type <= CPP_TOKEN_KEY_OTHER); +} + +FCPP_LINK Sub_Match_List_Result sub_match_list(char *data, int size, int pos, String_List list, int sub_size); + +FCPP_LINK Seek_Result seek_unescaped_eol(char *data, int size, int pos); +FCPP_LINK Seek_Result seek_unescaped_delim(char *data, int size, int pos, char delim); +FCPP_LINK Seek_Result seek_block_comment_end(char *data, int size, int pos); + +FCPP_LINK Cpp_Read_Result cpp_read_whitespace(char *data, int size, int pos); +FCPP_LINK Cpp_Read_Result cpp_read_junk_line(char *data, int size, int pos); +FCPP_LINK Cpp_Read_Result cpp_read_operator(char *data, int size, int pos); +FCPP_LINK Cpp_Read_Result cpp_read_pp_operator(char *data, int size, int pos); +FCPP_LINK Cpp_Read_Result cpp_read_alpha_numeric(char *data, int size, int pos, bool in_if_body); +inline Cpp_Read_Result cpp_read_alpha_numeric(char *data, int size, int pos) { return cpp_read_alpha_numeric(data, size, pos, 0); } +FCPP_LINK Cpp_Read_Result cpp_read_number(char *data, int size, int pos); +FCPP_LINK Cpp_Read_Result cpp_read_string_litteral(char *data, int size, int pos); +FCPP_LINK Cpp_Read_Result cpp_read_character_litteral(char *data, int size, int pos); +FCPP_LINK Cpp_Read_Result cpp_read_line_comment(char *data, int size, int pos); +FCPP_LINK Cpp_Read_Result cpp_read_block_comment(char *data, int size, int pos); +FCPP_LINK Cpp_Read_Result cpp_read_preprocessor(char *data, int size, int pos); +FCPP_LINK Cpp_Read_Result cpp_read_pp_include_file(char *data, int size, int pos); +FCPP_LINK Cpp_Read_Result cpp_read_pp_default_mode(char *data, int size, int pos, bool in_if_body); +inline Cpp_Read_Result cpp_read_pp_default_mode(char *data, int size, int pos) { return cpp_read_pp_default_mode(data, size, pos, 0); } + +FCPP_LINK Cpp_Token_Merge cpp_attempt_token_merge(Cpp_Token prev, Cpp_Token next); + +FCPP_LINK bool cpp_push_token_no_merge(Cpp_Token_Stack *stack, Cpp_Token token); +FCPP_LINK bool cpp_push_token_nonalloc(Cpp_Token_Stack *stack, Cpp_Token token); + +inline Cpp_Lex_Data cpp_lex_data_zero() { Cpp_Lex_Data data = {(Cpp_Preprocessor_State)0}; return(data); } + +FCPP_LINK Cpp_Read_Result cpp_lex_step(char *data, int size, Cpp_Lex_Data *lex); + +FCPP_LINK int cpp_lex_file_token_count(char *data, int size); +FCPP_LINK Cpp_Lex_Data cpp_lex_file_nonalloc(char *data, int size, Cpp_Token_Stack *stack, Cpp_Lex_Data lex_data); +inline Cpp_Lex_Data cpp_lex_file_nonalloc(char *data, int size, Cpp_Token_Stack *stack) { return cpp_lex_file_nonalloc(data, size, stack, cpp_lex_data_zero()); } + +FCPP_LINK Cpp_Get_Token_Result cpp_get_token(Cpp_Token_Stack *stack, int pos); + +FCPP_LINK int cpp_get_end_token(Cpp_Token_Stack *stack, int end); +FCPP_LINK void cpp_shift_token_starts(Cpp_Token_Stack *stack, int from_token, int amount); + +FCPP_LINK Cpp_Relex_State cpp_relex_nonalloc_start(char *data, int size, Cpp_Token_Stack *stack, int start, int end, int amount, int tolerance); +FCPP_LINK bool cpp_relex_nonalloc_main(Cpp_Relex_State state, Cpp_Token_Stack *stack); + +#ifndef FCPP_FORBID_MALLOC +FCPP_LINK Cpp_Token_Stack cpp_make_token_stack(int max); +FCPP_LINK void cpp_free_token_stack(Cpp_Token_Stack stack); +FCPP_LINK void cpp_resize_token_stack(Cpp_Token_Stack *stack, int new_max); + +FCPP_LINK void cpp_push_token(Cpp_Token_Stack *stack, Cpp_Token token); +FCPP_LINK void cpp_lex_file(char *data, int size, Cpp_Token_Stack *stack); +FCPP_LINK bool cpp_relex_file_limited(char *data, int size, Cpp_Token_Stack *stack, int start_i, int end_i, int amount, int extra_tolerance); +inline void cpp_relex_file(char *data, int size, Cpp_Token_Stack *stack, int start_i, int end_i, int amount) +{ cpp_relex_file_limited(data, size, stack, start_i, end_i, amount, -1); } +#endif + +#define FCPP_STRING_LIST(x) {x, FCPP_COUNT(x)} + +// TODO(allen): shift towards storing in a context +FCPP_GLOBAL String_And_Flag int_suf_strings[] = { + {"ull"}, {"ULL"}, + {"llu"}, {"LLU"}, + {"ll"}, {"LL"}, + {"l"}, {"L"}, + {"u"}, {"U"} +}; + +FCPP_GLOBAL String_List int_sufs = FCPP_STRING_LIST(int_suf_strings); + +FCPP_GLOBAL String_And_Flag float_suf_strings[] = { + {"f"}, {"F"}, + {"l"}, {"L"} +}; +FCPP_GLOBAL String_List float_sufs = FCPP_STRING_LIST(float_suf_strings); + +FCPP_GLOBAL String_And_Flag bool_lit_strings[] = { + {"true"}, {"false"} +}; +FCPP_GLOBAL String_List bool_lits = FCPP_STRING_LIST(bool_lit_strings); + +FCPP_GLOBAL String_And_Flag keyword_strings[] = { + {"and", CPP_TOKEN_AND}, + {"and_eq", CPP_TOKEN_ANDEQ}, + {"bitand", CPP_TOKEN_BIT_AND}, + {"bitor", CPP_TOKEN_BIT_OR}, + {"or", CPP_TOKEN_OR}, + {"or_eq", CPP_TOKEN_OREQ}, + {"sizeof", CPP_TOKEN_SIZEOF}, + {"alignof", CPP_TOKEN_ALIGNOF}, + {"decltype", CPP_TOKEN_DECLTYPE}, + {"throw", CPP_TOKEN_THROW}, + {"new", CPP_TOKEN_NEW}, + {"delete", CPP_TOKEN_DELETE}, + {"xor", CPP_TOKEN_BIT_XOR}, + {"xor_eq", CPP_TOKEN_XOREQ}, + {"not", CPP_TOKEN_NOT}, + {"not_eq", CPP_TOKEN_NOTEQ}, + {"typeid", CPP_TOKEN_TYPEID}, + {"compl", CPP_TOKEN_BIT_NOT}, + + {"void", CPP_TOKEN_KEY_TYPE}, + {"bool", CPP_TOKEN_KEY_TYPE}, + {"char", CPP_TOKEN_KEY_TYPE}, + {"int", CPP_TOKEN_KEY_TYPE}, + {"float", CPP_TOKEN_KEY_TYPE}, + {"double", CPP_TOKEN_KEY_TYPE}, + + {"long", CPP_TOKEN_KEY_MODIFIER}, + {"short", CPP_TOKEN_KEY_MODIFIER}, + {"unsigned", CPP_TOKEN_KEY_MODIFIER}, + + {"const", CPP_TOKEN_KEY_QUALIFIER}, + {"volatile", CPP_TOKEN_KEY_QUALIFIER}, + + {"asm", CPP_TOKEN_KEY_CONTROL_FLOW}, + {"break", CPP_TOKEN_KEY_CONTROL_FLOW}, + {"case", CPP_TOKEN_KEY_CONTROL_FLOW}, + {"catch", CPP_TOKEN_KEY_CONTROL_FLOW}, + {"continue", CPP_TOKEN_KEY_CONTROL_FLOW}, + {"default", CPP_TOKEN_KEY_CONTROL_FLOW}, + {"do", CPP_TOKEN_KEY_CONTROL_FLOW}, + {"else", CPP_TOKEN_KEY_CONTROL_FLOW}, + {"for", CPP_TOKEN_KEY_CONTROL_FLOW}, + {"goto", CPP_TOKEN_KEY_CONTROL_FLOW}, + {"if", CPP_TOKEN_KEY_CONTROL_FLOW}, + {"return", CPP_TOKEN_KEY_CONTROL_FLOW}, + {"switch", CPP_TOKEN_KEY_CONTROL_FLOW}, + {"try", CPP_TOKEN_KEY_CONTROL_FLOW}, + {"while", CPP_TOKEN_KEY_CONTROL_FLOW}, + {"static_assert", CPP_TOKEN_KEY_CONTROL_FLOW}, + + {"const_cast", CPP_TOKEN_KEY_CAST}, + {"dynamic_cast", CPP_TOKEN_KEY_CAST}, + {"reinterpret_cast", CPP_TOKEN_KEY_CAST}, + {"static_cast", CPP_TOKEN_KEY_CAST}, + + {"class", CPP_TOKEN_KEY_TYPE_DECLARATION}, + {"enum", CPP_TOKEN_KEY_TYPE_DECLARATION}, + {"struct", CPP_TOKEN_KEY_TYPE_DECLARATION}, + {"typedef", CPP_TOKEN_KEY_TYPE_DECLARATION}, + {"union", CPP_TOKEN_KEY_TYPE_DECLARATION}, + {"template", CPP_TOKEN_KEY_TYPE_DECLARATION}, + {"typename", CPP_TOKEN_KEY_TYPE_DECLARATION}, + + {"friend", CPP_TOKEN_KEY_ACCESS}, + {"namespace", CPP_TOKEN_KEY_ACCESS}, + {"private", CPP_TOKEN_KEY_ACCESS}, + {"protected", CPP_TOKEN_KEY_ACCESS}, + {"public", CPP_TOKEN_KEY_ACCESS}, + {"using", CPP_TOKEN_KEY_ACCESS}, + + {"extern", CPP_TOKEN_KEY_LINKAGE}, + {"export", CPP_TOKEN_KEY_LINKAGE}, + {"inline", CPP_TOKEN_KEY_LINKAGE}, + {"static", CPP_TOKEN_KEY_LINKAGE}, + {"virtual", CPP_TOKEN_KEY_LINKAGE}, + + {"alignas", CPP_TOKEN_KEY_OTHER}, + {"explicit", CPP_TOKEN_KEY_OTHER}, + {"noexcept", CPP_TOKEN_KEY_OTHER}, + {"nullptr", CPP_TOKEN_KEY_OTHER}, + {"operator", CPP_TOKEN_KEY_OTHER}, + {"register", CPP_TOKEN_KEY_OTHER}, + {"this", CPP_TOKEN_KEY_OTHER}, + {"thread_local", CPP_TOKEN_KEY_OTHER}, +}; +FCPP_GLOBAL String_List keywords = FCPP_STRING_LIST(keyword_strings); + +FCPP_GLOBAL String_And_Flag op_strings[] = { + {"...", CPP_TOKEN_ELLIPSIS}, + {"<<=", CPP_TOKEN_LSHIFTEQ}, + {">>=", CPP_TOKEN_RSHIFTEQ}, + {"->*", CPP_TOKEN_PTRARROW}, + {"<<", CPP_TOKEN_LSHIFT}, + {">>", CPP_TOKEN_RSHIFT}, + {"&&", CPP_TOKEN_AND}, + {"||", CPP_TOKEN_OR}, + {"->", CPP_TOKEN_ARROW}, + {"++", CPP_TOKEN_INCREMENT}, + {"--", CPP_TOKEN_DECREMENT}, + {"::", CPP_TOKEN_SCOPE}, + {"+=", CPP_TOKEN_ADDEQ}, + {"-=", CPP_TOKEN_SUBEQ}, + {"*=", CPP_TOKEN_MULEQ}, + {"/=", CPP_TOKEN_DIVEQ}, + {"%=", CPP_TOKEN_MODEQ}, + {"&=", CPP_TOKEN_ANDEQ}, + {"|=", CPP_TOKEN_OREQ}, + {"^=", CPP_TOKEN_XOREQ}, + {"==", CPP_TOKEN_EQEQ}, + {">=", CPP_TOKEN_GRTREQ}, + {"<=", CPP_TOKEN_LESSEQ}, + {"!=", CPP_TOKEN_NOTEQ}, + {".*", CPP_TOKEN_PTRDOT}, + {"{", CPP_TOKEN_BRACE_OPEN}, + {"}", CPP_TOKEN_BRACE_CLOSE}, + {"[", CPP_TOKEN_BRACKET_OPEN}, + {"]", CPP_TOKEN_BRACKET_CLOSE}, + {"(", CPP_TOKEN_PARENTHESE_OPEN}, + {")", CPP_TOKEN_PARENTHESE_CLOSE}, + {"<", CPP_TOKEN_LESS}, + {">", CPP_TOKEN_GRTR}, + {"+", CPP_TOKEN_PLUS}, + {"-", CPP_TOKEN_MINUS}, + {"!", CPP_TOKEN_NOT}, + {"~", CPP_TOKEN_TILDE}, + {"*", CPP_TOKEN_STAR}, + {"&", CPP_TOKEN_AMPERSAND}, + {"|", CPP_TOKEN_BIT_OR}, + {"^", CPP_TOKEN_BIT_XOR}, + {"=", CPP_TOKEN_EQ}, + {",", CPP_TOKEN_COMMA}, + {":", CPP_TOKEN_COLON}, + {";", CPP_TOKEN_SEMICOLON}, + {"/", CPP_TOKEN_DIV}, + {"?", CPP_TOKEN_TERNARY_QMARK}, + {"%", CPP_TOKEN_MOD}, + {".", CPP_TOKEN_DOT}, +}; +FCPP_GLOBAL String_List ops = FCPP_STRING_LIST(op_strings); + +FCPP_GLOBAL String_And_Flag pp_op_strings[] = { + {"##", CPP_PP_CONCAT}, + {"#", CPP_PP_STRINGIFY}, +}; +FCPP_GLOBAL String_List pp_ops = FCPP_STRING_LIST(pp_op_strings); + +FCPP_GLOBAL String_And_Flag preprop_strings[] = { + {"include", CPP_PP_INCLUDE}, + {"INCLUDE", CPP_PP_INCLUDE}, + {"ifndef", CPP_PP_IFNDEF}, + {"IFNDEF", CPP_PP_IFNDEF}, + {"define", CPP_PP_DEFINE}, + {"DEFINE", CPP_PP_DEFINE}, + {"import", CPP_PP_IMPORT}, + {"IMPORT", CPP_PP_IMPORT}, + {"pragma", CPP_PP_PRAGMA}, + {"PRAGMA", CPP_PP_PRAGMA}, + {"undef", CPP_PP_UNDEF}, + {"UNDEF", CPP_PP_UNDEF}, + {"endif", CPP_PP_ENDIF}, + {"ENDIF", CPP_PP_ENDIF}, + {"error", CPP_PP_ERROR}, + {"ERROR", CPP_PP_ERROR}, + {"ifdef", CPP_PP_IFDEF}, + {"IFDEF", CPP_PP_IFDEF}, + {"using", CPP_PP_USING}, + {"USING", CPP_PP_USING}, + {"else", CPP_PP_ELSE}, + {"ELSE", CPP_PP_ELSE}, + {"elif", CPP_PP_ELIF}, + {"ELIF", CPP_PP_ELIF}, + {"line", CPP_PP_LINE}, + {"LINE", CPP_PP_LINE}, + {"if", CPP_PP_IF}, + {"IF", CPP_PP_IF}, +}; +FCPP_GLOBAL String_List preprops = FCPP_STRING_LIST(preprop_strings); + +#undef FCPP_STRING_LIST + +#endif // #ifndef FCPP_CPP_LEXER + +#ifdef FCPP_LEXER_IMPLEMENTATION + +#define _Assert FCPP_ASSERT +#define _TentativeAssert FCPP_ASSERT + +FCPP_LINK Sub_Match_List_Result +sub_match_list(char *data, int size, int pos, String_List list, int sub_size){ + Sub_Match_List_Result result; + String str_main; + char *str_check; + int i,l; + + result.index = -1; + result.new_pos = pos; + str_main = make_string(data + pos, size - pos); + if (sub_size > 0){ + str_main = substr(str_main, 0, sub_size); + for (i = 0; i < list.count; ++i){ + str_check = list.data[i].str; + if (match_sc(str_main, str_check)){ + result.index = i; + result.new_pos = pos + sub_size; + break; + } + } + } + else{ + for (i = 0; i < list.count; ++i){ + str_check = list.data[i].str; + if (match_part_scl(str_main, str_check, &l)){ + result.index = i; + result.new_pos = pos + l; + break; + } + } + } + return result; +} + +FCPP_LINK Seek_Result +seek_unescaped_eol(char *data, int size, int pos){ + Seek_Result result = {}; + ++pos; + while (pos < size){ + if (data[pos] == '\\'){ + if (pos + 1 < size && + data[pos+1] == '\n'){ + result.new_line = 1; + ++pos; + } + else if (pos + 1 < size && + data[pos+1] == '\r' && + pos + 2 < size && + data[pos+2] == '\n'){ + result.new_line = 1; + pos += 2; + } + } + else if (data[pos] == '\n'){ + break; + } + ++pos; + } + ++pos; + + result.pos = pos; + return result; +} + +FCPP_LINK Seek_Result +seek_unescaped_delim(char *data, int size, int pos, char delim){ + Seek_Result result = {}; + bool escape = 0; + ++pos; + while (pos < size){ + if (data[pos] == '\n'){ + result.new_line = 1; + } + if (escape){ + escape = 0; + } + else{ + if (data[pos] == '\\'){ + escape = 1; + } + else if (data[pos] == delim){ + break; + } + } + ++pos; + } + ++pos; + + result.pos = pos; + return result; +} + +FCPP_LINK Seek_Result +seek_block_comment_end(char *data, int size, int pos){ + Seek_Result result = {}; + pos += 2; + while (pos < size){ + if (data[pos] == '*' && + pos + 1 < size && + data[pos+1] == '/'){ + break; + } + if (data[pos] == '\n'){ + result.new_line = 1; + } + ++pos; + } + pos += 2; + result.pos = pos; + return result; +} + +FCPP_LINK Cpp_Read_Result +cpp_read_whitespace(char *data, int size, int pos){ + Cpp_Read_Result result = {}; + + while (pos < size && char_is_whitespace(data[pos])){ + if (data[pos] == '\n'){ + result.newline = 1; + } + ++pos; + } + + result.pos = pos; + + return result; +} + +FCPP_LINK Cpp_Read_Result +cpp_read_junk_line(char *data, int size, int pos){ + Cpp_Read_Result result = {}; + result.token.start = pos; + result.token.type = CPP_TOKEN_JUNK; + + bool comment_end = 0; + while (pos < size && data[pos] != '\n'){ + if (data[pos] == '/' && pos + 1 < size){ + if (data[pos + 1] == '/' || + data[pos + 1] == '*'){ + comment_end = 1; + break; + } + } + ++pos; + } + + if (comment_end){ + result.pos = pos; + result.token.size = pos - result.token.start; + } + else{ + while (pos > 0 && data[pos - 1] == '\r'){ + --pos; + } + if (pos > 0 && data[pos - 1] == '\\'){ + --pos; + } + result.pos = pos; + result.token.size = pos - result.token.start; + } + + return result; +} + +FCPP_LINK Cpp_Read_Result +cpp_read_operator(char *data, int size, int pos){ + Cpp_Read_Result result = {}; + result.pos = pos; + result.token.start = pos; + + Sub_Match_List_Result match; + match = sub_match_list(data, size, result.token.start, ops, -1); + + if (match.index != -1){ + result.pos = match.new_pos; + result.token.size = result.pos - result.token.start; + result.token.type = (Cpp_Token_Type)ops.data[match.index].flags; + result.token.flags |= CPP_TFLAG_IS_OPERATOR; + } + else{ + result.token.size = 1; + result.token.type = CPP_TOKEN_JUNK; + result.pos = pos + 1; + } + + return result; +} + +FCPP_LINK Cpp_Read_Result +cpp_read_pp_operator(char *data, int size, int pos){ + Cpp_Read_Result result = {}; + result.pos = pos; + result.token.start = pos; + + Sub_Match_List_Result match; + match = sub_match_list(data, size, result.token.start, pp_ops, -1); + + _Assert(match.index != -1); + result.pos = match.new_pos; + result.token.size = result.pos - result.token.start; + result.token.type = (Cpp_Token_Type)pp_ops.data[match.index].flags; + + return result; +} + +FCPP_LINK Cpp_Read_Result +cpp_read_alpha_numeric(char *data, int size, int pos, bool in_if_body){ + Cpp_Read_Result result = {}; + result.pos = pos; + result.token.start = pos; + + while (result.pos < size && + char_is_alpha_numeric(data[result.pos])){ + ++result.pos; + } + + result.token.size = result.pos - result.token.start; + + // TODO(allen): do better + if (in_if_body){ + String word; + word.size = result.token.size; + word.str = data + result.token.start; + if (match_ss(word, make_lit_string("defined"))){ + result.token.type = CPP_TOKEN_DEFINED; + result.token.flags |= CPP_TFLAG_IS_OPERATOR; + result.token.flags |= CPP_TFLAG_IS_KEYWORD; + } + } + + if (result.token.type == CPP_TOKEN_JUNK){ + Sub_Match_List_Result match; + match = sub_match_list(data, size, result.token.start, bool_lits, result.token.size); + + if (match.index != -1){ + result.token.type = CPP_TOKEN_BOOLEAN_CONSTANT; + result.token.flags |= CPP_TFLAG_IS_KEYWORD; + } + else{ + match = sub_match_list(data, size, result.token.start, keywords, result.token.size); + + if (match.index != -1){ + String_And_Flag data = keywords.data[match.index]; + result.token.type = (Cpp_Token_Type)data.flags; + result.token.flags |= CPP_TFLAG_IS_KEYWORD; + } + else{ + result.token.type = CPP_TOKEN_IDENTIFIER; + } + } + } + + return result; +} + +FCPP_LINK Cpp_Read_Result +cpp_read_number(char *data, int size, int pos){ + Cpp_Read_Result result = {}; + result.pos = pos; + result.token.start = pos; + + bool is_float = 0; + bool is_integer = 0; + bool is_oct = 0; + bool is_hex = 0; + bool is_zero = 0; + + if (data[pos] == '0'){ + if (pos+1 < size){ + char next = data[pos+1]; + if (next == 'x'){ + is_hex = 1; + is_integer = 1; + } + else if (next == '.'){ + is_float = 1; + ++result.pos; + } + else if (next >= '0' && next <= '9'){ + is_oct = 1; + is_integer = 1; + } + else{ + is_zero = 1; + is_integer = 1; + } + } + else{ + is_zero = 1; + is_integer = 1; + } + } + else if (data[pos] == '.'){ + is_float = 1; + } + + if (is_zero){ + ++result.pos; + } + else if (is_hex){ + ++result.pos; + char character; + do{ + ++result.pos; + if (result.pos >= size){ + break; + } + character = data[result.pos]; + } while(char_is_hex(character)); + } + else if (is_oct){ + char character; + do{ + ++result.pos; + if (result.pos >= size){ + break; + } + character = data[result.pos]; + }while(char_is_numeric(character)); + } + else{ + if (!is_float){ + is_integer = 1; + while (1){ + ++result.pos; + + if (result.pos >= size){ + break; + } + bool is_good = 0; + char character = data[result.pos]; + if (character >= '0' && character <= '9'){ + is_good = 1; + } + else if (character == '.'){ + is_integer = 0; + is_float = 1; + } + if (!is_good){ + break; + } + } + } + + if (is_float){ + bool e_mode = 0; + bool e_minus = 0; + bool is_good = 0; + char character; + + while (1){ + ++result.pos; + if (result.pos >= size){ + break; + } + is_good = 0; + character = data[result.pos]; + if (character >= '0' && character <= '9'){ + is_good = 1; + } + else{ + if (character == 'e' && !e_mode){ + e_mode = 1; + is_good = 1; + } + else if (character == '-' && e_mode && !e_minus){ + e_minus = 1; + is_good = 1; + } + } + if (!is_good){ + break; + } + } + } + } + + if (is_integer){ + Sub_Match_List_Result match = + sub_match_list(data, size, result.pos, int_sufs, -1); + if (match.index != -1){ + result.pos = match.new_pos; + } + result.token.type = CPP_TOKEN_INTEGER_CONSTANT; + result.token.size = result.pos - result.token.start; + } + else if (is_float){ + Sub_Match_List_Result match = + sub_match_list(data, size, result.pos, float_sufs, -1); + if (match.index != -1){ + result.pos = match.new_pos; + } + result.token.type = CPP_TOKEN_FLOATING_CONSTANT; + result.token.size = result.pos - result.token.start; + } + else{ + _Assert(!"This shouldn't happen!"); + } + + return result; +} + +FCPP_LINK Cpp_Read_Result +cpp_read_string_litteral(char *data, int size, int pos){ + Cpp_Read_Result result = {}; + result.token.start = pos; + + _Assert(data[pos] == '"'); + Seek_Result seek = seek_unescaped_delim(data, size, pos, '"'); + pos = seek.pos; + if (seek.new_line){ + result.token.flags |= CPP_TFLAG_MULTILINE; + } + + result.token.size = pos - result.token.start; + result.token.type = CPP_TOKEN_STRING_CONSTANT; + result.pos = pos; + + return result; +} + +FCPP_LINK Cpp_Read_Result +cpp_read_character_litteral(char *data, int size, int pos){ + Cpp_Read_Result result = {}; + result.token.start = pos; + + _Assert(data[pos] == '\''); + Seek_Result seek = seek_unescaped_delim(data, size, pos, '\''); + pos = seek.pos; + if (seek.new_line){ + result.token.flags |= CPP_TFLAG_MULTILINE; + } + + result.token.size = pos - result.token.start; + result.token.type = CPP_TOKEN_CHARACTER_CONSTANT; + result.pos = pos; + + return result; +} + +FCPP_LINK Cpp_Read_Result +cpp_read_line_comment(char *data, int size, int pos){ + Cpp_Read_Result result = {}; + result.token.start = pos; + + _Assert(data[pos] == '/' && data[pos + 1] == '/'); + + pos += 2; + while (pos < size){ + if (data[pos] == '\n'){ + break; + } + if (data[pos] == '\\'){ + if (pos + 1 < size && + data[pos + 1] == '\n'){ + ++pos; + } + else if (pos + 2 < size && + data[pos + 1] == '\r' && + data[pos + 2] == '\n'){ + pos += 2; + } + } + ++pos; + } + if (pos > 0 && data[pos-1] == '\r'){ + --pos; + } + result.token.size = pos - result.token.start; + result.token.type = CPP_TOKEN_COMMENT; + result.pos = pos; + return result; +} + +FCPP_LINK Cpp_Read_Result +cpp_read_block_comment(char *data, int size, int pos){ + Cpp_Read_Result result = {}; + result.token.start = pos; + + _Assert(data[pos] == '/' && data[pos + 1] == '*'); + pos += 2; + while (pos < size){ + if (data[pos] == '*' && + pos + 1 < size && + data[pos+1] == '/'){ + pos += 2; + break; + } + ++pos; + } + result.token.size = pos - result.token.start; + result.token.type = CPP_TOKEN_COMMENT; + result.pos = pos; + return result; +} + +FCPP_LINK Cpp_Read_Result +cpp_read_preprocessor(char *data, int size, int pos){ + _Assert(data[pos] == '#'); + Cpp_Read_Result result = {}; + result.token.start = pos; + result.token.type = CPP_PP_UNKNOWN; + result.token.flags |= CPP_TFLAG_PP_DIRECTIVE; + + ++pos; + while (pos < size && + (data[pos] == ' ' || + data[pos] == '\t')){ + ++pos; + } + + Sub_Match_List_Result match + = sub_match_list(data, size, pos, preprops, -1); + + if (match.index != -1){ + result.token.size = match.new_pos - result.token.start; + result.token.type = (Cpp_Token_Type)preprops.data[match.index].flags; + result.pos = match.new_pos; + } + else{ + while (pos < size && !char_is_whitespace(data[pos])){ + ++pos; + } + result.token.size = pos - result.token.start; + result.pos = pos; + } + + return result; +} + +FCPP_LINK Cpp_Read_Result +cpp_read_pp_include_file(char *data, int size, int pos){ + char start = data[pos]; + _Assert(start == '<' || start == '"'); + + Cpp_Read_Result result = {}; + result.token.start = pos; + result.token.type = CPP_TOKEN_INCLUDE_FILE; + result.token.flags |= CPP_TFLAG_PP_BODY; + + char end; + if (start == '<'){ + end = '>'; + } + else{ + end = '"'; + } + + ++pos; + while (pos < size && data[pos] != end){ + if (data[pos] == '\n'){ + result.token.type = CPP_TOKEN_JUNK; + result.token.flags |= CPP_TFLAG_BAD_ENDING; + break; + } + if (data[pos] == '\\'){ + if (pos + 1 < size && data[pos + 1] == '\n'){ + ++pos; + result.token.flags |= CPP_TFLAG_MULTILINE; + } + else if (pos + 2 < size && + data[pos + 1] == '\r' && + data[pos + 2] == '\n'){ + pos += 2; + result.token.flags |= CPP_TFLAG_MULTILINE; + } + } + ++pos; + } + + if (result.token.type != CPP_TOKEN_JUNK){ + if (pos < size){ + ++pos; + } + } + + result.token.size = pos - result.token.start; + result.pos = pos; + + return result; +} + +FCPP_LINK Cpp_Read_Result +cpp_read_pp_default_mode(char *data, int size, int pos, bool in_if_body){ + char current = data[pos]; + Cpp_Read_Result result; + if (char_is_numeric(current)){ + result = cpp_read_number(data, size, pos); + } + else if (char_is_alpha(current)){ + result = cpp_read_alpha_numeric(data, size, pos, in_if_body); + } + else if (current == '.'){ + if (pos + 1 < size){ + char next = data[pos + 1]; + if (char_is_numeric(next)){ + result = cpp_read_number(data, size, pos); + } + else{ + result = cpp_read_operator(data, size, pos); + } + } + else{ + result = cpp_read_operator(data, size, pos); + } + } + + else if (current == '/'){ + if (pos + 1 < size){ + char next = data[pos + 1]; + if (next == '/'){ + result = cpp_read_line_comment(data, size, pos); + } + else if (next == '*'){ + result = cpp_read_block_comment(data, size, pos); + } + else{ + result = cpp_read_operator(data, size, pos); + } + } + else{ + result = cpp_read_operator(data, size, pos); + } + } + else if (current == '"'){ + result = cpp_read_string_litteral(data, size, pos); + } + else if (current == '\''){ + result = cpp_read_character_litteral(data, size, pos); + } + else{ + result = cpp_read_operator(data, size, pos); + } + + return result; +} + +FCPP_LINK Cpp_Token_Merge +cpp_attempt_token_merge(Cpp_Token prev_token, Cpp_Token next_token){ + Cpp_Token_Merge result = {}; + if (next_token.type == CPP_TOKEN_COMMENT && prev_token.type == CPP_TOKEN_COMMENT && + next_token.flags == prev_token.flags && next_token.state_flags == prev_token.state_flags){ + result.did_merge = 1; + prev_token.size = next_token.start + next_token.size - prev_token.start; + result.new_token = prev_token; + } + else if (next_token.type == CPP_TOKEN_JUNK && prev_token.type == CPP_TOKEN_JUNK && + next_token.flags == prev_token.flags && next_token.state_flags == prev_token.state_flags){ + result.did_merge = 1; + prev_token.size = next_token.start + next_token.size - prev_token.start; + result.new_token = prev_token; + } + return result; +} + +FCPP_LINK bool +cpp_push_token_no_merge(Cpp_Token_Stack *token_stack, Cpp_Token token){ + if (token_stack->count >= token_stack->max_count){ + return 0; + } + + token_stack->tokens[token_stack->count++] = token; + return 1; +} + +FCPP_LINK bool +cpp_push_token_nonalloc(Cpp_Token_Stack *token_stack, Cpp_Token token){ + Cpp_Token_Merge merge = {}; + + if (token_stack->count > 0){ + Cpp_Token prev_token = token_stack->tokens[token_stack->count - 1]; + merge = cpp_attempt_token_merge(prev_token, token); + if (merge.did_merge){ + token_stack->tokens[token_stack->count - 1] = merge.new_token; + } + } + + if (!merge.did_merge){ + if (token_stack->count >= token_stack->max_count){ + return 0; + } + + token_stack->tokens[token_stack->count++] = token; + } + + return 1; +} + +FCPP_LINK Cpp_Read_Result +cpp_lex_step(char *data, int size, Cpp_Lex_Data *lex_data){ + Cpp_Lex_Data lex = *lex_data; + Cpp_Read_Result result = {}; + bool has_result = 1; + + fcpp_u16 state_flags = cpp_token_set_pp_state(0, lex.pp_state); + + char current = data[lex.pos]; + if (char_is_whitespace(current)){ + result = cpp_read_whitespace(data, size, lex.pos); + lex.pos = result.pos; + if (result.newline && lex.pp_state != CPP_LEX_PP_DEFAULT){ + lex.pp_state = CPP_LEX_PP_DEFAULT; + } + has_result = 0; + } + + else{ + if (lex.pp_state == CPP_LEX_PP_DEFAULT){ + // TODO(allen): Not first hard of the line? Then it's junk. + if (current == '#'){ + result = cpp_read_preprocessor(data, size, lex.pos); + lex.pos = result.pos; + switch (result.token.type){ + case CPP_PP_INCLUDE: + case CPP_PP_IMPORT: + case CPP_PP_USING: + lex.pp_state = CPP_LEX_PP_INCLUDE; + break; + case CPP_PP_DEFINE: + lex.pp_state = CPP_LEX_PP_MACRO_IDENTIFIER; + break; + case CPP_PP_UNDEF: + case CPP_PP_IFDEF: + case CPP_PP_IFNDEF: + lex.pp_state = CPP_LEX_PP_IDENTIFIER; + break; + case CPP_PP_IF: + case CPP_PP_ELIF: + lex.pp_state = CPP_LEX_PP_BODY_IF; + break; + case CPP_PP_PRAGMA: + lex.pp_state = CPP_LEX_PP_BODY; + break; + case CPP_PP_LINE: + lex.pp_state = CPP_LEX_PP_NUMBER; + break; + case CPP_PP_ERROR: + lex.pp_state = CPP_LEX_PP_ERROR; + break; + + case CPP_PP_UNKNOWN: + case CPP_PP_ELSE: + case CPP_PP_ENDIF: + lex.pp_state = CPP_LEX_PP_JUNK; + break; + } + } + else{ + result = cpp_read_pp_default_mode(data, size, lex.pos); + lex.pos = result.pos; + } + } + + else{ + if (current == '\\'){ + fcpp_i32 seek = lex.pos; + ++seek; + while (seek < size && data[seek] == '\r'){ + ++seek; + } + if ((seek < size && data[seek] == '\n') || seek >= size){ + lex.pos = seek + 1; + has_result = 0; + } + else{ + lex.pp_state = CPP_LEX_PP_JUNK; + result.token.type = CPP_TOKEN_JUNK; + result.token.start = lex.pos; + result.token.size = 1; + result.token.flags |= CPP_TFLAG_PP_BODY; + lex.pos = seek; + } + } + + else{ + switch (lex.pp_state){ + case CPP_LEX_PP_IDENTIFIER: + if (!char_is_alpha_numeric(current)){ + has_result = 0; + lex.pp_state = CPP_LEX_PP_JUNK; + } + else{ + result = cpp_read_alpha_numeric(data, size, lex.pos); + result.token.flags |= CPP_TFLAG_PP_BODY; + lex.pos = result.pos; + lex.pp_state = CPP_LEX_PP_JUNK; + } + break; + + case CPP_LEX_PP_MACRO_IDENTIFIER: + if (!char_is_alpha_numeric(current)){ + has_result = 0; + lex.pp_state = CPP_LEX_PP_JUNK; + } + else{ + result = cpp_read_alpha_numeric(data, size, lex.pos); + result.token.flags |= CPP_TFLAG_PP_BODY; + lex.pos = result.pos; + lex.pp_state = CPP_LEX_PP_BODY; + } + break; + + case CPP_LEX_PP_INCLUDE: + if (current != '"' && current != '<'){ + has_result = 0; + lex.pp_state = CPP_LEX_PP_JUNK; + } + else{ + result = cpp_read_pp_include_file(data, size, lex.pos); + lex.pos = result.pos; + lex.pp_state = CPP_LEX_PP_JUNK; + } + break; + + case CPP_LEX_PP_BODY: + if (current == '#'){ + result = cpp_read_pp_operator(data, size, lex.pos); + } + else{ + result = cpp_read_pp_default_mode(data, size, lex.pos); + } + lex.pos = result.pos; + result.token.flags |= CPP_TFLAG_PP_BODY; + break; + + case CPP_LEX_PP_BODY_IF: + if (current == '#'){ + result = cpp_read_pp_operator(data, size, lex.pos); + } + else{ + result = cpp_read_pp_default_mode(data, size, lex.pos, 1); + } + lex.pos = result.pos; + result.token.flags |= CPP_TFLAG_PP_BODY; + break; + + case CPP_LEX_PP_NUMBER: + if (!char_is_numeric(current)){ + has_result = 0; + lex.pp_state = CPP_LEX_PP_JUNK; + } + else{ + result = cpp_read_number(data, size, lex.pos); + lex.pos = result.pos; + result.token.flags |= CPP_TFLAG_PP_BODY; + lex.pp_state = CPP_LEX_PP_INCLUDE; + } + break; + + case CPP_LEX_PP_ERROR: + result = cpp_read_junk_line(data, size, lex.pos); + lex.pos = result.pos; + result.token.type = CPP_TOKEN_ERROR_MESSAGE; + result.token.flags |= CPP_TFLAG_PP_BODY; + break; + + default: + { + bool took_comment = 0; + if (current == '/' && lex.pos + 1 < size){ + if (data[lex.pos + 1] == '/'){ + result = cpp_read_line_comment(data, size, lex.pos); + lex.pp_state = CPP_LEX_PP_DEFAULT; + lex.pos = result.pos; + took_comment = 1; + }else if (data[lex.pos + 1] == '*'){ + result = cpp_read_block_comment(data, size, lex.pos); + lex.pos = result.pos; + took_comment = 1; + } + } + + if (!took_comment){ + result = cpp_read_junk_line(data, size, lex.pos); + lex.pos = result.pos; + result.token.flags |= CPP_TFLAG_PP_BODY; + } + }break; + + } + } + } + } + + result.token.state_flags = state_flags; + result.has_result = has_result; + + *lex_data = lex; + return result; +} + +FCPP_LINK int +cpp_lex_file_token_count(char *data, int size){ + int count = 0; + Cpp_Lex_Data lex = {}; + Cpp_Token token = {}; + while (lex.pos < size){ + Cpp_Read_Result step_result = cpp_lex_step(data, size, &lex); + + if (step_result.has_result){ + if (count > 0){ + Cpp_Token_Merge merge = cpp_attempt_token_merge(token, step_result.token); + if (merge.did_merge){ + token = merge.new_token; + } + else{ + token = step_result.token; + ++count; + } + } + else{ + token = step_result.token; + ++count; + } + } + } + return count; +} + +FCPP_LINK Cpp_Lex_Data +cpp_lex_file_nonalloc(char *data, int size, Cpp_Token_Stack *token_stack_out, Cpp_Lex_Data lex_data){ + while (lex_data.pos < size){ + Cpp_Lex_Data prev_lex = lex_data; + Cpp_Read_Result step_result = cpp_lex_step(data, size, &lex_data); + + if (step_result.has_result){ + if (!cpp_push_token_nonalloc(token_stack_out, step_result.token)){ + lex_data = prev_lex; + return lex_data; + } + } + } + + lex_data.complete = 1; + return lex_data; +} + +FCPP_LINK Cpp_Get_Token_Result +cpp_get_token(Cpp_Token_Stack *token_stack, int pos){ + int first, last; + first = 0; + last = token_stack->count; + + Cpp_Get_Token_Result result = {}; + if (token_stack->count > 0){ + for (;;){ + result.token_index = (first + last)/2; + + int this_start = token_stack->tokens[result.token_index].start; + int next_start; + if (result.token_index + 1 < token_stack->count){ + next_start = token_stack->tokens[result.token_index+1].start; + } + else{ + next_start = this_start + token_stack->tokens[result.token_index].size; + } + if (this_start <= pos && pos < next_start){ + break; + } + else if (pos < this_start){ + last = result.token_index; + } + else{ + first = result.token_index + 1; + } + if (first == last){ + result.token_index = first; + break; + } + } + + if (result.token_index == token_stack->count){ + --result.token_index; + result.in_whitespace = 1; + } + else{ + Cpp_Token *token = token_stack->tokens + result.token_index; + if (token->start + token->size <= pos){ + result.in_whitespace = 1; + } + } + } + else{ + result.token_index = -1; + result.in_whitespace = 1; + } + + return result; +} + +FCPP_LINK void +cpp_shift_token_starts(Cpp_Token_Stack *stack, int from_token_i, int amount){ + int count = stack->count; + Cpp_Token *token = stack->tokens + from_token_i; + for (int i = from_token_i; i < count; ++i, ++token){ + token->start += amount; + } +} + +FCPP_LINK Cpp_Relex_State +cpp_relex_nonalloc_start(char *data, int size, Cpp_Token_Stack *stack, + int start, int end, int amount, int tolerance){ + Cpp_Relex_State state; + state.data = data; + state.size = size; + state.stack = stack; + state.start = start; + state.end = end; + state.amount = amount; + state.tolerance = tolerance; + + Cpp_Get_Token_Result result = cpp_get_token(stack, start); + if (result.token_index <= 0){ + state.start_token_i = 0; + } + else{ + state.start_token_i = result.token_index-1; + } + + result = cpp_get_token(stack, end); + if (result.token_index < 0){ + result.token_index = 0; + } + else if (end > stack->tokens[result.token_index].start){ + ++result.token_index; + } + state.end_token_i = result.token_index; + + state.relex_start = stack->tokens[state.start_token_i].start; + if (start < state.relex_start){ + state.relex_start = start; + } + + state.space_request = state.end_token_i - state.start_token_i + tolerance + 1; + + return(state); +} + +inline Cpp_Token +cpp__get_token(Cpp_Token_Stack *stack, Cpp_Token *tokens, int size, int index){ + Cpp_Token result; + if (index < stack->count){ + result = tokens[index]; + } + else{ + result.start = size; + result.size = 0; + result.type = CPP_TOKEN_EOF; + result.flags = 0; + result.state_flags = 0; + } + return result; +} + +FCPP_LINK bool +cpp_relex_nonalloc_main(Cpp_Relex_State *state, Cpp_Token_Stack *relex_stack, int *relex_end){ + Cpp_Token_Stack *stack = state->stack; + Cpp_Token *tokens = stack->tokens; + + cpp_shift_token_starts(stack, state->end_token_i, state->amount); + + Cpp_Lex_Data lex = {}; + lex.pp_state = cpp_token_get_pp_state(tokens[state->start_token_i].state_flags); + lex.pos = state->relex_start; + + int relex_end_i = state->end_token_i; + Cpp_Token match_token = cpp__get_token(stack, tokens, state->size, relex_end_i); + Cpp_Token end_token = match_token; + bool went_too_far = 0; + + for (;;){ + Cpp_Read_Result read = cpp_lex_step(state->data, state->size, &lex); + if (read.has_result){ + if (read.token.start == end_token.start && + read.token.size == end_token.size && + read.token.flags == end_token.flags && + read.token.state_flags == end_token.state_flags){ + break; + } + cpp_push_token_nonalloc(relex_stack, read.token); + + while (lex.pos > end_token.start && relex_end_i < stack->count){ + ++relex_end_i; + end_token = cpp__get_token(stack, tokens, state->size, relex_end_i); + } + if (relex_stack->count == relex_stack->max_count){ + went_too_far = 1; + break; + } + } + if (lex.pos >= state->size) break; + } + + if (!went_too_far){ + if (relex_stack->count > 0){ + if (state->start_token_i > 0){ + Cpp_Token_Merge merge = + cpp_attempt_token_merge(tokens[state->start_token_i - 1], + relex_stack->tokens[0]); + if (merge.did_merge){ + --state->start_token_i; + relex_stack->tokens[0] = merge.new_token; + } + } + + if (relex_end_i < state->stack->count){ + Cpp_Token_Merge merge = + cpp_attempt_token_merge(relex_stack->tokens[relex_stack->count-1], + tokens[relex_end_i]); + if (merge.did_merge){ + ++relex_end_i; + relex_stack->tokens[relex_stack->count-1] = merge.new_token; + } + } + } + + *relex_end = relex_end_i; + } + else{ + cpp_shift_token_starts(stack, state->end_token_i, -state->amount); + } + + return(went_too_far); +} + +#ifndef FCPP_FORBID_MALLOC +FCPP_LINK Cpp_Token_Stack +cpp_make_token_stack(int starting_max){ + Cpp_Token_Stack token_stack; + token_stack.count = 0; + token_stack.max_count = starting_max; + token_stack.tokens = (Cpp_Token*)FCPP_GET_MEMORY(sizeof(Cpp_Token)*starting_max); + return token_stack; +} + +FCPP_LINK void +cpp_free_token_stack(Cpp_Token_Stack token_stack){ + FCPP_FREE_MEMORY(token_stack.tokens); +} + +FCPP_LINK void +cpp_resize_token_stack(Cpp_Token_Stack *token_stack, int new_max){ + Cpp_Token *new_tokens = (Cpp_Token*)FCPP_GET_MEMORY(sizeof(Cpp_Token)*new_max); + + if (new_tokens){ + FCPP_MEM_COPY(new_tokens, token_stack->tokens, sizeof(Cpp_Token)*token_stack->count); + FCPP_FREE_MEMORY(token_stack->tokens); + token_stack->tokens = new_tokens; + token_stack->max_count = new_max; + } +} + +FCPP_LINK void +cpp_push_token(Cpp_Token_Stack *token_stack, Cpp_Token token){ + if (!cpp_push_token_nonalloc(token_stack, token)){ + int new_max = 2*token_stack->max_count + 1; + cpp_resize_token_stack(token_stack, new_max); + bool result = cpp_push_token_nonalloc(token_stack, token); + _Assert(result); + } +} + +FCPP_LINK void +cpp_lex_file(char *data, int size, Cpp_Token_Stack *token_stack_out){ + Cpp_Lex_Data lex = {}; + while (lex.pos < size){ + Cpp_Read_Result step_result = cpp_lex_step(data, size, &lex); + if (step_result.has_result){ + cpp_push_token(token_stack_out, step_result.token); + } + } +} + +FCPP_LINK bool +cpp_relex_file_limited(char *data, int size, Cpp_Token_Stack *stack, + int start, int end, int amount, int tolerance){ + int relex_start_i; + int end_token_i, end_guess_i; + { + Cpp_Get_Token_Result result = cpp_get_token(stack, start); + if (result.token_index <= 0){ + relex_start_i = 0; + } + else{ + relex_start_i = result.token_index-1; + } + + result = cpp_get_token(stack, end); + if (result.token_index < 0) result.token_index = 0; + else if (end > stack->tokens[result.token_index].start) ++result.token_index; + end_token_i = result.token_index; + end_guess_i = result.token_index+1; + } + + int relex_start = stack->tokens[relex_start_i].start; + if (start < relex_start) relex_start = start; + + cpp_shift_token_starts(stack, end_token_i, amount); + Cpp_Token_Stack relex_stack = cpp_make_token_stack((end_guess_i - relex_start_i + 1) * 3 / 2); + Cpp_Lex_Data lex = {}; + lex.pp_state = cpp_token_get_pp_state(stack->tokens[relex_start_i].state_flags); + lex.pos = relex_start; + bool went_too_far = 0; + + while (1){ + Cpp_Read_Result result = cpp_lex_step(data, size, &lex); + if (result.has_result){ + if (end_guess_i < stack->count && + result.token.start == stack->tokens[end_guess_i].start && + result.token.size == stack->tokens[end_guess_i].size && + result.token.flags == stack->tokens[end_guess_i].flags && + result.token.state_flags == stack->tokens[end_guess_i].state_flags){ + break; + } + else{ + cpp_push_token(&relex_stack, result.token); + while (lex.pos > stack->tokens[end_guess_i].start && + end_guess_i < stack->count){ + ++end_guess_i; + } + } + } + + if (lex.pos >= size){ + break; + } + + if (tolerance >= 0 && relex_stack.count + relex_start_i >= end_guess_i + tolerance){ + went_too_far = 1; + break; + } + } + + if (!went_too_far){ + int relex_end_i = end_guess_i; + + if (relex_stack.count > 0){ + if (relex_start_i > 0){ + Cpp_Token_Merge merge = cpp_attempt_token_merge(stack->tokens[relex_start_i - 1], + relex_stack.tokens[0]); + if (merge.did_merge){ + --relex_start_i; + relex_stack.tokens[0] = merge.new_token; + } + } + + if (relex_end_i < stack->count){ + Cpp_Token_Merge merge = cpp_attempt_token_merge(relex_stack.tokens[relex_stack.count - 1], + stack->tokens[relex_end_i]); + if (merge.did_merge){ + ++relex_end_i; + relex_stack.tokens[relex_stack.count - 1] = merge.new_token; + } + } + } + + int token_delete_amount = relex_end_i - relex_start_i; + int token_shift_amount = relex_stack.count - token_delete_amount; + + if (token_shift_amount != 0){ + int new_token_count = stack->count + token_shift_amount; + if (new_token_count > stack->max_count){ + int new_max = 2*stack->max_count + 1; + while (new_token_count > new_max){ + new_max = 2*new_max + 1; + } + cpp_resize_token_stack(stack, new_max); + } + + if (relex_end_i < stack->count){ + FCPP_MEM_MOVE(stack->tokens + relex_end_i + token_shift_amount, + stack->tokens + relex_end_i, sizeof(Cpp_Token)*(stack->count - relex_end_i)); + } + + stack->count += token_shift_amount; + } + + FCPP_MEM_COPY(stack->tokens + relex_start_i, relex_stack.tokens, sizeof(Cpp_Token)*relex_stack.count); + cpp_free_token_stack(relex_stack); + } + + else{ + cpp_shift_token_starts(stack, end_token_i, -amount); + cpp_free_token_stack(relex_stack); + } + + return went_too_far; +} +#endif + +#undef _Assert +#undef _TentativeAssert + +#undef FCPP_LEXER_IMPLEMENTATION +#endif // #ifdef FCPP_LEXER_IMPLEMENTATION + +// BOTTOM diff --git a/test/4cpp_lexer_tables.c b/4cpp_lexer_tables.c similarity index 100% rename from test/4cpp_lexer_tables.c rename to 4cpp_lexer_tables.c diff --git a/4ed_app_target.cpp b/4ed_app_target.cpp index 844fc632..1161507a 100644 --- a/4ed_app_target.cpp +++ b/4ed_app_target.cpp @@ -27,14 +27,7 @@ #include "4coder_table.cpp" -#define USE_NEW_LEXER 1 - -#if USE_NEW_LEXER -#include "test/4cpp_new_lexer.h" -#else -#define FCPP_LEXER_IMPLEMENTATION #include "4cpp_lexer.h" -#endif #include "4ed_template.cpp" diff --git a/4ed_metagen.cpp b/4ed_metagen.cpp index 2496e315..9b6d6816 100644 --- a/4ed_metagen.cpp +++ b/4ed_metagen.cpp @@ -22,13 +22,6 @@ #include "4coder_mem.h" -struct Global_Settings{ - int generate_docs; - int generate_string; -}; - -static Global_Settings global_settings; - struct Struct_Field{ char *type; char *name; @@ -2038,7 +2031,7 @@ generate_custom_headers(){ fclose(file); // NOTE(allen): Documentation - if (global_settings.generate_docs){ + { Typedef_Set typedef_set = {0}; Struct_Set struct_set = {0}; Enum_Set flag_set = {0}; @@ -2396,10 +2389,10 @@ generate_custom_headers(){ append_ss(&line, cpp_name); append_ss(&line, args); if (match_ss(ret, make_lit_string("void"))){ - append_ss(&line, make_lit_string("{ (")); + append_ss(&line, make_lit_string("{(")); } else{ - append_ss(&line, make_lit_string("{ return(")); + append_ss(&line, make_lit_string("{return(")); } append_ss(&line, name); append_s_char(&line, '('); @@ -3084,11 +3077,6 @@ generate_custom_headers(){ int main(int argc, char **argv){ char *filename = 0; - memset(&global_settings, 0, sizeof(global_settings)); - - global_settings.generate_docs = true; - global_settings.generate_string = true; - filename = generate_keycode_enum(); filename = generate_style(); filename = generate_custom_headers(); diff --git a/build.c b/build.c index 628bdc51..0ba846e9 100644 --- a/build.c +++ b/build.c @@ -44,13 +44,47 @@ static int error_state = 0; #if defined(IS_WINDOWS) -#define DWORD uint32_t -#define LPTSTR char* +typedef uint32_t DWORD; +typedef int32_t LONG; +typedef int64_t LONGLONG; +typedef char* LPTSTR; +typedef int32_t BOOL; +typedef union _LARGE_INTEGER { + struct { + DWORD LowPart; + LONG HighPart; + }; + struct { + DWORD LowPart; + LONG HighPart; + } u; + LONGLONG QuadPart; +} LARGE_INTEGER, *PLARGE_INTEGER; -DWORD GetCurrentDirectoryA( -_In_ DWORD nBufferLength, -_Out_ LPTSTR lpBuffer -); +DWORD GetCurrentDirectoryA(_In_ DWORD nBufferLength, _Out_ LPTSTR lpBuffer); +BOOL QueryPerformanceCounter(_Out_ LARGE_INTEGER *lpPerformanceCount); +BOOL QueryPerformanceFrequency(_Out_ LARGE_INTEGER *lpFrequency); + +static uint64_t perf_frequency; + +static void +init_time_system(){ + LARGE_INTEGER lint; + if (QueryPerformanceFrequency(&lint)){ + perf_frequency = lint.QuadPart; + } +} + +static uint64_t +get_time(){ + uint64_t time = 0; + LARGE_INTEGER lint; + if (QueryPerformanceCounter(&lint)){ + time = lint.QuadPart; + time = (time * 1000000) / perf_frequency; + } + return(time); +} static int32_t get_current_directory(char *buffer, int32_t max){ @@ -72,6 +106,9 @@ execute(char *dir, char *str){ #error This OS is not supported yet #endif +#define BEGIN_TIME_SECTION() do{ uint64_t start = get_time() +#define END_TIME_SECTION(n) uint64_t total = get_time() - start; printf("%-20s: %.2lu.%.6lu\n", (n), total/1000000, total%1000000); }while(0) + // // 4coder specific // @@ -116,6 +153,7 @@ build_cl(uint32_t flags, char *out_path, char *out_file, char *exports){ win32_slash_fix(out_path); + win32_slash_fix(code_path); char link_options[1024]; if (flags & SHARED_CODE){ @@ -184,13 +222,14 @@ build(uint32_t flags, } static void -buildsuper(char *code_path , char *filename){ +buildsuper(char *code_path, char *out_path, char *filename){ #if defined(IS_CL) - win32_slash_fix(filename); + win32_slash_fix(out_path); + win32_slash_fix(code_path); - systemf("call \"%s\\buildsuper.bat\" %s", - code_path, filename); + systemf("pushd %s & call \"%s\\buildsuper.bat\" %s", + out_path, code_path, filename); #else #error The build rule for this compiler is not ready @@ -200,27 +239,43 @@ buildsuper(char *code_path , char *filename){ #if defined(DEV_BUILD) int main(int argc, char **argv){ + init_time_system(); + char cdir[256]; - { - int32_t n = get_current_directory(cdir, sizeof(cdir)); - assert(n < sizeof(cdir)); - } + BEGIN_TIME_SECTION(); + int32_t n = get_current_directory(cdir, sizeof(cdir)); + assert(n < sizeof(cdir)); + END_TIME_SECTION("current directory"); + +#define META_DIR "../meta" +#define BUILD_DIR "../build" + + BEGIN_TIME_SECTION(); build(OPTS | DEBUG_INFO, cdir, "4ed_metagen.cpp", - "../meta", "metagen", 0); + META_DIR, "metagen", 0); + END_TIME_SECTION("build metagen"); - execute(cdir, "../meta/metagen"); + BEGIN_TIME_SECTION(); + execute(cdir, META_DIR"/metagen"); + END_TIME_SECTION("run metagen"); - //buildsuper(cdir, "../code/4coder_default_bindings.cpp"); - buildsuper(cdir, "../code/internal_4coder_tests.cpp"); - //buildsuper(cdir, "../code/power/4coder_casey.cpp"); - //buildsuper(cdir, "../4vim/4coder_chronal.cpp"); + BEGIN_TIME_SECTION(); + //buildsuper(cdir, BUILD_DIR, "../code/4coder_default_bindings.cpp"); + buildsuper(cdir, BUILD_DIR, "../code/internal_4coder_tests.cpp"); + //buildsuper(cdir, BUILD_DIR, "../code/power/4coder_casey.cpp"); + //buildsuper(cdir, BUILD_DIR, "../4vim/4coder_chronal.cpp"); + END_TIME_SECTION("build custom"); + BEGIN_TIME_SECTION(); build(OPTS | INCLUDES | SHARED_CODE | DEBUG_INFO, cdir, "4ed_app_target.cpp", - "../build", "4ed_app", "/EXPORT:app_get_functions"); + BUILD_DIR, "4ed_app", "/EXPORT:app_get_functions"); + END_TIME_SECTION("build 4ed_app"); + BEGIN_TIME_SECTION(); build(OPTS | INCLUDES | LIBS | ICON | DEBUG_INFO, cdir, "win32_4ed.cpp", - "../build", "4ed", 0); + BUILD_DIR, "4ed", 0); + END_TIME_SECTION("build 4ed"); return(error_state); } @@ -231,4 +286,5 @@ int main(int argc, char **argv){ #endif -// BOTTOM \ No newline at end of file +// BOTTOM + diff --git a/buildsuper.bat b/buildsuper.bat index 35e3daf7..e79d29f3 100644 --- a/buildsuper.bat +++ b/buildsuper.bat @@ -1,8 +1,8 @@ @echo off -REM this is here to prevent the spammation of PATH -IF NOT DEFINED clset (call "C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\vcvarsall.bat" x64) -SET clset=64 +REM this is not really working... it's slowing down the build time +REM IF NOT DEFINED clset (call "C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\vcvarsall.bat" x64) +REM SET clset=64 SET SRC=%1 if "%SRC%" == "" SET SRC=4coder_default_bindings.cpp diff --git a/internal_4coder_tests.cpp b/internal_4coder_tests.cpp index 5fa02293..4513b239 100644 --- a/internal_4coder_tests.cpp +++ b/internal_4coder_tests.cpp @@ -17,7 +17,10 @@ Allen Webster #include "4coder_default_include.cpp" #include "4coder_default_building.cpp" -#include +#include +#pragma intrinsic(__rdtsc) + +typedef uint64_t DWORD64; #define TEST_TIME_B(m) DWORD64 time_start = __rdtsc(), time_max = m; (void)(time_start), (void)(time_max) #define TEST_TIME_E() DWORD64 time_total = __rdtsc() - time_start; if (time_total > time_max) {assert(!"failed timing");} diff --git a/lexergen/4cpp_lexer_fsms.h b/lexergen/4cpp_lexer_fsms.h new file mode 100644 index 00000000..698b1f5d --- /dev/null +++ b/lexergen/4cpp_lexer_fsms.h @@ -0,0 +1,138 @@ +/* + * FSMs for 4c++ lexer + * + * 23.03.2016 (dd.mm.yyyy) + + + + + + + + + + + + + + + + + + + + + + + + */ + +// TOP + +struct String_And_Flag{ + char *str; + unsigned int flags; +}; + +enum Lex_State{ + LS_default, + LS_identifier, + LS_pound, + LS_pp, + LS_char, + LS_char_multiline, + LS_char_slashed, + LS_string, + LS_string_multiline, + LS_string_slashed, + LS_number, + LS_number0, + LS_float, + LS_crazy_float0, + LS_crazy_float1, + LS_hex, + LS_comment_pre, + LS_comment, + LS_comment_slashed, + LS_comment_block, + LS_comment_block_ending, + LS_dot, + LS_ellipsis, + LS_less, + LS_less_less, + LS_more, + LS_more_more, + LS_minus, + LS_arrow, + LS_and, + LS_or, + LS_plus, + LS_colon, + LS_star, + LS_modulo, + LS_caret, + LS_eq, + LS_bang, + LS_error_message, + // + LS_count +}; + +enum Lex_Int_State{ + LSINT_default, + LSINT_u, + LSINT_l, + LSINT_L, + LSINT_ul, + LSINT_uL, + LSINT_ll, + LSINT_extra, + // + LSINT_count +}; + +enum Lex_INC_State{ + LSINC_default, + LSINC_quotes, + LSINC_pointy, + LSINC_junk, +}; + +enum Lex_PP_State{ + LSPP_default, + LSPP_include, + LSPP_macro_identifier, + LSPP_identifier, + LSPP_body_if, + LSPP_body, + LSPP_number, + LSPP_error, + LSPP_junk, + // + LSPP_count +}; + +struct Whitespace_FSM{ + unsigned char pp_state; + unsigned char white_done; +}; + +struct Lex_FSM{ + unsigned char state; + union{ + unsigned char int_state; + unsigned char directive_state; + unsigned char sub_machine; + }; + unsigned char emit_token; + unsigned char multi_line; +}; +inline Lex_FSM +zero_lex_fsm(){ + Lex_FSM fsm = {0}; + return(fsm); +} + +// BOTTOM + + diff --git a/lexergen/4cpp_lexer_tables.c b/lexergen/4cpp_lexer_tables.c new file mode 100644 index 00000000..24f15227 --- /dev/null +++ b/lexergen/4cpp_lexer_tables.c @@ -0,0 +1,395 @@ +unsigned short whitespace_fsm_eq_classes[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 9,18, 9, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +const int num_whitespace_fsm_eq_classes = 3; + +unsigned char whitespace_fsm_table[] = { + 9,10,11,12,13,14,15,16,17, + 0, 1, 2, 3, 4, 5, 6, 7, 8, + 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +unsigned short int_fsm_eq_classes[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0,16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,24, 0, 0, 0, 0, 0, 0, 0, 0,16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +const int num_int_fsm_eq_classes = 4; + +unsigned char int_fsm_table[] = { + 8, 9,10,11,12,13,14,15, + 3, 5,10, 6,12, 7,14,15, + 1, 9, 7, 7,12,13, 7,15, + 2, 4, 6,11, 7,13,14,15, +}; + +unsigned char multiline_state_table[] = { + 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +unsigned short main_fsm_eq_classes[] = { + 0,39,39,39,39,39,39,39,39,39,78,117,117,117,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,156,195,234,273,312,351,390,273,273,429,468,273,507,546,585,624,663,663,663,663,663,663,663,663,663,702,273,741,780,819,273,273,858,858,858,858,858,858,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,273,936,273,975,897,39,858,858,858,858,1014,858,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,1053,897,897,273,1092,273,273,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39, +}; + +const int num_main_fsm_eq_classes = 29; + +unsigned char main_fsm_table[] = { +39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77, + 0,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 0,40,41, 3,43,44, 5,46,47, 8,49,50,51,52,53,54,55,56,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 0,40,41, 3, 4, 5, 6, 7, 8, 9,49,50,51,52,53,54,55,17,18,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +37,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 7,40,41, 3, 4, 5, 4,46,47, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +42,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +39,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +34,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +29,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 4,40,41, 3,43,44, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +33,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,19,17,17,20,20,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +31,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +27,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,14,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +21,40,41, 3, 4, 5, 4, 7, 8, 7,12,12,51,52,53,54,55,17,17,19,19,22,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +16,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,17,17,17,19,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +11, 1,41, 3, 4, 5, 4, 7, 8, 7,10,10,12,14,14,15,55,17,17,19,19,12,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +10, 1,41, 3, 4, 5, 4, 7, 8, 7,10,10,12,14,14,15,55,17,17,19,19,12,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +32,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +23,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,24,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +36,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +25,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,26,65,28,67,68,69,70,71,72,73,74,75,76,38, + 1, 1,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,15,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 1, 1,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +39,40,41, 3, 6, 6, 4, 9, 9, 7,49,50,51,52,53,54,55,18,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +35,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 1, 1,41, 3, 4, 5, 4, 7, 8, 7,49,50,13,52,53,15,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 1, 1,41, 3, 4, 5, 4, 7, 8, 7,49,15,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +30,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +}; + +unsigned short pp_include_fsm_eq_classes[] = { + 0,39,39,39,39,39,39,39,39,39,78,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,117,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,156,39,195,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39, +}; + +const int num_pp_include_fsm_eq_classes = 6; + +unsigned char pp_include_fsm_table[] = { +39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77, + 3, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38, + 3,42,42,42, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38, + 1,40, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38, + 2, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38, + 3, 1,41, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38, +}; + +unsigned short pp_macro_fsm_eq_classes[] = { + 0,39,39,39,39,39,39,39,39,39,78,117,117,117,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,156,195,234,273,312,351,390,273,273,429,468,273,507,546,585,624,663,663,663,663,663,663,663,663,663,702,273,741,780,819,273,273,858,858,858,858,858,858,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,273,936,273,975,897,39,858,858,858,858,1014,858,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,1053,897,897,273,1092,273,273,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39, +}; + +const int num_pp_macro_fsm_eq_classes = 29; + +unsigned char pp_macro_fsm_table[] = { +39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77, + 0,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 0,40,41, 3,43,44, 5,46,47, 8,49,50,51,52,53,54,55,56,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 0,40,41, 3, 4, 5, 6, 7, 8, 9,49,50,51,52,53,54,55,17,18,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +37,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 7,40,41, 3, 4, 5, 4,46,47, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 2,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +39,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +34,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +29,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 4,40,41, 3,43,44, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +33,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,19,17,17,20,20,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +31,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +27,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,14,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +21,40,41, 3, 4, 5, 4, 7, 8, 7,12,12,51,52,53,54,55,17,17,19,19,22,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +16,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,17,17,17,19,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +11, 1,41, 3, 4, 5, 4, 7, 8, 7,10,10,12,14,14,15,55,17,17,19,19,12,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +10, 1,41, 3, 4, 5, 4, 7, 8, 7,10,10,12,14,14,15,55,17,17,19,19,12,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +32,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +23,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,24,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +36,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +25,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,26,65,28,67,68,69,70,71,72,73,74,75,76,38, + 1, 1,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,15,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 1, 1,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +39,40,41, 3, 6, 6, 4, 9, 9, 7,49,50,51,52,53,54,55,18,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +35,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 1, 1,41, 3, 4, 5, 4, 7, 8, 7,49,50,13,52,53,15,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 1, 1,41, 3, 4, 5, 4, 7, 8, 7,49,15,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +30,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +}; + +unsigned short pp_identifier_fsm_eq_classes[] = { + 0,39,39,39,39,39,39,39,39,39,78,117,117,117,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,156,195,234,273,312,351,390,273,273,429,468,273,507,546,585,624,663,663,663,663,663,663,663,663,663,702,273,741,780,819,273,273,858,858,858,858,858,858,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,273,936,273,975,897,39,858,858,858,858,1014,858,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,1053,897,897,273,1092,273,273,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39, +}; + +const int num_pp_identifier_fsm_eq_classes = 29; + +unsigned char pp_identifier_fsm_table[] = { +39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77, + 0,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 0,40,41, 3,43,44, 5,46,47, 8,49,50,51,52,53,54,55,56,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 0,40,41, 3, 4, 5, 6, 7, 8, 9,49,50,51,52,53,54,55,17,18,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +37,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 7,40,41, 3, 4, 5, 4,46,47, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 2,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +39,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +34,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +29,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 4,40,41, 3,43,44, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +33,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,19,17,17,20,20,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +31,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +27,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,14,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +21,40,41, 3, 4, 5, 4, 7, 8, 7,12,12,51,52,53,54,55,17,17,19,19,22,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +16,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,17,17,17,19,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +11, 1,41, 3, 4, 5, 4, 7, 8, 7,10,10,12,14,14,15,55,17,17,19,19,12,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +10, 1,41, 3, 4, 5, 4, 7, 8, 7,10,10,12,14,14,15,55,17,17,19,19,12,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +32,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +23,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,24,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +36,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +25,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,26,65,28,67,68,69,70,71,72,73,74,75,76,38, + 1, 1,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,15,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 1, 1,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +39,40,41, 3, 6, 6, 4, 9, 9, 7,49,50,51,52,53,54,55,18,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +35,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 1, 1,41, 3, 4, 5, 4, 7, 8, 7,49,50,13,52,53,15,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 1, 1,41, 3, 4, 5, 4, 7, 8, 7,49,15,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +30,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +}; + +unsigned short pp_body_if_fsm_eq_classes[] = { + 0,39,39,39,39,39,39,39,39,39,78,117,117,117,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,156,195,234,273,312,351,390,273,273,429,468,273,507,546,585,624,663,663,663,663,663,663,663,663,663,702,273,741,780,819,273,273,858,858,858,858,858,858,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,273,936,273,975,897,39,858,858,858,858,1014,858,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,1053,897,897,273,1092,273,273,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39, +}; + +const int num_pp_body_if_fsm_eq_classes = 29; + +unsigned char pp_body_if_fsm_table[] = { +39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77, + 0,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 0,40,41, 3,43,44, 5,46,47, 8,49,50,51,52,53,54,55,56,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 0,40,41, 3, 4, 5, 6, 7, 8, 9,49,50,51,52,53,54,55,17,18,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +37,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 7,40,41, 3, 4, 5, 4,46,47, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 2,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +39,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +34,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +29,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 4,40,41, 3,43,44, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +33,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,19,17,17,20,20,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +31,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +27,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,14,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +21,40,41, 3, 4, 5, 4, 7, 8, 7,12,12,51,52,53,54,55,17,17,19,19,22,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +16,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,17,17,17,19,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +11, 1,41, 3, 4, 5, 4, 7, 8, 7,10,10,12,14,14,15,55,17,17,19,19,12,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +10, 1,41, 3, 4, 5, 4, 7, 8, 7,10,10,12,14,14,15,55,17,17,19,19,12,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +32,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +23,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,24,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +36,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +25,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,26,65,28,67,68,69,70,71,72,73,74,75,76,38, + 1, 1,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,15,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 1, 1,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +39,40,41, 3, 6, 6, 4, 9, 9, 7,49,50,51,52,53,54,55,18,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +35,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 1, 1,41, 3, 4, 5, 4, 7, 8, 7,49,50,13,52,53,15,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 1, 1,41, 3, 4, 5, 4, 7, 8, 7,49,15,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +30,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +}; + +unsigned short pp_body_fsm_eq_classes[] = { + 0,39,39,39,39,39,39,39,39,39,78,117,117,117,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,156,195,234,273,312,351,390,273,273,429,468,273,507,546,585,624,663,663,663,663,663,663,663,663,663,702,273,741,780,819,273,273,858,858,858,858,858,858,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,273,936,273,975,897,39,858,858,858,858,1014,858,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,1053,897,897,273,1092,273,273,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39, +}; + +const int num_pp_body_fsm_eq_classes = 29; + +unsigned char pp_body_fsm_table[] = { +39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77, + 0,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 0,40,41, 3,43,44, 5,46,47, 8,49,50,51,52,53,54,55,56,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 0,40,41, 3, 4, 5, 6, 7, 8, 9,49,50,51,52,53,54,55,17,18,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +37,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 7,40,41, 3, 4, 5, 4,46,47, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 2,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +39,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +34,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +29,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 4,40,41, 3,43,44, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +33,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,19,17,17,20,20,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +31,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +27,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,14,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +21,40,41, 3, 4, 5, 4, 7, 8, 7,12,12,51,52,53,54,55,17,17,19,19,22,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +16,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,17,17,17,19,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +11, 1,41, 3, 4, 5, 4, 7, 8, 7,10,10,12,14,14,15,55,17,17,19,19,12,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +10, 1,41, 3, 4, 5, 4, 7, 8, 7,10,10,12,14,14,15,55,17,17,19,19,12,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +32,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +23,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,24,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +36,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +25,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,26,65,28,67,68,69,70,71,72,73,74,75,76,38, + 1, 1,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,15,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 1, 1,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +39,40,41, 3, 6, 6, 4, 9, 9, 7,49,50,51,52,53,54,55,18,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +35,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 1, 1,41, 3, 4, 5, 4, 7, 8, 7,49,50,13,52,53,15,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 1, 1,41, 3, 4, 5, 4, 7, 8, 7,49,15,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +30,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +}; + +unsigned short pp_number_fsm_eq_classes[] = { + 0,39,39,39,39,39,39,39,39,39,78,117,117,117,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,156,195,234,273,312,351,390,273,273,429,468,273,507,546,585,624,663,663,663,663,663,663,663,663,663,702,273,741,780,819,273,273,858,858,858,858,858,858,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,273,936,273,975,897,39,858,858,858,858,1014,858,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,1053,897,897,273,1092,273,273,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39, +}; + +const int num_pp_number_fsm_eq_classes = 29; + +unsigned char pp_number_fsm_table[] = { +39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77, + 0,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 0,40,41, 3,43,44, 5,46,47, 8,49,50,51,52,53,54,55,56,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 0,40,41, 3, 4, 5, 6, 7, 8, 9,49,50,51,52,53,54,55,17,18,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +37,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 7,40,41, 3, 4, 5, 4,46,47, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 2,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +39,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +34,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +29,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 4,40,41, 3,43,44, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +33,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,19,17,17,20,20,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +31,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +27,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,14,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +21,40,41, 3, 4, 5, 4, 7, 8, 7,12,12,51,52,53,54,55,17,17,19,19,22,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +16,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,17,17,17,19,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +11, 1,41, 3, 4, 5, 4, 7, 8, 7,10,10,12,14,14,15,55,17,17,19,19,12,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +10, 1,41, 3, 4, 5, 4, 7, 8, 7,10,10,12,14,14,15,55,17,17,19,19,12,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +32,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +23,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,24,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +36,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +25,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,26,65,28,67,68,69,70,71,72,73,74,75,76,38, + 1, 1,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,15,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 1, 1,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +39,40,41, 3, 6, 6, 4, 9, 9, 7,49,50,51,52,53,54,55,18,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +35,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 1, 1,41, 3, 4, 5, 4, 7, 8, 7,49,50,13,52,53,15,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 1, 1,41, 3, 4, 5, 4, 7, 8, 7,49,15,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +30,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +}; + +unsigned short pp_error_fsm_eq_classes[] = { + 0,39,39,39,39,39,39,39,39,39,78,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39, +}; + +const int num_pp_error_fsm_eq_classes = 3; + +unsigned char pp_error_fsm_table[] = { +39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77, +38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38, +77,77,77,77,77,77,77,77,77,77,77,77,77,77,77,77,77,77,77,77,77,77,77,77,77,77,77,77,77,77,77,77,77,77,77,77,77,77,77, +}; + +unsigned short pp_junk_fsm_eq_classes[] = { + 0,39,39,39,39,39,39,39,39,39,78,117,117,117,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,156,195,234,273,312,351,390,273,273,429,468,273,507,546,585,624,663,663,663,663,663,663,663,663,663,702,273,741,780,819,273,273,858,858,858,858,858,858,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,273,936,273,975,897,39,858,858,858,858,1014,858,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,897,1053,897,897,273,1092,273,273,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39, +}; + +const int num_pp_junk_fsm_eq_classes = 29; + +unsigned char pp_junk_fsm_table[] = { +39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77, + 0,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 0,40,41, 3,43,44, 5,46,47, 8,49,50,51,52,53,54,55,56,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 0,40,41, 3, 4, 5, 6, 7, 8, 9,49,50,51,52,53,54,55,17,18,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +37,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 7,40,41, 3, 4, 5, 4,46,47, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 2,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +39,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +34,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +29,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 4,40,41, 3,43,44, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +33,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,19,17,17,20,20,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +31,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +27,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,14,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +21,40,41, 3, 4, 5, 4, 7, 8, 7,12,12,51,52,53,54,55,17,17,19,19,22,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +16,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,17,17,17,19,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +11, 1,41, 3, 4, 5, 4, 7, 8, 7,10,10,12,14,14,15,55,17,17,19,19,12,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +10, 1,41, 3, 4, 5, 4, 7, 8, 7,10,10,12,14,14,15,55,17,17,19,19,12,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +32,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +23,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,24,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +36,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +25,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,26,65,28,67,68,69,70,71,72,73,74,75,76,38, + 1, 1,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,15,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 1, 1,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +39,40,41, 3, 6, 6, 4, 9, 9, 7,49,50,51,52,53,54,55,18,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +35,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 1, 1,41, 3, 4, 5, 4, 7, 8, 7,49,50,13,52,53,15,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, + 1, 1,41, 3, 4, 5, 4, 7, 8, 7,49,15,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +30,40,41, 3, 4, 5, 4, 7, 8, 7,49,50,51,52,53,54,55,17,17,19,19,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,38, +}; + +unsigned short * get_eq_classes[] = { +main_fsm_eq_classes, +pp_include_fsm_eq_classes, +pp_macro_fsm_eq_classes, +pp_identifier_fsm_eq_classes, +pp_body_if_fsm_eq_classes, +pp_body_fsm_eq_classes, +pp_number_fsm_eq_classes, +pp_error_fsm_eq_classes, +pp_junk_fsm_eq_classes, +}; + +unsigned char * get_table[] = { +main_fsm_table, +pp_include_fsm_table, +pp_macro_fsm_table, +pp_identifier_fsm_table, +pp_body_if_fsm_table, +pp_body_fsm_table, +pp_number_fsm_table, +pp_error_fsm_table, +pp_junk_fsm_table, +}; + +unsigned short pp_directive_eq_classes[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0,119, 0,119,119,119, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,119, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,238, 0,357,476,595,714,833, 0,952, 0, 0,1071,1190,1309,1428,1547, 0,1666,1785,1904,2023, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,2142, 0,2261,2380,2499,2618,2737, 0,2856, 0, 0,2975,3094,3213,3332,3451, 0,3570,3689,3808,3927, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +const int num_pp_directive_eq_classes = 34; + +unsigned char pp_directive_table[] = { +200,200,200,200,200,200,200,200,200,200,200,200,200,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,200,200,200,203,200,200,200,200,203,200,200,200,200,215,200,200,200,200,215,200,200,200,200,204,200,200,213,200,200,200,200,204,200,200,213,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,214,200,200,214, + 0,200,200,200,200,200,200,200,200,200,200,200,200,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,200,200,200,203,200,200,200,200,203,200,200,200,200,215,200,200,200,200,215,200,200,200,200,204,200,200,213,200,200,200,200,204,200,200,213,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,214,200,200,214, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,200,200,200,203,200,200,200,200,203,200,200,200,200,215,67,200,200,70,215,200,200,200,200,204,200,200,213,200,200,200,200,204,200,200,213,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,214,200,200,214, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,35,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,200,200,200,203,200,200,200,200,203,200,200,200,200,215,200,200,200,200,215,200,200,200,200,204,200,200,213,200,200,200,200,204,200,200,213,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,214,200,200,214, + 4,200,200,200,200,200,200,200,200,200,200,200,200,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,41,200,200,200,38,200,202,42,200,200,200,207,200,206,200,200,200,212,200,200,200,200,203,200,200,200,200,203,200,200,200,200,215,200,200,200,200,215,200,200,200,200,204,200,200,213,81,200,200,200,204,200,200,213,200,200,200,200,200,210,200,200,211,200,200,208,209,103,200,200,200,200,210,200,200,211,200,200,208,209,200,200,214,200,200,214, +10,200,200,200,56,200,200,200,200,200,200,200,200,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,205,200,200,200,200,39,202,200,45,43,200,207,200,206,200,200,200,212,200,200,200,200,203,200,200,200,60,203,200,200,200,200,215,200,200,200,200,215,200,200,200,200,204,200,200,213,200,200,82,200,204,200,200,213,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,200,200,200,210,200,200,211,111,200,208,209,200,200,214,200,118,214, +200,200,33,200,200,200,200,200,200,200,200,200,200,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,205,200,200,200,200,200,202,200,200,200,44,207,46,206,200,200,200,212,200,200,200,200,203,57,200,200,200,203,200,200,200,200,215,200,200,200,200,215,200,200,200,200,204,200,200,213,200,200,200,83,204,200,200,213,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,200,200,105,210,200,200,211,200,112,208,209,200,200,214,200,200,214, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,200,200,200,203,200,200,200,200,203,200,200,200,200,215,200,68,200,200,215,200,200,200,200,204,200,200,213,200,200,200,200,204,200,86,213,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,214,200,200,214, + 2,200,200,200,200,200,200,200,200,200,200,200,116,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,200,200,200,203,200,58,200,200,203,200,200,200,200,215,200,200,200,200,215,200,200,200,200,204,200,200,213,200,84,200,200,204,200,200,213,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,110,104,200,210,200,200,211,200,200,208,209,200,200,214,200,200,214, +12,200,200,200,200,200,200,200,200,200,102,200,200,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,205,200,36,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,200,200,200,203,200,200,200,200,203,200,200,200,200,215,200,200,200,200,215,200,200,200,200,204,200,200,213,200,200,200,200,204,200,200,213,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,214,200,200,214, +200,200,34,200,200,200,200,200,200,200,200,200,200,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,200,200,200,203,200,200,200,200,203,200,200,200,200,215,200,200,69,200,215,200,200,200,200,204,200,200,213,200,200,200,200,204,200,200,213,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,214,200,200,214, +200,200,32,200,200,200,200,200,79,200,100,200,200,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,40,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,200,200,200,203,200,200,59,200,203,200,200,200,200,215,200,200,200,200,215,200,200,200,200,204,200,200,213,200,200,200,200,204,85,200,213,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,214,117,200,214, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,48,200,200,212,200,200,200,200,203,200,200,200,200,203,200,200,200,200,215,200,200,200,200,215,200,200,200,200,204,200,200,213,200,200,200,200,204,200,200,213,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,200,200,200,210,107,200,211,200,200,208,209,200,200,214,200,200,214, + 6,200,200,200,200,200,200,200,200,200,200,200,200,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,205,47,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,200,200,200,203,200,200,200,200,203,200,200,200,200,215,200,200,200,200,215,200,200,200,200,204,200,200,213,200,200,200,200,204,200,200,213,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,214,200,200,214, +200,200,200,200,200,200,66,200,200,200,101,200,200,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,49,200,212,200,200,200,200,203,200,200,200,200,203,200,200,200,200,215,200,200,200,200,215,200,200,200,200,204,200,200,213,200,200,200,200,204,200,200,213,200,200,200,200,200,210,200,200,211,200,200,208,209,200,106,200,200,200,210,200,108,211,200,200,208,209,200,200,214,200,200,214, +200,200,200,200,200,200,200,200,80,200,200,200,200,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,200,200,200,203,200,200,200,200,203,200,200,200,200,215,200,200,200,200,215,200,200,200,200,204,200,200,213,200,200,200,200,204,200,200,213,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,109,200,200,210,200,200,211,200,200,208,209,200,200,214,200,200,214, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,50,212,200,200,200,200,203,200,200,200,200,203,200,200,200,200,215,200,200,200,200,215,200,200,200,200,204,200,200,213,200,200,200,200,204,200,200,213,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,214,200,200,214, + 8,200,200,200,200,200,200,200,200,200,200,200,200,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,205,200,200,37,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,200,200,200,203,200,200,200,200,203,200,200,200,200,215,200,200,200,200,215,200,200,200,200,204,200,200,213,200,200,200,200,204,200,200,213,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,214,200,200,214, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,200,200,200,203,200,200,200,200,203,62,200,200,65,215,200,200,200,200,215,200,200,200,200,204,200,200,213,200,200,200,200,204,200,200,213,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,214,200,200,214, +200,200,200,200,200,200,200,200,200,200,200,200,200,16,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,200,200,200,203,200,200,200,200,203,200,200,200,200,215,200,200,200,200,215,200,200,200,200,204,200,200,213,200,200,200,200,204,200,200,213,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,214,200,200,214, + 3,200,200,200,200,200,200,200,200,200,200,200,200,200,22,200,200,200,19,200,202,23,200,200,200,207,200,206,200,200,200,212,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,200,200,200,203,200,200,200,200,203,200,200,200,200,215,200,200,200,200,215,73,200,200,200,204,200,200,213,200,200,200,200,204,200,200,213,90,200,200,200,200,210,200,200,211,200,200,208,209,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,214,200,200,214, + 9,200,200,51,200,200,200,200,200,200,200,200,200,200,205,200,200,200,200,20,202,200,26,24,200,207,200,206,200,200,200,212,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,200,200,55,203,200,200,200,200,203,200,200,200,200,215,200,200,200,200,215,200,200,74,200,204,200,200,213,200,200,200,200,204,200,200,213,200,200,200,200,200,210,200,200,211,98,200,208,209,200,200,200,200,200,210,200,200,211,200,200,208,209,200,115,214,200,200,214, +200,14,200,200,200,200,200,200,200,200,200,200,200,200,205,200,200,200,200,200,202,200,200,200,25,207,27,206,200,200,200,212,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,52,200,200,200,203,200,200,200,200,203,200,200,200,200,215,200,200,200,200,215,200,200,200,75,204,200,200,213,200,200,200,200,204,200,200,213,200,200,200,200,92,210,200,200,211,200,99,208,209,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,214,200,200,214, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,200,200,200,203,200,200,200,200,203,200,63,200,200,215,200,200,200,200,215,200,200,200,200,204,200,78,213,200,200,200,200,204,200,200,213,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,214,200,200,214, + 1,200,200,200,200,200,200,200,200,200,200,113,200,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,53,200,200,203,200,200,200,200,203,200,200,200,200,215,200,200,200,200,215,200,76,200,200,204,200,200,213,200,200,200,200,204,200,200,213,200,200,97,91,200,210,200,200,211,200,200,208,209,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,214,200,200,214, +11,200,200,200,200,200,200,200,200,89,200,200,200,200,205,200,17,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,200,200,200,203,200,200,200,200,203,200,200,200,200,215,200,200,200,200,215,200,200,200,200,204,200,200,213,200,200,200,200,204,200,200,213,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,214,200,200,214, +200,15,200,200,200,200,200,200,200,200,200,200,200,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,200,200,200,203,200,200,200,200,203,200,200,64,200,215,200,200,200,200,215,200,200,200,200,204,200,200,213,200,200,200,200,204,200,200,213,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,214,200,200,214, +200,13,200,200,200,200,200,71,200,87,200,200,200,200,21,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,200,54,200,203,200,200,200,200,203,200,200,200,200,215,200,200,200,200,215,200,200,200,200,204,77,200,213,200,200,200,200,204,200,200,213,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,200,200,200,210,200,200,211,200,200,208,209,114,200,214,200,200,214, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,29,200,200,212,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,200,200,200,203,200,200,200,200,203,200,200,200,200,215,200,200,200,200,215,200,200,200,200,204,200,200,213,200,200,200,200,204,200,200,213,200,200,200,200,200,210,94,200,211,200,200,208,209,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,214,200,200,214, + 5,200,200,200,200,200,200,200,200,200,200,200,200,200,205,28,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,200,200,200,203,200,200,200,200,203,200,200,200,200,215,200,200,200,200,215,200,200,200,200,204,200,200,213,200,200,200,200,204,200,200,213,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,214,200,200,214, +200,200,200,200,200,61,200,200,200,88,200,200,200,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,30,200,212,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,200,200,200,203,200,200,200,200,203,200,200,200,200,215,200,200,200,200,215,200,200,200,200,204,200,200,213,200,200,200,200,204,200,200,213,200,93,200,200,200,210,200,95,211,200,200,208,209,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,214,200,200,214, +200,200,200,200,200,200,200,72,200,200,200,200,200,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,200,200,200,203,200,200,200,200,203,200,200,200,200,215,200,200,200,200,215,200,200,200,200,204,200,200,213,200,200,200,200,204,200,200,213,200,200,96,200,200,210,200,200,211,200,200,208,209,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,214,200,200,214, +200,200,200,200,200,200,200,200,200,200,200,200,200,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,31,212,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,200,200,200,203,200,200,200,200,203,200,200,200,200,215,200,200,200,200,215,200,200,200,200,204,200,200,213,200,200,200,200,204,200,200,213,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,214,200,200,214, + 7,200,200,200,200,200,200,200,200,200,200,200,200,200,205,200,200,18,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,205,200,200,200,200,200,202,200,200,200,200,207,200,206,200,200,200,212,200,200,200,200,203,200,200,200,200,203,200,200,200,200,215,200,200,200,200,215,200,200,200,200,204,200,200,213,200,200,200,200,204,200,200,213,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,200,200,200,210,200,200,211,200,200,208,209,200,200,214,200,200,214, +}; + +unsigned char LSDIR_default = 0; + +unsigned char LSDIR_count = 119; + +unsigned char pp_directive_terminal_base = 200; + diff --git a/test/4cpp_new_lexer.h b/lexergen/4cpp_new_lexer.h similarity index 100% rename from test/4cpp_new_lexer.h rename to lexergen/4cpp_new_lexer.h diff --git a/test/fsm_table_generator.cpp b/lexergen/fsm_table_generator.cpp similarity index 100% rename from test/fsm_table_generator.cpp rename to lexergen/fsm_table_generator.cpp