4coder/custom/languages/4coder_cpp_lexer_gen.cpp

1145 lines
31 KiB
C++
Raw Normal View History

2019-09-27 03:52:29 +00:00
/*
4coder_lex_gen_cpp.cpp - Model definition for a C++ lexer.
*/
// TOP
#define LANG_NAME_LOWER cpp
#define LANG_NAME_CAMEL Cpp
#include "lexer_generator/4coder_lex_gen_main.cpp"
internal void
build_language_model(void){
u8 utf8[129];
smh_utf8_fill(utf8);
smh_set_base_character_names();
smh_typical_tokens();
// CPP Names
sm_char_name('!', "Not");
sm_char_name('&', "And");
sm_char_name('|', "Or");
sm_char_name('%', "Mod");
sm_char_name('^', "Xor");
sm_char_name('?', "Ternary");
sm_char_name('/', "Div");
// CPP Direct Toke Kinds
sm_select_base_kind(TokenBaseKind_Comment);
sm_direct_token_kind("BlockComment");
sm_direct_token_kind("LineComment");
sm_select_base_kind(TokenBaseKind_Whitespace);
sm_direct_token_kind("Backslash");
sm_select_base_kind(TokenBaseKind_LiteralInteger);
sm_direct_token_kind("LiteralInteger");
sm_direct_token_kind("LiteralIntegerU");
sm_direct_token_kind("LiteralIntegerL");
sm_direct_token_kind("LiteralIntegerUL");
sm_direct_token_kind("LiteralIntegerLL");
sm_direct_token_kind("LiteralIntegerULL");
sm_direct_token_kind("LiteralIntegerHex");
sm_direct_token_kind("LiteralIntegerHexU");
sm_direct_token_kind("LiteralIntegerHexL");
sm_direct_token_kind("LiteralIntegerHexUL");
sm_direct_token_kind("LiteralIntegerHexLL");
sm_direct_token_kind("LiteralIntegerHexULL");
sm_direct_token_kind("LiteralIntegerOct");
sm_direct_token_kind("LiteralIntegerOctU");
sm_direct_token_kind("LiteralIntegerOctL");
sm_direct_token_kind("LiteralIntegerOctUL");
sm_direct_token_kind("LiteralIntegerOctLL");
sm_direct_token_kind("LiteralIntegerOctULL");
sm_select_base_kind(TokenBaseKind_LiteralFloat);
sm_direct_token_kind("LiteralFloat32");
sm_direct_token_kind("LiteralFloat64");
sm_select_base_kind(TokenBaseKind_LiteralString);
sm_direct_token_kind("LiteralString");
sm_direct_token_kind("LiteralStringWide");
sm_direct_token_kind("LiteralStringUTF8");
sm_direct_token_kind("LiteralStringUTF16");
sm_direct_token_kind("LiteralStringUTF32");
sm_direct_token_kind("LiteralStringRaw");
sm_direct_token_kind("LiteralStringWideRaw");
sm_direct_token_kind("LiteralStringUTF8Raw");
sm_direct_token_kind("LiteralStringUTF16Raw");
sm_direct_token_kind("LiteralStringUTF32Raw");
sm_direct_token_kind("LiteralCharacter");
sm_direct_token_kind("LiteralCharacterWide");
sm_direct_token_kind("LiteralCharacterUTF8");
sm_direct_token_kind("LiteralCharacterUTF16");
sm_direct_token_kind("LiteralCharacterUTF32");
sm_direct_token_kind("PPIncludeFile");
sm_direct_token_kind("PPErrorMessage");
sm_select_base_kind(TokenBaseKind_Keyword);
sm_direct_token_kind("KeywordGeneric");
// CPP Operators
Operator_Set *main_ops = sm_begin_op_set();
sm_select_base_kind(TokenBaseKind_ScopeOpen);
sm_op("{");
sm_select_base_kind(TokenBaseKind_ScopeClose);
sm_op("}");
sm_select_base_kind(TokenBaseKind_ParentheticalOpen);
sm_op("(");
sm_op("[");
sm_select_base_kind(TokenBaseKind_ParentheticalClose);
sm_op(")");
sm_op("]");
2019-11-03 21:22:10 +00:00
sm_select_base_kind(TokenBaseKind_StatementClose);
2019-09-27 03:52:29 +00:00
sm_op(";");
sm_op(":");
2019-11-03 21:22:10 +00:00
sm_select_base_kind(TokenBaseKind_Operator);
2019-09-27 03:52:29 +00:00
sm_op("...");
sm_op("::");
sm_op("++");
sm_op("--");
sm_op(".");
sm_op("->", "Arrow");
sm_op("+");
sm_op("-");
sm_op("!");
sm_op("~");
sm_op("*");
sm_op("&");
sm_op(".*");
sm_op("->*", "ArrowStar");
sm_op("/");
sm_op("%");
sm_char_name('<', "Left");
sm_char_name('>', "Right");
sm_op("<<");
sm_op(">>");
sm_op("<=>", "Compare");
sm_char_name('<', "Less");
sm_char_name('>', "Grtr");
sm_op("<");
sm_op("<=");
sm_op(">");
sm_op(">=");
sm_op("==");
sm_op("!=");
sm_op("^");
sm_op("|");
sm_op("&&");
sm_op("||");
sm_op("?");
sm_op("=");
sm_op("+=");
sm_op("-=");
sm_op("*=");
sm_op("/=");
sm_op("%=");
sm_char_name('<', "Left");
sm_char_name('>', "Right");
sm_op("<<=");
sm_op(">>=");
2019-11-03 21:22:10 +00:00
sm_select_base_kind(TokenBaseKind_StatementClose);
2019-09-27 03:52:29 +00:00
sm_op(",");
// CPP Preprocess Operators
Operator_Set *pp_ops = sm_begin_op_set();
sm_op("#", "PPStringify");
sm_op("##", "PPConcat");
// CPP Keywords
Keyword_Set *main_keys = sm_begin_key_set("main_keys");
sm_select_base_kind(TokenBaseKind_Keyword);
sm_key("Void");
sm_key("Bool");
sm_key("Char");
sm_key("Int");
sm_key("Float");
sm_key("Double");
sm_key("Long");
sm_key("Short");
sm_key("Unsigned");
sm_key("Signed");
sm_key("Const");
sm_key("Volatile");
sm_key("Asm");
sm_key("Break");
sm_key("Case");
sm_key("Catch");
sm_key("Continue");
sm_key("Default");
sm_key("Do");
sm_key("Else");
sm_key("For");
sm_key("Goto");
sm_key("If");
sm_key("Return");
sm_key("Switch");
sm_key("Try");
sm_key("While");
sm_key("StaticAssert", "static_assert");
sm_key("ConstCast", "const_cast");
sm_key("DynamicCast", "dynamic_cast");
sm_key("ReinterpretCast", "reinterpret_cast");
sm_key("StaticCast", "static_cast");
sm_key("Class");
sm_key("Enum");
sm_key("Struct");
sm_key("Typedef");
sm_key("Union");
sm_key("Template");
sm_key("Typename");
sm_key("Friend");
sm_key("Namespace");
sm_key("Private");
sm_key("Protected");
sm_key("Public");
sm_key("Using");
sm_key("Extern");
sm_key("Export");
sm_key("Inline");
sm_key("Static");
sm_key("Virtual");
sm_key("AlignAs");
sm_key("Explicit");
sm_key("NoExcept");
sm_key("NullPtr");
sm_key("Operator");
sm_key("Register");
sm_key("This");
sm_key("ThreadLocal", "thread_local");
sm_key("SizeOf");
sm_key("AlignOf");
sm_key("DeclType");
sm_key("TypeID");
sm_key("New");
sm_key("Delete");
sm_select_base_kind(TokenBaseKind_LiteralInteger);
sm_key("LiteralTrue", "true");
sm_key("LiteralFalse", "false");
sm_select_base_kind(TokenBaseKind_Identifier);
sm_key_fallback("Identifier");
// CPP Preprocess Directives
Keyword_Set *pp_directive_set = sm_begin_key_set("pp_directives");
sm_select_base_kind(TokenBaseKind_Preprocessor);
sm_key("PPInclude", "include");
sm_key("PPVersion", "version");
sm_key("PPDefine", "define");
sm_key("PPUndef", "undef");
sm_key("PPIf", "if");
sm_key("PPIfDef", "ifdef");
sm_key("PPIfNDef", "ifndef");
sm_key("PPElse", "else");
sm_key("PPElIf", "elif");
sm_key("PPEndIf", "endif");
sm_key("PPError", "error");
sm_key("PPImport", "import");
sm_key("PPUsing", "using");
sm_key("PPLine", "line");
sm_key("PPPragma", "pragma");
2019-09-27 03:52:29 +00:00
sm_select_base_kind(TokenBaseKind_LexError);
sm_key_fallback("PPUnknown");
// CPP Preprocess Keywords
Keyword_Set *pp_keys = sm_begin_key_set("pp_keys");
sm_select_base_kind(TokenBaseKind_Keyword);
sm_key("PPDefined", "defined");
// State Machine
State *root = sm_begin_state_machine();
Flag *is_hex = sm_add_flag(FlagResetRule_AutoZero);
Flag *is_oct = sm_add_flag(FlagResetRule_AutoZero);
Flag *is_wide = sm_add_flag(FlagResetRule_AutoZero);
Flag *is_utf8 = sm_add_flag(FlagResetRule_AutoZero);
Flag *is_utf16 = sm_add_flag(FlagResetRule_AutoZero);
Flag *is_utf32 = sm_add_flag(FlagResetRule_AutoZero);
Flag *is_char = sm_add_flag(FlagResetRule_AutoZero);
Flag *is_pp_body = sm_add_flag(FlagResetRule_KeepState);
Flag *is_include_body = sm_add_flag(FlagResetRule_KeepState);
Flag *is_error_body = sm_add_flag(FlagResetRule_KeepState);
sm_flag_bind(is_pp_body, TokenBaseFlag_PreprocessorBody);
#define AddState(N) State *N = sm_add_state(#N)
AddState(identifier);
AddState(whitespace);
AddState(whitespace_end_pp);
AddState(error_body);
AddState(backslash);
AddState(operator_or_fnumber_dot);
AddState(operator_or_comment_slash);
AddState(number);
AddState(znumber);
AddState(fnumber_decimal);
AddState(fnumber_exponent);
AddState(fnumber_exponent_sign);
AddState(fnumber_exponent_digits);
AddState(number_hex_first);
AddState(number_hex);
AddState(number_oct);
AddState(U_number);
AddState(L_number);
AddState(UL_number);
AddState(LU_number);
AddState(l_number);
AddState(Ul_number);
AddState(lU_number);
AddState(LL_number);
AddState(ULL_number);
AddState(pp_directive_whitespace);
AddState(pp_directive_first);
2019-09-27 03:52:29 +00:00
AddState(pp_directive);
AddState(pp_directive_emit);
2019-09-27 03:52:29 +00:00
AddState(include_pointy);
AddState(include_quotes);
AddState(pre_L);
AddState(pre_u);
AddState(pre_U);
AddState(pre_u8);
AddState(pre_R);
AddState(character);
AddState(string);
AddState(string_esc);
AddState(string_esc_oct2);
AddState(string_esc_oct1);
AddState(string_esc_hex);
AddState(string_esc_universal_8);
AddState(string_esc_universal_7);
AddState(string_esc_universal_6);
AddState(string_esc_universal_5);
AddState(string_esc_universal_4);
AddState(string_esc_universal_3);
AddState(string_esc_universal_2);
AddState(string_esc_universal_1);
AddState(raw_string);
AddState(raw_string_get_delim);
AddState(raw_string_finish_delim);
AddState(raw_string_find_close);
AddState(raw_string_try_delim);
AddState(raw_string_try_quote);
AddState(comment_block);
AddState(comment_block_try_close);
AddState(comment_block_newline);
AddState(comment_line);
AddState(comment_line_backslashing);
2019-09-27 03:52:29 +00:00
Operator_Set *main_ops_without_dot_or_slash = smo_copy_op_set(main_ops);
smo_remove_ops_with_prefix(main_ops_without_dot_or_slash, ".");
smo_remove_ops_with_prefix(main_ops_without_dot_or_slash, "/");
Operator_Set *main_ops_with_dot = smo_copy_op_set(main_ops);
smo_remove_ops_without_prefix(main_ops_with_dot, ".");
smo_ops_string_skip(main_ops_with_dot, 1);
////
sm_select_state(root);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("EOF");
sm_case_eof(emit);
}
sm_case("abcdefghijklmnopqrstvwxyz"
"ABCDEFGHIJKMNOPQSTVWXYZ"
"_$",
identifier);
sm_case(utf8, identifier);
sm_case("L", pre_L);
sm_case("u", pre_u);
sm_case("U", pre_U);
sm_case("R", pre_R);
sm_case_flagged(is_error_body, true, " \r\t\f\v", error_body);
sm_case_flagged(is_error_body, false, " \r\t\f\v", whitespace);
sm_case("\n", whitespace_end_pp);
sm_case("\\", backslash);
sm_case(".", operator_or_fnumber_dot);
sm_case("/", operator_or_comment_slash);
{
Character_Set *char_set = smo_new_char_set();
smo_char_set_union_ops_firsts(char_set, main_ops_without_dot_or_slash);
smo_char_set_remove(char_set, ".</");
char *char_set_array = smo_char_set_get_array(char_set);
State *operator_state = smo_op_set_lexer_root(main_ops_without_dot_or_slash, root, "LexError");
sm_case_peek(char_set_array, operator_state);
sm_case_peek_flagged(is_include_body, false, "<", operator_state);
}
sm_case_flagged(is_include_body, true, "<", include_pointy);
sm_case_flagged(is_include_body, true, "\"", include_quotes);
sm_case("123456789", number);
sm_case("0", znumber);
sm_case_flagged(is_include_body, false, "\"", string);
sm_case("\'", character);
sm_case_flagged(is_pp_body, false, "#", pp_directive_whitespace);
2019-09-27 03:52:29 +00:00
{
State *operator_state = smo_op_set_lexer_root(pp_ops, root, "LexError");
sm_case_peek_flagged(is_pp_body, true, "#", operator_state);
}
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("LexError");
sm_fallback(emit);
}
////
sm_select_state(identifier);
sm_case("abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"_$"
"0123456789",
identifier);
sm_case(utf8, identifier);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_keys(is_pp_body, pp_keys);
sm_emit_handler_keys(main_keys);
sm_fallback_peek(emit);
}
////
sm_select_state(whitespace);
sm_case(" \t\r\f\v", whitespace);
sm_case("\n", whitespace_end_pp);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("Whitespace");
sm_fallback_peek(emit);
}
////
sm_select_state(whitespace_end_pp);
sm_set_flag(is_pp_body, false);
sm_set_flag(is_include_body, false);
sm_set_flag(is_error_body, false);
sm_fallback_peek(whitespace);
////
sm_select_state(error_body);
sm_case("\r", error_body);
2019-09-27 03:52:29 +00:00
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("PPErrorMessage");
sm_case_peek("\n", emit);
}
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("PPErrorMessage");
sm_case_eof_peek(emit);
}
sm_fallback(error_body);
////
sm_select_state(backslash);
sm_case("\r", backslash);
2019-09-27 03:52:29 +00:00
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("Backslash");
sm_case("\n", emit);
}
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("Backslash");
sm_fallback_peek(emit);
}
////
sm_select_state(operator_or_comment_slash);
sm_case("*", comment_block);
sm_case("/", comment_line);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("DivEq");
sm_case("=", emit);
}
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("Div");
sm_fallback_peek(emit);
}
////
sm_select_state(operator_or_fnumber_dot);
sm_case("0123456789", fnumber_decimal);
{
Character_Set *char_set = smo_new_char_set();
smo_char_set_union_ops_firsts(char_set, main_ops_with_dot);
char *char_set_array = smo_char_set_get_array(char_set);
State *operator_state = smo_op_set_lexer_root(main_ops_with_dot, root, "LexError");
sm_case_peek(char_set_array, operator_state);
}
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("Dot");
sm_fallback_peek(emit);
}
////
sm_select_state(number);
sm_case("0123456789", number);
sm_case(".", fnumber_decimal);
sm_case("Ee", fnumber_exponent);
sm_case("Uu", U_number);
sm_case("L", L_number);
sm_case("l", l_number);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("LiteralInteger");
sm_fallback_peek(emit);
}
////
sm_select_state(znumber);
sm_case(".", fnumber_decimal);
sm_case("Ee", fnumber_exponent);
sm_case("Uu", U_number);
sm_case("L", L_number);
sm_case("l", l_number);
sm_case("Xx", number_hex_first);
sm_case("01234567", number_oct);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("LiteralInteger");
sm_fallback_peek(emit);
}
////
sm_select_state(fnumber_decimal);
sm_case("0123456789", fnumber_decimal);
sm_case("Ee", fnumber_exponent);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("LiteralFloat32");
sm_case("Ff", emit);
}
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("LiteralFloat64");
sm_case("Ll", emit);
}
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("LiteralFloat64");
sm_fallback_peek(emit);
}
////
sm_select_state(fnumber_exponent);
sm_case("+-", fnumber_exponent_sign);
sm_case("0123456789", fnumber_exponent_digits);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("LiteralFloat32");
sm_case("Ff", emit);
}
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("LiteralFloat64");
sm_case("Ll", emit);
}
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("LiteralFloat64");
sm_fallback_peek(emit);
}
////
sm_select_state(fnumber_exponent_sign);
sm_case("0123456789", fnumber_exponent_digits);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("LiteralFloat32");
sm_case("Ff", emit);
}
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("LiteralFloat64");
sm_case("Ll", emit);
}
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("LiteralFloat64");
sm_fallback_peek(emit);
}
////
sm_select_state(fnumber_exponent_digits);
sm_case("0123456789", fnumber_exponent_digits);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("LiteralFloat32");
sm_case("Ff", emit);
}
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("LiteralFloat64");
sm_case("Ll", emit);
}
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("LiteralFloat64");
sm_fallback_peek(emit);
}
////
sm_select_state(number_hex_first);
sm_set_flag(is_hex, true);
sm_case("0123456789abcdefABCDEF", number_hex);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("LexError");
sm_fallback_peek(emit);
}
////
sm_select_state(number_hex);
sm_case("0123456789abcdefABCDEF", number_hex);
sm_case("Uu", U_number);
sm_case("L", L_number);
sm_case("l", l_number);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("LiteralIntegerHex");
sm_fallback_peek(emit);
}
////
sm_select_state(number_oct);
sm_set_flag(is_oct, true);
sm_case("01234567", number_oct);
sm_case("Uu", U_number);
sm_case("L", L_number);
sm_case("l", l_number);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("LiteralIntegerOct");
sm_fallback_peek(emit);
}
////
sm_select_state(U_number);
sm_case("L", UL_number);
sm_case("l", Ul_number);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct(is_hex, "LiteralIntegerHexU");
sm_emit_handler_direct(is_oct, "LiteralIntegerOctU");
sm_emit_handler_direct("LiteralIntegerU");
sm_fallback_peek(emit);
}
////
sm_select_state(L_number);
sm_case("L", LL_number);
sm_case("Uu", LU_number);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct(is_hex, "LiteralIntegerHexL");
sm_emit_handler_direct(is_oct, "LiteralIntegerOctL");
sm_emit_handler_direct("LiteralIntegerL");
sm_fallback_peek(emit);
}
////
sm_select_state(l_number);
sm_case("l", LL_number);
sm_case("Uu", lU_number);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct(is_hex, "LiteralIntegerHexL");
sm_emit_handler_direct(is_oct, "LiteralIntegerOctL");
sm_emit_handler_direct("LiteralIntegerL");
sm_fallback_peek(emit);
}
////
sm_select_state(LL_number);
sm_case("Uu", ULL_number);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct(is_hex, "LiteralIntegerHexLL");
sm_emit_handler_direct(is_oct, "LiteralIntegerOctLL");
sm_emit_handler_direct("LiteralIntegerLL");
sm_fallback_peek(emit);
}
////
sm_select_state(UL_number);
sm_case("L", ULL_number);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct(is_hex, "LiteralIntegerHexUL");
sm_emit_handler_direct(is_oct, "LiteralIntegerOctUL");
sm_emit_handler_direct("LiteralIntegerUL");
sm_fallback_peek(emit);
}
////
sm_select_state(Ul_number);
sm_case("l", ULL_number);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct(is_hex, "LiteralIntegerHexUL");
sm_emit_handler_direct(is_oct, "LiteralIntegerOctUL");
sm_emit_handler_direct("LiteralIntegerUL");
sm_fallback_peek(emit);
}
////
sm_select_state(LU_number);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct(is_hex, "LiteralIntegerHexUL");
sm_emit_handler_direct(is_oct, "LiteralIntegerOctUL");
sm_emit_handler_direct("LiteralIntegerUL");
sm_fallback_peek(emit);
}
////
sm_select_state(lU_number);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct(is_hex, "LiteralIntegerHexUL");
sm_emit_handler_direct(is_oct, "LiteralIntegerOctUL");
sm_emit_handler_direct("LiteralIntegerUL");
sm_fallback_peek(emit);
}
////
sm_select_state(ULL_number);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct(is_hex, "LiteralIntegerHexULL");
sm_emit_handler_direct(is_oct, "LiteralIntegerOctULL");
sm_emit_handler_direct("LiteralIntegerULL");
sm_fallback_peek(emit);
}
////
sm_select_state(pp_directive_whitespace);
sm_case(" \t\f\v", pp_directive_whitespace);
sm_case_peek("abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"_"
"0123456789",
pp_directive_first);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("LexError");
sm_fallback_peek(emit);
}
////
sm_select_state(pp_directive_first);
sm_delim_mark_first();
2019-09-27 03:52:29 +00:00
sm_set_flag(is_pp_body, true);
sm_fallback_peek(pp_directive);
////
sm_select_state(pp_directive);
2019-09-27 03:52:29 +00:00
sm_case("abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"_"
"0123456789",
pp_directive);
sm_fallback_peek(pp_directive_emit);
////
sm_select_state(pp_directive_emit);
sm_delim_mark_one_past_last();
2019-09-27 03:52:29 +00:00
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_check_set_flag("PPInclude", is_include_body, true);
sm_emit_check_set_flag("PPError", is_error_body, true);
sm_emit_handler_keys_delim(pp_directive_set);
2019-09-27 03:52:29 +00:00
sm_fallback_peek(emit);
}
////
sm_select_state(include_pointy);
sm_case("abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2021-05-15 03:43:43 +00:00
"!@#$%^&*()-=_+[]{}\\|;:',./<>? "
2019-09-27 03:52:29 +00:00
"0123456789",
include_pointy);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("PPIncludeFile");
sm_case(">", emit);
}
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("LexError");
sm_fallback_peek(emit);
}
////
sm_select_state(include_quotes);
sm_case("abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2021-05-15 03:43:43 +00:00
"!@#$%^&*()-=_+[]{}\\|;:',./<>? "
2019-09-27 03:52:29 +00:00
"0123456789",
include_quotes);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("PPIncludeFile");
sm_case("\"", emit);
}
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("LexError");
sm_fallback_peek(emit);
}
////
sm_select_state(pre_L);
sm_set_flag(is_wide, true);
sm_case("\"", string);
sm_case("R", pre_R);
sm_fallback_peek(identifier);
////
sm_select_state(pre_u);
sm_set_flag(is_utf16, true);
sm_case("\"", string);
sm_case("8", pre_u8);
sm_case("R", pre_R);
sm_fallback_peek(identifier);
////
sm_select_state(pre_U);
sm_set_flag(is_utf32, true);
sm_case("\"", string);
sm_case("R", pre_R);
sm_fallback_peek(identifier);
////
sm_select_state(pre_u8);
sm_set_flag(is_utf8, true);
sm_case("\"", string);
sm_case("R", pre_R);
sm_fallback_peek(identifier);
////
sm_select_state(pre_R);
sm_case("\"", raw_string);
sm_fallback_peek(identifier);
////
sm_select_state(character);
sm_set_flag(is_char, true);
sm_fallback_peek(string);
////
sm_select_state(string);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct(is_wide, "LiteralStringWide");
sm_emit_handler_direct(is_utf8 , "LiteralStringUTF8");
sm_emit_handler_direct(is_utf16, "LiteralStringUTF16");
sm_emit_handler_direct(is_utf32, "LiteralStringUTF32");
sm_emit_handler_direct("LiteralString");
sm_case_flagged(is_char, false, "\"", emit);
}
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct(is_wide, "LiteralCharacterWide");
sm_emit_handler_direct(is_utf8 , "LiteralCharacterUTF8");
sm_emit_handler_direct(is_utf16, "LiteralCharacterUTF16");
sm_emit_handler_direct(is_utf32, "LiteralCharacterUTF32");
sm_emit_handler_direct("LiteralCharacter");
sm_case_flagged(is_char, true, "\'", emit);
}
sm_case("\\", string_esc);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("LexError");
sm_case_peek("\n", emit);
}
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("LexError");
sm_case_eof_peek(emit);
}
sm_case_flagged(is_char, true, "\"", string);
sm_case_flagged(is_char, false, "\'", string);
sm_fallback(string);
////
sm_select_state(string_esc);
sm_case("\n'\"?\\abfnrtv", string);
sm_case("01234567", string_esc_oct2);
sm_case("x", string_esc_hex);
sm_case("u", string_esc_universal_4);
sm_case("U", string_esc_universal_8);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("LexError");
sm_case_peek("\n", emit);
}
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("LexError");
sm_case_eof_peek(emit);
}
sm_fallback(string);
////
sm_select_state(string_esc_oct2);
sm_case("01234567", string_esc_oct1);
sm_fallback_peek(string);
////
sm_select_state(string_esc_oct1);
sm_case("01234567", string);
sm_fallback_peek(string);
////
sm_select_state(string_esc_hex);
sm_case("0123456789abcdefABCDEF", string_esc_hex);
sm_fallback_peek(string);
////
sm_select_state(string_esc_universal_8);
sm_case("0123456789abcdefABCDEF", string_esc_universal_7);
sm_fallback_peek(string);
////
sm_select_state(string_esc_universal_7);
sm_case("0123456789abcdefABCDEF", string_esc_universal_6);
sm_fallback_peek(string);
////
sm_select_state(string_esc_universal_6);
sm_case("0123456789abcdefABCDEF", string_esc_universal_5);
sm_fallback_peek(string);
////
sm_select_state(string_esc_universal_5);
sm_case("0123456789abcdefABCDEF", string_esc_universal_4);
sm_fallback_peek(string);
////
sm_select_state(string_esc_universal_4);
sm_case("0123456789abcdefABCDEF", string_esc_universal_3);
sm_fallback_peek(string);
////
sm_select_state(string_esc_universal_3);
sm_case("0123456789abcdefABCDEF", string_esc_universal_2);
sm_fallback_peek(string);
////
sm_select_state(string_esc_universal_2);
sm_case("0123456789abcdefABCDEF", string_esc_universal_1);
sm_fallback_peek(string);
////
sm_select_state(string_esc_universal_1);
sm_case("0123456789abcdefABCDEF", string);
sm_fallback_peek(string);
////
sm_select_state(raw_string);
sm_delim_mark_first();
sm_fallback_peek(raw_string_get_delim);
////
sm_select_state(raw_string_get_delim);
sm_case_peek("(", raw_string_finish_delim);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("LexError");
sm_case(" \\)", emit);
}
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("LexError");
sm_case_eof_peek(emit);
}
sm_fallback(raw_string_get_delim);
////
sm_select_state(raw_string_finish_delim);
sm_delim_mark_one_past_last();
sm_fallback_peek(raw_string_find_close);
////
sm_select_state(raw_string_find_close);
sm_case(")", raw_string_try_delim);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("LexError");
sm_case_eof_peek(emit);
}
sm_fallback(raw_string_find_close);
////
sm_select_state(raw_string_try_delim);
sm_match_delim(raw_string_try_quote, raw_string_find_close);
////
sm_select_state(raw_string_try_quote);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct(is_wide, "LiteralStringWideRaw");
sm_emit_handler_direct(is_utf8 , "LiteralStringUTF8Raw");
sm_emit_handler_direct(is_utf16, "LiteralStringUTF16Raw");
sm_emit_handler_direct(is_utf32, "LiteralStringUTF32Raw");
sm_emit_handler_direct("LiteralStringRaw");
sm_case("\"", emit);
}
sm_fallback_peek(raw_string_find_close);
////
sm_select_state(comment_block);
sm_case("*", comment_block_try_close);
sm_case("\n", comment_block_newline);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("BlockComment");
sm_case_eof_peek(emit);
}
sm_fallback(comment_block);
////
sm_select_state(comment_block_try_close);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("BlockComment");
sm_case("/", emit);
}
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("BlockComment");
sm_case_eof_peek(emit);
}
sm_case("*", comment_block_try_close);
sm_fallback(comment_block);
////
sm_select_state(comment_block_newline);
sm_set_flag(is_pp_body, false);
sm_set_flag(is_include_body, false);
sm_fallback_peek(comment_block);
////
sm_select_state(comment_line);
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("LineComment");
sm_case_peek("\n", emit);
}
{
Emit_Rule *emit = sm_emit_rule();
sm_emit_handler_direct("LineComment");
sm_case_eof_peek(emit);
}
sm_case("\\", comment_line_backslashing);
sm_fallback(comment_line);
sm_select_state(comment_line_backslashing);
sm_case("\r", comment_line_backslashing);
2019-09-27 03:52:29 +00:00
sm_fallback(comment_line);
}
// BOTTOM