Add typescript support and document how to add a language

2025-07-13 14:37:25 -07:00 · 2025-07-13 14:37:25 -07:00 · 9ecf49d278
parent a5f13529c3
commit 9ecf49d278
12 changed files with 283874 additions and 3 deletions
--- a/README.md
+++ b/README.md
@ -4,6 +4,8 @@ Welcome to the 4coder community repository.

 # Building

+TODO - these are outdated
+
 ## Windows
 1. Setup the MSVC toolchain in your environment, this can be done with the `code/custom/bin/setup_cl_x64.bat` script
 2. call the `package.bat` script from the code directory
@ -78,3 +80,23 @@ You need to compile one of those file and run it from the `code` directory.
 There is also `code\4ed_api_check.cpp` to verify the generated file but it's not clear at the moment what to check against.

 - `code\4ed_generate_keycodes.cpp` is also a bit appart as it generates `code\custom\generated\4coder_event_codes.h` which are keyboard key codes and some event hook ids.
+
+# Adding a Language
+
+## Adding the Parser/Scanner to the tree-sitter static library
+
+1. Create a folder named for your language in `non-source/foreign/tree-sitter/lang`.
+2. Copy `parser.c` and `scanner.cc` into that folder
+3. Add `build_tree_sitter_language "<your_folder_name>" "<Readable Language Name>"` to `build-libs.sh::build_tree_sitter()`
+4. Run build-libs.sh and ensure your language built properly.
+
+If there was an error try the following:
+- if your error looks like a missing type, it's probably because your scanner/parser were written for a different version of tree sitter than is linked automatically. Most parsers will have their own tree-sitter directory next to the parser.c file that contains any of parser.h, array.h, and alloc.h. Copy this directory into your languages directory, and change the paths referencing them in your parser/scanner to be local. ie. change `#include "tree_sitter/parser.h"` to `#include "./tree_sitter/parser.h"`
+
+## Adding the Language to 4coder
+
+1. In `code/custom/languages` create `tree_sitter_<your_language_identifier>.h` by copying `tree_sitter_language_base.h`
+2. Import the language into `code/custom/4coder_tree_sitter.cpp` at the top
+3. Handle the TODO comments in your new language file
+
+At this point, if there are no compilation errors, you should have syntax highlighting, virtual whitespace support, and basic goto definition support in your new language.
--- a/build_new/scripts/build-libs.sh
+++ b/build_new/scripts/build-libs.sh
@ -25,6 +25,10 @@ build_tree_sitter_language() {
  SCANNER_SRC="${CUSTOM_ROOT}/lang/$LANG_DIR/scanner.cc"
  SCANNER_OUT="$TEMP_OUT_DIR/${LANG_DIR}_scanner.o"

+  if [ ! -f $SCANNER_SRC ]; then
+    SCANNER_SRC="${CUSTOM_ROOT}/lang/$LANG_DIR/scanner.c"
+  fi
+
  print_step "Building tree-sitter $LANG_NAME Language Lib ($LANG_DIR)"
  BUILT_ANYTHING=0

@ -79,6 +83,7 @@ build_tree_sitter() {
  build_tree_sitter_language "cpp" "C++"
  build_tree_sitter_language "jai" "Jai"
  build_tree_sitter_language "bash" "Bash"
+  build_tree_sitter_language "ts" "Typescript"

  # Link tree-sitter lib and parser obj files into a static library to link into main custom dll
  print_step "Linking tree-sitter static library"
--- a/code/custom/4coder_tree_sitter.cpp
+++ b/code/custom/4coder_tree_sitter.cpp
@ -6,6 +6,16 @@
 #include "languages/tree_sitter_cpp.h"
 #include "languages/tree_sitter_jai.h"
 #include "languages/tree_sitter_bash.h"
+#include "languages/tree_sitter_ts.h"
+
+function void
+register_all_languages(Application_Links* app)
+{
+  tree_sitter_register_cpp(app);
+  tree_sitter_register_jai(app);
+  tree_sitter_register_bash(app);
+  tree_sitter_register_ts(app);
+}

 ////////////////////////////////////////////////////////////////////
 // Language Management
@ -110,9 +120,7 @@ tree_sitter_init(Application_Links* app)

  tree_sitter_languages.arena = make_arena_system(KB(16));

-  tree_sitter_register_cpp(app);
-  tree_sitter_register_jai(app);
-  tree_sitter_register_bash(app);
+  register_all_languages(app);
 }

 function void
@ -426,6 +434,8 @@ tree_sitter_code_index_update_single_buffer(Application_Links* app, Buffer_ID bu
  return parse_state;
 }

+#define BUFFER_CODE_INDEX_UPDATES false
+
 function void
 tree_sitter_code_index_update_tick(Application_Links* app)
 {
@ -441,8 +451,14 @@ tree_sitter_code_index_update_tick(Application_Links* app)
    parse_state = tree_sitter_code_index_update_single_buffer(app, modified_node->buffer);
    if (parse_state.ok)
    {
+      #if BUFFER_CODE_INDEX_UPDATES
      buffered_code_indices[buffered_code_indices_count] = parse_state;
      buffered_code_indices_count += 1;
+      #else
+      code_index_lock();
+      code_index_set_file(parse_state.buffer_id, parse_state.index_arena, parse_state.index);
+      code_index_unlock();
+      #endif
    }

    if (buffered_code_indices_count >= BUFFERED_CODE_INDICES_CAP)
--- a/code/custom/languages/tree_sitter_language_base.h
+++ b/code/custom/languages/tree_sitter_language_base.h
@ -0,0 +1,73 @@
+/*
+
+tree_sitter_language_base.h
+
+This file is a template from which you can set up new languages for syntax highlighting,
+go-to-definition, and virtual whitespace in 4coder.
+
+BEFORE YOU START: go read the Adding a Language instructions in README.md
+
+1. find and replace "NEWLANG" with <your language identifier>
+    Example: "NEWLANG" -> "RUST" and "newlang" -> "rust"
+2. Go through each TODO in this file and complete it
+3. Include this file from "code/custom/4coder_tree_sitter.cpp"
+4. Add "tree_sitter_register_newlang(app);" to 4coder_tree_sitter.cpp::register_all_languages()
+5. Compile and run
+
+If you are confused about what to do, go look at tree_sitter_cpp.h as a working example.
+
+*/
+
+#ifndef TREE_SITTER_LANGUAGE_BASE_H
+#define TREE_SITTER_LANGUAGE_BASE_H
+
+String_Const_u8 TS_NEWLANG_EXTENSIONS[] = [
+  // TODO(PS): fill out this array with the extensions you want to be treated
+  // as this language.
+  SCu8("ext1"),
+  SCu8("ext2"),
+];
+
+String_Const_u8 TS_NEWLANG_TAGS_QUERY_SCM = string_u8_litexpr(R"DONE(
+
+; TODO - paste your grammars tags query here
+
+; Important - if you want virtual indentation, leave these tag queries here
+(_ "{" @scope_begin "}" @scope_end )
+(_ "(" @scope_begin ")" @scope_end )
+(_ "[" @scope_begin "]" @scope_end )
+)DONE");
+
+String_Const_u8 TS_NEWLANG_HIGHLIGHT_QUERY_SCM = string_u8_litexpr(R"DONE(
+
+; TODO - paste your grammars highlights query here
+
+)DONE");
+
+// NOTE(PS): depending on how you built your scanner, it might not need to be inside an extern "C" block
+extern "C" {
+  TSLanguage* tree_sitter_newlang;
+}
+
+void
+tree_sitter_register_newlang(Application_Links* app)
+{
+  TSLanguage* language = tree_sitter_newlang();
+
+  Tree_Sitter_Language_Queries queries = {};
+  queries.ptr[Tree_Sitter_Language_Query_Highlights] = tree_sitter_query_new(app, language, TS_NEWLANG_HIGHLIGHT_QUERY_SCM);
+  queries.ptr[Tree_Sitter_Language_Query_Tags]       = tree_sitter_query_new(app, language, TS_NEWLANG_TAGS_QUERY_SCM);
+
+  // TODO(PS): set this to zero if your language can not make use of virtual indentation (like python)
+  Tree_Sitter_Language_Flags flags = (
+     Tree_Sitter_Language_Can_Receive_Virtual_Indent
+  );
+
+  for (int i = 0; i < ArrayCount(TS_NEWLANG_EXTENSIONS); i++)
+  {
+    String_Const_u8 ext = TS_NEWLANG_EXTENSIONS[i];
+    tree_sitter_register_language(ext, language, queries, flags);
+  }
+}
+
+#endif //TREE_SITTER_LANGUAGE_BASE_H
--- a/code/custom/languages/tree_sitter_ts.h
+++ b/code/custom/languages/tree_sitter_ts.h
@ -0,0 +1,311 @@
+/* date = July 13th 2025 11:38 am */
+
+#ifndef TREE_SITTER_TS_H
+#define TREE_SITTER_TS_H
+
+String_Const_u8 TS_TS_TAGS_QUERY = string_u8_litexpr(R"DONE(
+
+(function_declaration
+  name: (identifier) @name) @definition.function
+
+(function_signature
+  name: (identifier) @name) @definition.function
+
+(variable_declarator
+  name: (identifier) @name
+  value: (arrow_function)) @definition.function
+
+(method_signature
+  name: (property_identifier) @name) @definition.method
+
+(abstract_method_signature
+  name: (property_identifier) @name) @definition.method
+
+(abstract_class_declaration
+  name: (type_identifier) @name) @definition.class
+
+(module
+  name: (identifier) @name) @definition.module
+
+(interface_declaration
+  name: (type_identifier) @name) @definition.type
+
+(type_alias_declaration
+  name: (type_identifier) @name) @definition.type
+
+(type_annotation
+  (type_identifier) @name) @reference.type
+
+(new_expression
+  constructor: (identifier) @name) @reference.class
+
+(_ "{" @scope_begin "}" @scope_end )
+(_ "(" @scope_begin ")" @scope_end )
+(_ "[" @scope_begin "]" @scope_end )
+
+)DONE");
+
+String_Const_u8 TS_TS_HIGHLIGHT_QUERY = string_u8_litexpr(R"DONE(
+;;;;;;;;;;;;;;;;; JAVASCRIPT ;;;;;;;;;;;;;;;;;
+; Variables
+;----------
+
+(identifier) @variable
+
+; Properties
+;-----------
+
+(property_identifier) @property
+
+; Function and method definitions
+;--------------------------------
+
+(function_expression
+  name: (identifier) @function)
+(function_declaration
+  name: (identifier) @function)
+(method_definition
+  name: (property_identifier) @function.method)
+
+(pair
+  key: (property_identifier) @function.method
+  value: [(function_expression) (arrow_function)])
+
+(assignment_expression
+  left: (member_expression
+    property: (property_identifier) @function.method)
+  right: [(function_expression) (arrow_function)])
+
+(variable_declarator
+  name: (identifier) @function
+  value: [(function_expression) (arrow_function)])
+
+(assignment_expression
+  left: (identifier) @function
+  right: [(function_expression) (arrow_function)])
+
+; Function and method calls
+;--------------------------
+
+(call_expression
+  function: (identifier) @function)
+
+(call_expression
+  function: (member_expression
+    property: (property_identifier) @function.method))
+
+; Special identifiers
+;--------------------
+
+((identifier) @constructor
+ (#match? @constructor "^[A-Z]"))
+
+([
+    (identifier)
+    (shorthand_property_identifier)
+    (shorthand_property_identifier_pattern)
+ ] @constant
+ (#match? @constant "^[A-Z_][A-Z\\d_]+$"))
+
+((identifier) @variable.builtin
+ (#match? @variable.builtin "^(arguments|module|console|window|document)$")
+ (#is-not? local))
+
+((identifier) @function.builtin
+ (#eq? @function.builtin "require")
+ (#is-not? local))
+
+; Literals
+;---------
+
+(this) @variable.builtin
+(super) @variable.builtin
+
+[
+  (true)
+  (false)
+  (null)
+  (undefined)
+] @constant.builtin
+
+(comment) @comment
+
+[
+  (string)
+  (template_string)
+] @string
+
+(regex) @string.special
+(number) @number
+
+; Tokens
+;-------
+
+[
+  ";"
+  (optional_chain)
+  "."
+  ","
+] @punctuation.delimiter
+
+[
+  "-"
+  "--"
+  "-="
+  "+"
+  "++"
+  "+="
+  "*"
+  "*="
+  "**"
+  "**="
+  "/"
+  "/="
+  "%"
+  "%="
+  "<"
+  "<="
+  "<<"
+  "<<="
+  "="
+  "=="
+  "==="
+  "!"
+  "!="
+  "!=="
+  "=>"
+  ">"
+  ">="
+  ">>"
+  ">>="
+  ">>>"
+  ">>>="
+  "~"
+  "^"
+  "&"
+  "|"
+  "^="
+  "&="
+  "|="
+  "&&"
+  "||"
+  "??"
+  "&&="
+  "||="
+  "??="
+] @operator
+
+[
+  "("
+  ")"
+  "["
+  "]"
+  "{"
+  "}"
+]  @punctuation.bracket
+
+(template_substitution
+  "${" @punctuation.special
+  "}" @punctuation.special) @embedded
+
+[
+  "as"
+  "async"
+  "await"
+  "break"
+  "case"
+  "catch"
+  "class"
+  "const"
+  "continue"
+  "debugger"
+  "default"
+  "delete"
+  "do"
+  "else"
+  "export"
+  "extends"
+  "finally"
+  "for"
+  "from"
+  "function"
+  "get"
+  "if"
+  "import"
+  "in"
+  "instanceof"
+  "let"
+  "new"
+  "of"
+  "return"
+  "set"
+  "static"
+  "switch"
+  "target"
+  "throw"
+  "try"
+  "typeof"
+  "var"
+  "void"
+  "while"
+  "with"
+  "yield"
+] @keyword
+
+;;;;;;;;;;;;;;;;; TYPESCRIPT ;;;;;;;;;;;;;;;;;
+
+; Types
+
+(type_identifier) @type
+(predefined_type) @type.builtin
+
+((identifier) @type
+ (#match? @type "^[A-Z]"))
+
+(type_arguments
+  "<" @punctuation.bracket
+  ">" @punctuation.bracket)
+
+; Variables
+
+(required_parameter (identifier) @variable.parameter)
+(optional_parameter (identifier) @variable.parameter)
+
+; Keywords
+
+[ "abstract"
+  "declare"
+  "enum"
+  "export"
+  "implements"
+  "interface"
+  "keyof"
+  "namespace"
+  "private"
+  "protected"
+  "public"
+  "type"
+  "readonly"
+  "override"
+  "satisfies"
+] @keyword
+)DONE");
+
+extern "C" {
+  TSLanguage* tree_sitter_typescript();
+}
+
+void
+tree_sitter_register_ts (Application_Links* app)
+{
+  TSLanguage* language = tree_sitter_typescript();
+  Tree_Sitter_Language_Queries queries = {};
+  queries.ptr[Tree_Sitter_Language_Query_Highlights] = tree_sitter_query_new(app, language, TS_TS_HIGHLIGHT_QUERY);
+  queries.ptr[Tree_Sitter_Language_Query_Tags]       = tree_sitter_query_new(app, language, TS_TS_TAGS_QUERY);
+  Tree_Sitter_Language_Flags flags = (
+     Tree_Sitter_Language_Can_Receive_Virtual_Indent
+  );
+  tree_sitter_register_language(SCu8("ts"), language, queries, flags);
+}
+
+#endif //TREE_SITTER_TS_H
--- a/non-source/foreign/tree-sitter/lang/ts/parser.c
+++ b/non-source/foreign/tree-sitter/lang/ts/parser.c
--- a/non-source/foreign/tree-sitter/lang/ts/scanner.c
+++ b/non-source/foreign/tree-sitter/lang/ts/scanner.c
@ -0,0 +1,13 @@
+#include "scanner.h"
+
+void *tree_sitter_typescript_external_scanner_create() { return NULL; }
+
+void tree_sitter_typescript_external_scanner_destroy(void *payload) {}
+
+unsigned tree_sitter_typescript_external_scanner_serialize(void *payload, char *buffer) { return 0; }
+
+void tree_sitter_typescript_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {}
+
+bool tree_sitter_typescript_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
+    return external_scanner_scan(payload, lexer, valid_symbols);
+}
--- a/non-source/foreign/tree-sitter/lang/ts/scanner.h
+++ b/non-source/foreign/tree-sitter/lang/ts/scanner.h
@ -0,0 +1,347 @@
+#include "./tree_sitter/parser.h"
+
+#include <wctype.h>
+
+enum TokenType {
+    AUTOMATIC_SEMICOLON,
+    TEMPLATE_CHARS,
+    TERNARY_QMARK,
+    HTML_COMMENT,
+    LOGICAL_OR,
+    ESCAPE_SEQUENCE,
+    REGEX_PATTERN,
+    JSX_TEXT,
+    FUNCTION_SIGNATURE_AUTOMATIC_SEMICOLON,
+    ERROR_RECOVERY,
+};
+
+static void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
+
+static void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
+
+static bool scan_template_chars(TSLexer *lexer) {
+    lexer->result_symbol = TEMPLATE_CHARS;
+    for (bool has_content = false;; has_content = true) {
+        lexer->mark_end(lexer);
+        switch (lexer->lookahead) {
+            case '`':
+                return has_content;
+            case '\0':
+                return false;
+            case '$':
+                advance(lexer);
+                if (lexer->lookahead == '{') {
+                    return has_content;
+                }
+                break;
+            case '\\':
+                return has_content;
+            default:
+                advance(lexer);
+        }
+    }
+}
+
+static bool scan_whitespace_and_comments(TSLexer *lexer, bool *scanned_comment) {
+    for (;;) {
+        while (iswspace(lexer->lookahead)) {
+            skip(lexer);
+        }
+
+        if (lexer->lookahead == '/') {
+            skip(lexer);
+
+            if (lexer->lookahead == '/') {
+                skip(lexer);
+                while (lexer->lookahead != 0 && lexer->lookahead != '\n') {
+                    skip(lexer);
+                }
+                *scanned_comment = true;
+            } else if (lexer->lookahead == '*') {
+                skip(lexer);
+                while (lexer->lookahead != 0) {
+                    if (lexer->lookahead == '*') {
+                        skip(lexer);
+                        if (lexer->lookahead == '/') {
+                            skip(lexer);
+                            break;
+                        }
+                    } else {
+                        skip(lexer);
+                    }
+                }
+            } else {
+                return false;
+            }
+        } else {
+            return true;
+        }
+    }
+}
+
+static bool scan_automatic_semicolon(TSLexer *lexer, const bool *valid_symbols, bool *scanned_comment) {
+    lexer->result_symbol = AUTOMATIC_SEMICOLON;
+    lexer->mark_end(lexer);
+
+    for (;;) {
+        if (lexer->lookahead == 0) {
+            return true;
+        }
+        if (lexer->lookahead == '}') {
+            // Automatic semicolon insertion breaks detection of object patterns
+            // in a typed context:
+            //   type F = ({a}: {a: number}) => number;
+            // Therefore, disable automatic semicolons when followed by typing
+            do {
+                skip(lexer);
+            } while (iswspace(lexer->lookahead));
+            if (lexer->lookahead == ':') {
+                return valid_symbols[LOGICAL_OR]; // Don't return false if we're in a ternary by checking if || is valid
+            }
+            return true;
+        }
+        if (!iswspace(lexer->lookahead)) {
+            return false;
+        }
+        if (lexer->lookahead == '\n') {
+            break;
+        }
+        skip(lexer);
+    }
+
+    skip(lexer);
+
+    if (!scan_whitespace_and_comments(lexer, scanned_comment)) {
+        return false;
+    }
+
+    switch (lexer->lookahead) {
+        case '`':
+        case ',':
+        case '.':
+        case ';':
+        case '*':
+        case '%':
+        case '>':
+        case '<':
+        case '=':
+        case '?':
+        case '^':
+        case '|':
+        case '&':
+        case '/':
+        case ':':
+            return false;
+
+        case '{':
+            if (valid_symbols[FUNCTION_SIGNATURE_AUTOMATIC_SEMICOLON]) {
+                return false;
+            }
+            break;
+
+            // Don't insert a semicolon before a '[' or '(', unless we're parsing
+            // a type. Detect whether we're parsing a type or an expression using
+            // the validity of a binary operator token.
+        case '(':
+        case '[':
+            if (valid_symbols[LOGICAL_OR]) {
+                return false;
+            }
+            break;
+
+            // Insert a semicolon before `--` and `++`, but not before binary `+` or `-`.
+        case '+':
+            skip(lexer);
+            return lexer->lookahead == '+';
+        case '-':
+            skip(lexer);
+            return lexer->lookahead == '-';
+
+            // Don't insert a semicolon before `!=`, but do insert one before a unary `!`.
+        case '!':
+            skip(lexer);
+            return lexer->lookahead != '=';
+
+            // Don't insert a semicolon before `in` or `instanceof`, but do insert one
+            // before an identifier.
+        case 'i':
+            skip(lexer);
+
+            if (lexer->lookahead != 'n') {
+                return true;
+            }
+            skip(lexer);
+
+            if (!iswalpha(lexer->lookahead)) {
+                return false;
+            }
+
+            for (unsigned i = 0; i < 8; i++) {
+                if (lexer->lookahead != "stanceof"[i]) {
+                    return true;
+                }
+                skip(lexer);
+            }
+
+            if (!iswalpha(lexer->lookahead)) {
+                return false;
+            }
+            break;
+    }
+
+    return true;
+}
+
+static bool scan_ternary_qmark(TSLexer *lexer) {
+    for (;;) {
+        if (!iswspace(lexer->lookahead)) {
+            break;
+        }
+        skip(lexer);
+    }
+
+    if (lexer->lookahead == '?') {
+        advance(lexer);
+
+        /* Optional chaining. */
+        if (lexer->lookahead == '?' || lexer->lookahead == '.') {
+            return false;
+        }
+
+        lexer->mark_end(lexer);
+        lexer->result_symbol = TERNARY_QMARK;
+
+        /* TypeScript optional arguments contain the ?: sequence, possibly
+           with whitespace. */
+        for (;;) {
+            if (!iswspace(lexer->lookahead)) {
+                break;
+            }
+            advance(lexer);
+        }
+
+        if (lexer->lookahead == ':' || lexer->lookahead == ')' || lexer->lookahead == ',') {
+            return false;
+        }
+
+        if (lexer->lookahead == '.') {
+            advance(lexer);
+            if (iswdigit(lexer->lookahead)) {
+                return true;
+            }
+            return false;
+        }
+        return true;
+    }
+    return false;
+}
+
+static bool scan_closing_comment(TSLexer *lexer) {
+    while (iswspace(lexer->lookahead) || lexer->lookahead == 0x2028 || lexer->lookahead == 0x2029) {
+        skip(lexer);
+    }
+
+    const char *comment_start = "<!--";
+    const char *comment_end = "-->";
+
+    if (lexer->lookahead == '<') {
+        for (unsigned i = 0; i < 4; i++) {
+            if (lexer->lookahead != comment_start[i]) {
+                return false;
+            }
+            advance(lexer);
+        }
+    } else if (lexer->lookahead == '-') {
+        for (unsigned i = 0; i < 3; i++) {
+            if (lexer->lookahead != comment_end[i]) {
+                return false;
+            }
+            advance(lexer);
+        }
+    } else {
+        return false;
+    }
+
+    while (lexer->lookahead != 0 && lexer->lookahead != '\n' && lexer->lookahead != 0x2028 &&
+           lexer->lookahead != 0x2029) {
+        advance(lexer);
+    }
+
+    lexer->result_symbol = HTML_COMMENT;
+    lexer->mark_end(lexer);
+
+    return true;
+}
+
+static bool scan_jsx_text(TSLexer *lexer) {
+    // saw_text will be true if we see any non-whitespace content, or any whitespace content that is not a newline and
+    // does not immediately follow a newline.
+    bool saw_text = false;
+    // at_newline will be true if we are currently at a newline, or if we are at whitespace that is not a newline but
+    // immediately follows a newline.
+    bool at_newline = false;
+
+    while (lexer->lookahead != 0 && lexer->lookahead != '<' && lexer->lookahead != '>' && lexer->lookahead != '{' &&
+           lexer->lookahead != '}' && lexer->lookahead != '&') {
+        bool is_wspace = iswspace(lexer->lookahead);
+        if (lexer->lookahead == '\n') {
+            at_newline = true;
+        } else {
+            // If at_newline is already true, and we see some whitespace, then it must stay true.
+            // Otherwise, it should be false.
+            //
+            // See the table below to determine the logic for computing `saw_text`.
+            //
+            // |------------------------------------|
+            // | at_newline | is_wspace | saw_text  |
+            // |------------|-----------|-----------|
+            // | false (0)  | false (0) | true  (1) |
+            // | false (0)  | true  (1) | true  (1) |
+            // | true  (1)  | false (0) | true  (1) |
+            // | true  (1)  | true  (1) | false (0) |
+            // |------------------------------------|
+
+            at_newline &= is_wspace;
+            if (!at_newline) {
+                saw_text = true;
+            }
+        }
+
+        advance(lexer);
+    }
+
+    lexer->result_symbol = JSX_TEXT;
+    return saw_text;
+}
+
+static inline bool external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
+    if (valid_symbols[TEMPLATE_CHARS]) {
+        if (valid_symbols[AUTOMATIC_SEMICOLON]) {
+            return false;
+        }
+        return scan_template_chars(lexer);
+    }
+
+    if (valid_symbols[JSX_TEXT] && scan_jsx_text(lexer)) {
+        return true;
+    }
+
+    if (valid_symbols[AUTOMATIC_SEMICOLON] || valid_symbols[FUNCTION_SIGNATURE_AUTOMATIC_SEMICOLON]) {
+        bool scanned_comment = false;
+        bool ret = scan_automatic_semicolon(lexer, valid_symbols, &scanned_comment);
+        if (!ret && !scanned_comment && valid_symbols[TERNARY_QMARK] && lexer->lookahead == '?') {
+            return scan_ternary_qmark(lexer);
+        }
+        return ret;
+    }
+    if (valid_symbols[TERNARY_QMARK]) {
+        return scan_ternary_qmark(lexer);
+    }
+
+    if (valid_symbols[HTML_COMMENT] && !valid_symbols[LOGICAL_OR] && !valid_symbols[ESCAPE_SEQUENCE] &&
+        !valid_symbols[REGEX_PATTERN]) {
+        return scan_closing_comment(lexer);
+    }
+
+    return false;
+}
--- a/non-source/foreign/tree-sitter/lang/ts/tree_sitter/alloc.h
+++ b/non-source/foreign/tree-sitter/lang/ts/tree_sitter/alloc.h
@ -0,0 +1,54 @@
+#ifndef TREE_SITTER_ALLOC_H_
+#define TREE_SITTER_ALLOC_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+// Allow clients to override allocation functions
+#ifdef TREE_SITTER_REUSE_ALLOCATOR
+
+extern void *(*ts_current_malloc)(size_t size);
+extern void *(*ts_current_calloc)(size_t count, size_t size);
+extern void *(*ts_current_realloc)(void *ptr, size_t size);
+extern void (*ts_current_free)(void *ptr);
+
+#ifndef ts_malloc
+#define ts_malloc  ts_current_malloc
+#endif
+#ifndef ts_calloc
+#define ts_calloc  ts_current_calloc
+#endif
+#ifndef ts_realloc
+#define ts_realloc ts_current_realloc
+#endif
+#ifndef ts_free
+#define ts_free    ts_current_free
+#endif
+
+#else
+
+#ifndef ts_malloc
+#define ts_malloc  malloc
+#endif
+#ifndef ts_calloc
+#define ts_calloc  calloc
+#endif
+#ifndef ts_realloc
+#define ts_realloc realloc
+#endif
+#ifndef ts_free
+#define ts_free    free
+#endif
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // TREE_SITTER_ALLOC_H_
--- a/non-source/foreign/tree-sitter/lang/ts/tree_sitter/array.h
+++ b/non-source/foreign/tree-sitter/lang/ts/tree_sitter/array.h
@ -0,0 +1,290 @@
+#ifndef TREE_SITTER_ARRAY_H_
+#define TREE_SITTER_ARRAY_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "./alloc.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef _MSC_VER
+#pragma warning(disable : 4101)
+#elif defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#define Array(T)       \
+  struct {             \
+    T *contents;       \
+    uint32_t size;     \
+    uint32_t capacity; \
+  }
+
+/// Initialize an array.
+#define array_init(self) \
+  ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)
+
+/// Create an empty array.
+#define array_new() \
+  { NULL, 0, 0 }
+
+/// Get a pointer to the element at a given `index` in the array.
+#define array_get(self, _index) \
+  (assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index])
+
+/// Get a pointer to the first element in the array.
+#define array_front(self) array_get(self, 0)
+
+/// Get a pointer to the last element in the array.
+#define array_back(self) array_get(self, (self)->size - 1)
+
+/// Clear the array, setting its size to zero. Note that this does not free any
+/// memory allocated for the array's contents.
+#define array_clear(self) ((self)->size = 0)
+
+/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is
+/// less than the array's current capacity, this function has no effect.
+#define array_reserve(self, new_capacity) \
+  _array__reserve((Array *)(self), array_elem_size(self), new_capacity)
+
+/// Free any memory allocated for this array. Note that this does not free any
+/// memory allocated for the array's contents.
+#define array_delete(self) _array__delete((Array *)(self))
+
+/// Push a new `element` onto the end of the array.
+#define array_push(self, element)                            \
+  (_array__grow((Array *)(self), 1, array_elem_size(self)), \
+   (self)->contents[(self)->size++] = (element))
+
+/// Increase the array's size by `count` elements.
+/// New elements are zero-initialized.
+#define array_grow_by(self, count) \
+  do { \
+    if ((count) == 0) break; \
+    _array__grow((Array *)(self), count, array_elem_size(self)); \
+    memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \
+    (self)->size += (count); \
+  } while (0)
+
+/// Append all elements from one array to the end of another.
+#define array_push_all(self, other)                                       \
+  array_extend((self), (other)->size, (other)->contents)
+
+/// Append `count` elements to the end of the array, reading their values from the
+/// `contents` pointer.
+#define array_extend(self, count, contents)                    \
+  _array__splice(                                               \
+    (Array *)(self), array_elem_size(self), (self)->size, \
+    0, count,  contents                                        \
+  )
+
+/// Remove `old_count` elements from the array starting at the given `index`. At
+/// the same index, insert `new_count` new elements, reading their values from the
+/// `new_contents` pointer.
+#define array_splice(self, _index, old_count, new_count, new_contents)  \
+  _array__splice(                                                       \
+    (Array *)(self), array_elem_size(self), _index,                \
+    old_count, new_count, new_contents                                 \
+  )
+
+/// Insert one `element` into the array at the given `index`.
+#define array_insert(self, _index, element) \
+  _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element))
+
+/// Remove one element from the array at the given `index`.
+#define array_erase(self, _index) \
+  _array__erase((Array *)(self), array_elem_size(self), _index)
+
+/// Pop the last element off the array, returning the element by value.
+#define array_pop(self) ((self)->contents[--(self)->size])
+
+/// Assign the contents of one array to another, reallocating if necessary.
+#define array_assign(self, other) \
+  _array__assign((Array *)(self), (const Array *)(other), array_elem_size(self))
+
+/// Swap one array with another
+#define array_swap(self, other) \
+  _array__swap((Array *)(self), (Array *)(other))
+
+/// Get the size of the array contents
+#define array_elem_size(self) (sizeof *(self)->contents)
+
+/// Search a sorted array for a given `needle` value, using the given `compare`
+/// callback to determine the order.
+///
+/// If an existing element is found to be equal to `needle`, then the `index`
+/// out-parameter is set to the existing value's index, and the `exists`
+/// out-parameter is set to true. Otherwise, `index` is set to an index where
+/// `needle` should be inserted in order to preserve the sorting, and `exists`
+/// is set to false.
+#define array_search_sorted_with(self, compare, needle, _index, _exists) \
+  _array__search_sorted(self, 0, compare, , needle, _index, _exists)
+
+/// Search a sorted array for a given `needle` value, using integer comparisons
+/// of a given struct field (specified with a leading dot) to determine the order.
+///
+/// See also `array_search_sorted_with`.
+#define array_search_sorted_by(self, field, needle, _index, _exists) \
+  _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists)
+
+/// Insert a given `value` into a sorted array, using the given `compare`
+/// callback to determine the order.
+#define array_insert_sorted_with(self, compare, value) \
+  do { \
+    unsigned _index, _exists; \
+    array_search_sorted_with(self, compare, &(value), &_index, &_exists); \
+    if (!_exists) array_insert(self, _index, value); \
+  } while (0)
+
+/// Insert a given `value` into a sorted array, using integer comparisons of
+/// a given struct field (specified with a leading dot) to determine the order.
+///
+/// See also `array_search_sorted_by`.
+#define array_insert_sorted_by(self, field, value) \
+  do { \
+    unsigned _index, _exists; \
+    array_search_sorted_by(self, field, (value) field, &_index, &_exists); \
+    if (!_exists) array_insert(self, _index, value); \
+  } while (0)
+
+// Private
+
+typedef Array(void) Array;
+
+/// This is not what you're looking for, see `array_delete`.
+static inline void _array__delete(Array *self) {
+  if (self->contents) {
+    ts_free(self->contents);
+    self->contents = NULL;
+    self->size = 0;
+    self->capacity = 0;
+  }
+}
+
+/// This is not what you're looking for, see `array_erase`.
+static inline void _array__erase(Array *self, size_t element_size,
+                                uint32_t index) {
+  assert(index < self->size);
+  char *contents = (char *)self->contents;
+  memmove(contents + index * element_size, contents + (index + 1) * element_size,
+          (self->size - index - 1) * element_size);
+  self->size--;
+}
+
+/// This is not what you're looking for, see `array_reserve`.
+static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) {
+  if (new_capacity > self->capacity) {
+    if (self->contents) {
+      self->contents = ts_realloc(self->contents, new_capacity * element_size);
+    } else {
+      self->contents = ts_malloc(new_capacity * element_size);
+    }
+    self->capacity = new_capacity;
+  }
+}
+
+/// This is not what you're looking for, see `array_assign`.
+static inline void _array__assign(Array *self, const Array *other, size_t element_size) {
+  _array__reserve(self, element_size, other->size);
+  self->size = other->size;
+  memcpy(self->contents, other->contents, self->size * element_size);
+}
+
+/// This is not what you're looking for, see `array_swap`.
+static inline void _array__swap(Array *self, Array *other) {
+  Array swap = *other;
+  *other = *self;
+  *self = swap;
+}
+
+/// This is not what you're looking for, see `array_push` or `array_grow_by`.
+static inline void _array__grow(Array *self, uint32_t count, size_t element_size) {
+  uint32_t new_size = self->size + count;
+  if (new_size > self->capacity) {
+    uint32_t new_capacity = self->capacity * 2;
+    if (new_capacity < 8) new_capacity = 8;
+    if (new_capacity < new_size) new_capacity = new_size;
+    _array__reserve(self, element_size, new_capacity);
+  }
+}
+
+/// This is not what you're looking for, see `array_splice`.
+static inline void _array__splice(Array *self, size_t element_size,
+                                 uint32_t index, uint32_t old_count,
+                                 uint32_t new_count, const void *elements) {
+  uint32_t new_size = self->size + new_count - old_count;
+  uint32_t old_end = index + old_count;
+  uint32_t new_end = index + new_count;
+  assert(old_end <= self->size);
+
+  _array__reserve(self, element_size, new_size);
+
+  char *contents = (char *)self->contents;
+  if (self->size > old_end) {
+    memmove(
+      contents + new_end * element_size,
+      contents + old_end * element_size,
+      (self->size - old_end) * element_size
+    );
+  }
+  if (new_count > 0) {
+    if (elements) {
+      memcpy(
+        (contents + index * element_size),
+        elements,
+        new_count * element_size
+      );
+    } else {
+      memset(
+        (contents + index * element_size),
+        0,
+        new_count * element_size
+      );
+    }
+  }
+  self->size += new_count - old_count;
+}
+
+/// A binary search routine, based on Rust's `std::slice::binary_search_by`.
+/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`.
+#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \
+  do { \
+    *(_index) = start; \
+    *(_exists) = false; \
+    uint32_t size = (self)->size - *(_index); \
+    if (size == 0) break; \
+    int comparison; \
+    while (size > 1) { \
+      uint32_t half_size = size / 2; \
+      uint32_t mid_index = *(_index) + half_size; \
+      comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \
+      if (comparison <= 0) *(_index) = mid_index; \
+      size -= half_size; \
+    } \
+    comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \
+    if (comparison == 0) *(_exists) = true; \
+    else if (comparison < 0) *(_index) += 1; \
+  } while (0)
+
+/// Helper macro for the `_sorted_by` routines below. This takes the left (existing)
+/// parameter by reference in order to work with the generic sorting function above.
+#define _compare_int(a, b) ((int)*(a) - (int)(b))
+
+#ifdef _MSC_VER
+#pragma warning(default : 4101)
+#elif defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_ARRAY_H_
--- a/non-source/foreign/tree-sitter/lang/ts/tree_sitter/parser.h
+++ b/non-source/foreign/tree-sitter/lang/ts/tree_sitter/parser.h
@ -0,0 +1,266 @@
+#ifndef TREE_SITTER_PARSER_H_
+#define TREE_SITTER_PARSER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#define ts_builtin_sym_error ((TSSymbol)-1)
+#define ts_builtin_sym_end 0
+#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
+
+#ifndef TREE_SITTER_API_H_
+typedef uint16_t TSStateId;
+typedef uint16_t TSSymbol;
+typedef uint16_t TSFieldId;
+typedef struct TSLanguage TSLanguage;
+#endif
+
+typedef struct {
+  TSFieldId field_id;
+  uint8_t child_index;
+  bool inherited;
+} TSFieldMapEntry;
+
+typedef struct {
+  uint16_t index;
+  uint16_t length;
+} TSFieldMapSlice;
+
+typedef struct {
+  bool visible;
+  bool named;
+  bool supertype;
+} TSSymbolMetadata;
+
+typedef struct TSLexer TSLexer;
+
+struct TSLexer {
+  int32_t lookahead;
+  TSSymbol result_symbol;
+  void (*advance)(TSLexer *, bool);
+  void (*mark_end)(TSLexer *);
+  uint32_t (*get_column)(TSLexer *);
+  bool (*is_at_included_range_start)(const TSLexer *);
+  bool (*eof)(const TSLexer *);
+  void (*log)(const TSLexer *, const char *, ...);
+};
+
+typedef enum {
+  TSParseActionTypeShift,
+  TSParseActionTypeReduce,
+  TSParseActionTypeAccept,
+  TSParseActionTypeRecover,
+} TSParseActionType;
+
+typedef union {
+  struct {
+    uint8_t type;
+    TSStateId state;
+    bool extra;
+    bool repetition;
+  } shift;
+  struct {
+    uint8_t type;
+    uint8_t child_count;
+    TSSymbol symbol;
+    int16_t dynamic_precedence;
+    uint16_t production_id;
+  } reduce;
+  uint8_t type;
+} TSParseAction;
+
+typedef struct {
+  uint16_t lex_state;
+  uint16_t external_lex_state;
+} TSLexMode;
+
+typedef union {
+  TSParseAction action;
+  struct {
+    uint8_t count;
+    bool reusable;
+  } entry;
+} TSParseActionEntry;
+
+typedef struct {
+  int32_t start;
+  int32_t end;
+} TSCharacterRange;
+
+struct TSLanguage {
+  uint32_t version;
+  uint32_t symbol_count;
+  uint32_t alias_count;
+  uint32_t token_count;
+  uint32_t external_token_count;
+  uint32_t state_count;
+  uint32_t large_state_count;
+  uint32_t production_id_count;
+  uint32_t field_count;
+  uint16_t max_alias_sequence_length;
+  const uint16_t *parse_table;
+  const uint16_t *small_parse_table;
+  const uint32_t *small_parse_table_map;
+  const TSParseActionEntry *parse_actions;
+  const char * const *symbol_names;
+  const char * const *field_names;
+  const TSFieldMapSlice *field_map_slices;
+  const TSFieldMapEntry *field_map_entries;
+  const TSSymbolMetadata *symbol_metadata;
+  const TSSymbol *public_symbol_map;
+  const uint16_t *alias_map;
+  const TSSymbol *alias_sequences;
+  const TSLexMode *lex_modes;
+  bool (*lex_fn)(TSLexer *, TSStateId);
+  bool (*keyword_lex_fn)(TSLexer *, TSStateId);
+  TSSymbol keyword_capture_token;
+  struct {
+    const bool *states;
+    const TSSymbol *symbol_map;
+    void *(*create)(void);
+    void (*destroy)(void *);
+    bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
+    unsigned (*serialize)(void *, char *);
+    void (*deserialize)(void *, const char *, unsigned);
+  } external_scanner;
+  const TSStateId *primary_state_ids;
+};
+
+static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
+  uint32_t index = 0;
+  uint32_t size = len - index;
+  while (size > 1) {
+    uint32_t half_size = size / 2;
+    uint32_t mid_index = index + half_size;
+    TSCharacterRange *range = &ranges[mid_index];
+    if (lookahead >= range->start && lookahead <= range->end) {
+      return true;
+    } else if (lookahead > range->end) {
+      index = mid_index;
+    }
+    size -= half_size;
+  }
+  TSCharacterRange *range = &ranges[index];
+  return (lookahead >= range->start && lookahead <= range->end);
+}
+
+/*
+ *  Lexer Macros
+ */
+
+#ifdef _MSC_VER
+#define UNUSED __pragma(warning(suppress : 4101))
+#else
+#define UNUSED __attribute__((unused))
+#endif
+
+#define START_LEXER()           \
+  bool result = false;          \
+  bool skip = false;            \
+  UNUSED                        \
+  bool eof = false;             \
+  int32_t lookahead;            \
+  goto start;                   \
+  next_state:                   \
+  lexer->advance(lexer, skip);  \
+  start:                        \
+  skip = false;                 \
+  lookahead = lexer->lookahead;
+
+#define ADVANCE(state_value) \
+  {                          \
+    state = state_value;     \
+    goto next_state;         \
+  }
+
+#define ADVANCE_MAP(...)                                              \
+  {                                                                   \
+    static const uint16_t map[] = { __VA_ARGS__ };                    \
+    for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) {  \
+      if (map[i] == lookahead) {                                      \
+        state = map[i + 1];                                           \
+        goto next_state;                                              \
+      }                                                               \
+    }                                                                 \
+  }
+
+#define SKIP(state_value) \
+  {                       \
+    skip = true;          \
+    state = state_value;  \
+    goto next_state;      \
+  }
+
+#define ACCEPT_TOKEN(symbol_value)     \
+  result = true;                       \
+  lexer->result_symbol = symbol_value; \
+  lexer->mark_end(lexer);
+
+#define END_STATE() return result;
+
+/*
+ *  Parse Table Macros
+ */
+
+#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
+
+#define STATE(id) id
+
+#define ACTIONS(id) id
+
+#define SHIFT(state_value)            \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .state = (state_value)          \
+    }                                 \
+  }}
+
+#define SHIFT_REPEAT(state_value)     \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .state = (state_value),         \
+      .repetition = true              \
+    }                                 \
+  }}
+
+#define SHIFT_EXTRA()                 \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .extra = true                   \
+    }                                 \
+  }}
+
+#define REDUCE(symbol_name, children, precedence, prod_id) \
+  {{                                                       \
+    .reduce = {                                            \
+      .type = TSParseActionTypeReduce,                     \
+      .symbol = symbol_name,                               \
+      .child_count = children,                             \
+      .dynamic_precedence = precedence,                    \
+      .production_id = prod_id                             \
+    },                                                     \
+  }}
+
+#define RECOVER()                    \
+  {{                                 \
+    .type = TSParseActionTypeRecover \
+  }}
+
+#define ACCEPT_INPUT()              \
+  {{                                \
+    .type = TSParseActionTypeAccept \
+  }}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_PARSER_H_
--- a/non-source/test_data/sample_files/sample.ts
+++ b/non-source/test_data/sample_files/sample.ts
@ -0,0 +1,37 @@
+
+import { Foo } from "@test"
+
+type Type = {
+  foo: number
+}
+
+const bar = require("somelib")
+
+const lit_str: string = "Hello"
+let lit_num: number = 5.314;
+
+const arrow_proc = async (
+  arg1: number
+): Promise<number> => {
+  return number
+}
+
+function normal_function(arg: string) {
+  return "Foobar"
+}
+
+class MyClass
+{
+  bar: number;
+
+  constructor() {
+    console.log("QQQ!!!")
+    Foo.some_proc()
+    normal_function();
+
+    const myT: Type;
+  }
+
+  proc() {
+  }
+}