Jai syntax highlighting

This commit is contained in:
Peter Slattery 2025-07-10 12:10:17 -07:00
parent 79695eca2c
commit 086ac34c4d
13 changed files with 1486505 additions and 404397 deletions

1
.gitignore vendored
View File

@ -1,3 +1,4 @@
.DS_Store
build/
build_new/temp
current_dist*/

View File

@ -44,9 +44,10 @@ build_tree_sitter() {
clang $CLANG_OPTS "${INCLUDES[@]}" "${CUSTOM_ROOT}/lang/cpp/scanner.cc" -o $TEMP_OUT_DIR/cpp_scanner.o
print_success "Complete"
# Lang: C
print_step "Building tree-sitter C Language Lib"
clang $CLANG_OPTS "${INCLUDES[@]}" "${CUSTOM_ROOT}/lang/c/parser.c" -o $TEMP_OUT_DIR/c_parser.o
# Lang: Jai
print_step "Building tree-sitter Jai Language Lib"
clang $CLANG_OPTS "${INCLUDES[@]}" "${CUSTOM_ROOT}/lang/jai/parser.c" -o $TEMP_OUT_DIR/jai_parser.o
clang $CLANG_OPTS "${INCLUDES[@]}" "${CUSTOM_ROOT}/lang/jai/scanner.cc" -o $TEMP_OUT_DIR/jai_scanner.o
print_success "Complete"
# Link tree-sitter lib and parser obj files into a static library to link into main custom dll

View File

@ -3,35 +3,350 @@
// TEMP until I implement more generic language stuff
/////////////////////////////////////////////
/////////////////////////////////////////////
// C
/////////////////////////////////////////////
// C++
TSQuery* tree_sitter_cpp_index_query;
String_Const_u8 TS_CPP_INDEX_QUERY = string_u8_litexpr("(_ \"{\" @Start \"}\" @End ) @ScopeNest\n");
String_Const_u8 TS_CPP_HIGHLIGHT_QUERY = string_u8_litexpr("(call_expression function: ["
" (identifier) @defcolor_function"
" (field_expression field: (field_identifier) @defcolor_function)])"
"(function_declarator"
" declarator: [(identifier) (field_identifier)] @defcolor_function)"
String_Const_u8 TS_CPP_HIGHLIGHT_QUERY = string_u8_litexpr(R"DONE(
(call_expression function: [
(identifier) @defcolor_function
(field_expression field: (field_identifier) @defcolor_function)])
"(preproc_def"
" name: (identifier) @defcolor_macro)"
"(preproc_function_def"
" name: (identifier) @defcolor_macro)"
(function_declarator
declarator: [(identifier) (field_identifier)] @defcolor_function)
"(type_identifier) @defcolor_type"
"(call_expression"
" function: (parenthesized_expression"
" (identifier) @defcolor_type))"
(preproc_def
name: (identifier) @defcolor_macro)
"[(primitive_type) (type_qualifier) (storage_class_specifier)"
" (break_statement) (continue_statement) \"union\" \"return\" \"do\""
" \"while\" \"for\" \"if\" \"class\" \"struct\" \"enum\" \"sizeof\""
" \"else\" \"switch\" \"case\"] @defcolor_keyword"
(preproc_function_def
name: (identifier) @defcolor_macro)
"[(number_literal) (string_literal)] @defcolor_str_constant"
"[(preproc_directive) \"#define\" \"#if\" \"#elif\" \"#else\" \"#endif\""
" \"#include\"] @defcolor_preproc"
"[\"{\" \"}\" \";\" \":\" \",\"] @defcolor_text_default"
"(comment) @defcolor_comment");
(type_identifier) @defcolor_type
(call_expression
function: (parenthesized_expression
(identifier) @defcolor_type))
[(primitive_type) (type_qualifier) (storage_class_specifier)
(break_statement) (continue_statement) "union" "return" "do"
"while" "for" "if" "class" "struct" "enum" "sizeof"
"else" "switch" "case"] @defcolor_keyword
[(number_literal) (string_literal) (raw_string_literal)] @defcolor_str_constant
[(preproc_directive) "#define" "#if" "#elif" "#else" "#endif"
"#include"] @defcolor_preproc
["{" "}" ";" ":" ","] @defcolor_text_default
(comment) @defcolor_comment
)DONE");
/////////////////////////////////////////////
// Jai
String_Const_u8 TS_JAI_HIGHLIGHT_QUERY = string_u8_litexpr(R"DONE(
; keywords
[
"if"
"else"
"break"
"continue"
"return"
"struct"
"enum"
"for"
"defer"
"cast"
"xx"
"ifx"
"null"
] @defcolor_keyword
; # preceeded
[
(compiler_directive)
(import)
(char_string)
] @defcolor_macro
(import (identifier) @defcolor_type)
; Identifiers
(struct_declaration
name: (identifier) @defcolor_type
)
(struct_literal
type: (identifier) @defcolor_type
)
(enum_declaration
name: (identifier) @defcolor_type
)
(enum_declaration "{" (identifier) @defcolor_type)
(variable_declaration
type: (types) @defcolor_type
)
(procedure_declaration
name: (identifier) @defcolor_function
)
(call_expression
function: (identifier) @defcolor_function
)
(procedure
result: (procedure_returns) @defcolor_type
)
; Constants & Literals
[
(string)
(string_directive)
] @defcolor_str_constant
(escape_sequence) @defcolor_special_character
(integer) @defcolor_int_constant
(float) @defcolor_float_constant
(boolean) @defcolor_bool_constant
(array_literal
type: (identifier) @defcolor_type
)
; Comments
(note) @defcolor_comment
(block_comment) @defcolor_comment
(block_comment_text) @defcolor_comment
)DONE");
// NOTE(PS): source: https://github.com/St0wy/tree-sitter-jai/blob/main/queries/highlights.scm
String_Const_u8 TS_JAI_HIGHLIGHT_QUERY_ = string_u8_litexpr(R"DONE(
[
(compiler_directive)
(import)
] @defcolor_macro
; Keywords
; TODO : complete this list
[
"struct"
"enum"
"defer"
"cast"
"xx"
"return"
] @defcolor_keyword
; Conditionals
[
"if"
"else"
"case"
"break"
] @defcolor_keyword
((if_expression
[
"then"
"ifx"
"else"
] @defcolor_keyword)
(#set! "priority" 105))
; Repeats
[
"for"
"while"
"continue"
] @defcolor_keyword
; Variables
(identifier) @defcolor_text_default
; Namespaces
(import (identifier) @defcolor_text_default)
; Parameters
(parameter (identifier) @defcolor_text_default ":" "="? (identifier)? @defcolor_str_constant)
(default_parameter (identifier) @defcolor_text_default ":=")
(call_expression argument: (identifier) @defcolor_text_default "=")
; Functions
(procedure_declaration (identifier) @defcolor_function)
(procedure_declaration (identifier) @defcolor_function (procedure (block)))
(call_expression function: (identifier) @defcolor_function)
; Types
(type (identifier) @defcolor_type)
((type (identifier) @defcolor_type)
(#any-of? @type.builtin
"bool"
"int" "s8" "s16" "s32" "s64"
"u8" "u16" "u32" "u64"
"string"))
(struct_declaration (identifier) @defcolor_type "::")
(enum_declaration (identifier) @defcolor_type "::")
;(union_declaration (identifier) @defcolor_type "::")
(const_declaration (identifier) @defcolor_type "::" [(array_type) (pointer_type)])
(struct . (identifier) @defcolor_type)
;(field_type . (identifier) @namespace "." (identifier) @defcolor_type)
;(bit_set_type (identifier) @defcolor_type ";")
;(procedure_type (parameters (parameter (identifier) @defcolor_type)))
;(polymorphic_parameters (identifier) @defcolor_type)
((identifier) @defcolor_type
(#lua-match? @defcolor_type "^[A-Z][a-zA-Z0-9]*$")
(#not-has-parent? @defcolor_type parameter procedure_declaration call_expression))
; Fields
(member_expression "." (identifier) @defcolor_text_default)
;(struct_type "{" (identifier) @defcolor_text_default)
(struct_field (identifier) @defcolor_text_default "="?)
(field (identifier) @defcolor_text_default)
; Constants
((identifier) @defcolor_text_default
(#lua-match? @defcolor_str_constnat "^_*[A-Z][A-Z0-9_]*$")
(#not-has-parent? @text_default type parameter))
(member_expression . "." (identifier) @defcolor_text_default)
(enum_declaration "{" (identifier) @defcolor_text_default)
; Literals
(number) @defcolor_int_constant
(float) @defcolor_float_constant
(string) @defcolor_str_constnat
;(character) @defcolor_str_constnat
(escape_sequence) @defcolor_str_constant
(boolean) @defcolor_bool_constant
[
(uninitialized)
(null)
] @defcolor_text_default
((identifier) @defcolor_text_default
(#any-of? @defcolor_text_default "context"))
; Operators
[
":="
"="
"+"
"-"
"*"
"/"
"%"
"%%"
">"
">="
"<"
"<="
"=="
"!="
"~="
"|"
"~"
"&"
"&~"
"<<"
">>"
"||"
"&&"
"!"
".."
"+="
"-="
"*="
"/="
"%="
"&="
"|="
"^="
"<<="
">>="
"||="
"&&="
"&~="
;"..="
;"..<"
;"?"
] @defcolor_operator
; Punctuation
[ "{" "}" ] @defcolor_text_default
[ "(" ")" ] @defcolor_text_default
[ "[" "]" ] @defcolor_text_default
[
"::"
"->"
"."
","
":"
";"
] @defcolor_text_default
; Comments
[
(comment)
(block_comment)
] @defcolor_comment
; Errors
(ERROR) @defcolor_comment_pop
)DONE");
////////////////////////////////////////////////////////////////////
// Language Management
@ -123,15 +438,36 @@ tree_sitter_init(Application_Links* app)
String_Const_u8 highlight_query_str = TS_CPP_HIGHLIGHT_QUERY;
TSQuery* highlight_query = ts_query_new(
language,
(const char *)TS_CPP_HIGHLIGHT_QUERY.str,
(u32)TS_CPP_HIGHLIGHT_QUERY.size,
(const char *)highlight_query_str.str,
(u32)highlight_query_str.size,
&error_offset, &query_error
);
tree_sitter_register_language(SCu8("c"), language, highlight_query);
tree_sitter_register_language(SCu8("cpp"), language, highlight_query);
tree_sitter_register_language(SCu8("h"), language, highlight_query);
tree_sitter_register_language(SCu8("hpp"), language, highlight_query);
tree_sitter_register_language(SCu8("cc"), language, highlight_query);
}
{ // Register Jai
TSLanguage* language = tree_sitter_jai();
String_Const_u8 highlight_query_str = TS_JAI_HIGHLIGHT_QUERY;
TSQuery* highlight_query = ts_query_new(
language,
(const char *)highlight_query_str.str,
(u32)highlight_query_str.size,
&error_offset, &query_error
);
if (!highlight_query)
{
print_message(app, SCu8("Error creating jai highlight query\n"));
printf("%.*s\n", (int)Min(highlight_query_str.size-error_offset, 100), highlight_query_str.str + error_offset);
}
else
{
tree_sitter_register_language(SCu8("jai"), language, highlight_query);
}
}
}
function void
@ -146,15 +482,15 @@ function void
tree_sitter_end_buffer(Application_Links* app, Buffer_ID buffer_id)
{
Managed_Scope buffer_scope = buffer_get_managed_scope(app, buffer_id);
Buffer_Tree_Sitter_Data* tree_data = scope_attachment(app, buffer_scope, buffer_tree_sitter_data_id, Buffer_Tree_Sitter_Data);
if (!tree_data) return;
Async_Task *tree_sitter_parse_task = scope_attachment(app, buffer_scope, buffer_tree_sitter_parse_task_id, Async_Task);
if (async_task_is_running_or_pending(&global_async_system, *tree_sitter_parse_task))
if (tree_sitter_parse_task && async_task_is_running_or_pending(&global_async_system, *tree_sitter_parse_task))
{
async_task_cancel(app, &global_async_system, *tree_sitter_parse_task);
}
Buffer_Tree_Sitter_Data* tree_data = scope_attachment(app, buffer_scope, buffer_tree_sitter_data_id, Buffer_Tree_Sitter_Data);
if (!tree_data || !tree_data->tree) return;
system_mutex_acquire(tree_data->tree_mutex);
ts_tree_delete(tree_data->tree);
system_mutex_release(tree_data->tree_mutex);

View File

@ -26,7 +26,7 @@ global Tree_Sitter_Languages tree_sitter_languages;
extern "C" {
TSLanguage *tree_sitter_cpp();
TSLanguage *tree_sitter_c();
TSLanguage *tree_sitter_jai();
}
CUSTOM_ID(attachment, buffer_tree_sitter_data_id);

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,119 +1,152 @@
#include <tree_sitter/parser.h>
#include <string>
#include <cwctype>
#include "tree_sitter/alloc.h"
#include "tree_sitter/parser.h"
namespace {
#include <assert.h>
#include <string.h>
#include <wctype.h>
using std::wstring;
using std::iswspace;
enum TokenType { RAW_STRING_DELIMITER, RAW_STRING_CONTENT };
enum TokenType {
RAW_STRING_LITERAL,
};
/// The spec limits delimiters to 16 chars
#define MAX_DELIMITER_LENGTH 16
struct Scanner {
bool scan(TSLexer *lexer, const bool *valid_symbols) {
while (iswspace(lexer->lookahead)) {
lexer->advance(lexer, true);
}
typedef struct {
uint8_t delimiter_length;
wchar_t delimiter[MAX_DELIMITER_LENGTH];
} Scanner;
lexer->result_symbol = RAW_STRING_LITERAL;
static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
// Raw string literals can start with: R, LR, uR, UR, u8R
// Consume 'R'
if (lexer->lookahead == 'L' || lexer->lookahead == 'U') {
lexer->advance(lexer, false);
if (lexer->lookahead != 'R') {
return false;
}
} else if (lexer->lookahead == 'u') {
lexer->advance(lexer, false);
if (lexer->lookahead == '8') {
lexer->advance(lexer, false);
if (lexer->lookahead != 'R') {
return false;
static inline void reset(Scanner *scanner) {
scanner->delimiter_length = 0;
memset(scanner->delimiter, 0, sizeof scanner->delimiter);
}
/// Scan the raw string delimiter in R"delimiter(content)delimiter"
static bool scan_raw_string_delimiter(Scanner *scanner, TSLexer *lexer) {
if (scanner->delimiter_length > 0) {
// Closing delimiter: must exactly match the opening delimiter.
// We already checked this when scanning content, but this is how we
// know when to stop. We can't stop at ", because R"""hello""" is valid.
for (int i = 0; i < scanner->delimiter_length; ++i) {
if (lexer->lookahead != scanner->delimiter[i]) {
return false;
}
advance(lexer);
}
} else if (lexer->lookahead != 'R') {
return false;
}
} else if (lexer->lookahead != 'R') {
return false;
}
lexer->advance(lexer, false);
// Consume '"'
if (lexer->lookahead != '"') return false;
lexer->advance(lexer, false);
// Consume '(', delimiter
wstring delimiter;
for (;;) {
if (lexer->lookahead == 0 || lexer->lookahead == '\\' || iswspace(lexer->lookahead)) {
return false;
}
if (lexer->lookahead == '(') {
lexer->advance(lexer, false);
break;
}
delimiter += lexer->lookahead;
lexer->advance(lexer, false);
reset(scanner);
return true;
}
// Consume content, delimiter, ')', '"'
int delimiter_index = -1;
// Opening delimiter: record the d-char-sequence up to (.
// d-char is any basic character except parens, backslashes, and spaces.
for (;;) {
if (lexer->lookahead == 0) return false;
if (scanner->delimiter_length >= MAX_DELIMITER_LENGTH || lexer->eof(lexer) || lexer->lookahead == '\\' ||
iswspace(lexer->lookahead)) {
return false;
}
if (lexer->lookahead == '(') {
// Rather than create a token for an empty delimiter, we fail and
// let the grammar fall back to a delimiter-less rule.
return scanner->delimiter_length > 0;
}
scanner->delimiter[scanner->delimiter_length++] = lexer->lookahead;
advance(lexer);
}
}
if (delimiter_index >= 0) {
if (static_cast<unsigned>(delimiter_index) == delimiter.size()) {
if (lexer->lookahead == '"') {
lexer->advance(lexer, false);
/// Scan the raw string content in R"delimiter(content)delimiter"
static bool scan_raw_string_content(Scanner *scanner, TSLexer *lexer) {
// The progress made through the delimiter since the last ')'.
// The delimiter may not contain ')' so a single counter suffices.
for (int delimiter_index = -1;;) {
// If we hit EOF, consider the content to terminate there.
// This forms an incomplete raw_string_literal, and models the code
// well.
if (lexer->eof(lexer)) {
lexer->mark_end(lexer);
return true;
} else {
delimiter_index = -1;
}
} else {
if (lexer->lookahead == delimiter[delimiter_index]) {
delimiter_index++;
} else {
delimiter_index = -1;
}
}
}
if (delimiter_index == -1 && lexer->lookahead == ')') {
delimiter_index = 0;
}
if (delimiter_index >= 0) {
if (delimiter_index == scanner->delimiter_length) {
if (lexer->lookahead == '"') {
return true;
}
delimiter_index = -1;
} else {
if (lexer->lookahead == scanner->delimiter[delimiter_index]) {
delimiter_index += 1;
} else {
delimiter_index = -1;
}
}
}
lexer->advance(lexer, false);
if (delimiter_index == -1 && lexer->lookahead == ')') {
// The content doesn't include the )delimiter" part.
// We must still scan through it, but exclude it from the token.
lexer->mark_end(lexer);
delimiter_index = 0;
}
advance(lexer);
}
}
};
}
extern "C" {
void *tree_sitter_cpp_external_scanner_create() {
return new Scanner();
Scanner *scanner = (Scanner *)ts_calloc(1, sizeof(Scanner));
memset(scanner, 0, sizeof(Scanner));
return scanner;
}
bool tree_sitter_cpp_external_scanner_scan(void *payload, TSLexer *lexer,
const bool *valid_symbols) {
Scanner *scanner = static_cast<Scanner *>(payload);
return scanner->scan(lexer, valid_symbols);
bool tree_sitter_cpp_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
Scanner *scanner = (Scanner *)payload;
if (valid_symbols[RAW_STRING_DELIMITER] && valid_symbols[RAW_STRING_CONTENT]) {
// we're in error recovery
return false;
}
// No skipping leading whitespace: raw-string grammar is space-sensitive.
if (valid_symbols[RAW_STRING_DELIMITER]) {
lexer->result_symbol = RAW_STRING_DELIMITER;
return scan_raw_string_delimiter(scanner, lexer);
}
if (valid_symbols[RAW_STRING_CONTENT]) {
lexer->result_symbol = RAW_STRING_CONTENT;
return scan_raw_string_content(scanner, lexer);
}
return false;
}
unsigned tree_sitter_cpp_external_scanner_serialize(void *payload, char *buffer) {
return 0;
static_assert(MAX_DELIMITER_LENGTH * sizeof(wchar_t) < TREE_SITTER_SERIALIZATION_BUFFER_SIZE,
"Serialized delimiter is too long!");
Scanner *scanner = (Scanner *)payload;
size_t size = scanner->delimiter_length * sizeof(wchar_t);
memcpy(buffer, scanner->delimiter, size);
return (unsigned)size;
}
void tree_sitter_cpp_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
assert(length % sizeof(wchar_t) == 0 && "Can't decode serialized delimiter!");
Scanner *scanner = (Scanner *)payload;
scanner->delimiter_length = length / sizeof(wchar_t);
if (length > 0) {
memcpy(&scanner->delimiter[0], buffer, length);
}
}
void tree_sitter_cpp_external_scanner_destroy(void *payload) {
Scanner *scanner = static_cast<Scanner *>(payload);
delete scanner;
Scanner *scanner = (Scanner *)payload;
ts_free(scanner);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,283 @@
#include <tree_sitter/alloc.h>
#include <tree_sitter/array.h>
#include <tree_sitter/parser.h>
#include <string.h>
#include <wchar.h>
#include <wctype.h>
typedef enum {
// IDENTIFIER,
HEREDOC_START,
HEREDOC_END,
ERROR_SENTINEL,
} TokenType;
// typedef Array(char) String;
typedef Array(int32_t) String32;
static inline bool string_eq(String32 *self, String32 *other) {
if (self->size != other->size)
return false;
if (self->size == 0)
return self->size == other->size;
return memcmp(self->contents, other->contents, self->size * sizeof(self->contents[0])) == 0;
}
typedef struct {
Array(String32) heredocs;
} Scanner;
typedef enum { ERROR, END } ScanContentResult;
static inline void reset_heredoc(String32 *heredoc) {
array_delete(heredoc);
}
static inline void advance(TSLexer *lexer) { if (!lexer->eof(lexer)) lexer->advance(lexer, false); }
static inline void skip(TSLexer *lexer) { if (!lexer->eof(lexer)) lexer->advance(lexer, true); }
static inline bool starts_identifier(int32_t c) { return iswalpha(c) || c == '_' || c >= 0x80; }
static inline bool continues_identifier(int32_t c) { return iswalnum(c) || c == '_' || c >= 0x80; }
static String32 scan_heredoc_word(TSLexer *lexer) {
String32 result = (String32)array_new();
while (continues_identifier(lexer->lookahead)) {
array_push(&result, lexer->lookahead);
advance(lexer);
}
return result;
}
/*
static int check_for_keyword(String ident) {
switch (ident.size) {
case 2:
if (strncmp(ident.contents, "if", 2) == 0) return 1;
if (strncmp(ident.contents, "xx", 2) == 0) return 1;
return 0;
case 3:
if (strncmp(ident.contents, "ifx", 3) == 0) return 1;
if (strncmp(ident.contents, "for", 3) == 0) return 1;
return 0;
case 4:
if (strncmp(ident.contents, "then", 4) == 0) return 1;
if (strncmp(ident.contents, "else", 4) == 0) return 1;
if (strncmp(ident.contents, "null", 4) == 0) return 1;
if (strncmp(ident.contents, "case", 4) == 0) return 1;
if (strncmp(ident.contents, "enum", 4) == 0) return 1;
if (strncmp(ident.contents, "true", 4) == 0) return 1;
if (strncmp(ident.contents, "cast", 4) == 0) return 1;
return 0;
case 5:
if (strncmp(ident.contents, "while", 5) == 0) return 1;
if (strncmp(ident.contents, "break", 5) == 0) return 1;
if (strncmp(ident.contents, "using", 5) == 0) return 1;
if (strncmp(ident.contents, "defer", 5) == 0) return 1;
if (strncmp(ident.contents, "false", 5) == 0) return 1;
if (strncmp(ident.contents, "union", 5) == 0) return 1;
return 0;
case 6:
if (strncmp(ident.contents, "return", 6) == 0) return 1;
if (strncmp(ident.contents, "struct", 6) == 0) return 1;
if (strncmp(ident.contents, "remove", 6) == 0) return 1;
if (strncmp(ident.contents, "inline", 6) == 0) return 1;
return 0;
case 7:
if (strncmp(ident.contents, "size_of", 7) == 0) return 1;
if (strncmp(ident.contents, "type_of", 7) == 0) return 1;
if (strncmp(ident.contents, "code_of", 7) == 0) return 1;
if (strncmp(ident.contents, "context", 7) == 0) return 1;
return 0;
case 8:
if (strncmp(ident.contents, "continue", 8) == 0) return 1;
if (strncmp(ident.contents, "operator", 8) == 0) return 1;
return 0;
case 9:
if (strncmp(ident.contents, "type_info", 9) == 0) return 1;
if (strncmp(ident.contents, "no_inline", 9) == 0) return 1;
if (strncmp(ident.contents, "interface", 9) == 0) return 1;
return 0;
case 10:
if (strncmp(ident.contents, "enum_flags", 10) == 0) return 1;
return 0;
case 11:
if (strncmp(ident.contents, "is_constant", 11) == 0) return 1;
return 0;
case 12:
if (strncmp(ident.contents, "push_context", 12) == 0) return 1;
return 0;
case 14:
if (strncmp(ident.contents, "initializer_of", 14) == 0) return 1;
return 0;
default:
return 0;
}
return 0;
}
*/
extern "C" {
bool tree_sitter_jai_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
const bool is_error_recovery = valid_symbols[ERROR_SENTINEL];
if (is_error_recovery)
return false;
lexer->mark_end(lexer);
/*
if (valid_symbols[IDENTIFIER]) {
lexer->result_symbol = IDENTIFIER;
while (iswspace(lexer->lookahead))
skip(lexer);
if (starts_identifier(lexer->lookahead)) {
String ident = (String)array_new();
array_push(&ident, lexer->lookahead);
advance(lexer);
while (1) {
if (continues_identifier(lexer->lookahead)) {
array_push(&ident, lexer->lookahead);
advance(lexer);
continue;
} else if (lexer->lookahead == '\\') {
advance(lexer);
while (iswspace(lexer->lookahead)) {
advance(lexer);
}
continue;
}
break;
}
bool keyword = check_for_keyword(ident);
array_delete(&ident);
lexer->mark_end(lexer);
return !keyword;
}
return false;
}
*/
Scanner *scanner = (Scanner *)payload;
if (valid_symbols[HEREDOC_END]) {
lexer->result_symbol = HEREDOC_END;
if (scanner->heredocs.size == 0)
return false;
String32 heredoc = *array_back(&scanner->heredocs);
while (iswspace(lexer->lookahead))
skip(lexer);
String32 word = scan_heredoc_word(lexer);
if (!string_eq(&word, &heredoc)) {
array_delete(&word);
return false;
}
array_delete(&word);
lexer->mark_end(lexer);
array_delete(&array_pop(&scanner->heredocs));
return true;
}
if (valid_symbols[HEREDOC_START]) {
lexer->result_symbol = HEREDOC_START;
String32 heredoc = array_new();
while (iswspace(lexer->lookahead))
skip(lexer);
heredoc = scan_heredoc_word(lexer);
if (heredoc.size == 0) {
array_delete(&heredoc);
return false;
}
lexer->mark_end(lexer);
array_push(&scanner->heredocs, heredoc);
return true;
}
return false;
}
void *tree_sitter_jai_external_scanner_create() {
Scanner *scanner = (Scanner*)ts_calloc(1, sizeof(Scanner));
array_init(&scanner->heredocs);
return scanner;
}
unsigned tree_sitter_jai_external_scanner_serialize(void *payload, char *buffer) {
Scanner *scanner = (Scanner *)payload;
unsigned size = 0;
buffer[size++] = (char)scanner->heredocs.size;
for (unsigned j = 0; j < scanner->heredocs.size; j++) {
String32 *heredoc = &scanner->heredocs.contents[j];
unsigned word_size = heredoc->size * sizeof(heredoc->contents[0]);
if (size + 5 + word_size >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE)
return 0;
memcpy(&buffer[size], &heredoc->size, sizeof(int32_t));
size += sizeof(int32_t);
if (heredoc->size > 0) {
memcpy(&buffer[size], heredoc->contents, word_size);
size += word_size;
}
}
return size;
}
void tree_sitter_jai_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
Scanner *scanner = (Scanner *)payload;
unsigned size = 0;
for (uint32_t i = 0; i < scanner->heredocs.size; i++)
reset_heredoc(array_get(&scanner->heredocs, i));
if (length == 0)
return;
uint8_t open_heredoc_count = buffer[size++];
for (unsigned i = 0; i < open_heredoc_count; i++) {
String32 *heredoc = NULL;
if (i < scanner->heredocs.size) {
heredoc = array_get(&scanner->heredocs, i);
} else {
String32 new_heredoc = array_new();
array_push(&scanner->heredocs, new_heredoc);
heredoc = array_back(&scanner->heredocs);
}
memcpy(&heredoc->size, &buffer[size], sizeof(int32_t));
size += sizeof(int32_t);
unsigned word_size = heredoc->size * sizeof(heredoc->contents[0]);
if (word_size > 0) {
array_reserve(heredoc, heredoc->size);
memcpy(heredoc->contents, &buffer[size], word_size);
size += word_size;
}
}
assert(size == length);
}
void tree_sitter_jai_external_scanner_destroy(void *payload) {
Scanner *scanner = (Scanner *)payload;
for (size_t i = 0; i < scanner->heredocs.size; i++) {
array_delete(&scanner->heredocs.contents[i]);
}
array_delete(&scanner->heredocs);
ts_free(scanner);
}
}

View File

@ -0,0 +1,41 @@
#ifndef TREE_SITTER_ALLOC_H_
#define TREE_SITTER_ALLOC_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#if defined(TREE_SITTER_HIDDEN_SYMBOLS) || defined(_WIN32)
#define TS_PUBLIC
#else
#define TS_PUBLIC __attribute__((visibility("default")))
#endif
TS_PUBLIC extern void *(*ts_current_malloc)(size_t size);
TS_PUBLIC extern void *(*ts_current_calloc)(size_t count, size_t size);
TS_PUBLIC extern void *(*ts_current_realloc)(void *ptr, size_t size);
TS_PUBLIC extern void (*ts_current_free)(void *ptr);
// Allow clients to override allocation functions
#ifndef ts_malloc
#define ts_malloc ts_current_malloc
#endif
#ifndef ts_calloc
#define ts_calloc ts_current_calloc
#endif
#ifndef ts_realloc
#define ts_realloc ts_current_realloc
#endif
#ifndef ts_free
#define ts_free ts_current_free
#endif
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_ALLOC_H_

View File

@ -0,0 +1,291 @@
#ifndef TREE_SITTER_ARRAY_H_
#define TREE_SITTER_ARRAY_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "./alloc.h"
#include "./ts_assert.h"
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable : 4101)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
#endif
#define Array(T) \
struct { \
T *contents; \
uint32_t size; \
uint32_t capacity; \
}
/// Initialize an array.
#define array_init(self) \
((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)
/// Create an empty array.
#define array_new() \
{ NULL, 0, 0 }
/// Get a pointer to the element at a given `index` in the array.
#define array_get(self, _index) \
(ts_assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index])
/// Get a pointer to the first element in the array.
#define array_front(self) array_get(self, 0)
/// Get a pointer to the last element in the array.
#define array_back(self) array_get(self, (self)->size - 1)
/// Clear the array, setting its size to zero. Note that this does not free any
/// memory allocated for the array's contents.
#define array_clear(self) ((self)->size = 0)
/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is
/// less than the array's current capacity, this function has no effect.
#define array_reserve(self, new_capacity) \
_array__reserve((Array *)(self), array_elem_size(self), new_capacity)
/// Free any memory allocated for this array. Note that this does not free any
/// memory allocated for the array's contents.
#define array_delete(self) _array__delete((Array *)(self))
/// Push a new `element` onto the end of the array.
#define array_push(self, element) \
(_array__grow((Array *)(self), 1, array_elem_size(self)), \
(self)->contents[(self)->size++] = (element))
/// Increase the array's size by `count` elements.
/// New elements are zero-initialized.
#define array_grow_by(self, count) \
do { \
if ((count) == 0) break; \
_array__grow((Array *)(self), count, array_elem_size(self)); \
memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \
(self)->size += (count); \
} while (0)
/// Append all elements from one array to the end of another.
#define array_push_all(self, other) \
array_extend((self), (other)->size, (other)->contents)
/// Append `count` elements to the end of the array, reading their values from the
/// `contents` pointer.
#define array_extend(self, count, contents) \
_array__splice( \
(Array *)(self), array_elem_size(self), (self)->size, \
0, count, contents \
)
/// Remove `old_count` elements from the array starting at the given `index`. At
/// the same index, insert `new_count` new elements, reading their values from the
/// `new_contents` pointer.
#define array_splice(self, _index, old_count, new_count, new_contents) \
_array__splice( \
(Array *)(self), array_elem_size(self), _index, \
old_count, new_count, new_contents \
)
/// Insert one `element` into the array at the given `index`.
#define array_insert(self, _index, element) \
_array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element))
/// Remove one element from the array at the given `index`.
#define array_erase(self, _index) \
_array__erase((Array *)(self), array_elem_size(self), _index)
/// Pop the last element off the array, returning the element by value.
#define array_pop(self) ((self)->contents[--(self)->size])
/// Assign the contents of one array to another, reallocating if necessary.
#define array_assign(self, other) \
_array__assign((Array *)(self), (const Array *)(other), array_elem_size(self))
/// Swap one array with another
#define array_swap(self, other) \
_array__swap((Array *)(self), (Array *)(other))
/// Get the size of the array contents
#define array_elem_size(self) (sizeof *(self)->contents)
/// Search a sorted array for a given `needle` value, using the given `compare`
/// callback to determine the order.
///
/// If an existing element is found to be equal to `needle`, then the `index`
/// out-parameter is set to the existing value's index, and the `exists`
/// out-parameter is set to true. Otherwise, `index` is set to an index where
/// `needle` should be inserted in order to preserve the sorting, and `exists`
/// is set to false.
#define array_search_sorted_with(self, compare, needle, _index, _exists) \
_array__search_sorted(self, 0, compare, , needle, _index, _exists)
/// Search a sorted array for a given `needle` value, using integer comparisons
/// of a given struct field (specified with a leading dot) to determine the order.
///
/// See also `array_search_sorted_with`.
#define array_search_sorted_by(self, field, needle, _index, _exists) \
_array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists)
/// Insert a given `value` into a sorted array, using the given `compare`
/// callback to determine the order.
#define array_insert_sorted_with(self, compare, value) \
do { \
unsigned _index, _exists; \
array_search_sorted_with(self, compare, &(value), &_index, &_exists); \
if (!_exists) array_insert(self, _index, value); \
} while (0)
/// Insert a given `value` into a sorted array, using integer comparisons of
/// a given struct field (specified with a leading dot) to determine the order.
///
/// See also `array_search_sorted_by`.
#define array_insert_sorted_by(self, field, value) \
do { \
unsigned _index, _exists; \
array_search_sorted_by(self, field, (value) field, &_index, &_exists); \
if (!_exists) array_insert(self, _index, value); \
} while (0)
// Private
typedef Array(void) Array;
/// This is not what you're looking for, see `array_delete`.
static inline void _array__delete(Array *self) {
if (self->contents) {
ts_free(self->contents);
self->contents = NULL;
self->size = 0;
self->capacity = 0;
}
}
/// This is not what you're looking for, see `array_erase`.
static inline void _array__erase(Array *self, size_t element_size,
uint32_t index) {
ts_assert(index < self->size);
char *contents = (char *)self->contents;
memmove(contents + index * element_size, contents + (index + 1) * element_size,
(self->size - index - 1) * element_size);
self->size--;
}
/// This is not what you're looking for, see `array_reserve`.
static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) {
if (new_capacity > self->capacity) {
if (self->contents) {
self->contents = ts_realloc(self->contents, new_capacity * element_size);
} else {
self->contents = ts_malloc(new_capacity * element_size);
}
self->capacity = new_capacity;
}
}
/// This is not what you're looking for, see `array_assign`.
static inline void _array__assign(Array *self, const Array *other, size_t element_size) {
_array__reserve(self, element_size, other->size);
self->size = other->size;
memcpy(self->contents, other->contents, self->size * element_size);
}
/// This is not what you're looking for, see `array_swap`.
static inline void _array__swap(Array *self, Array *other) {
Array swap = *other;
*other = *self;
*self = swap;
}
/// This is not what you're looking for, see `array_push` or `array_grow_by`.
static inline void _array__grow(Array *self, uint32_t count, size_t element_size) {
uint32_t new_size = self->size + count;
if (new_size > self->capacity) {
uint32_t new_capacity = self->capacity * 2;
if (new_capacity < 8) new_capacity = 8;
if (new_capacity < new_size) new_capacity = new_size;
_array__reserve(self, element_size, new_capacity);
}
}
/// This is not what you're looking for, see `array_splice`.
static inline void _array__splice(Array *self, size_t element_size,
uint32_t index, uint32_t old_count,
uint32_t new_count, const void *elements) {
uint32_t new_size = self->size + new_count - old_count;
uint32_t old_end = index + old_count;
uint32_t new_end = index + new_count;
ts_assert(old_end <= self->size);
_array__reserve(self, element_size, new_size);
char *contents = (char *)self->contents;
if (self->size > old_end) {
memmove(
contents + new_end * element_size,
contents + old_end * element_size,
(self->size - old_end) * element_size
);
}
if (new_count > 0) {
if (elements) {
memcpy(
(contents + index * element_size),
elements,
new_count * element_size
);
} else {
memset(
(contents + index * element_size),
0,
new_count * element_size
);
}
}
self->size += new_count - old_count;
}
/// A binary search routine, based on Rust's `std::slice::binary_search_by`.
/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`.
#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \
do { \
*(_index) = start; \
*(_exists) = false; \
uint32_t size = (self)->size - *(_index); \
if (size == 0) break; \
int comparison; \
while (size > 1) { \
uint32_t half_size = size / 2; \
uint32_t mid_index = *(_index) + half_size; \
comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \
if (comparison <= 0) *(_index) = mid_index; \
size -= half_size; \
} \
comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \
if (comparison == 0) *(_exists) = true; \
else if (comparison < 0) *(_index) += 1; \
} while (0)
/// Helper macro for the `_sorted_by` routines below. This takes the left (existing)
/// parameter by reference in order to work with the generic sorting function above.
#define _compare_int(a, b) ((int)*(a) - (int)(b))
#ifdef _MSC_VER
#pragma warning(pop)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic pop
#endif
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_ARRAY_H_

View File

@ -13,12 +13,16 @@ extern "C" {
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
typedef uint16_t TSStateId;
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSStateId;
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
typedef struct TSLanguageMetadata {
uint8_t major_version;
uint8_t minor_version;
uint8_t patch_version;
} TSLanguageMetadata;
#endif
typedef struct {
@ -27,10 +31,11 @@ typedef struct {
bool inherited;
} TSFieldMapEntry;
// Used to index the field and supertype maps.
typedef struct {
uint16_t index;
uint16_t length;
} TSFieldMapSlice;
} TSMapSlice;
typedef struct {
bool visible;
@ -48,6 +53,7 @@ struct TSLexer {
uint32_t (*get_column)(TSLexer *);
bool (*is_at_included_range_start)(const TSLexer *);
bool (*eof)(const TSLexer *);
void (*log)(const TSLexer *, const char *, ...);
};
typedef enum {
@ -79,6 +85,12 @@ typedef struct {
uint16_t external_lex_state;
} TSLexMode;
typedef struct {
uint16_t lex_state;
uint16_t external_lex_state;
uint16_t reserved_word_set_id;
} TSLexerMode;
typedef union {
TSParseAction action;
struct {
@ -87,8 +99,13 @@ typedef union {
} entry;
} TSParseActionEntry;
typedef struct {
int32_t start;
int32_t end;
} TSCharacterRange;
struct TSLanguage {
uint32_t version;
uint32_t abi_version;
uint32_t symbol_count;
uint32_t alias_count;
uint32_t token_count;
@ -104,13 +121,13 @@ struct TSLanguage {
const TSParseActionEntry *parse_actions;
const char * const *symbol_names;
const char * const *field_names;
const TSFieldMapSlice *field_map_slices;
const TSMapSlice *field_map_slices;
const TSFieldMapEntry *field_map_entries;
const TSSymbolMetadata *symbol_metadata;
const TSSymbol *public_symbol_map;
const uint16_t *alias_map;
const TSSymbol *alias_sequences;
const TSLexMode *lex_modes;
const TSLexerMode *lex_modes;
bool (*lex_fn)(TSLexer *, TSStateId);
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
TSSymbol keyword_capture_token;
@ -124,15 +141,48 @@ struct TSLanguage {
void (*deserialize)(void *, const char *, unsigned);
} external_scanner;
const TSStateId *primary_state_ids;
const char *name;
const TSSymbol *reserved_words;
uint16_t max_reserved_word_set_size;
uint32_t supertype_count;
const TSSymbol *supertype_symbols;
const TSMapSlice *supertype_map_slices;
const TSSymbol *supertype_map_entries;
TSLanguageMetadata metadata;
};
static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
uint32_t index = 0;
uint32_t size = len - index;
while (size > 1) {
uint32_t half_size = size / 2;
uint32_t mid_index = index + half_size;
const TSCharacterRange *range = &ranges[mid_index];
if (lookahead >= range->start && lookahead <= range->end) {
return true;
} else if (lookahead > range->end) {
index = mid_index;
}
size -= half_size;
}
const TSCharacterRange *range = &ranges[index];
return (lookahead >= range->start && lookahead <= range->end);
}
/*
* Lexer Macros
*/
#ifdef _MSC_VER
#define UNUSED __pragma(warning(suppress : 4101))
#else
#define UNUSED __attribute__((unused))
#endif
#define START_LEXER() \
bool result = false; \
bool skip = false; \
UNUSED \
bool eof = false; \
int32_t lookahead; \
goto start; \
@ -148,6 +198,17 @@ struct TSLanguage {
goto next_state; \
}
#define ADVANCE_MAP(...) \
{ \
static const uint16_t map[] = { __VA_ARGS__ }; \
for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \
if (map[i] == lookahead) { \
state = map[i + 1]; \
goto next_state; \
} \
} \
}
#define SKIP(state_value) \
{ \
skip = true; \
@ -166,7 +227,7 @@ struct TSLanguage {
* Parse Table Macros
*/
#define SMALL_STATE(id) id - LARGE_STATE_COUNT
#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
#define STATE(id) id
@ -176,7 +237,7 @@ struct TSLanguage {
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value \
.state = (state_value) \
} \
}}
@ -184,7 +245,7 @@ struct TSLanguage {
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value, \
.state = (state_value), \
.repetition = true \
} \
}}
@ -197,14 +258,15 @@ struct TSLanguage {
} \
}}
#define REDUCE(symbol_val, child_count_val, ...) \
{{ \
.reduce = { \
.type = TSParseActionTypeReduce, \
.symbol = symbol_val, \
.child_count = child_count_val, \
__VA_ARGS__ \
}, \
#define REDUCE(symbol_name, children, precedence, prod_id) \
{{ \
.reduce = { \
.type = TSParseActionTypeReduce, \
.symbol = symbol_name, \
.child_count = children, \
.dynamic_precedence = precedence, \
.production_id = prod_id \
}, \
}}
#define RECOVER() \
@ -221,4 +283,4 @@ struct TSLanguage {
}
#endif
#endif // TREE_SITTER_PARSER_H_
#endif // TREE_SITTER_PARSER_H_

View File

@ -0,0 +1,49 @@
#import "Basic";
Basic :: #import "Basic";
Options :: struct
{
foo: int;
}
Enum :: enum
{
Test;
Inavlid;
}
main :: () -> void
{
options1: Options;
options2 := Options.{
foo = bar
};
array_lit1: [..]string;
array_lit2 := string.["hello", "there\n"];
lit_str := "string";
lit_int := 5555;
lit_float := 6.28;
lit_bool := float;
lit_char := #char "c";
if cnd {
}
if cnd2 == {
case 5;
case .Bar;
case;
}
for i: 0..5 { continue; break; }
for arr {}
proc_call();
Basic.print();
return;
}