From 324a78ae1e0832d1662ba20811fcaf5a0c1508ed Mon Sep 17 00:00:00 2001 From: Peter Slattery Date: Fri, 11 Jul 2025 17:18:11 -0700 Subject: [PATCH] Implemented function and type queries. Implemented tree_sitter_list_all_* commands --- code/custom/4coder_tree_sitter.cpp | 420 ++++++++++++++++-- code/custom/4coder_tree_sitter.h | 16 +- non-source/test_data/sample_files/sample.cpp | 53 +++ .../test_data/sample_files/test_jai.jai | 4 + 4 files changed, 461 insertions(+), 32 deletions(-) create mode 100644 non-source/test_data/sample_files/sample.cpp diff --git a/code/custom/4coder_tree_sitter.cpp b/code/custom/4coder_tree_sitter.cpp index f83818a8..967ac9ba 100644 --- a/code/custom/4coder_tree_sitter.cpp +++ b/code/custom/4coder_tree_sitter.cpp @@ -12,6 +12,22 @@ TSQuery* tree_sitter_cpp_index_query; String_Const_u8 TS_CPP_INDEX_QUERY = string_u8_litexpr("(_ \"{\" @Start \"}\" @End ) @ScopeNest\n"); +String_Const_u8 TS_CPP_FUNCTION_QUERY = string_u8_litexpr(R"DONE( + (function_declarator) @function_identifier +)DONE"); + +String_Const_u8 TS_CPP_TYPE_QUERY = string_u8_litexpr(R"DONE( + (struct_specifier + name: (type_identifier) @prefixStruct + ) + (enum_specifier + name: (type_identifier) @prefixEnum + ) + (class_specifier + name: (type_identifier) @prefixClass + ) +)DONE"); + String_Const_u8 TS_CPP_HIGHLIGHT_QUERY = string_u8_litexpr(R"DONE( (call_expression function: [ (identifier) @defcolor_function @@ -50,6 +66,30 @@ String_Const_u8 TS_CPP_HIGHLIGHT_QUERY = string_u8_litexpr(R"DONE( ///////////////////////////////////////////// // Jai +String_Const_u8 TS_JAI_FUNCTION_QUERY = string_u8_litexpr(R"DONE( + +(procedure_declaration + name: (identifier) @print1 + (procedure + (named_parameters) @print2 + (procedure_returns) @print + ) +) + +)DONE"); + +String_Const_u8 TS_JAI_TYPE_QUERY = string_u8_litexpr(R"DONE( + +(struct_declaration + name: (identifier) @prefixStruct +) + +(enum_declaration + name: (identifier) @prefixEnum +) + +)DONE"); + String_Const_u8 TS_JAI_HIGHLIGHT_QUERY = string_u8_litexpr(R"DONE( ; keywords @@ -352,8 +392,28 @@ String_Const_u8 TS_JAI_HIGHLIGHT_QUERY_ = string_u8_litexpr(R"DONE( // Language Management //////////////////////////////////////////////////////////////////// +function TSQuery* +tree_sitter_query_new(Application_Links* app, TSLanguage* language, String_Const_u8 query_string) +{ + u32 error_offset; + TSQueryError query_error; + + TSQuery* result = ts_query_new( + language, (const char *)query_string.str, (u32)query_string.size, + &error_offset, &query_error + ); + + if (!result) + { + print_message(app, SCu8("Error creating query\n")); + printf("%.*s\n", (int)Min(query_string.size-error_offset, 100), query_string.str + error_offset); + } + + return result; +} + function void -tree_sitter_register_language(String_Const_u8 ext, TSLanguage* language, TSQuery* highlight_query) +tree_sitter_register_language(String_Const_u8 ext, TSLanguage* language, Tree_Sitter_Language_Queries queries) { Tree_Sitter_Language_Definition* lang = 0; u64 hash = table_hash_u8(ext.str, ext.size); @@ -375,7 +435,7 @@ tree_sitter_register_language(String_Const_u8 ext, TSLanguage* language, TSQuery lang->extension_hash = hash; lang->extension = push_string_copy(&tree_sitter_languages.arena, ext); lang->language = language; - lang->highlight_query = highlight_query; + lang->queries = queries; } } @@ -436,37 +496,25 @@ tree_sitter_init(Application_Links* app) { // Register CPP TSLanguage* language = tree_sitter_cpp(); String_Const_u8 highlight_query_str = TS_CPP_HIGHLIGHT_QUERY; - TSQuery* highlight_query = ts_query_new( - language, - (const char *)highlight_query_str.str, - (u32)highlight_query_str.size, - &error_offset, &query_error - ); - tree_sitter_register_language(SCu8("c"), language, highlight_query); - tree_sitter_register_language(SCu8("cpp"), language, highlight_query); - tree_sitter_register_language(SCu8("h"), language, highlight_query); - tree_sitter_register_language(SCu8("hpp"), language, highlight_query); - tree_sitter_register_language(SCu8("cc"), language, highlight_query); + + Tree_Sitter_Language_Queries queries = {}; + queries.ptr[Tree_Sitter_Language_Query_Highlights] = tree_sitter_query_new(app, language, TS_CPP_HIGHLIGHT_QUERY); + queries.ptr[Tree_Sitter_Language_Query_Functions] = tree_sitter_query_new(app, language, TS_CPP_FUNCTION_QUERY); + queries.ptr[Tree_Sitter_Language_Query_Types] = tree_sitter_query_new(app, language, TS_CPP_TYPE_QUERY); + tree_sitter_register_language(SCu8("c"), language, queries); + tree_sitter_register_language(SCu8("cpp"), language, queries); + tree_sitter_register_language(SCu8("h"), language, queries); + tree_sitter_register_language(SCu8("hpp"), language, queries); + tree_sitter_register_language(SCu8("cc"), language, queries); } { // Register Jai TSLanguage* language = tree_sitter_jai(); - String_Const_u8 highlight_query_str = TS_JAI_HIGHLIGHT_QUERY; - TSQuery* highlight_query = ts_query_new( - language, - (const char *)highlight_query_str.str, - (u32)highlight_query_str.size, - &error_offset, &query_error - ); - if (!highlight_query) - { - print_message(app, SCu8("Error creating jai highlight query\n")); - printf("%.*s\n", (int)Min(highlight_query_str.size-error_offset, 100), highlight_query_str.str + error_offset); - } - else - { - tree_sitter_register_language(SCu8("jai"), language, highlight_query); - } + Tree_Sitter_Language_Queries queries = {}; + queries.ptr[Tree_Sitter_Language_Query_Highlights] = tree_sitter_query_new(app, language, TS_JAI_HIGHLIGHT_QUERY); + queries.ptr[Tree_Sitter_Language_Query_Functions] = tree_sitter_query_new(app, language, TS_JAI_FUNCTION_QUERY); + queries.ptr[Tree_Sitter_Language_Query_Types] = tree_sitter_query_new(app, language, TS_JAI_TYPE_QUERY); + tree_sitter_register_language(SCu8("jai"), language, queries); } } @@ -720,7 +768,7 @@ function void draw_tree_sitter_node_colors(Application_Links* app, Text_Layout_ID text_layout_id, Buffer_ID buffer_id) { Tree_Sitter_Language_Definition* lang = tree_sitter_language_for_buffer(app, buffer_id); - TSQuery* query = lang->highlight_query; + TSQuery* query = lang->queries.ptr[Tree_Sitter_Language_Query_Highlights]; Range_i64 visible_range = text_layout_get_visible_range(app, text_layout_id); Managed_Scope buffer_scope = buffer_get_managed_scope(app, buffer_id); @@ -759,6 +807,316 @@ draw_tree_sitter_node_colors(Application_Links* app, Text_Layout_ID text_layout_ ts_tree_delete(tree); } +//////////////////////////////////////////////////////////////////// +// Queries +//////////////////////////////////////////////////////////////////// + +struct Tree_Sitter_Query_Cursor +{ + Buffer_ID buffer_id; + TSQuery* query; + TSQueryCursor* query_cursor; + TSTree* tree; + TSNode first_node; + bool ok; +}; + +function Tree_Sitter_Query_Cursor +tree_sitter_query_init(Application_Links* app, Buffer_ID buffer_id, TSQuery* query) +{ + Tree_Sitter_Query_Cursor result = {}; + result.buffer_id = buffer_id; + result.query = query; + result.ok = false; + + Managed_Scope buffer_scope = buffer_get_managed_scope(app, buffer_id); + Buffer_Tree_Sitter_Data* tree_data = scope_attachment(app, buffer_scope, buffer_tree_sitter_data_id, Buffer_Tree_Sitter_Data); + result.tree = tree_sitter_buffer_get_tree_copy(tree_data); + result.first_node = ts_tree_root_node(result.tree); + + result.ok = ( + result.query != 0 && + result.tree != 0 + ); + return result; +} + +function bool +tree_sitter_query_continue(Tree_Sitter_Query_Cursor* cursor, TSQueryMatch* match, u32* capture_index) +{ + if (cursor->ok) + { + if (!cursor->query_cursor) + { + cursor->query_cursor = ts_query_cursor_new(); + ts_query_cursor_exec(cursor->query_cursor, cursor->query, cursor->first_node); + } + + cursor->ok = ts_query_cursor_next_capture(cursor->query_cursor, match, capture_index); + } + return cursor->ok; +} + +function void +tree_sitter_query_end(Tree_Sitter_Query_Cursor* cursor) +{ + if (cursor->query_cursor) ts_query_cursor_delete(cursor->query_cursor); + ts_tree_delete(cursor->tree); +} + +//////////////////////////////////////////////////////////////////// +// Lists +//////////////////////////////////////////////////////////////////// + +function String_Const_u8 +convert_to_single_line_in_place(String_Const_u8 str) +{ + String_Const_u8 result = str; + int dst = 0; + for (int src = 0; src < str.size; src++) + { + if (str.str[src] == '\n') + { + while (src < str.size && character_is_whitespace(str.str[src])) { src += 1; } + if (src >= str.size) break; + } + result.str[dst] = str.str[src]; + dst += 1; + } + result.size = dst; + return result; +} + +function void +print_position( + Application_Links* app, + Buffer_Insertion* out, + Buffer_ID buffer, + String_Const_u8 buffer_name, + Range_i64 buffer_range, + String_Const_u8 prefix, + Arena* arena, + bool newlines_to_spaces +){ + i64 line_number = get_line_number_from_pos(app, buffer, buffer_range.start); + insertf(out, "%.*s:%lld: ", string_expand(buffer_name), line_number); + + if (prefix.size > 0) insertf(out, "%.*s ", string_expand(prefix)); + + Temp_Memory token_temp = begin_temp(arena); + String_Const_u8 line = push_buffer_range(app, arena, buffer, buffer_range); + if (newlines_to_spaces) line = convert_to_single_line_in_place(line); + insert_string(out, line); + end_temp(token_temp); + + insertc(out, '\n'); +} + +function void +tree_sitter_list_all_query_results( + Application_Links *app, + Buffer_ID optional_target_buffer, + Tree_Sitter_Language_Query_Kind query_kind +){ + String_Const_u8 decls_name = string_u8_litexpr("*decls*"); + Buffer_ID decls_buffer = get_buffer_by_name(app, decls_name, Access_Always); + if (!buffer_exists(app, decls_buffer)) + { + decls_buffer = create_buffer(app, decls_name, BufferCreate_AlwaysNew); + buffer_set_setting(app, decls_buffer, BufferSetting_Unimportant, true); + buffer_set_setting(app, decls_buffer, BufferSetting_ReadOnly, true); + } + else + { + clear_buffer(app, decls_buffer); + buffer_send_end_signal(app, decls_buffer); + } + + Scratch_Block scratch(app); + + Cursor insertion_cursor = make_cursor(push_array(scratch, u8, KB(256)), KB(256)); + Buffer_Insertion out = begin_buffer_insertion_at_buffered(app, decls_buffer, 0, &insertion_cursor); + + for (Buffer_ID buffer_it = get_buffer_next(app, 0, Access_Always); + buffer_it != 0; + buffer_it = get_buffer_next(app, buffer_it, Access_Always)) + { + Buffer_ID buffer = buffer_it; + if (optional_target_buffer != 0) buffer = optional_target_buffer; + + String_Const_u8 buffer_name = push_buffer_unique_name(app, scratch, buffer); + + Token_Array array = get_token_array_from_buffer(app, buffer); + if (array.tokens != 0) + { + Tree_Sitter_Language_Definition* lang = tree_sitter_language_for_buffer(app, buffer); + if (!lang) continue; + TSQuery* ts_query = lang->queries.ptr[query_kind]; + Tree_Sitter_Query_Cursor query = tree_sitter_query_init(app, buffer, ts_query); + + i64 last_query_match_id = -1; + i64 last_query_match_printed = -1; + i64 last_query_line_number = 0; + Range_i64 last_query_range = {}; + String_Const_u8 last_query_prefix = {}; + + TSQueryMatch query_match; + u32 capture_index; + bool reached_end = false; + while (tree_sitter_query_continue(&query, &query_match, &capture_index)) + { + TSQueryCapture capture = query_match.captures[capture_index]; + + if (last_query_match_id != query_match.id) + { + if (last_query_match_id >= 0) + { + print_position( + app, &out, buffer, buffer_name, last_query_range, last_query_prefix, scratch, true + ); + last_query_match_printed = last_query_match_id; + } + last_query_range.start = (i64)ts_node_start_byte(capture.node); + last_query_range.end = last_query_range.start; + last_query_prefix = {}; + } + + last_query_match_id = query_match.id; + last_query_range.end = Max((i64)ts_node_end_byte(capture.node), last_query_range.end); + + String_Const_u8 name; + u32 name_length; + name.str = (u8*)ts_query_capture_name_for_id(ts_query, capture.index, &name_length); + name.size = (u64)name_length; + + String_Const_u8 prefix_identifier = SCu8("prefix"); + u64 prefix_loc = string_find_first(name, prefix_identifier); + if (prefix_loc < name.size) + { + last_query_prefix = name; + last_query_prefix.str += prefix_loc + prefix_identifier.size; + last_query_prefix.size -= prefix_loc + prefix_identifier.size; + } + } + + if (last_query_match_printed != last_query_match_id) + { + print_position( + app, &out, buffer, buffer_name, last_query_range, last_query_prefix, scratch, true + ); + } + tree_sitter_query_end(&query); + + if (optional_target_buffer != 0) break; + } + } + + end_buffer_insertion(&out); + + View_ID view = get_active_view(app, Access_Always); + view_set_buffer(app, view, decls_buffer, 0); + + lock_jump_buffer(app, decls_name); +} + +CUSTOM_COMMAND_SIG(tree_sitter_list_all_functions_current_buffer) +CUSTOM_DOC("Creates a jump list of lines of the current buffer that appear to define or declare functions. Uses tree sitter") +{ + View_ID view = get_active_view(app, Access_ReadVisible); + Buffer_ID buffer = view_get_buffer(app, view, Access_ReadVisible); + if (buffer != 0) tree_sitter_list_all_query_results(app, buffer, Tree_Sitter_Language_Query_Functions); +} + +CUSTOM_UI_COMMAND_SIG(tree_sitter_list_all_functions_current_buffer_lister) +CUSTOM_DOC("Creates a lister of locations that look like function definitions and declarations in the buffer. Uses tree sitter") +{ + Heap *heap = &global_heap; + View_ID view = get_active_view(app, Access_ReadVisible); + Buffer_ID buffer = view_get_buffer(app, view, Access_ReadVisible); + if (buffer != 0) + { + tree_sitter_list_all_query_results(app, buffer, Tree_Sitter_Language_Query_Functions); + view = get_active_view(app, Access_Always); + buffer = view_get_buffer(app, view, Access_Always); + Marker_List *list = get_or_make_list_for_buffer(app, heap, buffer); + if (list != 0) + { + Jump_Lister_Result jump = get_jump_index_from_user(app, list, "Function:"); + jump_to_jump_lister_result(app, view, list, &jump); + } + } +} + +CUSTOM_COMMAND_SIG(tree_sitter_list_all_functions_all_buffers) +CUSTOM_DOC("Creates a jump list of lines from all buffers that appear to define or declare functions. Uses tree sitter") +{ + tree_sitter_list_all_query_results(app, 0, Tree_Sitter_Language_Query_Functions); +} + +CUSTOM_UI_COMMAND_SIG(tree_sitter_list_all_functions_all_buffers_lister) +CUSTOM_DOC("Creates a lister of locations that look like function definitions and declarations all buffers. Uses tree sitter") +{ + Heap *heap = &global_heap; + tree_sitter_list_all_query_results(app, 0, Tree_Sitter_Language_Query_Functions); + View_ID view = get_active_view(app, Access_Always); + Buffer_ID buffer = view_get_buffer(app, view, Access_Always); + Marker_List *list = get_or_make_list_for_buffer(app, heap, buffer); + if (list != 0) + { + Jump_Lister_Result jump = get_jump_index_from_user(app, list, "Function:"); + jump_to_jump_lister_result(app, view, list, &jump); + } +} + +CUSTOM_COMMAND_SIG(tree_sitter_list_all_types_current_buffer) +CUSTOM_DOC("Creates a jump list of lines of the current buffer that appear to define or declare types. Uses tree sitter") +{ + View_ID view = get_active_view(app, Access_ReadVisible); + Buffer_ID buffer = view_get_buffer(app, view, Access_ReadVisible); + if (buffer != 0) tree_sitter_list_all_query_results(app, buffer, Tree_Sitter_Language_Query_Types); +} + +CUSTOM_UI_COMMAND_SIG(tree_sitter_list_all_types_current_buffer_lister) +CUSTOM_DOC("Creates a lister of locations that look like function definitions and declarations in the buffer. Uses tree sitter") +{ + Heap *heap = &global_heap; + View_ID view = get_active_view(app, Access_ReadVisible); + Buffer_ID buffer = view_get_buffer(app, view, Access_ReadVisible); + if (buffer != 0) + { + tree_sitter_list_all_query_results(app, buffer, Tree_Sitter_Language_Query_Types); + view = get_active_view(app, Access_Always); + buffer = view_get_buffer(app, view, Access_Always); + Marker_List *list = get_or_make_list_for_buffer(app, heap, buffer); + if (list != 0) + { + Jump_Lister_Result jump = get_jump_index_from_user(app, list, "Type:"); + jump_to_jump_lister_result(app, view, list, &jump); + } + } +} + +CUSTOM_COMMAND_SIG(tree_sitter_list_all_types_all_buffers) +CUSTOM_DOC("Creates a jump list of lines from all buffers that appear to define or declare types. Uses tree sitter") +{ + tree_sitter_list_all_query_results(app, 0, Tree_Sitter_Language_Query_Types); +} + +CUSTOM_UI_COMMAND_SIG(tree_sitter_list_all_types_all_buffers_lister) +CUSTOM_DOC("Creates a lister of locations that look like type definitions and declarations all buffers. Uses tree sitter") +{ + Heap *heap = &global_heap; + tree_sitter_list_all_query_results(app, 0, Tree_Sitter_Language_Query_Types); + View_ID view = get_active_view(app, Access_Always); + Buffer_ID buffer = view_get_buffer(app, view, Access_Always); + Marker_List *list = get_or_make_list_for_buffer(app, heap, buffer); + if (list != 0) + { + Jump_Lister_Result jump = get_jump_index_from_user(app, list, "Type:"); + jump_to_jump_lister_result(app, view, list, &jump); + } +} + //////////////////////////////////////////////////////////////////// // DEBUG //////////////////////////////////////////////////////////////////// @@ -810,4 +1168,4 @@ CUSTOM_DOC("Write the current buffer's tree sitter tree to *tree*") TSNode root = ts_tree_root_node(tree_data->tree); write_tree_sitter_tree_to_buffer__inner(app, scratch, out_buffer, root); } -} \ No newline at end of file +} diff --git a/code/custom/4coder_tree_sitter.h b/code/custom/4coder_tree_sitter.h index 948abea0..6918aa08 100644 --- a/code/custom/4coder_tree_sitter.h +++ b/code/custom/4coder_tree_sitter.h @@ -5,13 +5,27 @@ #include +enum Tree_Sitter_Language_Query_Kind +{ + Tree_Sitter_Language_Query_Highlights, + Tree_Sitter_Language_Query_Functions, + Tree_Sitter_Language_Query_Types, + + Tree_Sitter_Language_Query_Count, +}; + +struct Tree_Sitter_Language_Queries +{ + TSQuery* ptr[Tree_Sitter_Language_Query_Count]; +}; + struct Tree_Sitter_Language_Definition { String_Const_u8 extension; u64 extension_hash; TSLanguage* language; - TSQuery* highlight_query; + Tree_Sitter_Language_Queries queries; Tree_Sitter_Language_Definition* next; }; diff --git a/non-source/test_data/sample_files/sample.cpp b/non-source/test_data/sample_files/sample.cpp new file mode 100644 index 00000000..2ae7495c --- /dev/null +++ b/non-source/test_data/sample_files/sample.cpp @@ -0,0 +1,53 @@ + +void Foo::foo( + int a, + float b, +); +void Foo::bar(); + +void func() {} + +void func2(); + +void func2() {} + +struct Foo1 { + +}; + +typedef struct Foo2 Bar1; + +typedef struct Foo3 +{ + +} Bar2; + +typedef enum Enum +{ + +} EnumName; + +enum Enum +{ + +} + +typedef enum Enum { + Foo; +} Enum; + +class Foo +{ + Foo(); + ~Foo(); + + public: + void foo(); + + private: + void bar(); + +} + +Foo::Foo() {} +Foo::~Foo() {} \ No newline at end of file diff --git a/non-source/test_data/sample_files/test_jai.jai b/non-source/test_data/sample_files/test_jai.jai index 72e2bd71..212e6e94 100644 --- a/non-source/test_data/sample_files/test_jai.jai +++ b/non-source/test_data/sample_files/test_jai.jai @@ -19,6 +19,10 @@ main :: () -> void foo = bar }; + foo :: (foo: int) { + + } + array_lit1: [..]string; array_lit2 := string.["hello", "there\n"];