Initial tree sitter usage:

- identify buffer language
- custom_begin_buffer sets up necessary tree sitter state, and kicks off a parse task
- custom_end_buffer cleans up tree sitter data and kills async parse tasks
- tree_sitter_parse_async/__inner uses tree sitter api to get a tree of the buffer's code and stores it on the buffers managed scope
- tree_sitter_write_tree prints tree to a special *tree* buffer
- use new build scripts in 4coder project
This commit is contained in:
Peter Slattery 2025-07-10 07:08:27 -07:00
parent 1a97b41257
commit 7caaed736b
7 changed files with 460 additions and 4 deletions

View File

@ -0,0 +1,192 @@
///////////////////////////////////////////////////////////////////////////
// Begin Buffer
///////////////////////////////////////////////////////////////////////////
struct File_Language_Result
{
File_Language_Kind kind;
bool treat_as_code;
};
function File_Language_Result
identify_file_language(Application_Links* app, Buffer_ID buffer_id)
{
Scratch_Block scratch(app);
File_Language_Result result;
block_zero_struct(&result);
String_Const_u8 file_name = push_buffer_file_name(app, scratch, buffer_id);
if (file_name.size <= 0) return result;
String_Const_u8 file_extension = string_file_extension(file_name);
result.kind = File_Language_Text;
String_Const_u8 treat_as_code_string = def_get_config_string(scratch, vars_save_string_lit("treat_as_code"));
String_Const_u8_Array extensions_to_treat_as_code = parse_extension_line_to_extension_list(app, scratch, treat_as_code_string);
for (i32 i = 0; i < extensions_to_treat_as_code.count; ++i)
{
if (string_match(file_extension, extensions_to_treat_as_code.strings[i]))
{
result.treat_as_code = true;
result.kind = File_Language_Unknown;
if (string_match(file_extension, string_u8_litexpr("md")))
{
print_message(app, SCu8("Language Detected as Markdown\n"));
result.kind = File_Language_Markdown;
}
else if (string_match(file_extension, string_u8_litexpr("c")))
{
print_message(app, SCu8("Language Detected as C\n"));
result.kind = File_Language_CPP;
}
else if (string_match(file_extension, string_u8_litexpr("cpp")) ||
string_match(file_extension, string_u8_litexpr("h")) ||
string_match(file_extension, string_u8_litexpr("hpp")) ||
string_match(file_extension, string_u8_litexpr("cc"))
){
print_message(app, SCu8("Language Detected as Cpp\n"));
result.kind = File_Language_CPP;
}
else if (string_match(file_extension, string_u8_litexpr("m"))){
print_message(app, SCu8("Language Detected as ObjectiveC\n"));
result.kind = File_Language_ObjectiveC;
}
else if (string_match(file_extension, string_u8_litexpr("hlsl"))){
print_message(app, SCu8("Language Detected as HLSL\n"));
result.kind = File_Language_HLSL;
}
else if (string_match(file_extension, string_u8_litexpr("glsl"))){
print_message(app, SCu8("Language Detected as GLSL\n"));
result.kind = File_Language_GLSL;
}
else if (string_match(file_extension, string_u8_litexpr("jai"))){
print_message(app, SCu8("Language Detected as Jai\n"));
result.kind = File_Language_Jai;
}
else if (string_match(file_extension, string_u8_litexpr("cs"))){
print_message(app, SCu8("Language Detected as C#\n"));
result.kind = File_Language_CSharp;
}
else if (string_match(file_extension, string_u8_litexpr("swift"))){
print_message(app, SCu8("Language Detected as Swift\n"));
result.kind = File_Language_Swift;
}
else if (string_match(file_extension, string_u8_litexpr("go"))){
print_message(app, SCu8("Language Detected as Go\n"));
result.kind = File_Language_Go;
}
else if (string_match(file_extension, string_u8_litexpr("rs"))){
print_message(app, SCu8("Language Detected as Rust\n"));
result.kind = File_Language_Rust;
}
else if (string_match(file_extension, string_u8_litexpr("js"))){
print_message(app, SCu8("Language Detected as Javascript\n"));
result.kind = File_Language_Javascript;
}
else if (string_match(file_extension, string_u8_litexpr("ts"))){
print_message(app, SCu8("Language Detected as Typescript\n"));
result.kind = File_Language_Typescript;
}
else if (string_match(file_extension, string_u8_litexpr("json"))){
print_message(app, SCu8("Language Detected as JSON\n"));
result.kind = File_Language_JSON;
}
else if (string_match(file_extension, string_u8_litexpr("odin"))){
print_message(app, SCu8("Language Detected as Odin\n"));
result.kind = File_Language_Odin;
}
else if (string_match(file_extension, string_u8_litexpr("zig"))){
print_message(app, SCu8("Language Detected as Zig\n"));
result.kind = File_Language_Zig;
}
if (result.kind != File_Language_Unknown) break;
}
}
return result;
}
BUFFER_HOOK_SIG(custom_begin_buffer){
ProfileScope(app, "begin buffer");
Scratch_Block scratch(app);
File_Language_Result lang = identify_file_language(app, buffer_id);
bool begin_parse_task = false;
if (lang.treat_as_code) begin_parse_task = tree_sitter_begin_buffer(app, buffer_id, lang.kind);
String_ID file_map_id = vars_save_string_lit("keys_file");
String_ID code_map_id = vars_save_string_lit("keys_code");
Command_Map_ID map_id = (lang.treat_as_code)?(code_map_id):(file_map_id);
Managed_Scope scope = buffer_get_managed_scope(app, buffer_id);
Command_Map_ID *map_id_ptr = scope_attachment(app, scope, buffer_map_id, Command_Map_ID);
*map_id_ptr = map_id;
Line_Ending_Kind setting = guess_line_ending_kind_from_buffer(app, buffer_id);
Line_Ending_Kind *eol_setting = scope_attachment(app, scope, buffer_eol_setting, Line_Ending_Kind);
*eol_setting = setting;
// NOTE(allen): Decide buffer settings
b32 wrap_lines = true;
b32 use_lexer = false;
if (lang.treat_as_code){
wrap_lines = def_get_config_b32(vars_save_string_lit("enable_code_wrapping"));
// TODO(PS): @Remove - consider removing the lexer for now? later, replace in favor of tree-sitter
use_lexer = true;
}
if (begin_parse_task)
{
Async_Task* parse_task = scope_attachment(app, scope, buffer_tree_sitter_parse_task_id, Async_Task);
*parse_task = async_task_no_dep(&global_async_system, tree_sitter_parse_async, make_data_struct(&buffer_id));
}
String_Const_u8 buffer_name = push_buffer_base_name(app, scratch, buffer_id);
if (buffer_name.size > 0 && buffer_name.str[0] == '*' && buffer_name.str[buffer_name.size - 1] == '*'){
wrap_lines = def_get_config_b32(vars_save_string_lit("enable_output_wrapping"));
}
if (use_lexer){
ProfileBlock(app, "begin buffer kick off lexer");
Async_Task *lex_task_ptr = scope_attachment(app, scope, buffer_lex_task, Async_Task);
*lex_task_ptr = async_task_no_dep(&global_async_system, do_full_lex_async, make_data_struct(&buffer_id));
}
{
b32 *wrap_lines_ptr = scope_attachment(app, scope, buffer_wrap_lines, b32);
*wrap_lines_ptr = wrap_lines;
}
if (use_lexer){
buffer_set_layout(app, buffer_id, layout_virt_indent_index_generic);
}
else{
if (lang.treat_as_code){
buffer_set_layout(app, buffer_id, layout_virt_indent_literal_generic);
}
else{
buffer_set_layout(app, buffer_id, layout_generic);
}
}
// no meaning for return
return(0);
}
///////////////////////////////////////////////////////////////////////////
// End Buffer
///////////////////////////////////////////////////////////////////////////
BUFFER_HOOK_SIG(custom_end_buffer){
Marker_List *list = get_marker_list_for_buffer(buffer_id);
if (list != 0) delete_marker_list(list);
tree_sitter_end_buffer(app, buffer_id);
default_end_buffer(app, buffer_id);
return(0);
}

View File

@ -527,8 +527,12 @@ custom_layer_init(Application_Links *app){
set_all_default_hooks(app);
modal_init(3, tctx);
set_custom_hook(app, HookID_BeginBuffer, custom_begin_buffer);
set_custom_hook(app, HookID_EndBuffer, custom_end_buffer);
custom_keyboard_bindings();
#if 0
mapping_init(tctx, &framework_mapping);
String_ID global_map_id = vars_save_string_lit("keys_global");
@ -542,6 +546,7 @@ custom_layer_init(Application_Links *app){
setup_essential_mapping(&framework_mapping, global_map_id, file_map_id, code_map_id);
#endif
tree_sitter_init(app);
}
#endif //FCODER_DEFAULT_BINDINGS

View File

@ -208,7 +208,9 @@ reload_clean_buffers_on_filesystem_change(Application_Links *app, Frame_Info fra
function void
default_tick(Application_Links *app, Frame_Info frame_info){
code_index_update_tick(app);
if (use_tree_sitter_code_indexing) { tree_sitter_code_index_update_tick(app); }
else { code_index_update_tick(app); }
if (tick_all_fade_ranges(app, frame_info.animation_dt)){
animate_in_n_milliseconds(app, 0);

View File

@ -65,6 +65,7 @@
#include "4coder_search_list.h"
#include "4coder_modal.h"
#include "4coder_qol.h"
#include "4coder_tree_sitter.h"
////////////////////////////////
@ -143,10 +144,12 @@
#include "4coder_search_list.cpp"
#include "4coder_modal.cpp"
#include "4coder_yeet.cpp"
#include "4coder_tree_sitter.cpp"
#include "4coder_examples.cpp"
#include "4coder_default_hooks.cpp"
#include "4coder_custom_hooks.cpp"
#include "4coder_qol.cpp"

View File

@ -0,0 +1,204 @@
function bool
tree_sitter_init(Application_Links* app)
{
Buffer_ID buffer = create_buffer(
app,
string_u8_litexpr("*tree*"),
BufferCreate_NeverAttachToFile | BufferCreate_AlwaysNew
);
buffer_set_setting(app, buffer, BufferSetting_Unimportant, true);
buffer_set_setting(app, buffer, BufferSetting_ReadOnly, true);
}
function bool
tree_sitter_begin_buffer(Application_Links* app, Buffer_ID buffer_id, File_Language_Kind kind)
{
Managed_Scope buffer_scope = buffer_get_managed_scope(app, buffer_id);
Buffer_Tree_Sitter_Data* tree_data = scope_attachment(app, buffer_scope, buffer_tree_sitter_data_id, Buffer_Tree_Sitter_Data);
switch (kind)
{
case File_Language_CPP:
{
tree_data->language = tree_sitter_cpp();
} break;
default:
tree_data->language = 0;
}
if (tree_data->language != 0)
{
tree_data->tree_mutex = system_mutex_make();
}
return tree_data->language != 0;
}
function void
tree_sitter_end_buffer(Application_Links* app, Buffer_ID buffer_id)
{
Managed_Scope buffer_scope = buffer_get_managed_scope(app, buffer_id);
Buffer_Tree_Sitter_Data* tree_data = scope_attachment(app, buffer_scope, buffer_tree_sitter_data_id, Buffer_Tree_Sitter_Data);
if (!tree_data || !tree_data->language) return;
Async_Task *tree_sitter_parse_task = scope_attachment(app, buffer_scope, buffer_tree_sitter_parse_task_id, Async_Task);
if (async_task_is_running_or_pending(&global_async_system, *tree_sitter_parse_task))
{
async_task_cancel(app, &global_async_system, *tree_sitter_parse_task);
}
system_mutex_acquire(tree_data->tree_mutex);
ts_tree_delete(tree_data->tree);
system_mutex_release(tree_data->tree_mutex);
system_mutex_free(tree_data->tree_mutex);
}
function TSTree*
tree_sitter_buffer_get_tree_copy(Buffer_Tree_Sitter_Data* tree_data)
{
TSTree* result = 0;
// system_mutex_acquire(tree_data->tree_mutex);
if (tree_data->tree) result = ts_tree_copy(tree_data->tree);
// system_mutex_release(tree_data->tree_mutex);
return result;
}
function void
tree_sitter_parse_async__inner(Async_Context* actx, Buffer_ID buffer_id)
{
Application_Links *app = actx->app;
Arena arena = make_arena_system(KB(16));
TSParser *parser = ts_parser_new();
ts_parser_set_timeout_micros(parser, 5000);
acquire_global_frame_mutex(app);
String_Const_u8 src = push_whole_buffer(app, &arena, buffer_id);
Managed_Scope scope = buffer_get_managed_scope(app, buffer_id);
Buffer_Tree_Sitter_Data* tree_data = scope_attachment(app, scope, buffer_tree_sitter_data_id, Buffer_Tree_Sitter_Data);
TSTree *old_tree = tree_sitter_buffer_get_tree_copy(tree_data);
bool lang_set = ts_parser_set_language(parser, tree_data->language);
release_global_frame_mutex(app);
if (!lang_set)
{
AssertMessageAlways("Failed to set the language for the parser."
"This probably means a language wasn't set"
"in the BeginBuffer hook.\n");
}
// Iterate until we get a tree or we find that we should cancel the parse
TSTree *new_tree = 0;
b32 canceled = false;
for (;;)
{
new_tree = ts_parser_parse_string(parser, old_tree, (char *)src.str, (u32)src.size);
if (async_check_canceled(actx))
{
canceled = true;
break;
}
if (new_tree) break;
}
if (!canceled && new_tree)
{
TSTree* old_buffer_tree;
acquire_global_frame_mutex(app);
{
// NOTE(jack): Copy the old pointer to delete it outside the mutex.
system_mutex_acquire(tree_data->tree_mutex);
old_buffer_tree = tree_data->tree;
tree_data->tree = new_tree;
system_mutex_acquire(tree_data->tree_mutex);
print_message(app, SCu8("Finished Parse\n"));
// TODO(PS): Just put the code index update call here
// NOTE(jack): This feels kinda hacky, this is here to trigger
// the code index update tick. The buffer is also makred by the
// async lexer so we will update the index too frequently. We
// should probably change the lexer to not mark as modified.
// TODO(jack): Should we instead trigger another async task here to
// update the code index once this is done?
buffer_mark_as_modified(buffer_id);
// Force a frame refresh by requesting another frame
animate_in_n_milliseconds(app, 0);
}
release_global_frame_mutex(app);
ts_tree_delete(old_buffer_tree);
}
ts_parser_delete(parser);
ts_tree_delete(old_tree);
linalloc_clear(&arena);
}
function void
tree_sitter_parse_async(Async_Context* actx, String_Const_u8 data)
{
if (data.size != sizeof(Buffer_ID)) return;
Buffer_ID buffer_id = *(Buffer_ID*)data.str;
tree_sitter_parse_async__inner(actx, buffer_id);
}
function void
tree_sitter_code_index_update_tick(Application_Links* app)
{
}
////////////////////////////////////////////////////////////////////
// DEBUG
////////////////////////////////////////////////////////////////////
char* prefix_buffer = " ";
function void
write_tree_sitter_tree_to_buffer__inner(Application_Links *app, Arena *arena, Buffer_ID buffer_id,
TSNode cur_node, i32 level = 0, const char *field="")
{
TSPoint start = ts_node_start_point(cur_node);
TSPoint end = ts_node_end_point(cur_node);
// + 1 on ts positions becuase the first line/column are zero in treesitter,
// but 4coder displays as 1 indexed in the filebar.
String_Const_u8 string = push_stringf(arena, "%.*s%s: %s [%d, %d] - [%d, %d]\n",
level*2, prefix_buffer, field, ts_node_type(cur_node),
start.row + 1, start.column + 1,
end.row + 1, end.column + 1);
buffer_replace_range(app, buffer_id, Ii64(buffer_get_size(app, buffer_id)), string);
u32 child_count = ts_node_child_count(cur_node);
for (u32 i = 0; i < child_count; ++i)
{
TSNode child = ts_node_child(cur_node, i);
if (ts_node_is_named(child))
{
field = ts_node_field_name_for_child(cur_node, i);
if (!field) field = "";
write_tree_sitter_tree_to_buffer__inner(app, arena, buffer_id, child, level + 1, field);
}
}
}
CUSTOM_COMMAND_SIG(tree_sitter_write_tree)
CUSTOM_DOC("Write the current buffer's tree sitter tree to *tree*")
{
Scratch_Block scratch(app);
Buffer_ID out_buffer = get_buffer_by_name(app, string_u8_litexpr("*tree*"), Access_Always);
View_ID view = get_active_view(app, Access_Always);
Buffer_ID buffer = view_get_buffer(app, view, Access_Visible);
Managed_Scope scope = buffer_get_managed_scope(app, buffer);
Buffer_Tree_Sitter_Data *tree_data = scope_attachment(app, scope, buffer_tree_sitter_data_id, Buffer_Tree_Sitter_Data);
if (tree_data->tree)
{
TSNode root = ts_tree_root_node(tree_data->tree);
write_tree_sitter_tree_to_buffer__inner(app, scratch, out_buffer, root);
}
}

View File

@ -0,0 +1,50 @@
/* date = July 8th 2025 10:13 am */
#ifndef FCODER_TREE_SITTER_H
#define FCODER_TREE_SITTER_H
#include <tree_sitter/api.h>
enum File_Language_Kind
{
File_Language_None,
File_Language_Unknown,
File_Language_Text,
File_Language_Markdown,
File_Language_C,
File_Language_CPP,
File_Language_ObjectiveC,
File_Language_HLSL,
File_Language_GLSL,
File_Language_Jai,
File_Language_CSharp,
File_Language_Swift,
File_Language_Go,
File_Language_Rust,
File_Language_Javascript,
File_Language_Typescript,
File_Language_JSON,
File_Language_Odin,
File_Language_Zig,
};
extern "C" {
TSLanguage *tree_sitter_cpp();
TSLanguage *tree_sitter_c();
}
CUSTOM_ID(attachment, buffer_tree_sitter_data_id);
CUSTOM_ID(attachment, buffer_tree_sitter_parse_task_id);
struct Buffer_Tree_Sitter_Data
{
TSLanguage* language;
TSTree* tree;
System_Mutex tree_mutex;
};
b8 use_tree_sitter_code_indexing = true;
function void tree_sitter_code_index_update_tick(Application_Links *app);
#endif //FCODER_TREE_SITTER_H

View File

@ -27,9 +27,9 @@ commands = {
.out = "*compilation*",
.footer_panel = true,
.save_dirty_files = true,
.win = "code\\bin\\build.bat",
.linux = "./code/bin/package.sh",
.mac = "code/bin/package-mac.sh", },
.win = "bash build_new\\scripts\\build.sh",
.linux = "build_new/scripts/build.sh",
.mac = "build_new/scripts/build.sh", },
.run = { .out = "*run*", .footer_panel = false, .save_dirty_files = false,
.win = "build\\4ed.exe",
.linux = "build/4ed",